diff --git a/extra/resources/HealthCPU b/extra/resources/HealthCPU index 0f32c5c183..fc9af57104 100755 --- a/extra/resources/HealthCPU +++ b/extra/resources/HealthCPU @@ -1,199 +1,199 @@ #!/bin/sh # # ocf:pacemaker:HealthCPU resource agent # # Copyright 2004-2019 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # Measures CPUs idling and writes #health-cpu status into the CIB # ################################ # # TODO: Enter default values # Error handling in getting uptime # ################################## ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} ####################################################################### meta_data() { cat <<END <?xml version="1.0"?> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> <resource-agent name="HealthCPU" version="0.1"> <version>1.0</version> <longdesc lang="en"> -Systhem health agent that measures the CPU idling and updates the #health-cpu attribute. +System health agent that measures the CPU idling and updates the #health-cpu attribute. </longdesc> <shortdesc lang="en">System health CPU usage</shortdesc> <parameters> <parameter name="state" unique="1"> <longdesc lang="en"> Location to store the resource state in. </longdesc> <shortdesc lang="en">State file</shortdesc> <content type="string" default="${HA_VARRUN%%/}/health-cpu-${OCF_RESOURCE_INSTANCE}.state" /> </parameter> <parameter name="yellow_limit" unique="1"> <longdesc lang="en"> Lower (!) limit of idle percentage to switch the health attribute to yellow. I.e. the #health-cpu will go yellow if the %idle of the CPU falls below 50%. </longdesc> <shortdesc lang="en">Lower limit for yellow health attribute</shortdesc> <content type="string" default="50"/> </parameter> <parameter name="red_limit" unique="1"> <longdesc lang="en"> Lower (!) limit of idle percentage to switch the health attribute to red. I.e. the #health-cpu will go red if the %idle of the CPU falls below 10%. </longdesc> <shortdesc lang="en">Lower limit for red health attribute</shortdesc> <content type="string" default="10"/> </parameter> </parameters> <actions> <action name="start" timeout="10s" /> <action name="stop" timeout="10s" /> <action name="monitor" timeout="10s" interval="10s" start-delay="0s" /> <action name="meta-data" timeout="5s" /> <action name="validate-all" timeout="10s" /> </actions> </resource-agent> END } ####################################################################### dummy_usage() { cat <<END usage: $0 {start|stop|monitor|validate-all|meta-data} Expects to have a fully populated OCF RA-compliant environment set. END } dummy_start() { dummy_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi touch "${OCF_RESKEY_state}" } dummy_stop() { dummy_monitor if [ $? -eq $OCF_SUCCESS ]; then rm "${OCF_RESKEY_state}" fi return $OCF_SUCCESS } dummy_monitor() { # Monitor _MUST!_ differentiate correctly between running # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING). # That is THREE states, not just yes/no. if [ -f "${OCF_RESKEY_state}" ]; then IDLE=$(top -b -n2 | grep Cpu | tail -1 | awk -F",|.[0-9][ %]id" '{ print $4 }') # echo "System idle: " $IDLE # echo "$OCF_RESKEY_red_limit" # echo $OCF_RESKEY_yellow_limit if [ $IDLE -lt ${OCF_RESKEY_red_limit} ] ; then # echo "System state RED!" attrd_updater -n "#health-cpu" -U "red" -d "30s" return $OCF_SUCCESS fi if [ $IDLE -lt ${OCF_RESKEY_yellow_limit} ] ; then # echo "System state yellow." attrd_updater -n "#health-cpu" -U "yellow" -d "30s" else # echo "System state green." attrd_updater -n "#health-cpu" -U "green" -d "30s" fi return $OCF_SUCCESS fi if false ; then return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } dummy_validate() { # Is the state directory writable? state_dir=$(dirname "$OCF_RESKEY_state") touch "$state_dir/$$" if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi rm "$state_dir/$$" return $OCF_SUCCESS } : ${OCF_RESKEY_CRM_meta_interval:=0} : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} if [ -z "$OCF_RESKEY_state" ]; then if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then state="${HA_VARRUN%%/}/Dummy-${OCF_RESOURCE_INSTANCE}.state" # Strip off the trailing clone marker OCF_RESKEY_state=$(echo $state | sed s/:[0-9][0-9]*\.state/.state/) else OCF_RESKEY_state="${HA_VARRUN%%/}/Dummy-${OCF_RESOURCE_INSTANCE}.state" fi fi if [ -z "${OCF_RESKEY_red_limit}" ] ; then OCF_RESKEY_red_limit=10 fi if [ -z "${OCF_RESKEY_yellow_limit}" ] ; then OCF_RESKEY_yellow_limit=50 fi case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) dummy_start;; stop) dummy_stop;; monitor) dummy_monitor;; validate-all) dummy_validate;; usage|help) dummy_usage exit $OCF_SUCCESS ;; *) dummy_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/extra/resources/HealthIOWait b/extra/resources/HealthIOWait index 9e292b4f5a..e17d8a83ca 100755 --- a/extra/resources/HealthIOWait +++ b/extra/resources/HealthIOWait @@ -1,178 +1,178 @@ #!/bin/sh # # ocf:pacemaker:HealthIOWait resource agent # # Copyright 2004-2019 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # Measures CPU iowait % via top and writes #health-iowait status into the CIB # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} ####################################################################### meta_data() { cat <<END <?xml version="1.0"?> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> <resource-agent name="HealthIOWait" version="1.0"> <version>1.0</version> <longdesc lang="en"> -Systhem health agent that measures the CPU iowait via top and updates the #health-iowait attribute. +System health agent that measures the CPU iowait via top and updates the #health-iowait attribute. </longdesc> <shortdesc lang="en">System health based on CPU iowait measurement</shortdesc> <parameters> <parameter name="state" unique="1"> <longdesc lang="en"> Location to store the resource state in. </longdesc> <shortdesc lang="en">State file</shortdesc> <content type="string" default="${HA_VARRUN%%/}/health-iowait-${OCF_RESOURCE_INSTANCE}.state" /> </parameter> <parameter name="yellow_limit" unique="0"> <longdesc lang="en"> Upper limit of iowait percentage to switch the health attribute to yellow. I.e. the #health-iowait will go yellow if the %iowait of the CPU gets higher than 10%. </longdesc> <shortdesc lang="en">Upper limit for yellow health attribute</shortdesc> <content type="string" default="10"/> </parameter> <parameter name="red_limit" unique="0"> <longdesc lang="en"> Upper limit of iowait percentage to switch the health attribute to red. I.e. the #health-iowait will go red if the %iowait of the CPU get higher than 15%. </longdesc> <shortdesc lang="en">Upper limit for red health attribute</shortdesc> <content type="string" default="15"/> </parameter> </parameters> <actions> <action name="start" timeout="10s" /> <action name="stop" timeout="10s" /> <action name="monitor" timeout="10s" interval="10s" start-delay="0s" /> <action name="meta-data" timeout="5s" /> <action name="validate-all" timeout="10s" /> </actions> </resource-agent> END } ####################################################################### agent_usage() { cat <<END usage: $0 {start|stop|monitor|validate-all|meta-data} Expects to have a fully populated OCF RA-compliant environment set. END } agent_start() { agent_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi touch "${OCF_RESKEY_state}" } agent_stop() { agent_monitor if [ $? -eq $OCF_SUCCESS ]; then rm "${OCF_RESKEY_state}" fi return $OCF_SUCCESS } agent_monitor() { # Monitor _MUST!_ differentiate correctly between running # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING). # That is THREE states, not just yes/no. if [ -f "${OCF_RESKEY_state}" ]; then WAIT=$(top -b -n2 | grep Cpu | tail -1 | awk -F",|.[0-9][ %]wa" '{ print $5 }') # echo "System iowait: " $WAIT # echo $OCF_RESKEY_yellow_limit if [ $WAIT -gt ${OCF_RESKEY_red_limit} ] ; then # echo "System state RED!" attrd_updater -n "#health-iowait" -U "red" -d "5s" return $OCF_SUCCESS fi if [ $WAIT -gt ${OCF_RESKEY_yellow_limit} ] ; then # echo "System state yellow." attrd_updater -n "#health-iowait" -U "yellow" -d "5s" else # echo "System state green." attrd_updater -n "#health-iowait" -U "green" -d "5s" fi return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } agent_validate() { # Is the state directory writable? state_dir=$(dirname "$OCF_RESKEY_state") if [ -d "$state_dir" ] && [ -w "$state_dir" ] && [ -x "$state_dir" ]; then return $OCF_ERR_ARGS fi return $OCF_SUCCESS } : ${OCF_RESKEY_CRM_meta_interval:=0} : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} if [ -z "$OCF_RESKEY_state" ]; then if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then state="${HA_VARRUN%%/}/HealthIoWait-${OCF_RESOURCE_INSTANCE}.state" #Strip off the trailing clone marker OCF_RESKEY_state=$(echo $state | sed s/:[0-9][0-9]*\.state/.state/) else OCF_RESKEY_state="${HA_VARRUN%%/}/HealthIoWait-${OCF_RESOURCE_INSTANCE}.state" fi fi if [ -z "${OCF_RESKEY_red_limit}" ] ; then OCF_RESKEY_red_limit=15 fi if [ -z "${OCF_RESKEY_yellow_limit}" ] ; then OCF_RESKEY_yellow_limit=10 fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) agent_start;; stop) agent_stop;; monitor) agent_monitor;; validate-all) agent_validate;; usage|help) agent_usage exit $OCF_SUCCESS ;; *) agent_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/extra/resources/HealthSMART.in b/extra/resources/HealthSMART.in index 7889a8f8a4..f8fc9d08a3 100755 --- a/extra/resources/HealthSMART.in +++ b/extra/resources/HealthSMART.in @@ -1,317 +1,317 @@ #!@BASH_PATH@ # # ocf:pacemaker:HealthSMART resource agent # # Copyright 2009-2019 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # Checks the S.M.A.R.T. status of all given drives and writes the #health-smart # status into the CIB # ####################################################################### ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # SMARTCTL=/usr/sbin/smartctl ATTRDUP=/usr/sbin/attrd_updater ####################################################################### meta_data() { cat <<END <?xml version="1.0"?> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> <resource-agent name="HealthSMART" version="0.1"> <version>1.0</version> <longdesc lang="en"> -Systhem health agent that checks the S.M.A.R.T. status of the given drives and +System health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. </longdesc> <shortdesc lang="en">SMART health status</shortdesc> <parameters> <parameter name="state" unique="1"> <longdesc lang="en"> Location to store the resource state in. </longdesc> <shortdesc lang="en">State file</shortdesc> <content type="string" default="${HA_VARRUN%%/}/HealthSMART-${OCF_RESOURCE_INSTANCE}.state" /> </parameter> <parameter name="drives" unique="0"> <longdesc lang="en"> The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". </longdesc> <shortdesc lang="en">Drives to check</shortdesc> <content type="string" default="/dev/sda" /> </parameter> <parameter name="devices" unique="0"> <longdesc lang="en"> The device type(s) to assume for the drive(s) being tested as a SPACE separated list. </longdesc> <shortdesc lang="en">Device types</shortdesc> <content type="string" /> </parameter> <parameter name="temp_lower_limit" unique="0"> <longdesc lang="en"> Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red. </longdesc> <shortdesc lang="en">Lower limit for the red smart attribute</shortdesc> <content type="string" default="0"/> </parameter> <parameter name="temp_upper_limit" unique="0"> <longdesc lang="en"> Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. </longdesc> <shortdesc lang="en">Upper limit for red smart attribute</shortdesc> <content type="string" default="60"/> </parameter> <parameter name="temp_warning" unique="0"> <longdesc lang="en"> Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. </longdesc> <shortdesc lang="en">Deg C below/above the upper limits for yellow smart attribute</shortdesc> <content type="string" default="5"/> </parameter> </parameters> <actions> <action name="start" timeout="10s" /> <action name="stop" timeout="10s" /> <action name="monitor" timeout="10s" interval="10s" start-delay="0s" /> <action name="meta-data" timeout="5s" /> <action name="validate-all" timeout="10s" /> </actions> </resource-agent> END } ####################################################################### check_temperature() { if [ $1 -lt ${lower_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" "$ATTRDUP" -n "#health-smart" -U "red" -d "5s" return 1 fi if [ $1 -gt ${upper_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" "$ATTRDUP" -n "#health-smart" -U "red" -d "5s" return 1 fi if [ $1 -lt ${lower_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" "$ATTRDUP" -n "#health-smart" -U "yellow" -d "5s" return 1 fi if [ $1 -gt ${upper_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" "$ATTRDUP" -n "#health-smart" -U "yellow" -d "5s" return 1 fi } init_smart() { #Set temperature defaults if [ -z "${OCF_RESKEY_temp_warning}" ]; then yellow_threshold=5 else yellow_threshold=${OCF_RESKEY_temp_warning} fi if [ -z "${OCF_RESKEY_temp_lower_limit}" ] ; then lower_red_limit=0 else lower_red_limit=${OCF_RESKEY_temp_lower_limit} fi lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) if [ -z "${OCF_RESKEY_temp_upper_limit}" ] ; then upper_red_limit=60 else upper_red_limit=${OCF_RESKEY_temp_upper_limit} fi upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) #Set disk defaults if [ -z "${OCF_RESKEY_drives}" ] ; then DRIVES="/dev/sda" else DRIVES=${OCF_RESKEY_drives} fi #Test for presence of smartctl if [ ! -x "$SMARTCTL" ] ; then ocf_log err "${SMARTCTL} not installed." exit $OCF_ERR_INSTALLED fi for DRIVE in $DRIVES; do if [ -n "${OCF_RESKEY_devices}" ]; then for DEVICE in ${OCF_RESKEY_devices}; do "$SMARTCTL" -d "$DEVICE" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi done else "$SMARTCTL" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi fi done } HealthSMART_usage() { cat <<END usage: $0 {start|stop|monitor|validate-all|meta-data} Expects to have a fully populated OCF RA-compliant environment set. END } HealthSMART_start() { HealthSMART_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi touch "${OCF_RESKEY_state}" } HealthSMART_stop() { HealthSMART_monitor if [ $? -eq $OCF_SUCCESS ]; then rm "${OCF_RESKEY_state}" fi return $OCF_SUCCESS } HealthSMART_monitor() { init_smart # Monitor _MUST!_ differentiate correctly between running # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING). # That is THREE states, not just yes/no. if [ -f "${OCF_RESKEY_state}" ]; then # Check overall S.M.A.R.T. status for DRIVE in $DRIVES; do if [ -n "${OCF_RESKEY_devices}" ]; then for DEVICE in ${OCF_RESKEY_devices}; do "$SMARTCTL" -d "$DEVICE" -H ${DRIVE} | grep -q "SMART overall-health self-assessment test result: PASSED" if [ $? -ne 0 ]; then "$ATTRDUP" -n "#health-smart" -U "red" -d "5s" return $OCF_SUCCESS fi done else "$SMARTCTL" -H "${DRIVE}" | grep -q "SMART overall-health self-assessment test result: PASSED" if [ $? -ne 0 ]; then "$ATTRDUP" -n "#health-smart" -U "red" -d "5s" return $OCF_SUCCESS fi fi # Check drive temperature(s) if [ -n "${OCF_RESKEY_devices}" ]; then for DEVICE in ${OCF_RESKEY_devices}; do check_temperature "$("$SMARTCTL" -d "$DEVICE" -A "${DRIVE}" | awk '/^194/ { print $10 }')" if [ $? -ne 0 ]; then return $OCF_SUCCESS fi done else check_temperature "$("$SMARTCTL" -A "${DRIVE}" | awk '/^194/ { print $10 }')" if [ $? -ne 0 ]; then return $OCF_SUCCESS fi fi done "$ATTRDUP" -n "#health-smart" -U "green" -d "5s" return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } HealthSMART_validate() { init_smart # Is the state directory writable? state_dir=$(dirname "$OCF_RESKEY_state") touch "$state_dir/$$" if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi rm "$state_dir/$$" return $OCF_SUCCESS } : ${OCF_RESKEY_CRM_meta_interval:=0} : ${OCF_RESKEY_CRM_meta_globally_unique:="true"} if [ -z "$OCF_RESKEY_state" ]; then if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then state="${HA_VARRUN%%/}/HealthSMART-${OCF_RESOURCE_INSTANCE}.state" # Strip off the trailing clone marker OCF_RESKEY_state=$(echo $state | sed s/:[0-9][0-9]*\.state/.state/) else OCF_RESKEY_state="${HA_VARRUN%%/}/HealthSMART-${OCF_RESOURCE_INSTANCE}.state" fi fi case "$__OCF_ACTION" in start) HealthSMART_start;; stop) HealthSMART_stop;; monitor) HealthSMART_monitor;; validate-all) HealthSMART_validate;; meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) HealthSMART_usage exit $OCF_SUCCESS ;; *) HealthSMART_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: