diff --git a/agents/ocf/HealthCPU.in b/agents/ocf/HealthCPU.in index 14e4b0741f..1a691a98b6 100755 --- a/agents/ocf/HealthCPU.in +++ b/agents/ocf/HealthCPU.in @@ -1,223 +1,221 @@ #!/bin/sh # # ocf:pacemaker:HealthCPU resource agent # # Copyright 2004-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # Measures CPUs idling and writes #health-cpu status into the CIB # ################################ # # TODO: Enter default values # Error handling in getting uptime # ################################## ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} +: ${OCF_RESKEY_dampening:="30s"} ####################################################################### meta_data() { cat < 1.1 System health agent that measures the CPU idling and updates the #health-cpu attribute. System health CPU usage Location to store the resource state in. State file Lower (!) limit of idle percentage to switch the health attribute to yellow. I.e. the #health-cpu will go yellow if the %idle of the CPU falls below 50%. Lower limit for yellow health attribute Lower (!) limit of idle percentage to switch the health attribute to red. I.e. the #health-cpu will go red if the %idle of the CPU falls below 10%. Lower limit for red health attribute The time to wait (dampening) in seconds for further changes before writing The time to wait (dampening) in seconds for further changes before writing END } ####################################################################### healthcpu_usage() { cat < 1.1 System health agent that measures the CPU iowait via top and updates the #health-iowait attribute. System health based on CPU iowait measurement Location to store the resource state in. State file Upper limit of iowait percentage to switch the health attribute to yellow. I.e. the #health-iowait will go yellow if the %iowait of the CPU gets higher than 10%. Upper limit for yellow health attribute Upper limit of iowait percentage to switch the health attribute to red. I.e. the #health-iowait will go red if the %iowait of the CPU get higher than 15%. Upper limit for red health attribute + + +The time to wait (dampening) in seconds for further changes before writing + +The time to wait (dampening) in seconds for further changes +before writing + + + + END } ####################################################################### agent_usage() { cat < 1.1 System health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. SMART health status Location to store the resource state in. State file The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". Drives to check The device type(s) to assume for the drive(s) being tested as a SPACE separated list. Device types Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red. Lower limit for the red smart attribute Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. Upper limit for red smart attribute Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. Deg C below/above the upper limits for yellow smart attribute The path to the smartctl program, used for querying device health. The path to the smartctl program The time to wait (dampening) for further changes to occur Dampening interval - + END } ####################################################################### check_temperature() { if [ $1 -lt ${lower_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" attrd_updater -n "#health-smart" -B "red" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -gt ${upper_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" attrd_updater -n "#health-smart" -B "red" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -lt ${lower_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" attrd_updater -n "#health-smart" -B "yellow" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -gt ${upper_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" attrd_updater -n "#health-smart" -B "yellow" -d "${OCF_RESKEY_dampen}" return 1 fi } common_checks() { # Each item in $OCF_RESKEY_drives must have a corresponding item in # $OCF_RESKEY_devices with the device type. Alternately, # $OCF_RESKEY_devices can be empty. drives_len=${#DRIVES[@]} devices_len=${#DEVICES[@]} if [ "${drives_len}" -ne "${devices_len}" ] && [ "${devices_len}" -gt 0 ]; then ocf_log err "OCF_RESKEY_devices must be empty or the same length as OCF_RESKEY_drives." exit $OCF_ERR_ARGS fi # Each item in $OCF_RESKEY_drives must look like a device node. for d in "${DRIVES[@]}"; do if [[ "$d" != /dev/* ]]; then ocf_log err "Device in OCF_RESKEY_devices does not look like a device node: $d" exit $OCF_ERR_ARGS fi done } init_smart() { #Set temperature defaults if [ -z "${OCF_RESKEY_temp_warning}" ]; then yellow_threshold=5 else yellow_threshold=${OCF_RESKEY_temp_warning} fi if [ -z "${OCF_RESKEY_temp_lower_limit}" ] ; then lower_red_limit=0 else lower_red_limit=${OCF_RESKEY_temp_lower_limit} fi lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) if [ -z "${OCF_RESKEY_temp_upper_limit}" ] ; then upper_red_limit=60 else upper_red_limit=${OCF_RESKEY_temp_upper_limit} fi upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) for ndx in ${!DRIVES[*]}; do DRIVE=${DRIVES[$ndx]} if [ -n "${OCF_RESKEY_devices}" ]; then DEVICE=${DEVICES[$ndx]} "${OCF_RESKEY_smartctl}" -d "${DEVICE}" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi else "${OCF_RESKEY_smartctl}" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi fi done } HealthSMART_usage() { cat <