diff --git a/extra/resources/HealthSMART.in b/extra/resources/HealthSMART.in index efc7ebc764..f3294c6453 100755 --- a/extra/resources/HealthSMART.in +++ b/extra/resources/HealthSMART.in @@ -1,325 +1,372 @@ #!@BASH_PATH@ # # ocf:pacemaker:HealthSMART resource agent # -# Copyright 2009-2021 the Pacemaker project contributors +# Copyright 2009-2022 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # (GPLv2) WITHOUT ANY WARRANTY. # # # Checks the S.M.A.R.T. status of all given drives and writes the #health-smart # status into the CIB # ####################################################################### ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} -# -SMARTCTL=/usr/sbin/smartctl -ATTRDUP=/usr/sbin/attrd_updater # Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_CRM_meta_interval:=0} : ${OCF_RESKEY_CRM_meta_globally_unique:="true"} : ${OCF_RESKEY_temp_warning:=""} : ${OCF_RESKEY_temp_lower_limit:=""} : ${OCF_RESKEY_temp_upper_limit:=""} -: ${OCF_RESKEY_drives:=""} +: ${OCF_RESKEY_drives:="/dev/sda"} : ${OCF_RESKEY_devices:=""} : ${OCF_RESKEY_state:=""} +: ${OCF_RESKEY_smartctl:="/usr/sbin/smartctl"} +: ${OCF_RESKEY_dampen:="5s"} + +# Turn these into arrays so we can iterate them later. +DRIVES=(${OCF_RESKEY_drives}) +DEVICES=(${OCF_RESKEY_devices}) ####################################################################### meta_data() { cat < - - -1.0 + +1.1 System health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. SMART health status - + Location to store the resource state in. State file - + The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". Drives to check - + The device type(s) to assume for the drive(s) being tested as a SPACE separated list. Device types - + Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red. Lower limit for the red smart attribute - + Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. Upper limit for red smart attribute - + Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. Deg C below/above the upper limits for yellow smart attribute + + +The path to the smartctl program, used for querying device health. + +The path to the smartctl program + + + + + +The time to wait (dampening) for further changes to occur + +Dampening interval + + + + END } ####################################################################### check_temperature() { if [ $1 -lt ${lower_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" - "$ATTRDUP" -n "#health-smart" -U "red" -d "5s" + attrd_updater -n "#health-smart" -U "red" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -gt ${upper_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" - "$ATTRDUP" -n "#health-smart" -U "red" -d "5s" + attrd_updater -n "#health-smart" -U "red" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -lt ${lower_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" - "$ATTRDUP" -n "#health-smart" -U "yellow" -d "5s" + attrd_updater -n "#health-smart" -U "yellow" -d "${OCF_RESKEY_dampen}" return 1 fi if [ $1 -gt ${upper_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" - "$ATTRDUP" -n "#health-smart" -U "yellow" -d "5s" + attrd_updater -n "#health-smart" -U "yellow" -d "${OCF_RESKEY_dampen}" return 1 fi } +common_checks() { + # Each item in $OCF_RESKEY_drives must have a corresponding item in + # $OCF_RESKEY_devices with the device type. Alternately, + # $OCF_RESKEY_devices can be empty. + drives_len=${#DRIVES[@]} + devices_len=${#DEVICES[@]} + + if [ "${drives_len}" -ne "${devices_len}" ] && [ "${devices_len}" -gt 0 ]; then + ocf_log err "OCF_RESKEY_devices must be empty or the same length as OCF_RESKEY_drives." + exit $OCF_ERR_ARGS + fi + + # Each item in $OCF_RESKEY_drives must look like a device node. + for d in "${DRIVES[@]}"; do + if [[ "$d" != /dev/* ]]; then + ocf_log err "Device in OCF_RESKEY_devices does not look like a device node: $d" + exit $OCF_ERR_ARGS + fi + done +} + init_smart() { #Set temperature defaults if [ -z "${OCF_RESKEY_temp_warning}" ]; then yellow_threshold=5 else yellow_threshold=${OCF_RESKEY_temp_warning} fi if [ -z "${OCF_RESKEY_temp_lower_limit}" ] ; then lower_red_limit=0 else lower_red_limit=${OCF_RESKEY_temp_lower_limit} fi lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) if [ -z "${OCF_RESKEY_temp_upper_limit}" ] ; then upper_red_limit=60 else upper_red_limit=${OCF_RESKEY_temp_upper_limit} fi upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) - #Set disk defaults - if [ -z "${OCF_RESKEY_drives}" ] ; then - DRIVES="/dev/sda" - else - DRIVES=${OCF_RESKEY_drives} - fi - - #Test for presence of smartctl - if [ ! -x "$SMARTCTL" ] ; then - ocf_log err "${SMARTCTL} not installed." - exit $OCF_ERR_INSTALLED - fi + for ndx in ${!DRIVES[*]}; do + DRIVE=${DRIVES[$ndx]} - for DRIVE in $DRIVES; do if [ -n "${OCF_RESKEY_devices}" ]; then - for DEVICE in ${OCF_RESKEY_devices}; do - "$SMARTCTL" -d "$DEVICE" -i "${DRIVE}" | grep -q "SMART support is: Enabled" - if [ $? -ne 0 ] ; then - ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} - exit $OCF_ERR_INSTALLED - fi - done + DEVICE=${DEVICES[$ndx]} + + "${OCF_RESKEY_smartctl}" -d "${DEVICE}" -i "${DRIVE}" | grep -q "SMART support is: Enabled" + if [ $? -ne 0 ] ; then + ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} + exit $OCF_ERR_INSTALLED + fi else - "$SMARTCTL" -i "${DRIVE}" | grep -q "SMART support is: Enabled" + "${OCF_RESKEY_smartctl}" -i "${DRIVE}" | grep -q "SMART support is: Enabled" if [ $? -ne 0 ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi fi done } HealthSMART_usage() { cat <