diff --git a/extra/resources/HealthSMART b/extra/resources/HealthSMART index 2c78a87b30..60fcfe3443 100644 --- a/extra/resources/HealthSMART +++ b/extra/resources/HealthSMART @@ -1,287 +1,326 @@ #!/bin/sh # # -# HealthSMART OCF RA. Checks the S.M.A.R.T. status of all given -# drives and writes the #health-smart status into the CIB +# HealthSMART OCF RA. Checks the S.M.A.R.T. status of all given +# drives and writes the #health-smart status into the CIB # -# Copyright (c) 2009 Michael Schwartzkopff +# Copyright (c) 2009 Michael Schwartzkopff, 2010 Matthew Richardson # # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # -################################ -# -# TODO: -# - All -# - Enable drive parameter with a loop. -# - Error handling if smart does not give temeprature. -# -################################## +####################################################################### ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs # SMARTCTL=/usr/sbin/smartctl +ATTRDUP=/usr/sbin/attrd_updater ####################################################################### meta_data() { - cat < 0.1 Systhem health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. SMART health status Location to store the resource state in. State file - + -The drives to check as a SPACE separated list. Enter only the part after the "/dev/" i.e. "sda". -At the moment /dev/sda is hard coded. Sorry. +The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". Drives to check - + + + + + +The device type(s) to assume for the drive(s) being tested as a SPACE separated list. + +Device types + -Lower limit of the temperature in deg C of the drive(s). Below this limit there status will be red. The yellow limit is 5 deg C more than this value. +Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red. -Lower limit for the temperature of the drive(s) - +Lower limit for the red smart attribute +content type="string" default="0"/> Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. -The yellow limit is 5 deg C below this value. Upper limit for red smart attribute + + +Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. + +Deg C below/above the upper limits for yellow smart attribute + + + END } ####################################################################### +check_temperature() { + + if [ $1 -lt ${lower_red_limit} ] ; then + ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" + $ATTRDUP -n "#health-smart" -U "red" -d "5s" + return 1 + fi + + if [ $1 -gt ${upper_red_limit} ] ; then + ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" + $ATTRDUP -n "#health-smart" -U "red" -d "5s" + return 1 + fi + + if [ $1 -lt ${lower_yellow_limit} ] ; then + ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" + $ATTRDUP -n "#health-smart" -U "yellow" -d "5s" + return 1 + fi + + if [ $1 -gt ${upper_yellow_limit} ] ; then + ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" + $ATTRDUP -n "#health-smart" -U "yellow" -d "5s" + return 1 + fi +} + + init_smart() { + #Set temperature defaults + if [ -z ${OCF_RESKEY_temp_warning} ]; then + yellow_threshold=5 + else + yellow_threshold=${OCF_RESKEY_temp_warning} + fi - if [ "x${OCF_RESKEY_temp_lower_limit}" = "x" ] ; then - lower_red_limit=0 - lower_yellow_limit=5 - else - lower_red_limit=${OCF_RESKEY_temp_lower_limit} - let lower_yellow_limit=${OCF_RESKEY_temp_lower_limit}+5 - fi - - if [ "x${OCF_RESKEY_temp_upper_limit}" = "x" ] ; then - upper_red_limit=60 - upper_yellow_limit=55 - else - upper_red_limit=${OCF_RESKEY_temp_upper_limit} - let upper_yellow_limit=${OCF_RESKEY_temp_upper_limit}-5 - fi - - if [ "x${OCF_RESKEY_drives}" = "x" ] ; then - DRIVES="sda" - else - DRIVES=${OCF_RESKEY_drives} - fi - - # echo "Drives: "$DRIVES, "Lower limits: "$lower_red_limit, $lower_yellow_limit, "Upper limits: "$upper_red_limit, $upper_yellow_limit - - if [ ! -x $SMARTCTL ] ; then - ocf_log err $SMARTCTL" not installed." - exit $OCF_ERR_INSTALLED - fi - - $SMARTCTL -i $DRIVE | grep -q "SMART support is: Enabled" - ret=$? - if [ $ret -ne "0" ] ; then - ocf_log err "S.M.A.R.T. not enabled for drive /dev/"${DRIVE} - exit $OCF_ERR_INSTALLED - fi + if [ -z ${OCF_RESKEY_temp_lower_limit} ] ; then + lower_red_limit=0 + else + lower_red_limit=${OCF_RESKEY_temp_lower_limit} + fi + lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) -} + if [ -z ${OCF_RESKEY_temp_upper_limit} ] ; then + upper_red_limit=60 + else + upper_red_limit=${OCF_RESKEY_temp_upper_limit} + fi + upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) + + #Set disk defaults + if [ -z ${OCF_RESKEY_drives} ] ; then + DRIVES="/dev/sda" + else + DRIVES=${OCF_RESKEY_drives} + fi + + #Test for presence of smartctl + if [ ! -x $SMARTCTL ] ; then + ocf_log err "${SMARTCTL} not installed." + exit $OCF_ERR_INSTALLED + fi -# don't exit on TERM, to test that lrmd makes sure that we do exit -trap sigterm_handler TERM -sigterm_handler() { - ocf_log info "They use TERM to bring us down. No such luck." - return + for DRIVE in $DRIVES; do + if [ "${OCF_RESKEY_devices}" ]; then + for DEVICE in ${OCF_RESKEY_devices}; do + $SMARTCTL -d $DEVICE -i ${DRIVE} | grep -q "SMART support is: Enabled" + if [ $? -ne "0" ] ; then + ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} + exit $OCF_ERR_INSTALLED + fi + done + else + $SMARTCTL -i ${DRIVE} | grep -q "SMART support is: Enabled" + if [ $? -ne "0" ] ; then + ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} + exit $OCF_ERR_INSTALLED + fi + fi + done } -dummy_usage() { - cat <