diff --git a/extra/resources/HealthSMART b/extra/resources/HealthSMART index 84b7e95505..7b7cce4b26 100644 --- a/extra/resources/HealthSMART +++ b/extra/resources/HealthSMART @@ -1,326 +1,326 @@ #!/bin/sh # # # HealthSMART OCF RA. Checks the S.M.A.R.T. status of all given # drives and writes the #health-smart status into the CIB # # Copyright (c) 2009 Michael Schwartzkopff, 2010 Matthew Richardson # # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs # SMARTCTL=/usr/sbin/smartctl ATTRDUP=/usr/sbin/attrd_updater ####################################################################### meta_data() { cat < 0.1 Systhem health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. SMART health status Location to store the resource state in. State file The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". Drives to check The device type(s) to assume for the drive(s) being tested as a SPACE separated list. Device types Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red. Lower limit for the red smart attribute -content type="string" default="0"/> + Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. Upper limit for red smart attribute Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. Deg C below/above the upper limits for yellow smart attribute END } ####################################################################### check_temperature() { if [ $1 -lt ${lower_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" $ATTRDUP -n "#health-smart" -U "red" -d "5s" return 1 fi if [ $1 -gt ${upper_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" $ATTRDUP -n "#health-smart" -U "red" -d "5s" return 1 fi if [ $1 -lt ${lower_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" $ATTRDUP -n "#health-smart" -U "yellow" -d "5s" return 1 fi if [ $1 -gt ${upper_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" $ATTRDUP -n "#health-smart" -U "yellow" -d "5s" return 1 fi } init_smart() { #Set temperature defaults if [ -z ${OCF_RESKEY_temp_warning} ]; then yellow_threshold=5 else yellow_threshold=${OCF_RESKEY_temp_warning} fi if [ -z ${OCF_RESKEY_temp_lower_limit} ] ; then lower_red_limit=0 else lower_red_limit=${OCF_RESKEY_temp_lower_limit} fi lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) if [ -z ${OCF_RESKEY_temp_upper_limit} ] ; then upper_red_limit=60 else upper_red_limit=${OCF_RESKEY_temp_upper_limit} fi upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) #Set disk defaults if [ -z ${OCF_RESKEY_drives} ] ; then DRIVES="/dev/sda" else DRIVES=${OCF_RESKEY_drives} fi #Test for presence of smartctl if [ ! -x $SMARTCTL ] ; then ocf_log err "${SMARTCTL} not installed." exit $OCF_ERR_INSTALLED fi for DRIVE in $DRIVES; do if [ "${OCF_RESKEY_devices}" ]; then for DEVICE in ${OCF_RESKEY_devices}; do $SMARTCTL -d $DEVICE -i ${DRIVE} | grep -q "SMART support is: Enabled" if [ $? -ne "0" ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi done else $SMARTCTL -i ${DRIVE} | grep -q "SMART support is: Enabled" if [ $? -ne "0" ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi fi done } HealthSMART_usage() { cat < 1.0 This is a SysInfo Resource Agent. It records (in the CIB) various attributes of a node Sample Linux output: arch: i686 os: Linux-2.4.26-gentoo-r14 free_swap: 1999 cpu_info: Intel(R) Celeron(R) CPU 2.40GHz cpu_speed: 4771.02 cpu_cores: 1 cpu_load: 0.00 ram_total: 513 ram_free: 117 root_free: 2.4 Sample Darwin output: arch: i386 os: Darwin-8.6.2 cpu_info: Intel Core Duo cpu_speed: 2.16 cpu_cores: 2 cpu_load: 0.18 ram_total: 2016 ram_free: 787 root_free: 13 Units: free_swap: Mb ram_*: Mb cpu_speed (Linux): bogomips cpu_speed (Darwin): Ghz *_free: GB (or user-defined: disk_unit) SysInfo resource agent PID file PID file Interval to allow values to stabilize Dampening Delay Filesystems or Paths to be queried for free disk space as a SPACE separated list - e.g "/dev/sda1 /tmp". Results will be written to an attribute with leading slashes removed, and other slashes replaced with underscore, and the word 'free' appended - e.g /dev/sda1 -> dev_sda1_free Note: The root filesystem '/' is always queried to an attribute named 'root_free' List of Filesytems/Paths to query for free disk space -content type="string" /> + Unit to report disk free space in. Can be one of: B, K, M, G, T, P (case-insensitive) Unit to report disk free space in -content type="string" default="G"/> + END } ####################################################################### UpdateStat() { name=$1; shift value="$*" printf "%s:\t%s\n" "$name" "$value" ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value" } SysInfoStats() { UpdateStat arch "`uname -m`" UpdateStat os "`uname -s`-`uname -r`" case `uname -s` in "Darwin") mem=`top -l 1 | grep Mem: | awk '{print $10}'` mem_used=`top -l 1 | grep Mem: | awk '{print $8}'` mem=`SysInfo_mem_units $mem` mem_used=`SysInfo_mem_units $mem_used` mem_total=`expr $mem_used + $mem` cpu_type=`system_profiler SPHardwareDataType | awk -F': ' '/^CPU Type/ {print $2; exit}'` cpu_speed=`system_profiler SPHardwareDataType | awk -F': ' '/^CPU Speed/ {print $2; exit}'` cpu_cores=`system_profiler SPHardwareDataType | awk -F': ' '/^Number Of/ {print $2; exit}'` ;; "Linux") if [ -f /proc/cpuinfo ]; then cpu_type=`awk -F': ' '/model name/ {print $2; exit}' /proc/cpuinfo` cpu_speed=`awk -F': ' '/bogomips/ {print $2; exit}' /proc/cpuinfo` cpu_cores=`grep "^processor" /proc/cpuinfo | wc -l` fi if [ -f /proc/meminfo ]; then # meminfo results are in kB mem=`grep "SwapFree" /proc/meminfo | awk '{print $2"k"}'` if [ ! -z $mem ]; then UpdateStat free_swap `SysInfo_mem_units $mem` fi mem=`grep "Inactive" /proc/meminfo | awk '{print $2"k"}'` mem_total=`grep "MemTotal" /proc/meminfo | awk '{print $2"k"}'` else mem=`top -n 1 | grep Mem: | awk '{print $7}'` fi ;; *) esac if [ x != x"$cpu_type" ]; then UpdateStat cpu_info "$cpu_type" fi if [ x != x"$cpu_speed" ]; then UpdateStat cpu_speed "$cpu_speed" fi if [ x != x"$cpu_cores" ]; then UpdateStat cpu_cores "$cpu_cores" fi loads=`uptime` load15=`echo ${loads} | awk '{print $10}'` UpdateStat cpu_load $load15 if [ ! -z "$mem" ]; then # Massage the memory values UpdateStat ram_total `SysInfo_mem_units $mem_total` UpdateStat ram_free `SysInfo_mem_units $mem` fi # Portability notes: # o tail: explicit "-n" not available in Solaris; instead simplify # 'tail -n ' to the equivalent 'tail -'. for disk in "/" ${OCF_RESKEY_disks}; do unset disk_free disk_label disk_free=`df -h ${disk} | tail -1 | awk '{print $4}'` if [ x != x"$disk_free" ]; then disk_label=`echo $disk | sed -e 's#^/$#root#;s#^/*##;s#/#_#g'` UpdateStat ${disk_label}_free `SysInfo_hdd_units $disk_free` fi done } SysInfo_megabytes() { # Size in megabytes echo $1 | awk '{ n = $0; sub(/[0-9]+(.[0-9]+)?/, ""); split(n, a, $0); n=a[1]; if ($0 == "G" || $0 == "") { n *= 1024 }; if (/^kB?/) { n /= 1024 }; printf "%d\n", n }' # Intentionaly round to an integer } SysInfo_mem_units() { mem=$1 if [ -z $1 ]; then return fi mem=$(SysInfo_megabytes "$1") # Round to the next multiple of 50 r=$(($mem % 50)) if [ $r != 0 ]; then mem=$(($mem + 50 - $r)) fi echo $mem } SysInfo_hdd_units() { # Defauts to size in gigabytes case $OCF_RESKEY_disk_unit in [Pp]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024 / 1024));; [Tt]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024));; [Gg]) echo $(($(SysInfo_megabytes "$1") / 1024));; [Mm]) echo SysInfo_megabytes "$1";; [Kk]) echo $(($(SysInfo_megabytes "$1") * 1024));; [Bb]) echo $(($(SysInfo_megabytes "$1") * 1024 * 1024));; *) ocf_log err "Invalid value for disk_unit: $OCF_RESKEY_disk_unit" echo $(($(SysInfo_megabytes "$1") / 1024));; esac } SysInfo_usage() { cat < $OCF_RESKEY_pidfile SysInfoStats exit $OCF_SUCCESS } SysInfo_stop() { rm $OCF_RESKEY_pidfile exit $OCF_SUCCESS } SysInfo_monitor() { if [ -f $OCF_RESKEY_pidfile ]; then clone=`cat $OCF_RESKEY_pidfile` fi if [ x$clone = x ]; then rm $OCF_RESKEY_pidfile exit $OCF_NOT_RUNNING elif [ $clone = $OCF_RESKEY_clone ]; then SysInfoStats exit $OCF_SUCCESS elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue -o x$OCF_RESKEY_CRM_meta_globally_unique = xTrue -o x$OCF_RESKEY_CRM_meta_globally_unique = xyes -o x$OCF_RESKEY_CRM_meta_globally_unique = xYes ]; then SysInfoStats exit $OCF_SUCCESS fi exit $OCF_NOT_RUNNING } SysInfo_validate() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then SysInfo_usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_pidfile:="$HA_VARRUN/SysInfo-${OCF_RESOURCE_INSTANCE}"} : ${OCF_RESKEY_disk_unit:="G"} : ${OCF_RESKEY_clone:="0"} if [ x != x${OCF_RESKEY_delay} ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) SysInfo_start ;; stop) SysInfo_stop ;; monitor) SysInfo_monitor ;; validate-all) SysInfo_validate ;; usage|help) SysInfo_usage exit $OCF_SUCCESS ;; *) SysInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?