diff --git a/extra/resources/ClusterMon b/extra/resources/ClusterMon index 8efdf1beae..5d1472d1a9 100644 --- a/extra/resources/ClusterMon +++ b/extra/resources/ClusterMon @@ -1,267 +1,267 @@ #!/bin/bash # # # ClusterMon OCF RA. # Starts crm_mon in background which logs cluster status as # html to the specified file. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # OCF instance parameters: # OCF_RESKEY_user # OCF_RESKEY_pidfile # OCF_RESKEY_update # OCF_RESKEY_extra_options # OCF_RESKEY_htmlfile ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < - + 1.0 This is a ClusterMon Resource Agent. It outputs current cluster status to the html. Runs crm_mon in the background, recording the cluster status to an HTML file The user we want to run crm_mon as The user we want to run crm_mon as How frequently should we update the cluster status Update interval Additional options to pass to crm_mon. Eg. -n -r Extra options PID file location to ensure only one instance is running PID file Location to write HTML output to. HTML output END } ####################################################################### ClusterMon_usage() { cat </dev/null 2>&1; rc=$? case $rc in 0) exit $OCF_SUCCESS;; 1) exit $OCF_NOT_RUNNING;; *) exit $OCF_ERR_GENERIC;; esac fi fi exit $OCF_NOT_RUNNING } CheckOptions() { while getopts Vi:nrh:cdp: OPTION do case $OPTION in V|n|r|c|d);; i) ocf_log warn "You should not have specified the -i option, since OCF_RESKEY_update is set already!";; h) ocf_log warn "You should not have specified the -h option, since OCF_RESKEY_htmlfile is set already!";; p) ocf_log warn "You should not have specified the -p option, since OCF_RESKEY_pidfile is set already!";; *) return $OCF_ERR_ARGS;; esac done if [ $? -ne 0 ]; then return $OCF_ERR_ARGS fi # We should have eaten all options at this stage shift $(($OPTIND -1)) if [ $# -gt 0 ]; then false else true fi } ClusterMon_validate() { # Existence of the user if [ ! -z $OCF_RESKEY_user ]; then getent passwd "$OCF_RESKEY_user" >/dev/null if [ $? -eq 0 ]; then : Yes, user exists. We can further check his permission on crm_mon if necessary else ocf_log err "The user $OCF_RESKEY_user does not exist!" exit $OCF_ERR_ARGS fi fi # Pidfile better be an absolute path case $OCF_RESKEY_pidfile in /*) ;; *) ocf_log warn "You should have pidfile($OCF_RESKEY_pidfile) of absolute path!" ;; esac # Check the update interval if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then : else ocf_log err "Invalid update interval $OCF_RESKEY_update. It should be positive integer!" exit $OCF_ERR_ARGS fi if CheckOptions $OCF_RESKEY_extra_options; then : else ocf_log err "Invalid options $OCF_RESKEY_extra_options!" exit $OCF_ERR_ARGS fi # Htmlfile better be an absolute path case $OCF_RESKEY_htmlfile in /*) ;; *) ocf_log warn "You should have htmlfile($OCF_RESKEY_htmlfile) of absolute path!" ;; esac echo "Validate OK" return $OCF_SUCCESS } if [ $# -ne 1 ]; then ClusterMon_usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_update:="15000"} : ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} : ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} OCF_RESKEY_update=`expr $OCF_RESKEY_update / 1000` case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ClusterMon_start ;; stop) ClusterMon_stop ;; monitor) ClusterMon_monitor ;; validate-all) ClusterMon_validate ;; usage|help) ClusterMon_usage exit $OCF_SUCCESS ;; *) ClusterMon_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/extra/resources/HealthCPU b/extra/resources/HealthCPU index 32a10ad3e7..c5fbb5372a 100644 --- a/extra/resources/HealthCPU +++ b/extra/resources/HealthCPU @@ -1,222 +1,222 @@ #!/bin/sh # # # HealthCPU OCF RA. Measures CPUs idling and writes # #health-cpu status into the CIB # # Copyright (c) 2009 Michael Schwartzkopff # in collaboration with the Bull company. Merci! # # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ################################ # # TODO: Enter default values # Error handling in getting uptime # ################################## ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < -0.1 +1.0 Systhem health agent that measures the CPU idling and updates the #health-cpu attribute. System health CPU usage Location to store the resource state in. State file Lower (!) limit of idle percentage to switch the health attribute to yellow. I.e. the #health-cpu will go yellow if the %idle of the CPU falls below 50%. Lower limit for yellow health attribute Lower (!) limit of idle percentage to switch the health attribute to red. I.e. the #health-cpu will go red if the %idle of the CPU falls below 10%. Lower limit for red health attribute END } ####################################################################### # don't exit on TERM, to test that lrmd makes sure that we do exit trap sigterm_handler TERM sigterm_handler() { ocf_log info "They use TERM to bring us down. No such luck." return } dummy_usage() { cat < -0.1 +1.0 Systhem health agent that checks the S.M.A.R.T. status of the given drives and updates the #health-smart attribute. SMART health status Location to store the resource state in. State file The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". Drives to check The device type(s) to assume for the drive(s) being tested as a SPACE separated list. Device types Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red. Lower limit for the red smart attribute Upper limit of the temperature if deg C of the drives(s). If the drive reports a temperature higher than this value the status of #health-smart will be red. Upper limit for red smart attribute Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow. Deg C below/above the upper limits for yellow smart attribute END } ####################################################################### check_temperature() { if [ $1 -lt ${lower_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C" $ATTRDUP -n "#health-smart" -U "red" -d "5s" return 1 fi if [ $1 -gt ${upper_red_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C" $ATTRDUP -n "#health-smart" -U "red" -d "5s" return 1 fi if [ $1 -lt ${lower_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C" $ATTRDUP -n "#health-smart" -U "yellow" -d "5s" return 1 fi if [ $1 -gt ${upper_yellow_limit} ] ; then ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C" $ATTRDUP -n "#health-smart" -U "yellow" -d "5s" return 1 fi } init_smart() { #Set temperature defaults if [ -z ${OCF_RESKEY_temp_warning} ]; then yellow_threshold=5 else yellow_threshold=${OCF_RESKEY_temp_warning} fi if [ -z ${OCF_RESKEY_temp_lower_limit} ] ; then lower_red_limit=0 else lower_red_limit=${OCF_RESKEY_temp_lower_limit} fi lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold})) if [ -z ${OCF_RESKEY_temp_upper_limit} ] ; then upper_red_limit=60 else upper_red_limit=${OCF_RESKEY_temp_upper_limit} fi upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold})) #Set disk defaults if [ -z "${OCF_RESKEY_drives}" ] ; then DRIVES="/dev/sda" else DRIVES=${OCF_RESKEY_drives} fi #Test for presence of smartctl if [ ! -x $SMARTCTL ] ; then ocf_log err "${SMARTCTL} not installed." exit $OCF_ERR_INSTALLED fi for DRIVE in $DRIVES; do if [ "${OCF_RESKEY_devices}" ]; then for DEVICE in ${OCF_RESKEY_devices}; do $SMARTCTL -d $DEVICE -i ${DRIVE} | grep -q "SMART support is: Enabled" if [ $? -ne "0" ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi done else $SMARTCTL -i ${DRIVE} | grep -q "SMART support is: Enabled" if [ $? -ne "0" ] ; then ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE} exit $OCF_ERR_INSTALLED fi fi done } HealthSMART_usage() { cat < - + 1.0 This is a SysInfo Resource Agent. It records (in the CIB) various attributes of a node Sample Linux output: arch: i686 os: Linux-2.4.26-gentoo-r14 free_swap: 1999 cpu_info: Intel(R) Celeron(R) CPU 2.40GHz cpu_speed: 4771.02 cpu_cores: 1 cpu_load: 0.00 ram_total: 513 ram_free: 117 root_free: 2.4 #health_disk: red Sample Darwin output: arch: i386 os: Darwin-8.6.2 cpu_info: Intel Core Duo cpu_speed: 2.16 cpu_cores: 2 cpu_load: 0.18 ram_total: 2016 ram_free: 787 root_free: 13 #health_disk: green Units: free_swap: Mb ram_*: Mb cpu_speed (Linux): bogomips cpu_speed (Darwin): Ghz *_free: GB (or user-defined: disk_unit) SysInfo resource agent PID file PID file Interval to allow values to stabilize Dampening Delay Filesystems or Paths to be queried for free disk space as a SPACE separated list - e.g "/dev/sda1 /tmp". Results will be written to an attribute with leading slashes removed, and other slashes replaced with underscore, and the word 'free' appended - e.g for /dev/sda1 it would be 'dev_sda1_free'. Note: The root filesystem '/' is always queried to an attribute named 'root_free' List of Filesytems/Paths to query for free disk space Unit to report disk free space in. Can be one of: B, K, M, G, T, P (case-insensitive) Unit to report disk free space in The amount of free space required in monitored disks. If any of the monitored disks has less than this amount of free space, , with the node attribute "#health_disk" changing to "red", all resources will move away from the node. Set the node-health-strategy property appropriately for this to take effect. If the unit is not specified, it defaults to disk_unit. minimum disk free space required END } ####################################################################### UpdateStat() { name=$1; shift value="$*" printf "%s:\t%s\n" "$name" "$value" if [ "$__OCF_ACTION" = "start" ] ; then ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -B "$value" else ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value" fi } SysInfoStats() { UpdateStat arch "`uname -m`" UpdateStat os "`uname -s`-`uname -r`" case `uname -s` in "Darwin") mem=`top -l 1 | grep Mem: | awk '{print $10}'` mem_used=`top -l 1 | grep Mem: | awk '{print $8}'` mem=`SysInfo_mem_units $mem` mem_used=`SysInfo_mem_units $mem_used` mem_total=`expr $mem_used + $mem` cpu_type=`system_profiler SPHardwareDataType | awk -F': ' '/^CPU Type/ {print $2; exit}'` cpu_speed=`system_profiler SPHardwareDataType | awk -F': ' '/^CPU Speed/ {print $2; exit}'` cpu_cores=`system_profiler SPHardwareDataType | awk -F': ' '/^Number Of/ {print $2; exit}'` ;; "Linux") if [ -f /proc/cpuinfo ]; then cpu_type=`awk -F': ' '/model name/ {print $2; exit}' /proc/cpuinfo` cpu_speed=`awk -F': ' '/bogomips/ {print $2; exit}' /proc/cpuinfo` cpu_cores=`grep "^processor" /proc/cpuinfo | wc -l` fi if [ -f /proc/meminfo ]; then # meminfo results are in kB mem=`grep "SwapFree" /proc/meminfo | awk '{print $2"k"}'` if [ ! -z $mem ]; then UpdateStat free_swap `SysInfo_mem_units $mem` fi mem=`grep "Inactive" /proc/meminfo | awk '{print $2"k"}'` mem_total=`grep "MemTotal" /proc/meminfo | awk '{print $2"k"}'` else mem=`top -n 1 | grep Mem: | awk '{print $7}'` fi ;; *) esac if [ x != x"$cpu_type" ]; then UpdateStat cpu_info "$cpu_type" fi if [ x != x"$cpu_speed" ]; then UpdateStat cpu_speed "$cpu_speed" fi if [ x != x"$cpu_cores" ]; then UpdateStat cpu_cores "$cpu_cores" fi loads=`uptime` load15=`echo ${loads} | awk '{print $10}'` UpdateStat cpu_load $load15 if [ ! -z "$mem" ]; then # Massage the memory values UpdateStat ram_total `SysInfo_mem_units $mem_total` UpdateStat ram_free `SysInfo_mem_units $mem` fi # Portability notes: # o tail: explicit "-n" not available in Solaris; instead simplify # 'tail -n ' to the equivalent 'tail -'. for disk in "/" ${OCF_RESKEY_disks}; do unset disk_free disk_label disk_free=`df -h ${disk} | tail -1 | awk '{print $4}'` if [ x != x"$disk_free" ]; then disk_label=`echo $disk | sed -e 's#^/$#root#;s#^/*##;s#/#_#g'` disk_free=`SysInfo_hdd_units $disk_free` UpdateStat ${disk_label}_free $disk_free if [ -n "$MIN_FREE" ]; then if [ $disk_free -le $MIN_FREE ]; then UpdateStat "#health_disk" "red" else UpdateStat "#health_disk" "green" fi fi fi done } SysInfo_megabytes() { # Size in megabytes echo $1 | awk '{ n = $0; sub(/[0-9]+(.[0-9]+)?/, ""); split(n, a, $0); n=a[1]; if ($0 == "G" || $0 == "") { n *= 1024 }; if (/^kB?/) { n /= 1024 }; printf "%d\n", n }' # Intentionaly round to an integer } SysInfo_mem_units() { mem=$1 if [ -z $1 ]; then return fi mem=$(SysInfo_megabytes "$1") # Round to the next multiple of 50 r=$(($mem % 50)) if [ $r != 0 ]; then mem=$(($mem + 50 - $r)) fi echo $mem } SysInfo_hdd_units() { # Defauts to size in gigabytes case $OCF_RESKEY_disk_unit in [Pp]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024 / 1024));; [Tt]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024));; [Gg]) echo $(($(SysInfo_megabytes "$1") / 1024));; [Mm]) echo $(SysInfo_megabytes "$1");; [Kk]) echo $(($(SysInfo_megabytes "$1") * 1024));; [Bb]) echo $(($(SysInfo_megabytes "$1") * 1024 * 1024));; *) ocf_log err "Invalid value for disk_unit: $OCF_RESKEY_disk_unit" echo $(($(SysInfo_megabytes "$1") / 1024));; esac } SysInfo_usage() { cat < $OCF_RESKEY_pidfile SysInfoStats exit $OCF_SUCCESS } SysInfo_stop() { rm $OCF_RESKEY_pidfile exit $OCF_SUCCESS } SysInfo_monitor() { if [ -f $OCF_RESKEY_pidfile ]; then clone=`cat $OCF_RESKEY_pidfile` fi if [ x$clone = x ]; then rm $OCF_RESKEY_pidfile exit $OCF_NOT_RUNNING elif [ $clone = $OCF_RESKEY_clone ]; then SysInfoStats exit $OCF_SUCCESS elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue -o x$OCF_RESKEY_CRM_meta_globally_unique = xTrue -o x$OCF_RESKEY_CRM_meta_globally_unique = xyes -o x$OCF_RESKEY_CRM_meta_globally_unique = xYes ]; then SysInfoStats exit $OCF_SUCCESS fi exit $OCF_NOT_RUNNING } SysInfo_validate() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then SysInfo_usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/SysInfo-${OCF_RESOURCE_INSTANCE}"} : ${OCF_RESKEY_disk_unit:="G"} : ${OCF_RESKEY_clone:="0"} if [ x != x${OCF_RESKEY_delay} ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" else OCF_RESKEY_delay="-d 0" fi MIN_FREE="" if [ -n "$OCF_RESKEY_min_disk_free" ]; then ocf_is_decimal "$OCF_RESKEY_min_disk_free" && OCF_RESKEY_min_disk_free="$OCF_RESKEY_min_disk_free$OCF_RESKEY_disk_unit" MIN_FREE=`SysInfo_hdd_units $OCF_RESKEY_min_disk_free` fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) SysInfo_start ;; stop) SysInfo_stop ;; monitor) SysInfo_monitor ;; validate-all) SysInfo_validate ;; usage|help) SysInfo_usage exit $OCF_SUCCESS ;; *) SysInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/extra/resources/SystemHealth b/extra/resources/SystemHealth index 658d446273..3e76fc3221 100644 --- a/extra/resources/SystemHealth +++ b/extra/resources/SystemHealth @@ -1,252 +1,252 @@ #!/bin/sh # # SystemHealth OCF RA. # # Copyright (c) 2009 International Business Machines (IBM), Mark Hamzy # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < -0.1 +1.0 This is a SystemHealth Resource Agent. It is used to monitor the health of a system via IPMI. SystemHealth resource agent END } ####################################################################### SystemHealth_usage() { cat < /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log err "servicelog_notify not found!" return $OCF_ERR_INSTALLED fi which ipmiservicelogd > /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log err "ipmiservicelogd not found!" return $OCF_ERR_INSTALLED fi test -x $OCF_RESKEY_program RC=$? if [ $RC != 0 ]; then ocf_log err "$OCF_RESKEY_program not found!" return $OCF_ERR_INSTALLED fi } SystemHealth_start() { SystemHealth_monitor RC=$? if [ $RC = $OCF_ERR_GENERIC ]; then return $OCF_ERR_GENERIC elif [ $RC = $OCF_SUCCESS ]; then ocf_log warn "starting an already started SystemHealth" return $OCF_SUCCESS fi service ipmi start > /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log err "Could not start service IPMI!" return $OCF_ERR_GENERIC fi ipmiservicelogd smi 0 > /dev/null 2>&1 & RC=$? if [ $RC != 0 ]; then ocf_log err "Could not start ipmiservicelogd!" return $OCF_ERR_GENERIC fi servicelog_notify --add --type=EVENT --command="$OCF_RESKEY_program" --method=num_arg --match='type=4' > /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log err "servicelog_notify register handler failed!" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } SystemHealth_stop() { SystemHealth_monitor RC=$? if [ $RC = $OCF_ERR_GENERIC ]; then return $OCF_ERR_GENERIC elif [ $RC = $OCF_SUCCESS ]; then killall ipmiservicelogd RC1=$? if [ $RC1 != 0 ]; then ocf_log err "Could not stop ipmiservicelogd!" fi servicelog_notify --remove --command="$OCF_RESKEY_program" > /dev/null 2>&1 RC2=$? if [ $RC2 != 0 ]; then ocf_log err "servicelog_notify remove handler failed!" fi if [ $RC1 = 0 -a $RC2 = 0 ]; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi elif [ $RC = $OCF_NOT_RUNNING ]; then ocf_log warn "stopping an already stopped SystemHealth" return $OCF_SUCCESS else ocf_log err "SystemHealth_stop: should not be here!" return $OCF_ERR_GENERIC fi } SystemHealth_monitor() { # Monitor _MUST!_ differentiate correctly between running # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING). # That is THREE states, not just yes/no. if [ ! -f /var/run/ipmiservicelogd.pid0 ]; then ocf_log debug "ipmiservicelogd is not running!" return $OCF_NOT_RUNNING fi ps -p `cat /var/run/ipmiservicelogd.pid0` > /dev/null 2>&1 RC=$? if [ $RC != 0 ]; then ocf_log debug "ipmiservicelogd's pid `cat /var/run/ipmiservicelogd.pid0` is not running!" rm /var/run/ipmiservicelogd.pid0 return $OCF_ERR_GENERIC fi servicelog_notify --list --command="$OCF_RESKEY_program" > /dev/null 2>&1 RC=$? if [ $RC = 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } SystemHealth_validate() { SystemHealth_check_tools RC=$? if [ $RC != 0 ]; then return $RC fi return $OCF_SUCCESS } : ${OCF_RESKEY_program=/usr/sbin/notifyServicelogEvent} case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) SystemHealth_usage exit $OCF_SUCCESS ;; esac SystemHealth_check_tools RC=$? if [ $RC != 0 ]; then case $__OCF_ACTION in stop) exit $OCF_SUCCESS;; *) exit $RC;; esac fi case $__OCF_ACTION in start) SystemHealth_start;; stop) SystemHealth_stop;; monitor) SystemHealth_monitor;; reload) ocf_log info "Reloading..." SystemHealth_start ;; validate-all) ;; *) SystemHealth_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/extra/resources/ifspeed b/extra/resources/ifspeed index a9390dc312..a41377c371 100644 --- a/extra/resources/ifspeed +++ b/extra/resources/ifspeed @@ -1,458 +1,458 @@ #!/bin/bash # # OCF resource agent which monitors state of network interface and records it # as a value in CIB based on summ of speeds of its active (up, link detected, # not blocked) underlying interfaces. # # Copyright (c) 2011 Vladislav Bogdanov # Partially based on 'ping' RA by Andrew Beekhof # # OCF instance parameters: # OCF_RESKEY_name: name of attribute to set in CIB # OCF_RESKEY_iface: network interface to monitor # OCF_RESKEY_bridge_ports: if not null and OCF_RESKEY_iface is a bridge, list of # bridge ports to consider. # Default is all ports which have designated_bridge=root_id # OCF_RESKEY_weight_base: Relative weight of 1Gbps. This can be used to tune # value of resulting CIB attribute. # # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} # Defaults OCF_RESKEY_name_default="ifspeed" OCF_RESKEY_bridge_ports_default="detect" OCF_RESKEY_weight_base_default=1000 OCF_RESKEY_dampen_default=5 : ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} : ${OCF_RESKEY_bridge_ports=${OCF_RESKEY_bridge_ports_default}} : ${OCF_RESKEY_weight_base=${OCF_RESKEY_weight_base_default}} : ${OCF_RESKEY_dampen=${OCF_RESKEY_dampen_default}} meta_data() { cat < - + 1.0 Every time the monitor action is run, this resource agent records (in the CIB) (relative) speed of network interface it monitors. This RA can monitor physical interfaces, bonds, bridges, vlans and (hopefully) any combination of them. Examples: *) Bridge on top of one 10Gbps interface (eth2) and 802.3ad bonding (bond0) built on two 1Gbps interfaces (eth0 and eth1). *) Active-backup bonding built on top of one physical interface and one vlan on another interface. For STP-enabled bridges this RA tries to some-how guess network topology and by default looks only on ports which are connected to upstream switch. This can be overriden by 'bridge_ports' parameter. Active interfaces in this case are those in "forwarding" state. For balancing bonds this RA summs speeds of underlying "up" slave interfaces (and applies coefficient 0.8 to result). For non-balancing bonds ('active-backup' and probably 'brodcast') only speed of now active slave is used. Network interface speed monitor The name of the attribute to set. This is the name to be used in the constraints. Attribute name Network interface to monitor. Network interface If not null and OCF_RESKEY_iface is a bridge, list of bridge ports to consider. Default is all ports which have designated_bridge=root_id. Bridge ports Relative weight of 1Gbps in interface speed. Can be used to tune how big attribute value will be. Weight of 1Gbps The time to wait (dampening) for further changes to occur. Dampening interval Log what have been done more verbosely. Verbose logging END } usage() { cat </dev/null)" if [ -z "$SP_OUT" ] then modprobe -s ocfs2_stack_user if [ $? != 0 ]; then ocf_log err "Could not load ocfs2_stack_user" return $OCF_ERR_INSTALLED fi fi SP_OUT="$(awk '/^'user'$/{print; exit}' "$LOADED_PLUGINS_FILE" 2>/dev/null)" if [ -z "$SP_OUT" ]; then ocf_log err "Switch to userspace stack unsuccessful" return $OCF_ERR_INSTALLED fi if [ -f "$CLUSTER_STACK_FILE" ]; then echo "$OCF_RESKEY_stack" >"$CLUSTER_STACK_FILE" if [ $? != 0 ]; then ocf_log err "Userspace stack '$OCF_RESKEY_stack' not supported" return $OCF_ERR_INSTALLED fi else ocf_log err "Switch to userspace stack not supported" return $OCF_ERR_INSTALLED fi driver_filesystem ocfs2; rc=$? if [ $rc != 0 ]; then modprobe -s ocfs2 if [ "$?" != 0 ]; then ocf_log err "Unable to load ocfs2 module" return $OCF_ERR_INSTALLED fi fi bringup_daemon return $? } o2cb_stop() { o2cb_monitor; rc=$? case $rc in $OCF_NOT_RUNNING) return $OCF_SUCCESS;; esac ocf_log info "Stopping $OCF_RESOURCE_INSTANCE" kill_daemon if [ $? != 0 ]; then ocf_log err "Unable to unload modules: the cluster is still online" return $OCF_ERR_GENERIC fi unload_filesystem ocfs2 if [ $? = 1 ]; then ocf_log err "Unable to unload ocfs2 module" return $OCF_ERR_GENERIC fi # If we can't find the stack glue, we have nothing to do. [ ! -e "$LOADED_PLUGINS_FILE" ] && return $OCF_SUCCESS while read plugin do unload_module "ocfs2_stack_${plugin}" if [ $? = 1 ]; then ocf_log err "Unable to unload ocfs2_stack_${plugin}" return $OCF_ERR_GENERIC fi done <"$LOADED_PLUGINS_FILE" unload_module "ocfs2_stackglue" if [ $? = 1 ]; then ocf_log err "Unable to unload ocfs2_stackglue" return $OCF_ERR_GENERIC fi # Don't unmount configfs - its always in use by libdlm } o2cb_monitor() { o2cb_validate # Assume that ocfs2_controld will terminate if any of the conditions below are met driver_filesystem configfs; rc=$? if [ $rc != 0 ]; then ocf_log info "configfs not loaded" return $OCF_NOT_RUNNING fi check_filesystem configfs "${OCF_RESKEY_configfs}"; rc=$? if [ $rc != 0 ]; then ocf_log info "configfs not mounted" return $OCF_NOT_RUNNING fi if [ ! -e "$LOADED_PLUGINS_FILE" ]; then ocf_log info "Stack glue driver not loaded" return $OCF_NOT_RUNNING fi grep user "$LOADED_PLUGINS_FILE" >/dev/null 2>&1; rc=$? if [ $rc != 0 ]; then ocf_log err "Wrong stack `cat $LOADED_PLUGINS_FILE`" return $OCF_ERR_INSTALLED fi driver_filesystem ocfs2; rc=$? if [ $rc != 0 ]; then ocf_log info "ocfs2 not loaded" return $OCF_NOT_RUNNING fi status_daemon return $? } o2cb_usage() { echo "usage: $0 {start|stop|monitor|validate-all|meta-data}" echo " Expects to have a fully populated OCF RA-compliant environment set." echo " In particualr, a value for OCF_ROOT" } o2cb_validate() { check_binary ${DAEMON} case ${OCF_RESKEY_CRM_meta_globally_unique} in yes|Yes|true|True|1) ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute" exit $OCF_ERR_CONFIGURED ;; esac return $OCF_SUCCESS } meta_data() { cat < - + 1.0 OCFS2 daemon resource agent This Resource Agent controls the userspace daemon needed by OCFS2. Location where sysfs is mounted Sysfs location Location where configfs is mounted Configfs location Which userspace stack to use. Known values: pcmk, cman Userspace stack Number of seconds to allow the control daemon to come up Daemon Timeout END } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) o2cb_start ;; stop) o2cb_stop ;; monitor) o2cb_monitor ;; validate-all) o2cb_validate ;; usage|help) o2cb_usage exit $OCF_SUCCESS ;; *) o2cb_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/extra/resources/ping b/extra/resources/ping index e2c5e9eefb..26cc0cc5d5 100755 --- a/extra/resources/ping +++ b/extra/resources/ping @@ -1,436 +1,436 @@ #!/bin/sh # # # Ping OCF RA that utilizes the system ping # # Copyright (c) 2009 Andrew Beekhof # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < - + 1.0 Every time the monitor action is run, this resource agent records (in the CIB) the current number of nodes the host can connect to using the system fping (preferred) or ping tool. node connectivity PID file PID file The time to wait (dampening) further changes occur Dampening interval The name of the attributes to set. This is the name to be used in the constraints. Attribute name The number by which to multiply the number of connected ping nodes by Value multiplier A space separated list of ping nodes to count. Host list Number of ping attempts, per host, before declaring it dead no. of ping attempts How long, in seconds, to wait before declaring a ping lost ping timeout in seconds A catch all for any other options that need to be passed to ping. Extra Options Resource is failed if the score is less than failure_score. Default never fails. failure_score Use fping rather than ping, if found. If set to 0, fping will not be used even if present. Use fping if available Enables to use default attrd_updater verbose logging on every call. Verbose logging END } ####################################################################### ping_conditional_log() { level=$1; shift if [ ${OCF_RESKEY_debug} = "true" ]; then ocf_log $level "$*" fi } ping_usage() { cat <$f_out 2>$f_err; rc=$? active=`grep alive $f_out|wc -l` case $rc in 0) ;; 1) for h in `grep unreachable $f_out | awk '{print $1}'`; do ping_conditional_log warn "$h is inactive" done ;; *) ocf_log err "Unexpected result for '$cmd' $rc: `tr '\n' ';' < $f_err`" ;; esac rm -f $f_out $f_err return $active } ping_check() { active=0 for host in $OCF_RESKEY_host_list; do p_exe=ping case `uname` in Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";; Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; *) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;; esac case $host in *:*) p_exe=ping6 esac p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$? case $rc in 0) active=`expr $active + 1`;; 1) ping_conditional_log warn "$host is inactive: $p_out";; *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";; esac done return $active } ping_update() { if use_fping; then fping_check active=$? else ping_check active=$? fi score=`expr $active \* $OCF_RESKEY_multiplier` if [ "$__OCF_ACTION" = "start" ] ; then attrd_updater -n $OCF_RESKEY_name -B $score -d $OCF_RESKEY_dampen $attrd_options else attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options fi rc=$? case $rc in 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; esac if [ $rc -ne 0 ]; then return $rc fi if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" return 1 fi return 0 } use_fping() { ocf_is_true "$OCF_RESKEY_use_fping" && have_binary fping; } # return values: # 4 IPv4 # 6 IPv6 # 0 indefinite (i.e. hostname) host_family() { case $1 in *[0-9].*[0-9].*[0-9].*[0-9]) return 4 ;; *:*) return 6 ;; *) return 0 ;; esac } # return values same as host_family plus # 99 ambiguous families hosts_family() { # For fping allow only same IP versions or hostnames family=0 for host in $OCF_RESKEY_host_list; do host_family $host f=$? if [ $family -ne 0 -a $f -ne 0 -a $f -ne $family ] ; then family=99 break fi [ $f -ne 0 ] && family=$f done return $family } : ${OCF_RESKEY_name:="pingd"} : ${OCF_RESKEY_dampen:="5s"} : ${OCF_RESKEY_attempts:="3"} : ${OCF_RESKEY_multiplier:="1"} : ${OCF_RESKEY_debug:="false"} : ${OCF_RESKEY_failure_score:="0"} : ${OCF_RESKEY_use_fping:="1"} : ${OCF_RESKEY_CRM_meta_timeout:="20000"} : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} integer=`echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*'` case ${OCF_RESKEY_timeout} in *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=`expr $integer / 1000`;; *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=`expr $integer \* 60`;; *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=`expr $integer \* 60 \* 60`;; *) OCF_RESKEY_timeout=$integer;; esac if [ -z ${OCF_RESKEY_timeout} ]; then if [ x"$OCF_RESKEY_host_list" != x ]; then host_count=`echo $OCF_RESKEY_host_list | awk '{print NF}'` OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts` OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early else OCF_RESKEY_timeout=5 fi fi if [ ${OCF_RESKEY_timeout} -lt 1 ]; then OCF_RESKEY_timeout=5 elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then # ping actually complains if this value is too high, 5 minutes is plenty OCF_RESKEY_timeout=300 fi if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESKEY_name}"} else : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESOURCE_INSTANCE}"} fi # Check the debug option case "${OCF_RESKEY_debug}" in true|True|TRUE|1) OCF_RESKEY_debug=true;; false|False|FALSE|0) OCF_RESKEY_debug=false;; *) ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}" OCF_RESKEY_debug=false ;; esac attrd_options='-q' if [ ${OCF_RESKEY_debug} = "true" ]; then attrd_options='' fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ping_start;; stop) ping_stop;; monitor) ping_monitor;; validate-all) ping_validate;; usage|help) ping_usage exit $OCF_SUCCESS ;; *) ping_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/extra/resources/pingd b/extra/resources/pingd index add152642e..6003c02fe0 100644 --- a/extra/resources/pingd +++ b/extra/resources/pingd @@ -1,200 +1,200 @@ #!/bin/sh # # # pingd OCF Resource Agent # Records (in the CIB) the current number of ping nodes a # cluster node can connect to. # # Copyright (c) 2006 Andrew Beekhof # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} : ${OCF_RESKEY_name:="pingd"} : ${OCF_RESKEY_interval:="1"} : ${OCF_RESKEY_CRM_meta_interval:=0} upgrade1="This agent (ocf:pacemaker:pingd) has been replaced by the more reliable ocf:pacemaker:ping." upgrade2="Attempting automated conversion, run 'crm ra info ocf:pacemaker:ping' for all configuration options" upgrade3="You will need to remove the existing resource and replace it with one that uses 'ocf:pacemaker:ping' directly" case $__OCF_ACTION in start|monitor) if [ "x" != "x$OCF_RESKEY_host_list" ]; then ocf_log err "$upgrade1" ocf_log err "$upgrade2" ocf_log err "Automatic conversion to ocf:pacemaker:ping failed: no hosts were configured to check for connectivity" ocf_log err "$upgrade3" exit $OCF_ERR_ARGS fi recurring=`crm configure show $OCF_RESOURCE_INSTANCE | grep "op monitor.*interval=\"[1-9]" | sed s/.*interval=// | awk -F\" '{print $2}' | sed s/.*interval=// | awk -F\" '{print $2}' | sort | head -n 1` if [ -z $recurring ]; then ocf_log err "$upgrade1" ocf_log err "$upgrade2" ocf_log err "Automatic conversion to ocf:pacemaker:ping failed: no monitor operation configured" ocf_log err "Without an explicit monitor operation for '$OCF_RESOURCE_INSTANCE', connectivity changes will not be noticed" ocf_log err "Preventing startup to ensure the issue is addressed before it matters" exit $OCF_ERR_ARGS fi if [ $OCF_RESKEY_CRM_meta_interval = 0 ]; then ocf_log warn "$upgrade1" ocf_log warn "$upgrade2" if [ $recurring != $OCF_RESKEY_interval ]; then ocf_log warn "Your monitor operation happens every $recurring, which means that the $OCF_RESKEY_name attribute will be updated with a different frequency than the previously configured ( $OCF_RESKEY_interval )" ocf_log warn "Either change the monitor interval to match or, ideally, switch to the ocf:pacemaker:ping agent and avoid all this compatibility nonsense." fi fi ;; meta-data) cat < - + 1.0 This agent (ocf:pacemaker:pingd) has been replaced by the more reliable ocf:pacemaker:ping. It records (in the CIB) the current number of ping nodes (specified in the 'host_list' parameter) a cluster node can connect to. pingd resource agent PID file PID file The user we want to run pingd as The user we want to run pingd as The time to wait (dampening) further changes occur Dampening interval The name of the instance_attributes set to place the value in. Rarely needs to be specified. Set name The name of the attributes to set. This is the name to be used in the constraints. Attribute name The section place the value in. Rarely needs to be specified. Section name The number by which to multiply the number of connected ping nodes by Value multiplier The list of ping nodes to count. Defaults to all configured ping nodes. Rarely needs to be specified. Host list How often, in seconds, to check for node liveliness ping interval in seconds Number of ping attempts, per host, before declaring it dead no. of ping attempts How long, in seconds, to wait before declaring a ping lost ping timeout in seconds A catch all for any other options that need to be passed to pingd. Extra Options END exit $OCF_SUCCESS ;; esac ${OCF_ROOT}/resource.d/pacemaker/ping $1 exit $? diff --git a/extra/resources/remote b/extra/resources/remote index bf6a91c0d1..afd8c79973 100644 --- a/extra/resources/remote +++ b/extra/resources/remote @@ -1,125 +1,125 @@ #!/bin/sh # # # remote OCF RA. This script provides metadata for the internal # pacemaker remote lrmd connection agent. Outside of acting # as a place holder so the remote ra script can be indexed and # providing metadata, this script should never be invoked. The # actual functionality behind the remote lrmd connection lives # within pacemaker's crmd component. # # Copyright (c) 2013 David Vossel # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < -0.1 +1.0 remote resource agent Server location to connect to. This can be an ip address or hostname. Server location tcp port to connect to. tcp port Interval in seconds at which Pacemaker will attempt to reconnect to a remote node after an active connection to the remote node has been severed. When this value is nonzero, Pacemaker will retry the connection indefinitely, at the specified interval. As with any time-based actions, this is not guaranteed to be checked more frequently than the value of the cluster-recheck-interval cluster option. reconnect interval END } ####################################################################### remote_usage() { cat <