Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/agent/sbd.in b/agent/sbd.in
index 82ad2f6..0057023 100644
--- a/agent/sbd.in
+++ b/agent/sbd.in
@@ -1,302 +1,303 @@
#!/bin/bash
#
# This STONITH script drives the shared-storage stonith plugin.
#
# Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Main code
if [ -z "$sbd_device" ]; then
if [ -f @CONFIGDIR@/sbd ]; then
source @CONFIGDIR@/sbd
sbd_device=$SBD_DEVICE
fi
fi
SBD_DEVS=${sbd_device%;}
sbd_device=${SBD_DEVS//;/ -d }
sbd_cmd_output() {
local pid=$1
local fd=$2
local call_wait=$3
local any_output=0
local failed=0
local rc=0
running=0
unknown_hanging=0
# Async IO timeout defaults to 3 seconds
while read -t 5 line; do
echo "$line"
any_output=1
# Indicator of failure in case that stderr is retrieved
if [[ "$line" == *"sbd failed"* ]]; then
failed=1
fi
done <&$fd
# Command exited
if ! $(kill -0 $pid > /dev/null 2>&1); then
# Safe now to retrieve any remaining output without specifying timeout
while read line; do
echo "$line"
any_output=1
if [[ "$line" == *"sbd failed"* ]]; then
failed=1
fi
done <&$fd
# Determine the exit status
# bash's wait command only recongizes the latest child even if the pids of the previous children were saved.
if [ $call_wait -ne 0 ]; then
wait $pid
return $?
# Let's assume one that printed anything other than explicit failure to stdout has succeeded.
elif [ $any_output -ne 0 -a $failed -eq 0 ]; then
return 0
else
return 1
fi
# Command still existing
else
running=1
# Failed but hanging. Don't wait for it any more.
if [ $failed -ne 0 ]; then
return 1
else
unknown_hanging=1
return 1
fi
fi
return $rc
}
sbd_cmd_get_stdout() {
local devices=${SBD_DEVS//;/ }
local cmd="$1"
local rc=0
local success_count=0
local unknown_hanging_procs=""
for device in $devices; do
exec {fd}< <(sbd -d $device $cmd)
pid=$!
sbd_cmd_output $pid $fd 1
cmd_rc=$?
if [ $cmd_rc -eq 0 ]; then
success_count=$((success_count + 1))
else
rc=$cmd_rc
fi
if [ $unknown_hanging -ne 0 ]; then
unknown_hanging_procs+="$pid:$fd "
fi
done
if [ -z "$unknown_hanging_procs" -o $success_count -gt 0 ]; then
return $rc
fi
# We didn't get any successful output
# Desperately wait for the ones hanging in unknown state
while true; do
local running_count=0
for proc in $unknown_hanging_procs; do
pid=${proc%:*}
fd=${proc#*:}
sbd_cmd_output $pid $fd 0
cmd_rc=$?
if [ $cmd_rc -eq 0 ]; then
success_count=$((success_count + 1))
else
rc=$cmd_rc
fi
if [ $running -ne 0 ]; then
running_count=$((running_count + 1))
fi
done
if [ $success_count -gt 0 -o $running_count -eq 0 ]; then
return $rc
fi
done
return $rc
}
sbd_check_device() {
if [ -z "$sbd_device" ]; then
ha_log.sh err "No sbd device(s) found in the configuration."
exit 1
fi
}
sbd_validate_timeout() {
case "$timeout_bypass" in
yes|true|1|YES|TRUE|ja|on|ON) return ;;
esac
crm_timeout=$[$(crm_attribute -t crm_config -G -n stonith-timeout -d 20s -q | sed -e 's/\(.*\)s/\1/' -e 's/\(.*\)m/\1*60/')]
sbd_timeout=$(sbd_cmd_get_stdout dump | perl -ne 'if (/msgwait.*: (\d+)/) { print "$1\n"; }' | head -n 1)
if [ -z "$sbd_timeout" -o "$sbd_timeout" = "0" ]; then
return
fi
sbd_timeout_min=$[$sbd_timeout*12/10]
if [ "$sbd_timeout_min" -lt 20 ]; then
sbd_timeout_min=20
fi
sbd_timeout_suggested=$[$sbd_timeout_min*12/10]
if [ "$crm_timeout" -lt "$sbd_timeout_min" ]; then
ha_log.sh err "The CIB property stonith-timeout is set too low for sbd to ever succeed"
ha_log.sh err "Recommended value is $sbd_timeout_suggested, updating configuration."
crm_attribute -t crm_config -n stonith-timeout -v $sbd_timeout_suggested
exit 1
fi
}
case $1 in
gethosts)
sbd_check_device
echo `sbd_cmd_get_stdout list | cut -f2 | sort | uniq`
exit 0
;;
off|reset)
sbd_check_device
sbd_validate_timeout
message=$1
case "$crashdump" in
yes|true|1|YES|TRUE|ja|on|ON) message="crashdump" ;;
esac
- sbd -d $sbd_device message $2 $message
+ exec {fd}< <(sbd -d $sbd_device message $2 $message)
+ wait $!
exit $?
;;
status)
sbd_check_device
sbd_validate_timeout
error_output=$(sbd -d $sbd_device list 2>&1 >/dev/null)
if [ $? -ne 0 ]; then
error_message=$(echo "$error_output" | grep -v "please check the logs")
ha_log.sh err "sbd list failed: $error_message"
exit 1
fi
exit 0
;;
on)
exit 1
;;
getconfignames)
echo "sbd_device crashdump timeout_bypass"
exit 0
;;
getinfo-devid)
echo "Shared storage STONITH device"
exit 0
;;
getinfo-devname)
echo "Shared storage STONITH device"
exit 0
;;
getinfo-devdescr)
cat << DESC
sbd uses a shared storage device as a medium to communicate
fencing requests. This allows clusters without network power
switches; the downside is that access to the shared storage
device becomes a Single Point of Failure.
It requires sbd to be configured on all nodes.
Please read http://linux-ha.org/wiki/SBD_Fencing!
DESC
exit 0
;;
getinfo-devurl)
echo "http://linux-ha.org/wiki/SBD_Fencing"
exit 0
;;
getinfo-xml)
cat << SSHXML
<parameters>
<parameter name="crashdump">
<content type="string" />
<shortdesc lang="en">
Crashdump instead of regular fence
</shortdesc>
<longdesc lang="en">
If SBD is given a fence command, this option will instead perform a
kernel crash of a reboot or power-off, which on a properly configured
system can lead to a crashdump for analysis.
This is less safe for production environments. Please use with caution
and for debugging purposes only.
</longdesc>
</parameter>
<parameter name="sbd_device" unique="1">
<content type="string" />
<shortdesc lang="en">
SBD device(s)
</shortdesc>
<longdesc lang="en">
The block device used for the SBD partition. Up to three
can be specified if separated by a semicolon. (Please check
the documentation if specifying two.)
If not specified, will default to the value from @CONFIGDIR@/sbd.
</longdesc>
</parameter>
<parameter name="timeout_bypass">
<content type="boolean" />
<shortdesc lang="en">
Permit a seemingly too short stonith-timeout
</shortdesc>
<longdesc lang="en">
The sbd agent will try to detect a too short stonith-timeout (relative
to msgwait) in the Pacemaker configuration and automatically correct
it.
Should that logic fail in your environment or you have legitimate need
to use a shorter timeout, you can disable it via this parameter.
</longdesc>
</parameter>
</parameters>
SSHXML
exit 0
;;
*)
exit 1
;;
esac

File Metadata

Mime Type
text/x-diff
Expires
Thu, Jul 10, 12:37 AM (5 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2009338
Default Alt Text
(7 KB)

Event Timeline