Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4638496
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
7 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/agent/sbd.in b/agent/sbd.in
index 82ad2f6..0057023 100644
--- a/agent/sbd.in
+++ b/agent/sbd.in
@@ -1,302 +1,303 @@
#!/bin/bash
#
# This STONITH script drives the shared-storage stonith plugin.
#
# Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Main code
if [ -z "$sbd_device" ]; then
if [ -f @CONFIGDIR@/sbd ]; then
source @CONFIGDIR@/sbd
sbd_device=$SBD_DEVICE
fi
fi
SBD_DEVS=${sbd_device%;}
sbd_device=${SBD_DEVS//;/ -d }
sbd_cmd_output() {
local pid=$1
local fd=$2
local call_wait=$3
local any_output=0
local failed=0
local rc=0
running=0
unknown_hanging=0
# Async IO timeout defaults to 3 seconds
while read -t 5 line; do
echo "$line"
any_output=1
# Indicator of failure in case that stderr is retrieved
if [[ "$line" == *"sbd failed"* ]]; then
failed=1
fi
done <&$fd
# Command exited
if ! $(kill -0 $pid > /dev/null 2>&1); then
# Safe now to retrieve any remaining output without specifying timeout
while read line; do
echo "$line"
any_output=1
if [[ "$line" == *"sbd failed"* ]]; then
failed=1
fi
done <&$fd
# Determine the exit status
# bash's wait command only recongizes the latest child even if the pids of the previous children were saved.
if [ $call_wait -ne 0 ]; then
wait $pid
return $?
# Let's assume one that printed anything other than explicit failure to stdout has succeeded.
elif [ $any_output -ne 0 -a $failed -eq 0 ]; then
return 0
else
return 1
fi
# Command still existing
else
running=1
# Failed but hanging. Don't wait for it any more.
if [ $failed -ne 0 ]; then
return 1
else
unknown_hanging=1
return 1
fi
fi
return $rc
}
sbd_cmd_get_stdout() {
local devices=${SBD_DEVS//;/ }
local cmd="$1"
local rc=0
local success_count=0
local unknown_hanging_procs=""
for device in $devices; do
exec {fd}< <(sbd -d $device $cmd)
pid=$!
sbd_cmd_output $pid $fd 1
cmd_rc=$?
if [ $cmd_rc -eq 0 ]; then
success_count=$((success_count + 1))
else
rc=$cmd_rc
fi
if [ $unknown_hanging -ne 0 ]; then
unknown_hanging_procs+="$pid:$fd "
fi
done
if [ -z "$unknown_hanging_procs" -o $success_count -gt 0 ]; then
return $rc
fi
# We didn't get any successful output
# Desperately wait for the ones hanging in unknown state
while true; do
local running_count=0
for proc in $unknown_hanging_procs; do
pid=${proc%:*}
fd=${proc#*:}
sbd_cmd_output $pid $fd 0
cmd_rc=$?
if [ $cmd_rc -eq 0 ]; then
success_count=$((success_count + 1))
else
rc=$cmd_rc
fi
if [ $running -ne 0 ]; then
running_count=$((running_count + 1))
fi
done
if [ $success_count -gt 0 -o $running_count -eq 0 ]; then
return $rc
fi
done
return $rc
}
sbd_check_device() {
if [ -z "$sbd_device" ]; then
ha_log.sh err "No sbd device(s) found in the configuration."
exit 1
fi
}
sbd_validate_timeout() {
case "$timeout_bypass" in
yes|true|1|YES|TRUE|ja|on|ON) return ;;
esac
crm_timeout=$[$(crm_attribute -t crm_config -G -n stonith-timeout -d 20s -q | sed -e 's/\(.*\)s/\1/' -e 's/\(.*\)m/\1*60/')]
sbd_timeout=$(sbd_cmd_get_stdout dump | perl -ne 'if (/msgwait.*: (\d+)/) { print "$1\n"; }' | head -n 1)
if [ -z "$sbd_timeout" -o "$sbd_timeout" = "0" ]; then
return
fi
sbd_timeout_min=$[$sbd_timeout*12/10]
if [ "$sbd_timeout_min" -lt 20 ]; then
sbd_timeout_min=20
fi
sbd_timeout_suggested=$[$sbd_timeout_min*12/10]
if [ "$crm_timeout" -lt "$sbd_timeout_min" ]; then
ha_log.sh err "The CIB property stonith-timeout is set too low for sbd to ever succeed"
ha_log.sh err "Recommended value is $sbd_timeout_suggested, updating configuration."
crm_attribute -t crm_config -n stonith-timeout -v $sbd_timeout_suggested
exit 1
fi
}
case $1 in
gethosts)
sbd_check_device
echo `sbd_cmd_get_stdout list | cut -f2 | sort | uniq`
exit 0
;;
off|reset)
sbd_check_device
sbd_validate_timeout
message=$1
case "$crashdump" in
yes|true|1|YES|TRUE|ja|on|ON) message="crashdump" ;;
esac
- sbd -d $sbd_device message $2 $message
+ exec {fd}< <(sbd -d $sbd_device message $2 $message)
+ wait $!
exit $?
;;
status)
sbd_check_device
sbd_validate_timeout
error_output=$(sbd -d $sbd_device list 2>&1 >/dev/null)
if [ $? -ne 0 ]; then
error_message=$(echo "$error_output" | grep -v "please check the logs")
ha_log.sh err "sbd list failed: $error_message"
exit 1
fi
exit 0
;;
on)
exit 1
;;
getconfignames)
echo "sbd_device crashdump timeout_bypass"
exit 0
;;
getinfo-devid)
echo "Shared storage STONITH device"
exit 0
;;
getinfo-devname)
echo "Shared storage STONITH device"
exit 0
;;
getinfo-devdescr)
cat << DESC
sbd uses a shared storage device as a medium to communicate
fencing requests. This allows clusters without network power
switches; the downside is that access to the shared storage
device becomes a Single Point of Failure.
It requires sbd to be configured on all nodes.
Please read http://linux-ha.org/wiki/SBD_Fencing!
DESC
exit 0
;;
getinfo-devurl)
echo "http://linux-ha.org/wiki/SBD_Fencing"
exit 0
;;
getinfo-xml)
cat << SSHXML
<parameters>
<parameter name="crashdump">
<content type="string" />
<shortdesc lang="en">
Crashdump instead of regular fence
</shortdesc>
<longdesc lang="en">
If SBD is given a fence command, this option will instead perform a
kernel crash of a reboot or power-off, which on a properly configured
system can lead to a crashdump for analysis.
This is less safe for production environments. Please use with caution
and for debugging purposes only.
</longdesc>
</parameter>
<parameter name="sbd_device" unique="1">
<content type="string" />
<shortdesc lang="en">
SBD device(s)
</shortdesc>
<longdesc lang="en">
The block device used for the SBD partition. Up to three
can be specified if separated by a semicolon. (Please check
the documentation if specifying two.)
If not specified, will default to the value from @CONFIGDIR@/sbd.
</longdesc>
</parameter>
<parameter name="timeout_bypass">
<content type="boolean" />
<shortdesc lang="en">
Permit a seemingly too short stonith-timeout
</shortdesc>
<longdesc lang="en">
The sbd agent will try to detect a too short stonith-timeout (relative
to msgwait) in the Pacemaker configuration and automatically correct
it.
Should that logic fail in your environment or you have legitimate need
to use a shorter timeout, you can disable it via this parameter.
</longdesc>
</parameter>
</parameters>
SSHXML
exit 0
;;
*)
exit 1
;;
esac
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Jul 10, 12:37 AM (5 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2009338
Default Alt Text
(7 KB)
Attached To
Mode
rS SBD
Attached
Detach File
Event Timeline
Log In to Comment