diff --git a/doc/man/ra2refentry.xsl b/doc/man/ra2refentry.xsl
index ac148ef6c..d0535fd36 100644
--- a/doc/man/ra2refentry.xsl
+++ b/doc/man/ra2refentry.xsl
@@ -1,644 +1,642 @@
resource-agentsocfheartbeat7 | __re-ra-
- Linux-HA contributors (see the resource agent source for information about individual authors)
+ ClusterLabs contributors (see the resource agent source for information about individual authors)OCF resource agents
-
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
+ DescriptionThis resource agent may be configured for native
migration if available in the cluster manager. For
Pacemaker, the
allow-migrate="true" meta
attribute enables native migration.Supported Parameters
-
-
-
-
-
- This resource agent does not support any parameters.
-
-
+
+
+
+
+
+ This resource agent does not support any parameters.
+
+
-
-
-
-
+
+
+
+ (
- unique,
+ unique,
-
- required
-
-
- optional
-
+
+ required
+
+
+ optional
+ ,
-
- ,
+
+ ,
-
- ,
+
+ ,
-
- default
-
- "
-
-
-
-
-
- "
-
-
-
- default
- false
-
-
- no default
-
+
+ default
+
+ "
+
+
+
+
+
+ "
+
+
+
+ default
+ false
+
+
+ no default
+ )Supported Actions
-
-
-
-
-
-
- This resource agent does not advertise any supported actions.
-
-
+
+
+
+
+
+
+ This resource agent does not advertise any supported actions.
+
+ This resource agent supports the following actions (operations):
-
-
- Starts the resource.
-
-
- Stops the resource.
-
-
- Performs a status check.
-
-
- Performs a detailed status check.
-
-
- Promotes the resource to the Master role.
-
-
- Demotes the resource to the Slave role.
-
-
- Executes steps necessary for migrating the
- resource
- away from
- the node.
-
-
- Executes steps necessary for migrating the
- resource
- to
- the node.
-
-
- Performs a validation of the resource configuration.
-
-
- Retrieves resource agent metadata (internal use only).
-
-
-
- Suggested minimum timeout:
-
- .
-
-
- Suggested interval:
-
- .
-
+
+
+ Starts the resource.
+
+
+ Stops the resource.
+
+
+ Performs a status check.
+
+
+ Performs a detailed status check.
+
+
+ Promotes the resource to the Master role.
+
+
+ Demotes the resource to the Slave role.
+
+
+ Executes steps necessary for migrating the
+ resource
+ away from
+ the node.
+
+
+ Executes steps necessary for migrating the
+ resource
+ to
+ the node.
+
+
+ Performs a validation of the resource configuration.
+
+
+ Retrieves resource agent metadata (internal use only).
+
+
+
+ Suggested minimum timeout:
+
+ .
+
+
+ Suggested interval:
+
+ .
+ Example CRM Shell
- The following is an example configuration for a
-
- resource using the
- crm8
- shell:
+ The following is an example configuration for a
+
+ resource using the
+ crm8
+ shell:
- primitive p_
-
-
-
- :
-
- :
-
-
-
- \
+ primitive p_
+
+
+
+ :
+
+ :
+
+
+
+ \
params \
-
-
-
- \
-
-
-
-
-
+
+
+
+ \
+
+
+
+
+
meta allow-migrate="true" \
-
-
+
+
-
- ms ms_
-
- p_
-
- \
+
+ ms ms_
+
+ p_
+
+ \
meta notify="true" interleave="true"
-
+ = \
- "
-
- "
+ "
+
+ "
-
+
op \
-
- ="
-
- "
+
+ ="
+
+ " Example PCS
- The following is an example configuration for a
-
- resource using
- pcs8
+ The following is an example configuration for a
+
+ resource using
+ pcs8
- pcs resource create p_
-
-
-
- :
-
- :
-
-
-
- \
+ pcs resource create p_
+
+
+
+ :
+
+ :
+
+
+
+ \
-
-
-
- \
-
-
-
+
+
+
+ \
+
+
+
- --master
+ --master= \
- "
-
- "
+ "
+
+ "
-
+
op \
-
- ="
-
- "
+
+ ="
+
+ " See also
-
-
- http://www.linux-ha.org/wiki/
-
- _(resource_agent)
-
-
+
+
+ http://clusterlabs.org/
+
+
diff --git a/heartbeat/Delay b/heartbeat/Delay
index f9d303bf8..ab0796579 100755
--- a/heartbeat/Delay
+++ b/heartbeat/Delay
@@ -1,223 +1,223 @@
#!/bin/sh
#
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# This script is a test resource for introducing delay.
#
# usage: $0 {start|stop|status|monitor|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_startdelay
# OCF_RESKEY_stopdelay
# OCF_RESKEY_mondelay
#
#
# OCF_RESKEY_startdelay defaults to 20 (seconds)
# OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay
# OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay
#
#
# This is really a test resource script.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
cat <<-!
usage: $0 {start|stop|status|monitor|meta-data|validate-all}
!
}
meta_data() {
cat <1.0
This script is a test resource for introducing delay.
Waits for a defined timespan
How long in seconds to delay on start operation.
Start delay
How long in seconds to delay on stop operation.
Defaults to "startdelay" if unspecified.
Stop delay
How long in seconds to delay on monitor operation.
Defaults to "startdelay" if unspecified.
Monitor delay
END
}
Delay_stat() {
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor
}
Delay_Status() {
if
Delay_stat
then
ocf_log info "Delay is running OK"
return $OCF_SUCCESS
else
ocf_log info "Delay is stopped"
return $OCF_NOT_RUNNING
fi
}
Delay_Monitor() {
Delay_Validate_All -q
sleep $OCF_RESKEY_mondelay
Delay_Status
}
Delay_Start() {
if
Delay_stat
then
ocf_log info "Delay already running."
return $OCF_SUCCESS
else
Delay_Validate_All -q
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start
rc=$?
sleep $OCF_RESKEY_startdelay
if
[ $rc -ne 0 ]
then
return $OCF_ERR_PERM
fi
return $OCF_SUCCESS
fi
}
Delay_Stop() {
if
Delay_stat
then
Delay_Validate_All -q
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop
rc=$?
sleep $OCF_RESKEY_stopdelay
if
[ $rc -ne 0 ]
then
return $OCF_ERR_PERM
fi
return $OCF_SUCCESS
else
ocf_log info "Delay already stopped."
return $OCF_SUCCESS
fi
}
# Check if all the arguments are valid numbers, a string is considered valid if:
# 1. It does not contain any character but digits and period ".";
# 2. The period "." does not occur more than once
Are_Valid_Numbers() {
for i in "$@"; do
echo $i |grep -v [^0-9.] |grep -q -v [.].*[.]
if test $? -ne 0; then
return $OCF_ERR_ARGS
fi
done
return $OCF_SUCCESS
}
Delay_Validate_All() {
# Be quiet when specified -q option _and_ validation succeded
getopts "q" option
if test $option = "q"; then
quiet=yes
else
quiet=no
fi
shift $(($OPTIND -1))
if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \
$OCF_RESKEY_mondelay; then
if test $quiet = "no"; then
echo "Validate OK"
fi
# _Return_ on validation success
return $OCF_SUCCESS
else
ocf_exit_reason "Some of the instance parameters are invalid"
# _Exit_ on validation failure
exit $OCF_ERR_ARGS
fi
}
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
: ${OCF_RESKEY_startdelay=20}
: ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay}
: ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay}
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) Delay_Start
;;
stop) Delay_Stop
;;
monitor) Delay_Monitor
;;
status) Delay_Status
;;
validate-all) Delay_Validate_All
;;
usage) usage
exit $OCF_SUCCESS
;;
*) usage
exit $OCF_ERR_ARGS
;;
esac
exit $?
diff --git a/heartbeat/EvmsSCC b/heartbeat/EvmsSCC
index 802bac470..21dfc7bde 100755
--- a/heartbeat/EvmsSCC
+++ b/heartbeat/EvmsSCC
@@ -1,216 +1,216 @@
#!/bin/sh
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# EvmsSCC
# Description: Runs evms_activate in a heartbeat cluster to activate a
# EVMS shared cluster container in the cluster.
# Original Author: Jo De Baer (jdebaer@novell.com)
# Original Release: 06 Nov 2006
#
# usage: ./EvmsSCC {start|stop|status|monitor|meta-data}
#
# The goal of this resource agent is to provoke the creation of device file
# in /dev/emvs which correspond to EVMS2 volumes that reside in a EVMS2 shared
# cluster container. As such it should be run as a clone resource in the
# cluster. Logic inside the resource agent will make sure that "evms_activate"
# is run on only one node in the cluster, both at cluster startup time as well
# as when a node joins the cluster.
#
# Typically, resources that need to mount EVMS2 volumes should run after this
# resource agent has finished it's run. As such those resources should be made
# "dependent" on this resource agent by the cluster administrator. An example
# of resources that should depend on this resource agent are Filesystem resource
# agent that mount OCFS2 volumes that reside on EVMS2 volumes in a shared
# EVMS2 cluster container.
#
# For this resource agent to do it's job correctly, evmsd must be running on
# the node where the agent is started. Usually evmsd is started by the cluster
# software via a respawn statement in /etc/ha.d/ha.cf. If you encounter timing
# issues where evmsd is not yet started but where the cluster already starts
# the EvmsSCC clone, then you should comment out the evmsd respawn statement
# in /etc/ha.d/ha.cf and start evmsd on each node in the cluster via a separate
# clone resource agent. The EvmsSCC resource agent cloneset should then be made
# dependent to this evmsd cloneset. This will guarantee that emvsd is running
# before EvmsSCC is started, on each node in the cluster.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Utilities used by this script
CUT=cut
EVMSACTIVATE=evms_activate
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|meta-data}
EOT
}
meta_data() {
cat <1.0
Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. --
Resource script for EVMS shared cluster container. It runs evms_activate on one node in the cluster.
Manages EVMS Shared Cluster Containers (SCCs) (deprecated)
If set to true, suppresses the deprecation warning for this agent.
Suppress deprecation warning
END
}
EvmsSCC_status()
{
# At the moment we don't support monitoring EVMS activations. We just return "not running" to cope with the pre-start monitor call.
return $OCF_NOT_RUNNING
}
EvmsSCC_notify()
{
local n_type="$OCF_RESKEY_CRM_meta_notify_type"
local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname"
local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname"
local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname"
case "$n_type" in
pre)
case "$n_op" in
start) ocf_log debug "EvmsSCC: Notify: Starting node(s): $n_start."
EvmsSCC_start_notify_common
;;
esac
;;
esac
return $OCF_SUCCESS
}
EvmsSCC_start()
{
local n_type="$OCF_RESKEY_CRM_meta_notify_type"
local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname"
local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname"
local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname"
ocf_log debug "EvmsSCC: Start: starting node(s): $n_start."
EvmsSCC_start_notify_common
return $OCF_SUCCESS
}
EvmsSCC_stop()
{
return $OCF_SUCCESS
}
EvmsSCC_start_notify_common()
{
local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)}
ocf_log debug "EvmsSCC: Start_Notify: I am node $n_myself."
n_active="$n_active $n_start"
case " $n_active " in
*" $n_myself "*) ;;
*) ocf_log err "EvmsSCC: $n_myself (local) not on active list!"
return $OCF_ERR_GENERIC
;;
esac
#pick the first node from the starting list
#when the cluster boots this will be one of the many booting nodes
#when a node later joins the cluster, this will be the joining node
local n_first=$(echo $n_start | cut -d ' ' -f 1)
ocf_log debug "EvmsSCC: Start_Notify: First node in starting list is $n_first."
if [ "$n_myself" = "$n_first" ] ; then
ocf_log debug "EvmsSCC: Start_Notify: I am running ${EVMSACTIVATE}."
while true ; do
if ! ${EVMSACTIVATE} -q 2> /dev/null ; then
SLEEP_TIME=$(($(ocf_maybe_random) % 40))
ocf_log info "EvmsSCC: Evms call failed - sleeping for $SLEEP_TIME seconds and then trying again."
sleep $SLEEP_TIME
else
break
fi
done
fi
return $OCF_SUCCESS
}
# Check the arguments passed to this script
if
[ $# -ne 1 ]
then
usage
exit $OCF_ERR_ARGS
fi
OP=$1
case $OP in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
# Be obnoxious, log deprecation warning on every invocation (unless
# suppressed by resource configuration).
ocf_deprecated
check_binary $CUT
check_binary $EVMSACTIVATE
case $OP in
start) EvmsSCC_start
;;
notify) EvmsSCC_notify
;;
stop) EvmsSCC_stop
;;
status|monitor) EvmsSCC_status
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 9baf14073..d009329cb 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -1,883 +1,883 @@
#!/bin/sh
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# Filesystem
# Description: Manages a Filesystem on a shared storage medium.
# Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
# Original Release: 25 Oct 2000
#
# usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_device
# OCF_RESKEY_directory
# OCF_RESKEY_fstype
# OCF_RESKEY_options
# OCF_RESKEY_statusfile_prefix
# OCF_RESKEY_run_fsck
# OCF_RESKEY_fast_stop
# OCF_RESKEY_force_clones
#
#OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0
# Or a -U or -L option for mount, or an NFS mount specification
#OCF_RESKEY_directory : the mount point for the filesystem
#OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2
#OCF_RESKEY_options : options to be given to the mount command via -o
#OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring
#OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no
#OCF_RESKEY_fast_stop : fast stop: yes(default)/no
#OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts
# for each brick in a glusterfs setup
#
#
# This assumes you want to manage a filesystem on a shared (SCSI) bus,
# on a replicated device (such as DRBD), or a network filesystem (such
# as NFS or Samba).
#
# Do not put this filesystem in /etc/fstab. This script manages all of
# that for you.
#
# NOTE: If 2 or more nodes mount the same file system read-write, and
# that file system is not designed for that specific purpose
# (such as GFS or OCFS2), and is not a network file system like
# NFS or Samba, then the filesystem is going to become
# corrupted.
#
# As a result, you should use this together with the stonith
# option and redundant, independent communications paths.
#
# If you don't do this, don't blame us when you scramble your
# disk.
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
DFLT_STATUSDIR=".Filesystem_status/"
# Variables used by multiple methods
HOSTOS=`uname`
# The status file is going to an extra directory, by default
#
prefix=${OCF_RESKEY_statusfile_prefix}
: ${prefix:=$DFLT_STATUSDIR}
suffix="${OCF_RESOURCE_INSTANCE}"
[ "$OCF_RESKEY_CRM_meta_clone" ] &&
suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone"
suffix="${suffix}_`uname -n`"
STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix
#######################################################################
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
EOT
}
meta_data() {
cat <1.1
Resource script for Filesystem. It manages a Filesystem on a
shared storage medium.
The standard monitor operation of depth 0 (also known as probe)
checks if the filesystem is mounted. If you want deeper tests,
set OCF_CHECK_LEVEL to one of the following values:
10: read first 16 blocks of the device (raw read)
This doesn't exercise the filesystem at all, but the device on
which the filesystem lives. This is noop for non-block devices
such as NFS, SMBFS, or bind mounts.
20: test if a status file can be written and read
The status file must be writable by root. This is not always the
case with an NFS mount, as NFS exports usually have the
"root_squash" option set. In such a setup, you must either use
read-only monitoring (depth=10), export with "no_root_squash" on
your NFS server, or grant world write permissions on the
directory where the status file is to be placed.
Manages filesystem mounts
The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification.
block device
The mount point for the filesystem.
mount point
The type of filesystem to be mounted.
filesystem type
Any extra options to be given as -o options to mount.
For bind mounts, add "bind" here and set fstype to "none".
We will do the right thing for options such as "bind,ro".
options
The prefix to be used for a status file for resource monitoring
with depth 20. If you don't specify this parameter, all status
files will be created in a separate directory.
status file prefix
Specify how to decide whether to run fsck or not.
"auto" : decide to run fsck depending on the fstype(default)
"force" : always run fsck regardless of the fstype
"no" : do not run fsck ever.
run_fsck
Normally, we expect no users of the filesystem and the stop
operation to finish quickly. If you cannot control the filesystem
users easily and want to prevent the stop action from failing,
then set this parameter to "no" and add an appropriate timeout
for the stop operation.
fast stop
The use of a clone setup for local filesystems is forbidden
by default. For special setups like glusterfs, cloning a mount
of a local device with a filesystem like ext4 or xfs independently
on several nodes is a valid use case.
Only set this to "true" if you know what you are doing!
allow running as a clone, regardless of filesystem type
This option allows specifying how to handle processes that are
currently accessing the mount directory.
"true" : Default value, kill processes accessing mount point
"safe" : Kill processes accessing mount point using methods that
avoid functions that could potentially block during process
detection
"false" : Do not kill any processes.
The 'safe' option uses shell logic to walk the /procs/ directory
for pids using the mount point while the default option uses the
fuser cli tool. fuser is known to perform operations that can potentially
block if unresponsive nfs mounts are in use on the system.
Kill processes before unmount
END
}
#
# Make sure the kernel does the right thing with the FS buffers
# This function should be called after unmounting and before mounting
# It may not be necessary in 2.4 and later kernels, but it shouldn't hurt
# anything either...
#
# It's really a bug that you have to do this at all...
#
flushbufs() {
if have_binary $BLOCKDEV ; then
if [ "$blockdevice" = "yes" ] ; then
$BLOCKDEV --flushbufs $1
return $?
fi
fi
return 0
}
# Take advantage of /etc/mtab if present, use portable mount command
# otherwise. Normalize format to "dev mountpoint fstype".
is_bind_mount() {
echo "$options" | grep -w bind >/dev/null 2>&1
}
list_mounts() {
local inpf=""
if [ -e "/proc/mounts" ] && ! is_bind_mount; then
inpf=/proc/mounts
elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then
inpf=/etc/mtab
fi
if [ "$inpf" ]; then
cut -d' ' -f1,2,3 < $inpf
else
$MOUNT | cut -d' ' -f1,3,5
fi
}
determine_blockdevice() {
if [ $blockdevice = "yes" ]; then
return
fi
# Get the current real device name, if possible.
# (specified devname could be -L or -U...)
case "$FSTYPE" in
nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|none)
: ;;
*)
DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
if [ -b "$DEVICE" ]; then
blockdevice=yes
fi
;;
esac
}
# Lists all filesystems potentially mounted under a given path,
# excluding the path itself.
list_submounts() {
list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r
}
# kernels < 2.6.26 can't handle bind remounts
bind_kernel_check() {
echo "$options" | grep -w ro >/dev/null 2>&1 ||
return
uname -r | awk -F. '
$1==2 && $2==6 {
sub("[^0-9].*","",$3);
if ($3<26)
exit(1);
}'
[ $? -ne 0 ] &&
ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
}
bind_mount() {
if is_bind_mount && [ "$options" != "-o bind" ]
then
bind_kernel_check
bind_opts=`echo $options | sed 's/bind/remount/'`
$MOUNT $bind_opts $MOUNTPOINT
else
true # make sure to return OK
fi
}
is_option() {
echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1
}
is_fsck_needed() {
case $OCF_RESKEY_run_fsck in
force) true;;
no) false;;
""|auto)
case $FSTYPE in
ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs)
false;;
*)
true;;
esac;;
*)
ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'"
OCF_RESKEY_run_fsck="auto"
is_fsck_needed;;
esac
}
fstype_supported()
{
local support="$FSTYPE"
local rc
if [ "X${HOSTOS}" != "XOpenBSD" ];then
# skip checking /proc/filesystems for obsd
return $OCF_SUCCESS
fi
if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then
: No FSTYPE specified, rely on the system has the right file-system support already
return $OCF_SUCCESS
fi
# support fuse-filesystems (e.g. GlusterFS)
case $FSTYPE in
fuse.*|glusterfs|rozofs) support="fuse";;
esac
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -eq 0 ]; then
# found the fs type
return $OCF_SUCCESS
fi
# if here, we should attempt to load the module and then
# check the if the filesystem support exists again.
$MODPROBE $support >/dev/null
if [ $? -ne 0 ]; then
ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernel module"
return $OCF_ERR_INSTALLED
fi
# It is possible for the module to load and not be complete initialized
# before we check /proc/filesystems again. Give this a few trys before
# giving up entirely.
for try in $(seq 5); do
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -eq 0 ] ; then
# yes. found the filesystem after doing the modprobe
return $OCF_SUCCESS
fi
ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again"
sleep 1
done
ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems"
return $OCF_ERR_INSTALLED
}
#
# START: Start up the filesystem
#
Filesystem_start()
{
# See if the device is already mounted.
if Filesystem_status >/dev/null 2>&1 ; then
ocf_log info "Filesystem $MOUNTPOINT is already mounted."
return $OCF_SUCCESS
fi
fstype_supported || exit $OCF_ERR_INSTALLED
# Check the filesystem & auto repair.
# NOTE: Some filesystem types don't need this step... Please modify
# accordingly
if [ $blockdevice = "yes" ]; then
if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then
ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
exit $OCF_ERR_INSTALLED
fi
if is_fsck_needed; then
ocf_log info "Starting filesystem check on $DEVICE"
if [ -z "$FSTYPE" ]; then
$FSCK -p $DEVICE
else
$FSCK -t $FSTYPE -p $DEVICE
fi
# NOTE: if any errors at all are detected, it returns non-zero
# if the error is >= 4 then there is a big problem
if [ $? -ge 4 ]; then
ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE"
return $OCF_ERR_GENERIC
fi
fi
fi
[ -d "$MOUNTPOINT" ] ||
ocf_run mkdir -p $MOUNTPOINT
if [ ! -d "$MOUNTPOINT" ] ; then
ocf_exit_reason "Couldn't find directory [$MOUNTPOINT] to use as a mount point"
exit $OCF_ERR_INSTALLED
fi
flushbufs $DEVICE
# Mount the filesystem.
case "$FSTYPE" in
none) $MOUNT $options $DEVICE $MOUNTPOINT &&
bind_mount
;;
"") $MOUNT $options $DEVICE $MOUNTPOINT ;;
*) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;;
esac
if [ $? -ne 0 ]; then
ocf_exit_reason "Couldn't mount device [$DEVICE] as $MOUNTPOINT"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
# end of Filesystem_start
get_pids()
{
local dir=$1
local procs
local mmap_procs
if ocf_is_true "$FORCE_UNMOUNT"; then
if [ "X${HOSTOS}" = "XOpenBSD" ];then
fstat | grep $dir | awk '{print $3}'
else
$FUSER -m $dir 2>/dev/null
fi
elif [ "$FORCE_UNMOUNT" = "safe" ]; then
procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}')
mmap_procs=$(grep " ${dir}" /proc/[0-9]*/maps | awk -F/ '{print $3}')
printf "${procs}\n${mmap_procs}" | sort | uniq
fi
}
signal_processes() {
local dir=$1
local sig=$2
local pids pid
# fuser returns a non-zero return code if none of the
# specified files is accessed or in case of a fatal
# error.
pids=$(get_pids "$dir")
if [ -z "$pids" ]; then
ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'"
return
fi
for pid in $pids; do
ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`"
kill -s $sig $pid
done
}
try_umount() {
local SUB=$1
$UMOUNT $umount_force $SUB
list_mounts | grep -q " $SUB " >/dev/null 2>&1 || {
ocf_log info "unmounted $SUB successfully"
return $OCF_SUCCESS
}
return $OCF_ERR_GENERIC
}
fs_stop() {
local SUB=$1 timeout=$2 sig cnt
for sig in TERM KILL; do
cnt=$((timeout/2)) # try half time with TERM
while [ $cnt -gt 0 ]; do
try_umount $SUB &&
return $OCF_SUCCESS
ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig"
signal_processes $SUB $sig
cnt=$((cnt-1))
sleep 1
done
done
return $OCF_ERR_GENERIC
}
#
# STOP: Unmount the filesystem
#
Filesystem_stop()
{
# See if the device is currently mounted
Filesystem_status >/dev/null 2>&1
if [ $? -eq $OCF_NOT_RUNNING ]; then
# Already unmounted, wonderful.
rc=$OCF_SUCCESS
else
# Wipe the status file, but continue with a warning if
# removal fails -- the file system might be read only
if [ $OCF_CHECK_LEVEL -eq 20 ]; then
rm -f ${STATUSFILE}
if [ $? -ne 0 ]; then
ocf_log warn "Failed to remove status file ${STATUSFILE}."
fi
fi
# Determine the real blockdevice this is mounted on (if
# possible) prior to unmounting.
determine_blockdevice
# For networked filesystems, there's merit in trying -f:
case "$FSTYPE" in
nfs4|nfs|cifs|smbfs) umount_force="-f" ;;
esac
# Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
local timeout
for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do
ocf_log info "Trying to unmount $SUB"
if ocf_is_true "$FAST_STOP"; then
timeout=6
else
timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"}
timeout=$((timeout/1000))
fi
fs_stop $SUB $timeout
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
ocf_exit_reason "Couldn't unmount $SUB, giving up!"
fi
done
fi
flushbufs $DEVICE
return $rc
}
# end of Filesystem_stop
#
# STATUS: is the filesystem mounted or not?
#
Filesystem_status()
{
if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else
rc=$OCF_NOT_RUNNING
msg="$MOUNTPOINT is unmounted (stopped)"
fi
# Special case "monitor" to check whether the UUID cached and
# on-disk still match?
case "$OP" in
status) ocf_log info "$msg";;
esac
return $rc
}
# end of Filesystem_status
# Note: the read/write tests below will stall in case the
# underlying block device (or in the case of a NAS mount, the
# NAS server) has gone away. In that case, if I/O does not
# return to normal in time, the operation hits its timeout
# and it is up to the CRM to initiate appropriate recovery
# actions (such as fencing the node).
#
# MONITOR 10: read the device
#
Filesystem_monitor_10()
{
if [ "$blockdevice" = "no" ] ; then
ocf_log warn "$DEVICE is not a block device, monitor 10 is noop"
return $OCF_SUCCESS
fi
dd_opts="iflag=direct bs=4k count=1"
err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null`
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to read device $DEVICE"
ocf_log err "dd said: $err_output"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
#
# MONITOR 20: write and read a status file
#
Filesystem_monitor_20()
{
if [ "$blockdevice" = "no" ] ; then
# O_DIRECT not supported on cifs/smbfs
dd_opts="oflag=sync bs=4k conv=fsync,sync"
else
# Writing to the device in O_DIRECT mode is imperative
# to bypass caches.
dd_opts="oflag=direct,sync bs=4k conv=fsync,sync"
fi
status_dir=`dirname $STATUSFILE`
[ -d "$status_dir" ] || mkdir -p "$status_dir"
err_output=`echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1`
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to write status file ${STATUSFILE}"
ocf_log err "dd said: $err_output"
return $OCF_ERR_GENERIC
fi
test -f ${STATUSFILE}
if [ $? -ne 0 ]; then
ocf_exit_reason "Cannot stat the status file ${STATUSFILE}"
return $OCF_ERR_GENERIC
fi
cat ${STATUSFILE} > /dev/null
if [ $? -ne 0 ]; then
ocf_exit_reason "Cannot read the status file ${STATUSFILE}"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
Filesystem_monitor()
{
Filesystem_status
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then
case "$OCF_CHECK_LEVEL" in
10) Filesystem_monitor_10; rc=$?;;
20) Filesystem_monitor_20; rc=$?;;
*)
ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"
rc=$OCF_ERR_CONFIGURED
;;
esac
fi
return $rc
}
# end of Filesystem_monitor
#
# VALIDATE_ALL: Are the instance parameters valid?
# FIXME!! The only part that's useful is the return code.
# This code always returns $OCF_SUCCESS (!)
#
Filesystem_validate_all()
{
if [ -n "$MOUNTPOINT" ] && [ ! -d "$MOUNTPOINT" ]; then
ocf_log warn "Mountpoint $MOUNTPOINT does not exist"
fi
# Check if the $FSTYPE is workable
# NOTE: Without inserting the $FSTYPE module, this step may be imprecise
# TODO: This is Linux specific crap.
if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then
cut -f2 /proc/filesystems |grep -q ^$FSTYPE$
if [ $? -ne 0 ]; then
modpath=/lib/modules/`uname -r`
moddep=$modpath/modules.dep
# Do we have $FSTYPE in modules.dep?
cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$"
if [ $? -ne 0 ]; then
ocf_log info "It seems we do not have $FSTYPE support"
fi
fi
fi
# If we are supposed to do monitoring with status files, then
# we need a utility to write in O_DIRECT mode.
if [ $OCF_CHECK_LEVEL -gt 0 ]; then
check_binary dd
# Note: really old coreutils version do not support
# the "oflag" option for dd. We don't check for that
# here. In case dd does not support oflag, monitor is
# bound to fail, with dd spewing an error message to
# the logs. On such systems, we must do without status
# file monitoring.
fi
#TODO: How to check the $options ?
return $OCF_SUCCESS
}
#
# set the blockdevice variable to "no" or "yes"
#
set_blockdevice_var() {
blockdevice=no
# these are definitely not block devices
case $FSTYPE in
nfs4|nfs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs) return;;
esac
if `is_option "loop"`; then
return
fi
case $DEVICE in
-*) # Oh... An option to mount instead... Typically -U or -L
;;
/dev/null) # Special case for BSC
blockdevice=yes
;;
*)
if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then
ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
fi
if [ ! -d "$DEVICE" ]; then
blockdevice=yes
fi
;;
esac
}
# Check the arguments passed to this script
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
# Check the OCF_RESKEY_ environment variables...
FORCE_UNMOUNT="yes"
if [ -n "${OCF_RESKEY_force_unmount}" ]; then
FORCE_UNMOUNT=$OCF_RESKEY_force_unmount
fi
DEVICE=$OCF_RESKEY_device
FSTYPE=$OCF_RESKEY_fstype
if [ ! -z "$OCF_RESKEY_options" ]; then
options="-o $OCF_RESKEY_options"
fi
FAST_STOP=${OCF_RESKEY_fast_stop:="yes"}
OP=$1
# These operations do not require instance parameters
case $OP in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
if [ x = x"$DEVICE" ]; then
ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed"
exit $OCF_ERR_CONFIGURED
fi
set_blockdevice_var
# Normalize instance parameters:
# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/".
# But the output of `mount` and /proc/mounts do not.
if [ -z "$OCF_RESKEY_directory" ]; then
if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then
ocf_exit_reason "Please specify the directory"
exit $OCF_ERR_CONFIGURED
fi
else
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
# kill the whole system. Is that a good idea?
fi
# Check to make sure the utilites are found
if [ "X${HOSTOS}" != "XOpenBSD" ];then
check_binary $MODPROBE
check_binary $FUSER
fi
check_binary $FSCK
check_binary $MOUNT
check_binary $UMOUNT
if [ "$OP" != "monitor" ]; then
ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT"
fi
case $OP in
status) Filesystem_status
exit $?
;;
monitor) Filesystem_monitor
exit $?
;;
validate-all) Filesystem_validate_all
exit $?
;;
stop) Filesystem_stop
exit $?
;;
esac
CLUSTERSAFE=0
is_option "ro" &&
CLUSTERSAFE=2
case $FSTYPE in
nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs)
CLUSTERSAFE=1 # this is kind of safe too
;;
# add here CLUSTERSAFE=0 for all filesystems which are not
# cluster aware and which, even if when mounted read-only,
# could still modify parts of it such as journal/metadata
ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs)
if ocf_is_true "$OCF_RESKEY_force_clones"; then
CLUSTERSAFE=2
else
CLUSTERSAFE=0 # these are not allowed
fi
;;
esac
if ocf_is_clone; then
case $CLUSTERSAFE in
0)
ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
ocf_log err "DO NOT RUN IT AS A CLONE!"
ocf_log err "Politely refusing to proceed to avoid data corruption."
exit $OCF_ERR_CONFIGURED
;;
2)
ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!"
if ocf_is_true "$OCF_RESKEY_force_clones"; then
ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so."
else
ocf_log warn "But we'll let it run because it is mounted read-only."
ocf_log warn "Please make sure that it's meta data is read-only too!"
fi
;;
esac
fi
case $OP in
start) Filesystem_start
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/ICP b/heartbeat/ICP
index c427e7a39..8f187e082 100755
--- a/heartbeat/ICP
+++ b/heartbeat/ICP
@@ -1,296 +1,296 @@
#!/bin/sh
#
#
# ICP
#
# Description: Manages an ICP Vortex clustered host drive as an HA resource
#
#
# Author: Lars Marowsky-Bree
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 SuSE Linux AG
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 LinuxSCSI::0:0 ICP::c0h1::/dev/sdb1 LVM::myvolname
#
# Notice that you will need to get the utility "icpclucon" from the ICP
# support to use this.
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_driveid
# OCF_RESKEY_device
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
#
ICPCLUCON=/usr/sbin/icpclucon
#
usage() {
methods=`ICP_methods | grep -v methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-!
usage: $0 ($methods)
$0 manages an ICP Vortex clustered host drive.
The 'start' operation reserves the given host drive.
The 'stop' operation releses the given host drive.
The 'status' operation reports whether the host drive is reserved.
The 'monitor' operation reports whether the host drive is reserved.
The 'validate-all' operation reports whether OCF instance parameters are valid.
The 'methods' operation reports on the methods $0 supports
!
}
meta_data() {
cat <1.0
Resource script for ICP. It Manages an ICP Vortex clustered host drive as an
HA resource.
Manages an ICP Vortex clustered host drive
The ICP cluster drive ID.
ICP cluster drive ID
The device name.
device
END
}
#
# methods: What methods/operations do we support?
#
ICP_methods() {
cat <<-!
start
stop
status
monitor
methods
validate-all
meta-data
usage
!
}
ICP_status() {
local icp_out
icp_out=$($ICPCLUCON -v -status $1)
if [ $? -ne 0 ]; then
ocf_log "err" "Hostdrive not reserved by us."
return $OCF_ERR_GENERIC
fi
if expr match "$icp_out" \
'.*Drive is reserved by this host.*' >/dev/null 2>&1 ; then
ocf_log "info" "Volume $1 is reserved by us."
return $OCF_SUCCESS
elif expr match "$icp_out" \
'.*Drive is not reserved by any host.*' >/dev/null 2>&1 ; then
ocf_log "err" "Volume $1 not reserved by any host."
return $OCF_NOT_RUNNING
else
ocf_log "err" "Unknown output from icpclucon. Assuming we do not have a reservation:"
ocf_log "err" "$icp_out"
return $OCF_NOT_RUNNING
fi
}
ICP_report_status() {
if ICP_status $1 ; then
echo "$1: running"
return $OCF_SUCCESS
else
echo "$1: not running"
return $OCF_NOT_RUNNING
fi
}
#
# Monitor the host drive - does it really seem to be working?
#
#
ICP_monitor() {
if
ICP_status $1
then
return $?
else
ocf_log "err" "ICP host drive $1 is offline"
return $OCF_NOT_RUNNING
fi
}
Clear_bufs() {
$BLOCKDEV --flushbufs $1
}
#
# Enable ICP host drive
#
ICP_start() {
ocf_log "info" "Activating host drive $1"
ocf_run $ICPCLUCON -v -reserve $1
if [ $? -ne 0 ]; then
ocf_log "info" "Forcing reservation of $1"
ocf_run $ICPCLUCON -v -force $1 || return $OCF_ERR_GENERIC
fi
if
ICP_status $1
then
: OK
# A reservation isn't as prompt as it should be
sleep 3
return $OCF_SUCCESS
else
ocf_log "err" "ICP: $1 was not reserved correctly"
return $OCF_ERR_GENERIC
fi
}
#
# Release the ICP host drive
#
ICP_stop() {
ocf_log "info" "Releasing ICP host drive $1"
ocf_run $ICPCLUCON -v -release $1 || return $OCF_ERR_GENERIC
ocf_log "info" "Verifying reservation"
if ICP_status $1 ; then
ocf_log "err" "ICP: $1 was not released correctly"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
ICP_validate_all() {
check_binary $BLOCKDEV
check_binary $ICPCLUCON
$ICPCLUCON -v -status $driveid >/dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_log err "Invalid driveid $driveid"
exit $OCF_ERR_ARGS
fi
if [ ! -b $device ]; then
ocf_log err "Device $device is not a block device"
exit $OCF_ERR_ARGS
fi
# Do not know how to check the association of $device with $driveid.
return $OCF_SUCCESS
}
#
# 'main' starts here...
#
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations do not require OCF instance parameters to be set
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
methods) ICP_methods
exit $OCF_SUCCESS;;
usage) usage
exit $OCF_SUCCESS;;
*) ;;
esac
if
[ -z "$OCF_RESKEY_driveid" ]
then
ocf_log err "Please specify OCF_RESKEY_driveid"
exit $OCF_ERR_ARGS
fi
if [ -z "$OCF_RESKEY_device" ]; then
ocf_log err "Please specify OCF_RESKEY_device"
exit $OCF_ERR_ARGS
fi
driveid=$OCF_RESKEY_driveid
device=$OCF_RESKEY_device
# What kind of method was invoked?
case "$1" in
start) ICP_validate_all
ICP_start $driveid
Clear_bufs $device
exit $?;;
stop) ICP_stop $driveid
Clear_bufs $device
exit $?;;
status) ICP_report_status $driveid
exit $?;;
monitor) ICP_monitor $driveid
exit $?;;
validate-all) ICP_validate_all
exit $?;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/IPaddr b/heartbeat/IPaddr
index 8ada6c4d2..8c9fb20f3 100755
--- a/heartbeat/IPaddr
+++ b/heartbeat/IPaddr
@@ -1,892 +1,892 @@
#!/bin/sh
#
# License: GNU General Public License (GPL)
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# This script manages IP alias IP addresses
#
# It can add an IP alias, or remove one.
#
# usage: $0 {start|stop|status|monitor|validate-all|meta-data}
#
# The "start" arg adds an IP alias.
#
# Surprisingly, the "stop" arg removes one. :-)
#
# OCF parameters are as below
# OCF_RESKEY_ip
# OCF_RESKEY_broadcast
# OCF_RESKEY_nic
# OCF_RESKEY_cidr_netmask
# OCF_RESKEY_lvs_support ( e.g. true, on, 1 )
# OCF_RESKEY_ARP_INTERVAL_MS
# OCF_RESKEY_ARP_REPEAT
# OCF_RESKEY_ARP_BACKGROUND (e.g. yes )
# OCF_RESKEY_ARP_NETMASK
# OCF_RESKEY_local_start_script
# OCF_RESKEY_local_stop_script
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
SENDARP=$HA_BIN/send_arp
FINDIF=$HA_BIN/findif
VLDIR=$HA_RSCTMP
SENDARPPIDDIR=$HA_RSCTMP
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
#######################################################################
SYSTYPE="`uname -s`"
case "$SYSTYPE" in
SunOS)
# `uname -r` = 5.9 -> SYSVERSION = 9
SYSVERSION="`uname -r | cut -d. -f 2`"
;;
Darwin)
# Treat Darwin the same as the other BSD variants (matched as *BSD)
SYSTYPE="${SYSTYPE}BSD"
;;
*)
;;
esac
meta_data() {
cat <1.0
This script manages IP alias IP addresses
It can add an IP alias, or remove one.
Manages virtual IPv4 addresses (portable version)
The IPv4 address to be configured in dotted quad notation, for example
"192.168.1.1".
IPv4 address
The base network interface on which the IP address will be brought
online.
If left empty, the script will try and determine this from the
routing table.
Do NOT specify an alias interface in the form eth0:1 or anything here;
rather, specify the base interface only.
Prerequisite:
There must be at least one static IP address, which is not managed by
the cluster, assigned to the network interface.
If you can not assign any static IP address on the interface,
modify this kernel parameter:
sysctl -w net.ipv4.conf.all.promote_secondaries=1
(or per device)
Network interface
The netmask for the interface in CIDR format. (ie, 24), or in
dotted quad notation 255.255.255.0).
If unspecified, the script will also try to determine this from the
routing table.
Netmask
Broadcast address associated with the IP. If left empty, the script will
determine this from the netmask.
Broadcast address
You can specify an additional label for your IP address here.
Interface label
Enable support for LVS Direct Routing configurations. In case a IP
address is stopped, only move it to the loopback device to allow the
local node to continue to service requests, but no longer advertise it
on the network.
Enable support for LVS DR
Script called when the IP is released
Script called when the IP is released
Script called when the IP is added
Script called when the IP is added
milliseconds between ARPs
milliseconds between gratuitous ARPs
How many gratuitous ARPs to send out when bringing up a new address
repeat count
run in background (no longer any reason to do this)
run in background
netmask for ARP - in nonstandard hexadecimal format.
netmask for ARP
END
exit $OCF_SUCCESS
}
# The 'ping' command takes highly OS-dependent arguments, so this
# function creates a suitable argument list for the host OS's 'ping'.
# We use a subset of its functionality:
# 1. single packet
# 2. reasonable timeout (say 1 second)
#
# arguments:
# $1: IP address to ping
# result string:
# arguments for ping command
#
# If more flexibility is needed, they could be specified in the environment
# to this function, to adjust the resulting 'ping' arguments.
# David Lee May 2007
pingargs() {
_baseip=$1
_timeout=1 # seconds
_pktcount=1
_systype="`uname -s`"
case $_systype in
Linux)
# Default is perpetual ping: need "-c $_pktcount".
# -c count -t timetolive -q(uiet) -n(umeric) -W timeout
_pingargs="-c $_pktcount -q -n $_baseip"
;;
SunOS)
# Default is immediate (or timeout) return.
_pingargs="$_baseip $_timeout"
;;
*)
_pingargs="-c $_pktcount $_baseip"
;;
esac
echo "$_pingargs"
}
# On Linux systems the (hidden) loopback interface may
# conflict with the requested IP address. If so, this
# unoriginal code will remove the offending loopback address
# and save it in VLDIR so it can be added back in later
# when the IPaddr is released.
#
lvs_remove_conflicting_loopback() {
ipaddr="$1"
ifname="$2"
ocf_log info "Removing conflicting loopback $ifname."
if
echo $ifname > "$VLDIR/$ipaddr"
then
: Saved loopback information in $VLDIR/$ipaddr
else
ocf_log err "Could not save conflicting loopback $ifname." \
"it will not be restored."
fi
if [ ! -z "${OCF_RESKEY_local_stop_script}" ]; then
if [ -x "${OCF_RESKEY_local_stop_script}" ]; then
${OCF_RESKEY_local_stop_script} $*
fi
fi
delete_interface "$ifname" "$ipaddr"
# Forcibly remove the route (if it exists) to the loopback.
delete_route "$ipaddr"
}
#
# On Linux systems the (hidden) loopback interface may
# need to be restored if it has been taken down previously
# by lvs_remove_conflicting_loopback()
#
lvs_restore_loopback() {
ipaddr="$1"
if [ ! -s "$VLDIR/$ipaddr" ]; then
return
fi
ifname=`cat "$VLDIR/$ipaddr"`
ocf_log info "Restoring loopback IP Address $ipaddr on $ifname."
CMD="OCF_RESKEY_cidr_netmask=32 OCF_RESKEY_ip=$1 OCF_RESKEY_nic=$ifname $FINDIF"
if
NICINFO=`eval $CMD`
NICINFO=`echo $NICINFO | tr " " " " | tr -s " "`
then
netmask_text=`echo "$NICINFO" | cut -f3 -d " "`
broadcast=`echo "$NICINFO" | cut -f5 -d " "`
else
echo "ERROR: $CMD failed (rc=$rc)"
exit $OCF_ERR_GENERIC
fi
add_interface "$ipaddr" "$ifname" "$ifname" $netmask_text $broadcast
rm -f "$VLDIR/$ipaddr"
}
#
# Find out which alias serves the given IP address
# The argument is an IP address, and its output
# is an aliased interface name (e.g., "eth0:0").
#
find_interface_solaris() {
ipaddr="$1"
$IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' |
while read ifname linkstuff
do
: ifname = $ifname
read inet addr junk
: inet = $inet addr = $addr
while
read line && [ "X$line" != "X" ]
do
: Nothing
done
case $ifname in
*:*) ;;
*) continue;;
esac
# This doesn't look right for a box with multiple NICs.
# It looks like it always selects the first interface on
# a machine. Yet, we appear to use the results for this case too...
ifname=`echo "$ifname" | sed s'%:$%%'`
case $addr in
addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;;
$ipaddr) echo $ifname; return $OCF_SUCCESS;;
esac
done
return $OCF_ERR_GENERIC
}
find_interface_bsd() {
$IFCONFIG $IFCONFIG_A_OPT | awk -v ip_addr="$ipaddr" '
/UP,/ && $0 ~ /^[a-z]+[0-9]:/ {
if_name=$1; sub(":$","",if_name);
}
$1 == "inet" && $2 == ip_addr {
print if_name
exit(0)
}'
}
#
# Find out which alias serves the given IP address
# The argument is an IP address, and its output
# is an aliased interface name (e.g., "eth0:0").
#
find_interface_generic() {
ipaddr="$1"
$IFCONFIG $IFCONFIG_A_OPT |
while read ifname linkstuff
do
: Read gave us ifname = $ifname
read inet addr junk
: Read gave us inet = $inet addr = $addr
while
read line && [ "X$line" != "X" ]
do
: Nothing
done
case $ifname in
*:*) ifname=`echo $ifname | sed 's/:$//'`;;
*) continue;;
esac
: "comparing $ipaddr to $addr (from ifconfig)"
case $addr in
addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;;
$ipaddr) echo $ifname; return $OCF_SUCCESS;;
esac
done
return $OCF_ERR_GENERIC
}
#
# Find out which alias serves the given IP address
# The argument is an IP address, and its output
# is an aliased interface name (e.g., "eth0:0").
#
find_interface() {
ipaddr="$1"
case "$SYSTYPE" in
SunOS)
NIC=`find_interface_solaris $ipaddr`;;
*BSD)
NIC=`find_interface_bsd $ipaddr`;;
*)
NIC=`find_interface_generic $ipaddr`;;
esac
echo $NIC
return $OCF_SUCCESS;
}
#
# Find an unused interface/alias name for us to use for new IP alias
# The argument is an IP address, and the output
# is an aliased interface name (e.g., "eth0:0", "dc0", "le0:0").
#
find_free_interface() {
NIC="$1"
if [ "X$NIC" = "X" ]; then
ocf_log err "No free interface found for $OCF_RESKEY_ip"
return $OCF_ERR_GENERIC;
fi
NICBASE="$VLDIR/IPaddr-$NIC"
touch "$NICBASE"
case "$SYSTYPE" in
*BSD)
echo $NIC;
return $OCF_SUCCESS;;
SunOS)
j=1
IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \
grep "^$NIC:[0-9]" | sed 's%: .*%%'`;;
*)
j=0
IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \
grep "^$NIC:[0-9]" | sed 's% .*%%'`
TRYADRCNT=`ls "${NICBASE}:"* 2>/dev/null | wc -w | tr -d ' '`
if [ -f "${NICBASE}:${TRYADRCNT}" ]; then
: OK
else
j="${TRYADRCNT}"
fi
;;
esac
IFLIST=" `echo $IFLIST` "
while
[ $j -lt 512 ]
do
case $IFLIST in
*" "$NIC:$j" "*)
;;
*)
NICLINK="$NICBASE:$j"
if
ln "$NICBASE" "$NICLINK" 2>/dev/null
then
echo "$NIC:$j"
return $OCF_SUCCESS
fi
;;
esac
j=`expr $j + 1`
done
return $OCF_ERR_GENERIC
}
delete_route () {
ipaddr="$1"
case "$SYSTYPE" in
SunOS) return 0;;
*BSD) CMD="$ROUTE -n delete -host $ipaddr";;
*) CMD="$ROUTE -n del -host $ipaddr";;
esac
$CMD
return $?
}
delete_interface () {
ifname="$1"
ipaddr="$2"
case "$SYSTYPE" in
SunOS)
if [ "$SYSVERSION" -ge 8 ] ; then
CMD="$IFCONFIG $ifname unplumb"
else
CMD="$IFCONFIG $ifname 0 down"
fi;;
Darwin*)
CMD="$IFCONFIG $ifname $ipaddr delete";;
*BSD)
CMD="$IFCONFIG $ifname inet $ipaddr delete";;
*)
CMD="$IFCONFIG $ifname down";;
esac
ocf_log info "$CMD"
$CMD
return $?
}
add_interface () {
ipaddr="$1"
iface_base="$2"
iface="$3"
netmask="$4"
broadcast="$5"
if [ $# != 5 ]; then
ocf_log err "Insufficient arguments to add_interface: $*"
exit $OCF_ERR_ARGS
fi
case "$SYSTYPE" in
SunOS)
if [ "$SYSVERSION" -ge 8 ] ; then
$IFCONFIG $iface plumb
rc=$?
if [ $rc -ne 0 ] ; then
echo "ERROR: '$IFCONFIG $iface plumb' failed."
return $rc
fi
fi
# At Solaris 10, this single-command version sometimes broke.
# Almost certainly an S10 bug.
# CMD="$IFCONFIG $iface inet $ipaddr $text up"
# So hack the following workaround:
CMD="$IFCONFIG $iface inet $ipaddr"
CMD="$CMD && $IFCONFIG $iface netmask $netmask"
CMD="$CMD && $IFCONFIG $iface up"
;;
*BSD)
# netmask is always set to 255.255.255.255 for an alias
CMD="$IFCONFIG $iface inet $ipaddr netmask 255.255.255.255 alias";;
*)
CMD="$IFCONFIG $iface $ipaddr netmask $netmask broadcast $broadcast";;
esac
# Use "eval $CMD" (not "$CMD"): it might be a chain of two or more commands.
ocf_log info "eval $CMD"
eval $CMD
rc=$?
if [ $rc != 0 ]; then
echo "ERROR: eval $CMD failed (rc=$rc)"
fi
return $rc
}
#
# Remove the IP alias for the requested IP address...
#
ip_stop() {
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
NIC=`find_interface $OCF_RESKEY_ip`
if [ -f "$SENDARPPIDFILE" ]; then
cat "$SENDARPPIDFILE" | xargs kill
rm -f "$SENDARPPIDFILE"
fi
if [ -z "$NIC" ]; then
: Requested interface not in use
return $OCF_SUCCESS
fi
if [ ${OCF_RESKEY_lvs_support} = 1 ]; then
case $NIC in
lo*)
: Requested interface is on loopback
return $OCF_SUCCESS;;
esac
fi
delete_route "$OCF_RESKEY_ip"
delete_interface "$NIC" "$OCF_RESKEY_ip"
rc=$?
if [ ${OCF_RESKEY_lvs_support} = 1 ]; then
lvs_restore_loopback "$OCF_RESKEY_ip"
fi
# remove lock file...
rm -f "$VLDIR/IPaddr-$NIC"
if [ $rc != 0 ]; then
ocf_log warn "IP Address $OCF_RESKEY_ip NOT released: rc=$rc"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
#
# Add an IP alias for the requested IP address...
#
# It could be that we already have taken it, in which case it should
# do nothing.
#
ip_start() {
#
# Do we already service this IP address?
#
ip_status_internal
if [ $? = $OCF_SUCCESS ]; then
# Nothing to do, the IP is already active
return $OCF_SUCCESS;
fi
NIC_unique=`find_free_interface $OCF_RESKEY_nic`
if [ -n "$NIC_unique" ]; then
: OK got interface [$NIC_unique] for $OCF_RESKEY_ip
else
return $OCF_ERR_GENERIC
fi
# This logic is mostly to support LVS (If I understand it correctly)
if [ ${OCF_RESKEY_lvs_support} = 1 ]; then
NIC_current=`find_interface $OCF_RESKEY_ip`
case $NIC_unique in
lo*)
if [ x"$NIC_unique" = x"$NIC_current" ]; then
# Its already "running" and not moving, nothing to do.
ocf_log err "Could not find a non-loopback device to move $OCF_RESKEY_ip to"
return $OCF_ERR_GENERIC
fi;;
*) lvs_remove_conflicting_loopback "$OCF_RESKEY_ip" "$NIC_current";;
esac
fi
if [ ! -z "${OCF_RESKEY_local_start_script}" ]; then
if [ -x "${OCF_RESKEY_local_start_script}" ]; then
${OCF_RESKEY_local_start_script} $*
fi
fi
add_interface "$OCF_RESKEY_ip" "$OCF_RESKEY_nic" "$NIC_unique" \
"$OCF_RESKEY_cidr_netmask" "$OCF_RESKEY_broadcast"
rc=$?
if [ $rc != 0 ]; then
ocf_log err "Could not add $OCF_RESKEY_ip to $OCF_RESKEY_nic: rc=$rc"
return $rc
fi
# The address is active, now notify others about it using sendarp
if [ "$SYSTYPE" = "DarwinBSD" -a "$NIC_unique" = "lo0" ]; then
# Darwin can't send ARPs on loopback devices
SENDARP="x$SENDARP" # Prevent the binary from being found
fi
if [ -x $SENDARP ]; then
TARGET_INTERFACE=`echo $NIC_unique | sed 's%:.*%%'`
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
ARGS="-i $OCF_RESKEY_ARP_INTERVAL_MS -r $OCF_RESKEY_ARP_REPEAT"
ARGS="$ARGS -p $SENDARPPIDFILE $TARGET_INTERFACE $OCF_RESKEY_ip"
ARGS="$ARGS auto $OCF_RESKEY_ip $OCF_RESKEY_ARP_NETMASK"
ocf_log debug "Sending Gratuitous Arp for $OCF_RESKEY_ip on $NIC_unique [$TARGET_INTERFACE]"
case $OCF_RESKEY_ARP_BACKGROUND in
yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?" & ) >&2 ;;
*) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?";;
esac
fi
ip_status_internal
return $?
}
ip_status_internal() {
NIC=`find_interface "$OCF_RESKEY_ip"`
if [ "x$NIC" = x ]; then
return $OCF_NOT_RUNNING
elif [ "${OCF_RESKEY_lvs_support}" = "1" ]; then
case $NIC in
lo*) return $OCF_NOT_RUNNING;;
*) return $OCF_SUCCESS;;
esac
else
if [ x$OCF_RESKEY_nic != x ]; then
simple_OCF_NIC=`echo $OCF_RESKEY_nic | awk -F: '{print $1}'`
simple_NIC=`echo $NIC | awk -F: '{print $1}'`
if [ $simple_OCF_NIC != $simple_NIC ]; then
ocf_log err "$OCF_RESKEY_ip is running an interface ($simple_NIC) instead of the configured one ($simple_OCF_NIC)"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
fi
}
ip_status() {
ip_status_internal
rc=$?
if [ $rc = $OCF_SUCCESS ]; then
echo "running"
elif [ $rc = $OCF_NOT_RUNNING ]; then
echo "stopped"
else
echo "unknown"
fi
return $rc;
}
#
# Determine if this IP address is really being served, or not.
# Note that we must distinguish if *we're* serving it locally...
#
ip_monitor() {
ip_status_internal
rc=$?
if [ $OCF_CHECK_LEVEL = 0 -o $rc != 0 ]; then
return $rc
fi
ocf_log info "Checking IP stack"
PINGARGS="`pingargs $OCF_RESKEY_ip`"
for j in 1 2 3 4 5 6 7 8 9 10; do
MSG=`$PING $PINGARGS 2>&1`
if [ $? = 0 ]; then
return $OCF_SUCCESS
fi
done
ocf_log err "$MSG"
return $OCF_ERR_GENERIC
}
is_positive_integer() {
ocf_is_decimal $1 && [ $1 -ge 1 ]
if [ $? = 0 ]; then
return 1
fi
return 0
}
ip_validate_all() {
: ${OCF_RESKEY_ARP_BACKGROUND=yes}
: ${OCF_RESKEY_ARP_NETMASK=ffffffffffff}
: ${OCF_RESKEY_ARP_INTERVAL_MS=500}
: ${OCF_RESKEY_ARP_REPEAT=10}
check_binary $AWK
check_binary $IFCONFIG
check_binary $ROUTE
check_binary $PING
if is_positive_integer $OCF_RESKEY_ARP_INTERVAL_MS
then
ocf_log err "Invalid parameter value: ARP_INTERVAL_MS [$OCF_RESKEY_ARP_INTERVAL_MS]"
return $OCF_ERR_ARGS
fi
if is_positive_integer $OCF_RESKEY_ARP_REPEAT
then
ocf_log err "Invalid parameter value: ARP_REPEAT [$OCF_RESKEY_ARP_REPEAT]"
return $OCF_ERR_ARGS
fi
: ${OCF_RESKEY_lvs_support=0}
if [ "$SYSTYPE" = "Linux" -o "$SYSTYPE" = "SunOS" ]; then
:
else
if [ "${OCF_RESKEY_lvs_support}" = "1" ]; then
ocf_log err "$SYSTYPE does not support LVS"
return $OCF_ERR_GENERIC
fi
fi
case $OCF_RESKEY_ip in
"") ocf_log err "Required parameter OCF_RESKEY_ip is missing"
return $OCF_ERR_CONFIGURED;;
[0-9]*.[0-9]*.[0-9]*.*[0-9]) : OK;;
*) ocf_log err "Parameter OCF_RESKEY_ip [$OCF_RESKEY_ip] not an IP address"
return $OCF_ERR_CONFIGURED;;
esac
# Unconditionally do this?
case $OCF_RESKEY_nic in
*:*)
OCF_RESKEY_nic=`echo $OCF_RESKEY_nic | sed 's/:.*//'`
;;
esac
NICINFO=`$FINDIF`
rc=$?
if [ $rc != 0 ]; then
ocf_log err "$FINDIF failed [rc=$rc]."
return $OCF_ERR_GENERIC
fi
tmp=`echo "$NICINFO" | cut -f1`
if
[ "x$OCF_RESKEY_nic" = "x" ]
then
ocf_log info "Using calculated nic for ${OCF_RESKEY_ip}: $tmp"
OCF_RESKEY_nic=$tmp
elif
[ x$tmp != x${OCF_RESKEY_nic} ]
then
ocf_log err "Invalid parameter value: nic [$OCF_RESKEY_nic] Calculated nic: [$tmp]"
return $OCF_ERR_ARGS
fi
tmp=`echo "$NICINFO" | cut -f2 | cut -d ' ' -f2`
if
[ "x$OCF_RESKEY_cidr_netmask" != "x$tmp" ]
then
ocf_log info "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp"
fi
# Always use the calculated version becuase it might have been specified
# using CIDR notation which not every system accepts
OCF_RESKEY_netmask=$tmp
OCF_RESKEY_cidr_netmask=$tmp; export OCF_RESKEY_cidr_netmask
tmp=`echo "$NICINFO" | cut -f3 | cut -d ' ' -f2`
if
[ "x$OCF_RESKEY_broadcast" = "x" ]
then
ocf_log debug "Using calculated broadcast for ${OCF_RESKEY_ip}: $tmp"
OCF_RESKEY_broadcast=$tmp
elif [ x$tmp != x${OCF_RESKEY_broadcast} ]; then
ocf_log err "Invalid parameter value: broadcast [$OCF_RESKEY_broadcast] Calculated broadcast: [$tmp]"
return $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
}
usage() {
echo $USAGE >&2
return $1
}
if [ $# -ne 1 ]; then
usage $OCF_ERR_ARGS
fi
: ${OCF_RESKEY_lvs_support=0}
# Normalize the value of lvs_support
if [ "${OCF_RESKEY_lvs_support}" = "true" \
-o "${OCF_RESKEY_lvs_support}" = "on" \
-o "${OCF_RESKEY_lvs_support}" = "yes" \
-o "${OCF_RESKEY_lvs_support}" = "1" ]; then
OCF_RESKEY_lvs_support=1
else
OCF_RESKEY_lvs_support=0
fi
# Note: We had a version out there for a while which used
# netmask instead of cidr_netmask. So, don't remove this aliasing code!
if
[ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ]
then
OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask
export OCF_RESKEY_cidr_netmask
fi
case $1 in
meta-data) meta_data;;
start) ip_validate_all && ip_start;;
stop) ip_stop;;
status) ip_status;;
monitor) ip_monitor;;
validate-all) ip_validate_all;;
usage) usage $OCF_SUCCESS;;
*) usage $OCF_ERR_UNIMPLEMENTED;;
esac
exit $?
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 33c5be62f..08fd8a623 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -1,503 +1,503 @@
#!/bin/sh
#
# Description: IPsrcaddr - Preferred source address modification
#
# Author: John Sutton
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: SCL Internet
#
# Based on the IPaddr script.
#
# This script manages the preferred source address associated with
# packets which originate on the localhost and are routed through the
# default route. By default, i.e. without the use of this script or
# similar, these packets will carry the IP of the primary i.e. the
# non-aliased interface. This can be a nuisance if you need to ensure
# that such packets carry the same IP irrespective of which host in
# a redundant cluster they actually originate from.
#
# It can add a preferred source address, or remove one.
#
# usage: IPsrcaddr {start|stop|status|monitor|validate-all|meta-data}
#
# The "start" arg adds a preferred source address.
#
# Surprisingly, the "stop" arg removes it. :-)
#
# NOTES:
#
# 1) There must be one and not more than 1 default route! Mainly because
# I can't see why you should have more than one. And if there is more
# than one, we would have to box clever to find out which one is to be
# modified, or we would have to pass its identity as an argument.
#
# 2) The script depends on Alexey Kuznetsov's ip utility from the
# iproute aka iproute2 package.
#
# 3) No checking is done to see if the passed in IP address can
# reasonably be associated with the interface on which the default
# route exists. So unless you want to deliberately spoof your source IP,
# check it! Normally, I would expect that your haresources looks
# something like:
#
# nodename ip1 ip2 ... ipN IPsrcaddr::ipX
#
# where ipX is one of the ip1 to ipN.
#
# OCF parameters are as below:
# OCF_RESKEY_ipaddress
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0"
CMDCHANGE="$IP2UTIL route change to "
SYSTYPE="`uname -s`"
usage() {
echo $USAGE >&2
}
meta_data() {
cat <1.0
Resource script for IPsrcaddr. It manages the preferred source address
modification.
Manages the preferred source address for outgoing IP packets
The IP address.
IP address
The netmask for the interface in CIDR format. (ie, 24), or in
dotted quad notation 255.255.255.0).
Netmask
END
}
errorexit() {
ocf_exit_reason "$*"
exit $OCF_ERR_GENERIC
}
#
# We can distinguish 3 cases: no preferred source address, a
# preferred source address exists which matches that specified, and one
# exists but doesn't match that specified. srca_read() returns 1,0,2
# respectively.
#
# The output of route show is something along the lines of:
#
# default via X.X.X.X dev eth1 src Y.Y.Y.Y
#
# where the src clause "src Y.Y.Y.Y" may or may not be present
WS="[`echo -en ' \t'`]"
OCTET="[0-9]\{1,3\}"
IPADDR="\($OCTET\.\)\{3\}$OCTET"
SRCCLAUSE="src$WS$WS*\($IPADDR\)"
MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)"
FINDIF=$HA_BIN/findif
# findif needs that to be set
export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress
srca_read() {
# Capture the default route - doublequotes prevent word splitting...
DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed"
# ... so we can make sure there is only 1 default route
[ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \
errorexit "more than 1 default route exists"
# But there might still be no default route
[ -z "$DEFROUTE" ] && errorexit "no default route exists"
# Sed out the source ip address if it exists
SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"`
# and what remains after stripping out the source ip address clause
ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"`
[ -z "$SRCIP" ] && return 1
[ $SRCIP = $1 ] && return 0
return 2
}
#
# Add (or change if it already exists) the preferred source address
# The exit code should conform to LSB exit codes.
#
srca_start() {
srca_read $1
rc=$?
if [ $rc = 0 ]; then
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
ip route replace $NETWORK dev $INTERFACE src $1 || \
errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed"
$CMDCHANGE $ROUTE_WO_SRC src $1 || \
errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed"
rc=$?
fi
return $rc
}
#
# Remove (if it exists) the preferred source address.
# If one exists but it's not the same as the one specified, that's
# an error. Maybe that's the wrong behaviour because if this fails
# then when IPaddr releases the associated interface (if there is one)
# your default route will also get dropped ;-(
# The exit code should conform to LSB exit codes.
#
srca_stop() {
srca_read $1
rc=$?
if [ $rc = 1 ]; then
# We do not have a preferred source address for now
ocf_log info "No preferred source address defined, nothing to stop"
exit $OCF_SUCCESS
fi
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
ip route replace $NETWORK dev $INTERFACE || \
errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed"
$CMDCHANGE $ROUTE_WO_SRC || \
errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed"
return $?
}
srca_status() {
srca_read $1
case $? in
0) echo "OK"
return $OCF_SUCCESS;;
1) echo "No preferred source address defined"
return $OCF_NOT_RUNNING;;
2) echo "Preferred source address has incorrect value"
return $OCF_ERR_GENERIC;;
esac
}
# A not reliable IP address checking function, which only picks up those _obvious_ violations...
#
# It accepts IPv4 address in dotted quad notation, for example "192.168.1.1"
#
# 100% confidence whenever it reports "negative",
# but may get false "positive" answer.
#
CheckIP() {
ip="$1"
case $ip in
*[!0-9.]*) #got invalid char
false;;
.*|*.) #begin or end by ".", which is invalid
false;;
*..*) #consecutive ".", which is invalid
false;;
*.*.*.*.*) #four decimal dots, which is too many
false;;
*.*.*.*) #exactly three decimal dots, candidate, evaluate each field
local IFS=.
set -- $ip
if
( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] )
then
if [ $1 -eq 127 ]; then
ocf_exit_reason "IP address [$ip] is a loopback address, thus can not be preferred source address"
exit $OCF_ERR_CONFIGURED
fi
else
true
fi
;;
*) #less than three decimal dots
false;;
esac
return $? # This return is unnecessary, this comment too :)
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface_solaris() {
$IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' |
while read ifname linkstuff
do
: ifname = $ifname
read inet addr junk
: inet = $inet addr = $addr
while
read line && [ "X$line" != "X" ]
do
: Nothing
done
# This doesn't look right for a box with multiple NICs.
# It looks like it always selects the first interface on
# a machine. Yet, we appear to use the results for this case too...
ifname=`echo "$ifname" | sed s'%:*$%%'`
case $addr in
addr:$BASEIP) echo $ifname; return $OCF_SUCCESS;;
$BASEIP) echo $ifname; return $OCF_SUCCESS;;
esac
done
return $OCF_ERR_GENERIC
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface_generic() {
local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \
| cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'`
if [ -z "$iface" ]; then
return $OCF_ERR_GENERIC
else
echo $iface
return $OCF_SUCCESS
fi
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface() {
case "$SYSTYPE" in
SunOS)
IF=`find_interface_solaris $BASEIP`
;;
*)
IF=`find_interface_generic $BASEIP`
;;
esac
echo $IF
return $OCF_SUCCESS;
}
ip_status() {
BASEIP="$1"
case "$SYSTYPE" in
Darwin)
# Treat Darwin the same as the other BSD variants (matched as *BSD)
SYSTYPE="${SYSTYPE}BSD"
;;
*)
;;
esac
case "$SYSTYPE" in
*BSD)
$IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$BASEIP " >/dev/null 2>&1
if [ $? = 0 ]; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi;;
Linux|SunOS)
IF=`find_interface "$BASEIP"`
if [ -z "$IF" ]; then
return $OCF_NOT_RUNNING
fi
case $IF in
lo*)
ocf_exit_reason "IP address [$BASEIP] is served by loopback, thus can not be preferred source address"
exit $OCF_ERR_CONFIGURED
;;
*)return $OCF_SUCCESS;;
esac
;;
*)
if [ -z "$IF" ]; then
return $OCF_NOT_RUNNING
else
return $OCF_SUCCESS
fi;;
esac
}
srca_validate_all() {
if [ -z "$OCF_RESKEY_ipaddress" ]; then
# usage
ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!"
return $OCF_ERR_CONFIGURED
fi
if ! [ "x$SYSTYPE" = "xLinux" ]; then
# checks after this point are only relevant for linux.
return $OCF_SUCCESS
fi
check_binary $AWK
check_binary $IFCONFIG
# The IP address should be in good shape
if CheckIP "$ipaddress"; then
:
else
ocf_exit_reason "Invalid IP address [$ipaddress]"
return $OCF_ERR_CONFIGURED
fi
if ocf_is_probe; then
return $OCF_SUCCESS
fi
# We should serve this IP address of course
if ip_status "$ipaddress"; then
:
else
ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations do not require the OCF instance parameters to be set
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
*)
;;
esac
ipaddress="$OCF_RESKEY_ipaddress"
srca_validate_all
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
case $1 in
# if we can't validate the configuration during a stop, that
# means the resources isn't configured correctly. There's no way
# to actually stop the resource in this situation because there's
# no way it could have even started. Return success here
# to indicate that the resource is not running, otherwise the
# stop action will fail causing the node to be fenced just because
# of a mis configuration.
stop) exit $OCF_SUCCESS;;
*) exit $rc;;
esac
fi
findif_out=`$FINDIF -C`
rc=$?
[ $rc -ne 0 ] && {
ocf_exit_reason "[$FINDIF -C] failed"
exit $rc
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
NETWORK=`ip route list dev $INTERFACE scope link match $ipaddress|grep -o '^[^ ]*'`
case $1 in
start) srca_start $ipaddress
;;
stop) srca_stop $ipaddress
;;
status) srca_status $ipaddress
;;
monitor) srca_status $ipaddress
;;
validate-all) srca_validate_all
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
#
# Version 0.3 2002/11/04 17:00:00 John Sutton
# Name changed from IPsrcroute to IPsrcaddr and now reports errors
# using ha_log rather than on stderr.
#
# Version 0.2 2002/11/02 17:00:00 John Sutton
# Changed status output to "OK" to satisfy ResourceManager's
# we_own_resource() function.
#
# Version 0.1 2002/11/01 17:00:00 John Sutton
# First effort but does the job?
#
diff --git a/heartbeat/LVM b/heartbeat/LVM
index 75cd6fae8..79c279127 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -1,713 +1,713 @@
#!/bin/sh
#
#
# LVM
#
# Description: Manages an LVM volume as an HA resource
#
#
# Author: Alan Robertson
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 - 2005 International Business Machines, Inc.
#
# This code significantly inspired by the LVM resource
# in FailSafe by Lars Marowsky-Bree
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_volgrpname
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
methods=`LVM_methods`
methods=`echo $methods | tr ' ' '|'`
cat <1.0
Resource script for LVM. It manages an Linux Volume Manager volume (LVM)
as an HA resource.
Controls the availability of an LVM Volume Group
The name of volume group.
Volume group name
If set, the volume group will be activated exclusively. This option works one of
two ways. If the volume group has the cluster attribute set, then the volume group
will be activated exclusively using clvmd across the cluster. If the cluster attribute
is not set, the volume group will be activated exclusively using a tag and the volume_list
filter. When the tag option is in use, the volume_list in lvm.con must be initialized. This
can be as simple as setting 'volume_list = []' depending on your setup.
Exclusive activation
If "exclusive" is set on a non clustered volume group, this overrides the tag to be used.
Exclusive activation tag
If set, the volume group will be activated partially even with some
physical volumes missing. It helps to set to true when using mirrored
logical volumes.
Activate VG partially when missing PVs
EOF
}
#
# methods: What methods/operations do we support?
#
LVM_methods() {
cat < /dev/null 2>&1
if [ $? -ne 0 ]; then
return
fi
##
# Now check to see if the initrd has been updated.
# If not, the machine could boot and activate the VG outside
# the control of pacemaker
##
if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" = "" ]; then
ocf_log warn "LVM: Improper setup detected"
ocf_log warn "* initrd image needs to be newer than lvm.conf"
# While dangerous if not done the first time, there are many
# cases where we don't simply want to fail here. Instead,
# keep warning until the user remakes the initrd - or has
# it done for them by upgrading the kernel.
#
# initrd can be updated using this command.
# dracut -H -f /boot/initramfs-$(uname -r).img $(uname -r)
#
fi
}
##
# does this vg have our tag
##
check_tags()
{
local owner=`vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '`
if [ -z "$owner" ]; then
# No-one owns this VG yet
return 1
fi
if [ "$OUR_TAG" = "$owner" ]; then
# yep, this is ours
return 0
fi
# some other tag is set on this vg
return 2
}
strip_tags()
{
local i
for i in `vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g`; do
ocf_log info "Stripping tag, $i"
# LVM version 2.02.98 allows changing tags if PARTIAL
vgchange --deltag $i $OCF_RESKEY_volgrpname
done
if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '` ]; then
ocf_exit_reason "Failed to remove ownership tags from $OCF_RESKEY_volgrpname"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
set_tags()
{
check_tags
case $? in
0)
# we already own it.
return $OCF_SUCCESS
;;
2)
# other tags are set, strip them before setting
if ! strip_tags; then
return $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
vgchange --addtag $OUR_TAG $OCF_RESKEY_volgrpname
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to add ownership tag to $OCF_RESKEY_volgrpname"
return $OCF_ERR_GENERIC
fi
ocf_log info "New tag \"$OUR_TAG\" added to $OCF_RESKEY_volgrpname"
return $OCF_SUCCESS
}
#
# Return LVM status (silently)
#
LVM_status() {
local rc=1
loglevel="debug"
# Set the log level of the error message
if [ "X${2}" = "X" ]; then
loglevel="err"
if ocf_is_probe; then
loglevel="warn"
else
if [ ${OP_METHOD} = "stop" ]; then
loglevel="info"
fi
fi
fi
if [ -d /dev/$1 ]; then
test "`cd /dev/$1 && ls`" != ""
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "VG $1 with no logical volumes is not supported by this RA!"
fi
fi
if [ $rc -ne 0 ]; then
ocf_log $loglevel "LVM Volume $1 is not available (stopped)"
rc=$OCF_NOT_RUNNING
else
case $(get_vg_mode) in
1) # exclusive with tagging.
# If vg is running, make sure the correct tag is present. Otherwise we
# can not guarantee exclusive activation.
if ! check_tags; then
ocf_exit_reason "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\""
rc=$OCF_ERR_GENERIC
fi
# make sure the environment for tags activation is still valid
if ! verify_tags_environment; then
rc=$OCF_ERR_GENERIC
fi
# let the user know if their initrd is older than lvm.conf.
check_initrd_warning
;;
*)
: ;;
esac
fi
if [ "X${2}" = "X" ]; then
# status call return
return $rc
fi
# Report on LVM volume status to stdout...
if [ $rc -eq 0 ]; then
echo "Volume $1 is available (running)"
else
echo "Volume $1 is not available (stopped)"
fi
return $rc
}
get_activate_options()
{
local options="-a"
case $(get_vg_mode) in
0) options="${options}ly";;
1) options="${options}y --config activation{volume_list=[\"@${OUR_TAG}\"]}";;
2) options="${options}ey";;
esac
if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
options="${options} --partial"
fi
# for clones (clustered volume groups), we'll also have to force
# monitoring, even if disabled in lvm.conf.
if ocf_is_clone; then
options="$options --monitor y"
fi
echo $options
}
##
# Attempt to deactivate vg cluster wide and then start the vg exclusively
##
retry_exclusive_start()
{
local vgchange_options="$(get_activate_options)"
# Deactivate each LV in the group one by one cluster wide
set -- $(lvs -o name,attr --noheadings $OCF_RESKEY_volgrpname 2> /dev/null)
while [ $# -ge 2 ]; do
case $2 in
????ao*)
# open LVs cannot be deactivated.
return $OCF_ERR_GENERIC;;
*)
if ! lvchange -an $OCF_RESKEY_volgrpname/$1; then
ocf_exit_reason "Unable to perform required deactivation of $OCF_RESKEY_volgrpname/$1 before starting"
return $OCF_ERR_GENERIC
fi
;;
esac
shift 2
done
ocf_run vgchange $vgchange_options $OCF_RESKEY_volgrpname
}
#
# Enable LVM volume
#
LVM_start() {
local vgchange_options="$(get_activate_options)"
local vg=$1
local clvmd=0
# TODO: This MUST run vgimport as well
ocf_log info "Activating volume group $vg"
if [ "$LVM_MAJOR" -eq "1" ]; then
ocf_run vgscan $vg
else
ocf_run vgscan
fi
case $(get_vg_mode) in
2)
clvmd=1
;;
1)
if ! set_tags; then
return $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
if ! ocf_run vgchange $vgchange_options $vg; then
if [ $clvmd -eq 0 ]; then
return $OCF_ERR_GENERIC
fi
# Failure to exclusively activate cluster vg.:
# This could be caused by a remotely active LV, Attempt
# to disable volume group cluster wide and try again.
# Allow for some settling
sleep 5
if ! retry_exclusive_start; then
return $OCF_ERR_GENERIC
fi
fi
if LVM_status $vg; then
: OK Volume $vg activated just fine!
return $OCF_SUCCESS
else
ocf_exit_reason "LVM: $vg did not activate correctly"
return $OCF_NOT_RUNNING
fi
}
#
# Disable the LVM volume
#
LVM_stop() {
local res=$OCF_ERR_GENERIC
local vgchange_options="-aln"
local vg=$1
if ! vgs $vg > /dev/null 2>&1; then
ocf_log info "Volume group $vg not found"
return $OCF_SUCCESS
fi
ocf_log info "Deactivating volume group $vg"
case $(get_vg_mode) in
1) vgchange_options="-an" ;;
esac
for i in $(seq 10)
do
ocf_run vgchange $vgchange_options $vg
res=$?
if LVM_status $vg; then
ocf_exit_reason "LVM: $vg did not stop correctly"
res=1
fi
if [ $res -eq 0 ]; then
break
fi
res=$OCF_ERR_GENERIC
ocf_log warn "$vg still Active"
ocf_log info "Retry deactivating volume group $vg"
sleep 1
which udevadm > /dev/null 2>&1 && udevadm settle --timeout=5
done
case $(get_vg_mode) in
1)
if [ $res -eq 0 ]; then
strip_tags
res=$?
fi
;;
esac
return $res
}
#
# Check whether the OCF instance parameters are valid
#
LVM_validate_all() {
check_binary $AWK
##
# lvmetad is a daemon that caches lvm metadata to improve the
# performance of LVM commands. This daemon should never be used when
# volume groups exist that are being managed by the cluster. The lvmetad
# daemon introduces a response lag, where certain LVM commands look like
# they have completed (like vg activation) when in fact the command
# is still in progress by the lvmetad. This can cause reliability issues
# when managing volume groups in the cluster. For Example, if you have a
# volume group that is a dependency for another application, it is possible
# the cluster will think the volume group is activated and attempt to start
# the application before volume group is really accesible... lvmetad is bad.
##
lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1
if [ $? -eq 0 ]; then
# for now warn users that lvmetad is enabled and that they should disable it. In the
# future we may want to consider refusing to start, or killing the lvmetad daemon.
ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process"
fi
##
# Off-the-shelf tests...
##
VGOUT=`vgck ${VOLUME} 2>&1`
if [ $? -ne 0 ]; then
# Inconsistency might be due to missing physical volumes, which doesn't
# automatically mean we should fail. If partial_activation=true then
# we should let start try to handle it, or if no PVs are listed as
# "unknown device" then another node may have marked a device missing
# where we have access to all of them and can start without issue.
if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then
if vgs -o pv_name --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep -E "unknown device|Couldn't find device|Device mismatch detected" > /dev/null 2>&1; then
if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
# We are missing devices and cannot activate partially
ocf_exit_reason "Volume group [$VOLUME] has devices missing. Consider partial_activation=true to attempt to activate partially"
exit $OCF_ERR_GENERIC
else
# We are missing devices but are allowed to activate partially.
# Assume that caused the vgck failure and carry on
ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
fi
fi
# else the vg is partial but all devices are accounted for, so another
# node must have marked the device missing. Proceed.
else
# vgck failure was for something other than missing devices
ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
exit $OCF_ERR_GENERIC
fi
fi
##
# Does the Volume Group exist?
##
if [ "$LVM_MAJOR" = "1" ]; then
VGOUT=`vgdisplay ${VOLUME} 2>&1`
else
VGOUT=`vgdisplay -v ${VOLUME} 2>&1`
fi
if [ $? -ne 0 ]; then
ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
exit $OCF_ERR_GENERIC
fi
##
# If exclusive activation is not enabled, then
# further checking of proper setup is not necessary
##
if ! ocf_is_true "$OCF_RESKEY_exclusive"; then
return $OCF_SUCCESS;
fi
##
# Having cloned lvm resources with exclusive vg activation makes no sense at all.
##
if ocf_is_clone; then
ocf_exit_reason "cloned lvm resources can not be activated exclusively"
exit $OCF_ERR_CONFIGURED
fi
##
# Make sure the cluster attribute is set and clvmd is up when exclusive
# activation is enabled. Otherwise we can't exclusively activate the volume group.
##
case $(get_vg_mode) in
1) # exclusive activation using tags
if ! verify_tags_environment; then
exit $OCF_ERR_GENERIC
fi
;;
2) # exclusive activation with clvmd
##
# verify is clvmd running
##
if ! ps -C clvmd > /dev/null 2>&1; then
ocf_exit_reason "$OCF_RESKEY_volgrpname has the cluster attribute set, but 'clvmd' is not running"
exit $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
return $OCF_SUCCESS
}
#
# 'main' starts here...
#
if
[ $# -ne 1 ]
then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS;;
methods) LVM_methods
exit $?;;
usage) usage
exit $OCF_SUCCESS;;
*) ;;
esac
if
[ -z "$OCF_RESKEY_volgrpname" ]
then
ocf_exit_reason "You must identify the volume group name!"
exit $OCF_ERR_CONFIGURED
fi
# Get the LVM version number, for this to work we assume(thanks to panjiam):
#
# LVM1 outputs like this
#
# # vgchange --version
# vgchange: Logical Volume Manager 1.0.3
# Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10)
#
# LVM2 and higher versions output in this format
#
# # vgchange --version
# LVM version: 2.00.15 (2004-04-19)
# Library version: 1.00.09-ioctl (2004-03-31)
# Driver version: 4.1.0
LVM_VERSION=`vgchange --version 2>&1 | \
$AWK '/Logical Volume Manager/ {print $5"\n"; exit; }
/LVM version:/ {printf $3"\n"; exit;}'`
rc=$?
if
( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] )
then
ocf_exit_reason "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?"
exit $OCF_ERR_INSTALLED
fi
LVM_MAJOR="${LVM_VERSION%%.*}"
VOLUME=$OCF_RESKEY_volgrpname
OP_METHOD=$1
if [ -n "$OCF_RESKEY_tag" ]; then
OUR_TAG=$OCF_RESKEY_tag
fi
# What kind of method was invoked?
case "$1" in
start)
LVM_validate_all
LVM_start $VOLUME
exit $?;;
stop) LVM_stop $VOLUME
exit $?;;
status) LVM_status $VOLUME $1
exit $?;;
monitor) LVM_status $VOLUME
exit $?;;
validate-all) LVM_validate_all
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/LinuxSCSI b/heartbeat/LinuxSCSI
index ce033c00b..89fed6b74 100755
--- a/heartbeat/LinuxSCSI
+++ b/heartbeat/LinuxSCSI
@@ -1,314 +1,314 @@
#!/bin/sh
#
#
# LinuxSCSI
#
# Description: Enables/Disables SCSI devices to protect them from being
# used by mistake
#
#
# Author: Alan Robertson
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 - 2005 IBM
#
# CAVEATS: See the usage message for some important warnings
#
# usage: ./LinuxSCSI (start|stop|status|monitor|meta-data|validate-all|methods)
#
# OCF parameters are as below:
# OCF_RESKEY_scsi
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 LinuxSCSI:0:0:11
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
zeropat="[ 0]0"
PROCSCSI=/proc/scsi/scsi
usage() {
cat <1.0
Deprecation warning: This agent makes use of Linux SCSI hot-plug
functionality which has been superseded by SCSI reservations. It is
deprecated and may be removed from a future release. See the
scsi2reservation and sfex agents for alternatives. --
This is a resource agent for LinuxSCSI. It manages the availability of a
SCSI device from the point of view of the linux kernel. It make Linux
believe the device has gone away, and it can make it come back again.
Enables and disables SCSI devices through the
kernel SCSI hot-plug subsystem (deprecated)
The SCSI instance to be managed.
SCSI instance
If set to true, suppresses the deprecation warning for this agent.
Suppress deprecation warning
EOF
}
scsi_methods() {
cat <>$PROCSCSI
echo "scsi add-single-device $host $channel $target $lun" >>$PROCSCSI
if
scsi_status "$1"
then
return $OCF_SUCCESS
else
ocf_log err "SCSI device $1 not active!"
return $OCF_ERR_GENERIC
fi
}
#
# stop: Disable the given SCSI device in the kernel
#
scsi_stop() {
parseinst "$1"
# [ $target = error ] && exit 1
echo "scsi remove-single-device $host $channel $target $lun" >>$PROCSCSI
if
scsi_status "$1"
then
ocf_log err "SCSI device $1 still active!"
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
}
#
# status: is the given device now available?
#
scsi_status() {
parseinst "$1"
# [ $target = error ] && exit 1
[ $channel -eq 0 ] && channel=$zeropat
[ $target -eq 0 ] && target=$zeropat
[ $lun -eq 0 ] && lun=$zeropat
greppat="Host: *scsi$host *Channel: *$channel *Id: *$target *Lun: *$lun"
grep -i "$greppat" $PROCSCSI >/dev/null
if [ $? -eq 0 ]; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi
}
#
# validate_all: Check the OCF instance parameters
#
scsi_validate_all() {
parseinst $instance
return $OCF_SUCCESS
}
if
( [ $# -ne 1 ] )
then
ocf_log err "Parameter number error."
usage
exit $OCF_ERR_GENERIC
fi
#if
# [ -z "$OCF_RESKEY_scsi" ] && [ "X$1" = "Xmethods" ]
#then
# scsi_methods
# exit #?
#fi
case $1 in
methods) scsi_methods
exit $OCF_SUCCESS
;;
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
*) ;;
esac
# Be obnoxious, log deprecation warning on every invocation (unless
# suppressed by resource configuration).
ocf_deprecated
if
[ -z "$OCF_RESKEY_scsi" ]
then
ocf_log err "You have to set a valid scsi id at least!"
# usage
exit $OCF_ERR_GENERIC
fi
instance=$OCF_RESKEY_scsi
case $1 in
start) scsi_start $instance
;;
stop) scsi_stop $instance
;;
status|monitor)
if
scsi_status $instance
then
ocf_log info "SCSI device $instance is running"
return $OCF_SUCCESS
else
ocf_log info "SCSI device $instance is stopped"
exit $OCF_NOT_RUNNING
fi
;;
validate-all) scsi_validate_all
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/Raid1 b/heartbeat/Raid1
index 7cf658b59..bef2606cf 100755
--- a/heartbeat/Raid1
+++ b/heartbeat/Raid1
@@ -1,556 +1,556 @@
#!/bin/sh
#
#
# License: GNU General Public License (GPL)
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# Raid1
# Description: Manages a Linux software RAID device on a shared storage medium.
# Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
# Original Release: 25 Oct 2000
# RAID patches: http://people.redhat.com/mingo/raid-patches/
# Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3
# Sympathetic Ear: mailto:linux-raid@vger.kernel.org
#
# usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data}
#
#
# EXAMPLE config file /etc/raidtab.md0
# This file must exist on both machines!
#
# raiddev /dev/md0
# raid-level 1
# nr-raid-disks 2
# chunk-size 64k
# persistent-superblock 1
# #nr-spare-disks 0
# device /dev/sda1
# raid-disk 0
# device /dev/sdb1
# raid-disk 1
#
# EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf)
#
# DEVICE /dev/sdb1 /dev/sdc1
# ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data}
EOT
}
meta_data() {
cat <1.0
This resource agent manages Linux software RAID (MD) devices on
a shared storage medium. It uses mdadm(8) to start, stop, and
monitor the MD devices. Raidtools are supported, but deprecated.
See https://raid.wiki.kernel.org/index.php/Linux_Raid for more
information.
Manages Linux software RAID (MD) devices on shared storage
The RAID configuration file, e.g. /etc/mdadm.conf.
RAID config file
One or more block devices to use, space separated. Alternatively,
set to "auto" to manage all devices specified in raidconf.
block device
The value for the homehost directive; this is an mdadm feature to
protect RAIDs against being activated by accident. It is recommended to
create RAIDs managed by the cluster with "homehost" set to a special
value, so they are not accidentially auto-assembled by nodes not
supposed to own them.
Homehost for mdadm
If processes or kernel threads are using the array, it cannot be
stopped. We will try to stop processes, first by sending TERM and
then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL.
The lsof(8) program is required to get the list of array users.
Of course, the kernel threads cannot be stopped this way.
If the processes are critical for data integrity, then set this
parameter to false. Note that in that case the stop operation
will fail and the node will be fenced.
force stop processes using the array
Wait until udevd creates a device in the start operation. On a
normally loaded host this should happen quickly, but you may be
unlucky. If you are not using udev set this to "no".
udev
Activating the same md RAID array on multiple nodes at the same time
will result in data corruption and thus is forbidden by default.
A safe example could be an array that is only named identically across
all nodes, but is in fact distinct.
Only set this to "true" if you know what you are doing!
force ability to run as a clone
END
}
udev_settle() {
if ocf_is_true $WAIT_FOR_UDEV; then
udevadm settle $*
fi
}
list_conf_arrays() {
test -f $RAIDCONF || {
ocf_exit_reason "$RAIDCONF gone missing!"
exit $OCF_ERR_GENERIC
}
grep ^ARRAY $RAIDCONF | awk '{print $2}'
}
forall() {
local func=$1
local checkall=$2
local mddev rc=0
for mddev in $RAIDDEVS; do
$func $mddev
rc=$(($rc | $?))
[ "$checkall" = all ] && continue
[ $rc -ne 0 ] && return $rc
done
return $rc
}
are_arrays_stopped() {
local rc mddev
for mddev in $RAIDDEVS; do
raid1_monitor_one $mddev
rc=$?
[ $rc -ne $OCF_NOT_RUNNING ] && break
done
test $rc -eq $OCF_NOT_RUNNING
}
md_assemble() {
local mddev=$1
$MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST
udev_settle --exit-if-exists=$mddev
}
#
# START: Start up the RAID device
#
raid1_start() {
local rc
raid1_monitor
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
# md already online, nothing to do.
return $OCF_SUCCESS
fi
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# If the array is in a broken state, this agent doesn't
# know how to repair that.
ocf_exit_reason "$RAIDDEVS in a broken state; cannot start (rc=$rc)"
return $OCF_ERR_GENERIC
fi
if [ $HAVE_RAIDTOOLS = "true" ]; then
# Run raidstart to start up the RAID array
$RAIDSTART --configfile $RAIDCONF $MDDEV
else
forall md_assemble all
fi
raid1_monitor
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
else
ocf_exit_reason "Couldn't start RAID for $RAIDDEVS"
return $OCF_ERR_GENERIC
fi
}
#
# STOP: stop the RAID device
#
mark_readonly() {
local mddev=$1
local rc
ocf_log info "Attempting to mark array $mddev readonly"
$MDADM --readonly $mddev --config=$RAIDCONF
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to set $mddev readonly (rc=$rc)"
fi
return $rc
}
mknod_raid1_stop() {
# first create a block device file, then try to stop the
# array
local rc n tmp_block_file
n=`echo $1 | sed 's/[^0-9]*//'`
if ! ocf_is_decimal "$n"; then
ocf_log warn "could not get the minor device number from $1"
return 1
fi
tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`"
rm -f $tmp_block_file
ocf_log info "block device file $1 missing, creating one in order to stop the array"
mknod $tmp_block_file b 9 $n
$MDADM --stop $tmp_block_file --config=$RAIDCONF --wait-clean -W
rc=$?
rm -f $tmp_block_file
return $rc
}
raid1_stop_one() {
ocf_log info "Stopping array $1"
if [ -b "$1" ]; then
$MDADM --stop $1 --config=$RAIDCONF --wait-clean -W &&
return
else
# newer mdadm releases can stop arrays when given the
# basename; try that first
$MDADM --stop `basename $1` --config=$RAIDCONF --wait-clean -W &&
return
# otherwise create a block device file
mknod_raid1_stop $1
fi
}
get_users_pids() {
local mddev=$1
local outp l
ocf_log debug "running lsof to list $mddev users..."
outp=`lsof $mddev | tail -n +2`
echo "$outp" | awk '{print $2}' | sort -u
echo "$outp" | while read l; do
ocf_log warn "$l"
done
}
stop_raid_users() {
local pids
pids=`forall get_users_pids all | sort -u`
if [ -z "$pids" ]; then
ocf_log warn "lsof reported no users holding arrays"
return 2
else
ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids
fi
}
stop_arrays() {
if [ $HAVE_RAIDTOOLS = "true" ]; then
$RAIDSTOP --configfile $RAIDCONF $MDDEV
else
forall raid1_stop_one all
fi
}
showusers() {
local disk
for disk; do
if have_binary lsof; then
ocf_log info "running lsof to list $disk users..."
ocf_run -warn lsof $disk
fi
if [ -d /sys/block/$disk/holders ]; then
ocf_log info "ls -l /sys/block/$disk/holders"
ocf_run -warn ls -l /sys/block/$disk/holders
fi
done
}
raid1_stop() {
local rc
# See if the MD device is already cleanly stopped:
if are_arrays_stopped; then
return $OCF_SUCCESS
fi
# Turn off raid
if ! stop_arrays; then
if ocf_is_true $FORCESTOP; then
if have_binary lsof; then
stop_raid_users
case $? in
2) false;;
*) stop_arrays;;
esac
else
ocf_log warn "install lsof(8) to list users holding the disk"
false
fi
else
false
fi
fi
rc=$?
if [ $rc -ne 0 ]; then
ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)"
showusers $RAIDDEVS
if [ $HAVE_RAIDTOOLS != "true" ]; then
forall mark_readonly all
fi
return $OCF_ERR_GENERIC
fi
if are_arrays_stopped; then
return $OCF_SUCCESS
fi
ocf_exit_reason "RAID $RAIDDEVS still active after stop command!"
return $OCF_ERR_GENERIC
}
#
# monitor: a less noisy status
#
raid1_monitor_one() {
local mddev=$1
local md=`echo $mddev | sed 's,/dev/,,'`
local rc
local TRY_READD=0
local pbsize
# check if the md device exists first
# but not if we are in the stop operation
# device existence is important only for the running arrays
if [ "$__OCF_ACTION" != "stop" -a ! -b $mddev ]; then
ocf_log info "$mddev is not a block device"
return $OCF_NOT_RUNNING
fi
if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then
ocf_log info "$md not found in /proc/mdstat"
return $OCF_NOT_RUNNING
fi
if [ $HAVE_RAIDTOOLS != "true" ]; then
$MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$?
case $rc in
0) ;;
1) ocf_log warn "$mddev has at least one failed device."
TRY_READD=1
;;
2) ocf_exit_reason "$mddev has failed."
return $OCF_ERR_GENERIC
;;
4) ocf_exit_reason "mdadm failed on $mddev."
return $OCF_ERR_GENERIC
;;
*) ocf_exit_reason "mdadm returned an unknown result ($rc)."
return $OCF_ERR_GENERIC
;;
esac
fi
if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \
-a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then
ocf_log info "Attempting recovery sequence to re-add devices on $mddev:"
$MDADM $mddev --fail detached
$MDADM $mddev --remove failed
$MDADM $mddev --re-add missing
# TODO: At this stage, there's nothing to actually do
# here. Either this worked or it did not.
fi
pbsize=`(blockdev --getpbsz $mddev || stat -c "%o" $mddev) 2>/dev/null`
if [ -z "$pbsize" ]; then
ocf_log warn "both blockdev and stat could not get the block size (will use 4k)"
pbsize=4096 # try with 4k
fi
if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \
iflag=direct >/dev/null 2>&1 ; then
ocf_exit_reason "$mddev: I/O error on read"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
raid1_monitor() {
forall raid1_monitor_one
}
#
# STATUS: is the raid device online or offline?
#
raid1_status() {
# See if the MD device is online
local rc
raid1_monitor
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
echo "stopped"
else
echo "running"
fi
return $rc
}
raid1_validate_all() {
return $OCF_SUCCESS
}
PROC_CLEANUP_TIME=3
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
case "$1" in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
usage
exit $OCF_SUCCESS
;;
*)
;;
esac
RAIDCONF="$OCF_RESKEY_raidconf"
MDDEV="$OCF_RESKEY_raiddev"
FORCESTOP="${OCF_RESKEY_force_stop:-1}"
WAIT_FOR_UDEV="${OCF_RESKEY_udev:-1}"
if [ -z "$RAIDCONF" ] ; then
ocf_exit_reason "Please set OCF_RESKEY_raidconf!"
exit $OCF_ERR_CONFIGURED
fi
if [ ! -r "$RAIDCONF" ] ; then
ocf_exit_reason "Configuration file [$RAIDCONF] does not exist, or can not be opend!"
exit $OCF_ERR_INSTALLED
fi
if [ -z "$MDDEV" ] ; then
ocf_exit_reason "Please set OCF_RESKEY_raiddev to the Raid device you want to control!"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_clone && ! ocf_is_true "$OCF_RESKEY_force_clones"; then
ocf_exit_reason "md RAID arrays are NOT safe to run as a clone!"
ocf_log err "Please read the comment on the force_clones parameter."
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_true $WAIT_FOR_UDEV && ! have_binary udevadm; then
if [ "$__OCF_ACTION" = "start" ]; then
ocf_log warn "either install udevadm or set udev to false"
ocf_log info "setting udev to false!"
fi
WAIT_FOR_UDEV=0
fi
if ! ocf_is_true $WAIT_FOR_UDEV; then
export MDADM_NO_UDEV=1
fi
if ocf_is_true $FORCESTOP && ! have_binary lsof; then
ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..."
fi
HAVE_RAIDTOOLS=false
if have_binary $MDADM >/dev/null 2>&1 ; then
if [ -n "$OCF_RESKEY_homehost" ]; then
MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}"
else
MDADM_HOMEHOST=""
fi
else
check_binary $RAIDSTART
HAVE_RAIDTOOLS=true
fi
if [ $HAVE_RAIDTOOLS = true ]; then
if [ "$MDDEV" = "auto" ]; then
ocf_exit_reason "autoconf supported only with mdadm!"
exit $OCF_ERR_INSTALLED
elif [ `echo $MDDEV|wc -w` -gt 1 ]; then
ocf_exit_reason "multiple devices supported only with mdadm!"
exit $OCF_ERR_INSTALLED
fi
fi
if [ "$MDDEV" = "auto" ]; then
RAIDDEVS=`list_conf_arrays`
else
RAIDDEVS="$MDDEV"
fi
# At this stage,
# [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM,
# otherwise we have raidtools (raidstart and raidstop)
# Look for how we are called
case "$1" in
start)
raid1_start
;;
stop)
raid1_stop
;;
status)
raid1_status
;;
monitor)
raid1_monitor
;;
validate-all)
raid1_validate_all
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/SendArp b/heartbeat/SendArp
index b67404f24..9d0b48726 100755
--- a/heartbeat/SendArp
+++ b/heartbeat/SendArp
@@ -1,267 +1,267 @@
#!/bin/sh
#
#
# Copyright (c) 2006, Huang Zhen
# Converting original heartbeat RA to OCF RA.
#
# Copyright (C) 2004 Horms
#
# Based on IPaddr2: Copyright (C) 2003 Tuomo Soini
#
# License: GNU General Public License (GPL)
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# This script send out gratuitous Arp for an IP address
#
# It can be used _instead_ of the IPaddr2 or IPaddr resource
# to send gratuitous arp for an IP address on a given interface,
# without adding the address to that interface. I.e. if for
# some reason you want to send gratuitous arp for addresses
# managed by IPaddr2 or IPaddr on an additional interface.
#
# OCF parameters are as below:
# OCF_RESKEY_ip
# OCF_RESKEY_nic
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
SENDARP=$HA_BIN/send_arp
SENDARPPIDDIR=${HA_RSCTMP}
BASEIP="$OCF_RESKEY_ip"
INTERFACE="$OCF_RESKEY_nic"
RESIDUAL=""
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP"
BACKGROUND=${OCF_RESKEY_background:-"yes"}
# Set default values
: ${ARP_INTERVAL_MS=200} # milliseconds between ARPs
: ${ARP_REPEAT=5} # repeat count
: ${ARP_BACKGROUND=$BACKGROUND} # no to run in foreground
: ${ARP_NETMASK=ffffffffffff} # netmask for ARP
#######################################################################
sendarp_meta_data() {
cat <1.0
This RA can be used _instead_ of the IPaddr2 or IPaddr RA to
send gratuitous ARP for an IP address on a given interface,
without adding the address to that interface. For example,
if for some resaon you wanted to send gratuitous ARP for
addresses managed by IPaddr2 or IPaddr on an additional
interface.
Broadcasts unsolicited ARP announcements
The IP address for sending ARP packet.
IP address
The NIC for sending ARP packet.
NIC
Send ARPs in background. Set to false if you want to test if
sending ARPs succeeded.
Send ARPs in background
END
}
#######################################################################
sendarp_usage() {
cat <1.0
Resource script for ServeRAID. It enables/disables shared ServeRAID merge groups.
Enables and disables shared ServeRAID merge groups
The adapter number of the ServeRAID adapter.
serveraid
The logical drive under consideration.
mergegroup
END
}
ServeRAID_methods() {
cat <<-!
start
stop
status
validate-all
methods
usage
meta-data
!
}
ServeRAIDSCSI="/proc/scsi/ips"
IPS=ipssend
proc_scsi=/proc/scsi/scsi
parseinst() {
sr_adapter=error
sr_mergegroup=error
hostid=error
sr_logicaldrivenumber=error
if
[ $# -ne 2 ]
then
ocf_log err "Invalid ServeRAID instance: $*"
exit $OCF_ERR_ARGS
fi
PerlScript='next unless /^Host/; $_ .= <>.<>; print "$1 " if /SERVERAID/ and /Proces/ and /scsi(\d+)/'
# Get the list of host ids of the ServeRAID host adapters
hostlist=`$PERL -ne "${PerlScript}" <$proc_scsi`
# Figure the host id of the desired ServeRAID adapter
hostid=`echo $hostlist | cut -d' ' -f$1`
if
[ ! -f "$ServeRAIDSCSI/$hostid" ]
then
ocf_log err "No such ServeRAID adapter: $1"
exit $OCF_ERR_ARGS
fi
case $2 in
[1-8]);;
*) ocf_log err "Invalid Shared Merge Group Number: $2"
exit $OCF_ERR_ARGS;;
esac
sr_adapter=$1
sr_mergegroup=$2
CheckRaidLevel
return $?
}
SRLogicalDriveConfig() {
$IPS getconfig $sr_adapter ld
}
MergeGroupToSCSI_ID() {
PerlScript="while (<>) {
/logical drive number *([0-9]+)/i && (\$ld=\$1);
/part of merge group *: *$sr_mergegroup *\$/i && print \$ld - 1, \"\n\";
}"
ID=`SRLogicalDriveConfig | $PERL -e "$PerlScript"`
case $ID in
[0-9]*) echo "$ID"; return 0;;
*) return 1;;
esac
}
MergeGroupRaidLevel() {
PerlScript="while (<>) {
/RAID level *: *([0-9]+[A-Za-z]*)/i && (\$ld=\$1);
/part of merge group *: *$sr_mergegroup *\$/i && print \$ld, \"\n\";
}"
Level=`SRLogicalDriveConfig | $PERL -e "$PerlScript"`
case $Level in
?*) echo "$Level"; return 0;;
*) return 1;;
esac
}
CheckRaidLevel() {
RAIDlevel=`MergeGroupRaidLevel`
case $RAIDlevel in
*5*)
ocf_log err "ServeRAID device $sr_adapter $sr_mergegroup is RAID level $RAIDlevel"
ocf_log err "This level of ServeRAID RAID is not supported for failover by the firmware."
exit $OCF_ERR_GENERIC;;
esac
return $OCF_SUCCESS
}
ReleaseSCSI() {
targetid=`MergeGroupToSCSI_ID`
echo "${SCSI}remove-single-device $hostid 0 $targetid 0" > $proc_scsi
}
AddSCSI() {
targetid=`MergeGroupToSCSI_ID`
echo "${SCSI}add-single-device $hostid 0 $targetid 0" > $proc_scsi
}
#
# start: Enable the given ServeRAID device
#
ServeRAID_start() {
if
ServeRAID_status $serveraid $mergegroup
then
ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running."
return $OCF_SUCCESS
else
if
#
# Normally we do a MERGE PARTNER, but if we still own the drive for
# some reason, then we'll need to do a MERGE OWN instead...
#
out=`$IPS MERGE $sr_adapter $sr_mergegroup PARTNER 2>&1`
if
[ $? -eq $srsuccess ]
then
ocf_log info "$out"
else
ocf_run $IPS MERGE $sr_adapter $sr_mergegroup OWN
fi
then
: OK All is well!
targetid=`MergeGroupToSCSI_ID`
sr_logicaldrivenumber=`expr $targetid + 1`
#run $IPS SYNCH $sr_adapter $sr_logicaldrivenumber &
# This version of the SYNCH command requires the 6.10 or later
# ServeRAID support CD.
# To avoid issues when called by lrmd, redirect stdout->stderr.
# Use () to create a subshell to make the redirection be synchronized.
( ocf_run $IPS SYNCH $sr_adapter $sr_mergegroup & ) >&2
AddSCSI
else
return $OCF_ERR_GENERIC
fi
fi
if
ServeRAID_status "$@"
then
return $OCF_SUCCESS
else
ocf_log err "ServeRAID device $1 not active!"
exit $OCF_ERR_GENERIC
fi
}
#
# stop: Disable the given ServeRAID device
#
ServeRAID_stop() {
parseinst "$@"
ReleaseSCSI
if
ocf_run $IPS UNMERGE $sr_adapter $sr_mergegroup
then
: UNMERGE $sr_adapter $sr_mergegroup worked
fi
if
ServeRAID_status "$@"
then
ocf_log err "ServeRAID device $* is still active!"
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
}
#
# status: is the given device now available?
#
ServeRAID_status() {
parseinst "$@"
#
# The output we're looking for
# Part of merge group : 2
#
SRLogicalDriveConfig \
| grep -i "part of merge group[ ]*: *$sr_mergegroup *\$" >/dev/null
}
#
# validate_all: are the OCF instance parameters valid?
#
ServeRAID_validate_all() {
check_binary $PERL
# parseinst() will do all the work...
parseinst "$@"
return $?
}
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations don't require OCF instance parameters to be set
case "$1" in
meta-data)
meta_data
exit $OCF_SUCCESS;;
#
# methods: What methods do we support?
#
methods)
ServeRAID_methods
exit $?;;
usage)
usage
exit $OCF_SUCCESS;;
*)
;;
esac
if
( [ -z "$OCF_RESKEY_serveraid" ] || [ -z "$OCF_RESKEY_mergegroup" ] )
then
ocf_log err "You have to set the OCF_RESKEY_serveraid and OCF_RESKEY_mergegroup\n
enviroment virables before running $0 !"
# usage
exit $OCF_ERR_GENERIC
fi
: Right Number of arguments..
serveraid=$OCF_RESKEY_serveraid
mergegroup=$OCF_RESKEY_mergegroup
# Look for the start, stop, status, or methods calls...
case "$1" in
stop)
ServeRAID_stop $serveraid $mergegroup
exit $?;;
start)
ServeRAID_start $serveraid $mergegroup
exit $?;;
status|monitor)
if
ServeRAID_status $serveraid $mergegroup
then
ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running."
exit $OCF_SUCCESS
else
ocf_log debug "ServeRAID merge group $serveraid $mergegroup is stopped."
exit $OCF_NOT_RUNNING
fi
exit $?;;
validate-all)
ServeRAID_validate_all $serveraid $mergegroup
exit $?;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/VIPArip b/heartbeat/VIPArip
index 01c6c994f..12804dffb 100755
--- a/heartbeat/VIPArip
+++ b/heartbeat/VIPArip
@@ -1,302 +1,302 @@
#!/bin/sh
#
# License: GNU General Public License (GPL)
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# Author: Huang Zhen
# Copyright (c) 2006 International Business Machines
#
# Virtual IP Address by RIP2 protocol.
# This script manages IP alias in different subnet with quagga/ripd.
# It can add an IP alias, or remove one.
#
# The quagga package should be installed to run this RA
#
# usage: $0 {start|stop|status|monitor|validate-all|meta-data}
#
# The "start" arg adds an IP alias.
# Surprisingly, the "stop" arg removes one. :-)
#
# OCF parameters are as below
# OCF_RESKEY_ip The IP address in different subnet
# OCF_RESKEY_nic The nic for broadcast the route information
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
RIPDCONF=$HA_RSCTMP/VIPArip-ripd.conf
ZEBRA=/usr/sbin/zebra
RIPD=/usr/sbin/ripd
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
#######################################################################
meta_data() {
cat <1.0
Virtual IP Address by RIP2 protocol.
This script manages IP alias in different subnet with quagga/ripd.
It can add an IP alias, or remove one.
Manages a virtual IP address through RIP2
The IPv4 address in different subnet, for example "192.168.1.1".
The IP address in different subnet
The nic for broadcast the route information.
The ripd uses this nic to broadcast the route informaton to others
The nic for broadcast the route information
Absolute path to the zebra binary.
zebra binary
Absolute path to the ripd binary.
ripd binary
END
exit $OCF_SUCCESS
}
usage() {
echo $USAGE >&2
}
new_config_file() {
echo new_config_file $1 $2 $3
cat >$RIPDCONF < $RIPDCONF.tmp
cp $RIPDCONF.tmp $RIPDCONF
}
add_ip() {
echo add_ip $1
sed "s/ip_tag/ip_tag\naccess-list private permit $1\/32/g" $RIPDCONF > $RIPDCONF.tmp
cp $RIPDCONF.tmp $RIPDCONF
}
del_ip() {
echo del_ip $1
sed "/$1/d" $RIPDCONF > $RIPDCONF.tmp
cp $RIPDCONF.tmp $RIPDCONF
if $GREP "access-list private permit" $RIPDCONF>/dev/null
then
echo some other IP is running
reload_config
else
stop_quagga
echo remove $RIPDCONF
rm $RIPDCONF
fi
}
add_nic() {
echo add_nic $1
if $GREP "network $1" $RIPDCONF >/dev/null
then
echo the nic is already in the config file
else
sed "s/nic_tag/nic_tag\n no passive-interface $1\n network $1\n distribute-list private out $1\n distribute-list private in $1/g" $RIPDCONF > $RIPDCONF.tmp
cp $RIPDCONF.tmp $RIPDCONF
fi
}
reload_config() {
echo reload_config
echo $RIPDCONF:
cat $RIPDCONF
echo killall -SIGHUP ripd
killall -SIGHUP ripd
}
start_quagga() {
echo start_quagga
echo $RIPDCONF:
cat $RIPDCONF
echo $ZEBRA -d
$ZEBRA -d
echo $RIPD -d -f $RIPDCONF
$RIPD -d -f $RIPDCONF
}
stop_quagga() {
echo stop_quagga
echo $RIPDCONF:
cat $RIPDCONF
echo killall -SIGTERM ripd
killall -SIGTERM ripd
echo killall -SIGTERM zebra
killall -SIGTERM zebra
}
start_rip_ip() {
echo start_rip_ip
check_params
if [ x"$OCF_RESKEY_nic" = x ]
then
echo OCF_RESKEY_nic is null, set to eth0
OCF_RESKEY_nic="eth0"
fi
status_rip_ip
case $? in
$OCF_SUCCESS)
ocf_log info "already running"
exit $OCF_SUCCESS
;;
$OCF_NOT_RUNNING)
;;
*)
ocf_log info "state undefined, stopping first"
stop_rip_ip
;;
esac
$IP2UTIL addr add $OCF_RESKEY_ip/32 dev lo
if [ -f "$RIPDCONF" ]
then
# there is a config file, add new data(IP,nic,metric)
# to the existing config file.
add_ip $OCF_RESKEY_ip
add_nic $OCF_RESKEY_nic
set_metric 1
reload_config
echo sleep 3
sleep 3
set_metric 3
reload_config
else
new_config_file $OCF_RESKEY_ip $OCF_RESKEY_nic 1
start_quagga
echo sleep 3
sleep 3
set_metric 3
reload_config
fi
return $OCF_SUCCESS
}
stop_rip_ip() {
echo stop_rip_ip
check_params
status_rip_ip
if [ $? = $OCF_NOT_RUNNING ]
then
exit $OCF_SUCCESS
fi
$IP2UTIL addr del $OCF_RESKEY_ip dev lo
echo sleep 2
sleep 2
del_ip $OCF_RESKEY_ip
return $OCF_SUCCESS
}
status_rip_ip() {
check_params
if $IP2UTIL addr | $GREP $OCF_RESKEY_ip >/dev/null
then
if $GREP $OCF_RESKEY_ip $RIPDCONF >/dev/null
then
if pidof ripd >/dev/null
then
return $OCF_SUCCESS
fi
fi
return $OCF_ERR_GENERIC
fi
return $OCF_NOT_RUNNING
}
if
[ $# -ne 1 ]
then
usage
exit $OCF_ERR_ARGS
fi
[ x != x"$OCF_RESKEY_zebra_binary" ] &&
ZEBRA=$OCF_RESKEY_zebra_binary
[ x != x"$OCF_RESKEY_ripd_binary" ] &&
RIPD=$OCF_RESKEY_ripd_binary
case $1 in
start) start_rip_ip;;
stop) stop_rip_ip;;
status) status_rip_ip;;
monitor) status_rip_ip;;
validate-all) check_binary $IP2UTIL
exit $OCF_SUCCESS;;
meta-data) meta_data;;
usage) usage; exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index 188b7c3ab..0b614f5f3 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -1,842 +1,842 @@
#!/bin/sh
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# Resource Agent for domains managed by the libvirt API.
# Requires a running libvirt daemon (libvirtd).
#
# (c) 2008-2010 Florian Haas, Dejan Muhamedagic,
# and Linux-HA contributors
#
# usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_migration_downtime_default=0
OCF_RESKEY_migration_speed_default=0
OCF_RESKEY_force_stop_default=0
OCF_RESKEY_autoset_utilization_cpu_default="true"
OCF_RESKEY_autoset_utilization_hv_memory_default="true"
OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 ))
OCF_RESKEY_CRM_meta_timeout_default=90000
OCF_RESKEY_save_config_on_stop_default=false
OCF_RESKEY_sync_config_on_stop_default=false
: ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}}
: ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}}
: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}}
: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}}
: ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}}
: ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}}
: ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}}
: ${OCF_RESKEY_sync_config_on_stop=${OCF_RESKEY_sync_config_on_stop_default}}
if ocf_is_true ${OCF_RESKEY_sync_config_on_stop}; then
OCF_RESKEY_save_config_on_stop="true"
fi
#######################################################################
## I'd very much suggest to make this RA use bash,
## and then use magic $SECONDS.
## But for now:
NOW=$(date +%s)
usage() {
echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}"
}
VirtualDomain_meta_data() {
cat <1.1
Resource agent for a virtual domain (a.k.a. domU, virtual machine,
virtual environment etc., depending on context) managed by libvirtd.
Manages virtual domains through the libvirt virtualization framework
Absolute path to the libvirt configuration file,
for this virtual domain.
Virtual domain configuration file
Hypervisor URI to connect to. See the libvirt documentation for
details on supported URI formats. The default is system dependent.
Determine the system's default uri by running 'virsh --quiet uri'.
Hypervisor URI
Always forcefully shut down ("destroy") the domain on stop. The default
behavior is to resort to a forceful shutdown only after a graceful
shutdown attempt has failed. You should only set this to true if
your virtual domain (or your virtualization backend) does not support
graceful shutdown.
Always force shutdown on stop
Transport used to connect to the remote hypervisor while
migrating. Please refer to the libvirt documentation for details on
transports available. If this parameter is omitted, the resource will
use libvirt's default transport to connect to the remote hypervisor.
Remote hypervisor transport
Define max downtime during live migration in milliseconds
Live migration downtime
Define live migration speed per resource in MiB/s
Live migration speed
Use a dedicated migration network. The migration URI is composed by
adding this parameters value to the end of the node name. If the node
name happens to be an FQDN (as opposed to an unqualified host name),
insert the suffix immediately prior to the first period (.) in the FQDN.
At the moment Qemu/KVM and Xen migration via a dedicated network is supported.
Note: Be sure this composed host name is locally resolveable and the
associated IP is reachable through the favored network.
See also the migrate_options parameter below.
Migration network host name suffix
Extra virsh options for the guest live migration. You can also specify
here --migrateuri if the calculated migrate URI is unsuitable for your
environment. If --migrateuri is set then migration_network_suffix
and migrateport are effectively ignored. Use "%n" as the placeholder
for the target node name.
Please refer to the libvirt documentation for details on guest
migration.
live migrate options
To additionally monitor services within the virtual domain, add this
parameter with a list of scripts to monitor.
Note: when monitor scripts are used, the start and migrate_from operations
will complete only when all monitor scripts have completed successfully.
Be sure to set the timeout of these operations to accommodate this delay.
space-separated list of monitor scripts
If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it
into the CPU utilization of the resource when the monitor is executed.
Enable auto-setting the CPU utilization of the resource
If set true, the agent will detect the number of *Max memory* from virsh, and put it
into the hv_memory utilization of the resource when the monitor is executed.
Enable auto-setting the hv_memory utilization of the resource
This port will be used in the qemu migrateuri. If unset, the port will be a random highport.
Port for migrateuri
Changes to a running VM's config are normally lost on stop.
This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter.
Save running VM's config back to its config file
Setting this automatically enables save_config_on_stop.
When enabled this parameter instructs the RA to
call csync2 -x to synchronize the file to all nodes.
csync2 must be properly set up for this to work.
Save running VM's config back to its config file
Path to the snapshot directory where the virtual machine image will be stored. When this
parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot
directory when stopped. If on start a state file is present for the domain, the domain
will be restored to the same state it was in right before it stopped last. This option
is incompatible with the 'force_stop' option.
Restore state on start/stop
EOF
}
set_util_attr() {
local attr=$1 val=$2
local cval outp
cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null)
if [ $? -ne 0 ] && [ -z "$cval" ]; then
crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1
if [ $? -eq 0 ]; then
ocf_log debug "Unable to set utilization attribute, cib is not available"
return
fi
fi
if [ "$cval" != "$val" ]; then
outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) ||
ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp"
fi
}
update_utilization() {
local dom_cpu dom_mem
if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then
dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}')
test -n "$dom_cpu" && set_util_attr cpu $dom_cpu
fi
if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then
dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}')
test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem"
fi
}
get_emulator()
{
local emulator=""
emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p')
if [ -z "$emulator" ] && [ -e "$EMULATOR_STATE" ]; then
emulator=$(cat $EMULATOR_STATE)
fi
if [ -z "$emulator" ]; then
emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p')
fi
if [ -n "$emulator" ]; then
basename $emulator
fi
}
update_emulator_cache()
{
local emulator
emulator=$(get_emulator)
if [ -n "$emulator" ]; then
echo $emulator > $EMULATOR_STATE
fi
}
# attempt to check domain status outside of libvirt using the emulator process
pid_status()
{
local rc=$OCF_ERR_GENERIC
local emulator=$(get_emulator)
# An emulator is not required, so only report message in debug mode
local loglevel="debug"
if ocf_is_probe; then
loglevel="notice"
fi
case "$emulator" in
qemu-kvm|qemu-dm|qemu-system-*)
rc=$OCF_NOT_RUNNING
ps awx | grep -E "[q]emu-(kvm|dm|system).*-name $DOMAIN_NAME " > /dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
;;
libvirt_lxc)
rc=$OCF_NOT_RUNNING
ps awx | grep -E "[l]ibvirt_lxc.*-name $DOMAIN_NAME " > /dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
;;
# This can be expanded to check for additional emulators
*)
# We may be running xen with PV domains, they don't
# have an emulator set. try xl list or xen-lists
if have_binary xl; then
rc=$OCF_NOT_RUNNING
xl list $DOMAIN_NAME >/dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
elif have_binary xen-list; then
rc=$OCF_NOT_RUNNING
xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
else
ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME"
fi
;;
esac
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log debug "Virtual domain $DOMAIN_NAME is currently running."
elif [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running."
fi
return $rc
}
VirtualDomain_status() {
local try=0
rc=$OCF_ERR_GENERIC
status="no state"
while [ "$status" = "no state" ]; do
try=$(($try + 1 ))
status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z')
case "$status" in
*"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off")
# shut off: domain is defined, but not started, will not happen if
# domain is created but not defined
# "Domain not found" or "failed to get domain": domain is not defined
# and thus not started
ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)"
rc=$OCF_NOT_RUNNING
;;
running|paused|idle|blocked|"in shutdown")
# running: domain is currently actively consuming cycles
# paused: domain is paused (suspended)
# idle: domain is running but idle
# blocked: synonym for idle used by legacy Xen versions
# in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed.
ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
rc=$OCF_SUCCESS
;;
""|*"failed to "*"connect to the hypervisor"*|"no state")
# Empty string may be returned when virsh does not
# receive a reply from libvirtd.
# "no state" may occur when the domain is currently
# being migrated (on the migration target only), or
# whenever virsh can't reliably obtain the domain
# state.
status="no state"
if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then
# During the stop operation, we want to bail out
# quickly, so as to be able to force-stop (destroy)
# the domain if necessary.
ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out."
return $OCF_ERR_GENERIC;
elif [ "$__OCF_ACTION" = "monitor" ]; then
pid_status
rc=$?
if [ $rc -ne $OCF_ERR_GENERIC ]; then
# we've successfully determined the domains status outside of libvirt
return $rc
fi
else
# During all other actions, we just wait and try
# again, relying on the CRM/LRM to time us out if
# this takes too long.
ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying."
fi
sleep 1
;;
*)
# any other output is unexpected.
ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!"
sleep 1
;;
esac
done
return $rc
}
# virsh undefine removes configuration files if they are in
# directories which are managed by libvirt. such directories
# include also subdirectories of /etc (for instance
# /etc/libvirt/*) which may be surprising. VirtualDomain didn't
# include the undefine call before, hence this wasn't an issue
# before.
#
# There seems to be no way to find out which directories are
# managed by libvirt.
#
verify_undefined() {
local tmpf
if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME"
then
tmpf=$(mktemp -t vmcfgsave.XXXXXX)
if [ ! -r "$tmpf" ]; then
ocf_log warn "unable to create temp file, disk full?"
# we must undefine the domain
virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
else
cp -p $OCF_RESKEY_config $tmpf
virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
[ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config
rm -f $tmpf
fi
fi
}
VirtualDomain_start() {
local snapshotimage
if VirtualDomain_status; then
ocf_log info "Virtual domain $DOMAIN_NAME already running."
return $OCF_SUCCESS
fi
snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state"
if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then
virsh restore $snapshotimage
if [ $? -eq 0 ]; then
rm -f $snapshotimage
return $OCF_SUCCESS
fi
ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory."
return $OCF_ERR_GENERIC
fi
# Make sure domain is undefined before creating.
# The 'create' command guarantees that the domain will be
# undefined on shutdown, but requires the domain to be undefined.
# if a user defines the domain
# outside of this agent, we have to ensure that the domain
# is restored to an 'undefined' state before creating.
verify_undefined
virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config}
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
while ! VirtualDomain_monitor; do
sleep 1
done
return $OCF_SUCCESS
}
force_stop()
{
local out ex translate
local status=0
ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1)
ex=$?
translate=$(echo $out|tr 'A-Z' 'a-z')
echo >&2 "$translate"
case $ex$translate in
*"error:"*"domain is not running"*|*"error:"*"domain not found"*|\
*"error:"*"failed to get domain"*)
: ;; # unexpected path to the intended outcome, all is well
[!0]*)
ocf_exit_reason "forced stop failed"
return $OCF_ERR_GENERIC ;;
0*)
while [ $status != $OCF_NOT_RUNNING ]; do
VirtualDomain_status
status=$?
done ;;
esac
return $OCF_SUCCESS
}
sync_config(){
ocf_log info "Syncing $DOMAIN_NAME config file with csync2 -x ${OCF_RESKEY_config}"
if ! csync2 -x ${OCF_RESKEY_config}; then
ocf_log warn "Syncing ${OCF_RESKEY_config} failed.";
fi
}
save_config(){
CFGTMP=$(mktemp -t vmcfgsave.XXX)
virsh $VIRSH_OPTIONS dumpxml --inactive --security-info ${DOMAIN_NAME} > ${CFGTMP}
if [ -s ${CFGTMP} ]; then
if ! cmp -s ${CFGTMP} ${OCF_RESKEY_config}; then
if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then
ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes or sync_config_on_stop is on."
if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then
ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}."
if ocf_is_true "$OCF_RESKEY_sync_config_on_stop"; then
sync_config
fi
else
ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed."
fi
else
ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update."
fi
fi
else
ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update."
fi
rm -f ${CFGTMP}
}
VirtualDomain_stop() {
local i
local status
local shutdown_timeout
local needshutdown=1
VirtualDomain_status
status=$?
case $status in
$OCF_SUCCESS)
if ocf_is_true $OCF_RESKEY_force_stop; then
# if force stop, don't bother attempting graceful shutdown.
force_stop
return $?
fi
ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}."
if [ -n "$OCF_RESKEY_snapshot" ]; then
virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state"
if [ $? -eq 0 ]; then
needshutdown=0
else
ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop"
fi
fi
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
# issue the shutdown if save state didn't shutdown for us
if [ $needshutdown -eq 1 ]; then
# Issue a graceful shutdown request
virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME}
fi
# The "shutdown_timeout" we use here is the operation
# timeout specified in the CIB, minus 5 seconds
shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 ))
# Loop on status until we reach $shutdown_timeout
while [ $NOW -lt $shutdown_timeout ]; do
VirtualDomain_status
status=$?
case $status in
$OCF_NOT_RUNNING)
# This was a graceful shutdown.
return $OCF_SUCCESS
;;
$OCF_SUCCESS)
# Domain is still running, keep
# waiting (until shutdown_timeout
# expires)
sleep 1
;;
*)
# Something went wrong. Bail out and
# resort to forced stop (destroy).
break;
esac
NOW=$(date +%s)
done
;;
$OCF_NOT_RUNNING)
ocf_log info "Domain $DOMAIN_NAME already stopped."
return $OCF_SUCCESS
esac
# OK. Now if the above graceful shutdown hasn't worked, kill
# off the domain with destroy. If that too does not work,
# have the LRM time us out.
force_stop
}
mk_migrateuri() {
local target_node
local migrate_target
local hypervisor
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
# A typical migration URI via a special migration network looks
# like "tcp://bar-mig:49152". The port would be randomly chosen
# by libvirt from the range 49152-49215 if omitted, at least since
# version 0.7.4 ...
if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then
hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}"
# Hostname might be a FQDN
migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},")
case $hypervisor in
qemu)
# For quiet ancient libvirt versions a migration port is needed
# and the URI must not contain the "//". Newer versions can handle
# the "bad" URI.
echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}"
;;
xen)
echo "xenmigr://${migrate_target}"
;;
*)
ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}."
;;
esac
fi
}
VirtualDomain_migrate_to() {
local rc
local target_node
local remoteuri
local transport_suffix
local migrateuri
local migrate_opts
local migrate_pid
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
if VirtualDomain_status; then
# Find out the remote hypervisor to connect to. That is, turn
# something like "qemu://foo:9999/system" into
# "qemu+tcp://bar:9999/system"
if [ -n "${OCF_RESKEY_migration_transport}" ]; then
transport_suffix="+${OCF_RESKEY_migration_transport}"
fi
# User defined migrateuri or do we make one?
migrate_opts="$OCF_RESKEY_migrate_options"
if echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then
migrateuri=`echo "$migrate_opts" |
sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"`
migrate_opts=`echo "$migrate_opts" |
sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\2/"`
else
migrateuri=`mk_migrateuri`
fi
# Scared of that sed expression? So am I. :-)
remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,")
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
# Live migration speed limit
if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then
ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})."
virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed}
fi
# OK, we know where to connect to. Now do the actual migration.
ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)."
virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri &
migrate_pid=${!}
# Live migration downtime interval
# Note: You can set downtime only while live migration is in progress
if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then
sleep 2
ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})."
virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime}
fi
wait ${migrate_pid}
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc"
return $OCF_ERR_GENERIC
else
ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded."
return $OCF_SUCCESS
fi
else
ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!"
return $OCF_ERR_GENERIC
fi
}
VirtualDomain_migrate_from() {
while ! VirtualDomain_monitor; do
sleep 1
done
ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded."
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
return $OCF_SUCCESS
}
VirtualDomain_monitor() {
# First, check the domain status. If that returns anything other
# than $OCF_SUCCESS, something is definitely wrong.
VirtualDomain_status
rc=$?
if [ ${rc} -eq ${OCF_SUCCESS} ]; then
# OK, the generic status check turned out fine. Now, if we
# have monitor scripts defined, run them one after another.
for script in ${OCF_RESKEY_monitor_scripts}; do
script_output="$($script 2>&1)"
script_rc=$?
if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then
# A monitor script returned a non-success exit
# code. Stop iterating over the list of scripts, log a
# warning message, and propagate $OCF_ERR_GENERIC.
ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}"
rc=$OCF_ERR_GENERIC
break
else
ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}"
fi
done
fi
update_emulator_cache
update_utilization
# Save configuration on monitor as well, so we will have a better chance of
# having fresh and up to date config files on all nodes.
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
return ${rc}
}
VirtualDomain_validate_all() {
if ocf_is_true $OCF_RESKEY_force_stop && [ -n "$OCF_RESKEY_snapshot" ]; then
ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together."
return $OCF_ERR_CONFIGURED
fi
# check if we can read the config file (otherwise we're unable to
# deduce $DOMAIN_NAME from it, see below)
if [ ! -r $OCF_RESKEY_config ]; then
if ocf_is_probe; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe."
elif [ "$__OCF_ACTION" = "stop" ]; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped."
else
ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or not readable."
fi
return $OCF_ERR_INSTALLED
fi
if [ -z $DOMAIN_NAME ]; then
ocf_exit_reason "Unable to determine domain name."
return $OCF_ERR_INSTALLED
fi
# Check if csync2 is available when config tells us we might need it.
if ocf_is_true $OCF_RESKEY_sync_config_on_stop; then
check_binary csync2
fi
# Check if migration_speed is a decimal value
if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then
ocf_exit_reason "migration_speed has to be a decimal value"
return $OCF_ERR_CONFIGURED
fi
# Check if migration_downtime is a decimal value
if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then
ocf_exit_reason "migration_downtime has to be a decimal value"
return $OCF_ERR_CONFIGURED
fi
}
VirtualDomain_getconfig() {
# Grab the virsh uri default, but only if hypervisor isn't set
: ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)}
# Set options to be passed to virsh:
VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"
# Retrieve the domain name from the xml file.
DOMAIN_NAME=`egrep '[[:space:]]*.*[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*\(.*\)<\/name>[[:space:]]*$/\1/'`
EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state"
}
OCF_REQUIRED_PARAMS="config"
OCF_REQUIRED_BINARIES="virsh sed"
ocf_rarun $*
diff --git a/heartbeat/WAS b/heartbeat/WAS
index a46cdd9be..3c7469328 100755
--- a/heartbeat/WAS
+++ b/heartbeat/WAS
@@ -1,572 +1,572 @@
#!/bin/sh
#
#
# WAS
#
# Description: Manages a Websphere Application Server as an HA resource
#
#
# Author: Alan Robertson
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 - 2005 International Business Machines, Inc.
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_config
# (WAS-configuration file, used for the single server edition of WAS)
# OCF_RESKEY_port
# (WAS--port-number, used for the advanced edition of WAS)
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
WASDIR=/opt/WebSphere/AppServer
if
[ ! -d $WASDIR ]
then
WASDIR=/usr/WebSphere/AppServer
fi
STARTTIME=300 # 5 minutes
DEFAULT_WASPORTS="9080"
#
#
WASBIN=$WASDIR/bin
DEFAULT=$WASDIR/config/server-cfg.xml
#
# Print usage message
#
usage() {
methods=`WAS_methods | grep -v methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-END
usage: $0 ($methods)
For the single server edition of WAS, you have to set the following
enviroment virable:
OCF_RESKEY_config
(WAS-configuration file)
For the advanced edition of WAS, you have to set the following
enviroment virable:
OCF_RESKEY_port
(WAS--port-number)
$0 manages a Websphere Application Server (WAS) as an HA resource
The 'start' operation starts WAS.
The 'stop' operation stops WAS.
The 'status' operation reports whether WAS is running
The 'monitor' operation reports whether the WAS seems to be working
(httpd also needs to be working for this case)
The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_config or OCF_RESKEY_port) is valid
The 'methods' operation reports on the methods $0 supports
This is known to work with the Single Server edition of Websphere,
and is believed to work with the Advanced edition too.
Since the Advanced Edition has no configuration file (it's in a the
database) you need to give a port number instead of a
configuration file for this config parameter.
The default configuration file for the single server edition is:
$DEFAULT
The default snoop-port for the advanced edition is: $DEFAULT_WASPORTS
The start and stop operations must be run as root.
The status operation will report a pid of "-" for the
WAS root process using unless it is run as root.
If you don't have xmllint on your system, parsing of WAS
configuration files is very primitive.
In this case, the port specification we need from the XML
config file has to be on the same line as the
first part of the tag.
We run servlet/snoop on the first transport port listed in
the config file for the "monitor" operation.
END
}
meta_data() {
cat <1.0
Resource script for WAS. It manages a Websphere Application Server (WAS) as
an HA resource.
Manages a WebSphere Application Server instance
The WAS-configuration file.
configration file
The WAS-(snoop)-port-number.
port
END
}
#
# Reformat the XML document in a sort of canonical form
# if we can. If we don't have xmllint, we just cat it out
# and hope for the best ;-)
#
xmlcat() {
if
[ "X$XMLcat" = X ]
then
XMLcat=`which xmllint 2>/dev/null`
if
[ "X${XMLcat}" = X -o ! -x "${XMLcat}" ]
then
XMLcat=cat
else
XMLcat="$XMLcat --recover --format"
fi
fi
for j in "$@"
do
${XMLcat} "$j"
done
}
#
#This is a bit skanky, but it works anyway...
#
#
#
#
#
# It's not really skanky if we can find xmllint on the system, because it
# reformats tags so they are all on one line, which is all we we need...
#
#
# Get the numbers of the ports WAS should be listening on...
#
# If we don't have xmllint around, then the applicationserver and the
# port= specification have to be on the same line in the XML config file.
#
GetWASPorts() {
case $1 in
[0-9]*) echo "$1" | tr ',' '\012';;
*)
xmlcat $1 | grep -i 'transports.*applicationserver:HTTPTransport' |
grep port= |
sed -e 's%.*port= *"* *%%' \
-e 's%[^0-9][^0-9]*.*$%%'
# Delete up to port=, throw away optional quote and optional
# white space.
# Throw away everything after the first non-digit.
# This should leave us the port number all by itself...
esac
}
#
# We assume that the first port listed in the
# is the one we should run servlet/snoop on.
#
GetWASSnoopPort() {
GetWASPorts "$@" | head -n1
}
#
# Return information on the processname/id for the WAS ports
#
# pid/java is the expected output. Several lines, one per port...
#
#
WASPortInfo() {
pat=""
once=yes
PortCount=0
for j in $*
do
case $pat in
"") pat="$j";;
*) pat="$pat|$j";;
esac
PortCount=`expr $PortCount + 1`
done
netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%'
}
#
# Return the number of WAS ports which are open
#
CheckWASPortsInUse() {
count=`WASPortInfo "$@" | wc -l`
echo $count
}
#
# Return the pid(s) of the processes that have WAS ports open
#
WASPIDs() {
WASPortInfo "$@" | sort -u | cut -f1 -d/
}
#
# The version of ps that returns all processes and their (long) args
# It's only used by WAS_procs, which isn't used for anything ;-)
#
ps_long() {
ps axww
}
#
# The total set of WAS processes (single server only)
#
WAS_procs() {
ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1
}
#
# methods: What methods/operations do we support?
#
WAS_methods() {
cat <<-!
start
stop
status
methods
validate-all
meta-data
usage
!
if
have_binary $WGET
then
echo monitor
fi
}
#
# Return WAS status (silently)
#
WAS_status() {
WASPorts=`GetWASPorts $1`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) false;;
*) true;;
esac
}
#
# Report on WAS status to stdout...
#
WAS_report_status() {
WASPorts=`GetWASPorts $1`
PortCount=`echo $WASPorts | wc -w`
PortCount=`echo $PortCount`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;;
*)
pids=`WASPIDs $WASPorts`
if
[ $PortsInUse -ge $PortCount ]
then
ocf_log debug "WAS: server $1 is running (pid" $pids "et al)."
else
ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports."
fi
return $OCF_SUCCESS;;
esac
}
#
# Monitor WAS - does it really seem to be working?
#
# For this we invoke the snoop applet via wget.
#
# This is actually faster than WAS_status above...
#
WAS_monitor() {
trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0
tmpfile=`maketempfile` || return 1
SnoopPort=`GetWASSnoopPort $1`
output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/servlet/snoop 2>&1`
rc=$?
if
[ $rc -eq 0 ]
then
if
grep -i 'user-agent.*Wget' $tmpfile >/dev/null
then
: OK
else
ocf_log "err" "WAS: $1: no user-agent from snoop application"
rc=$OCF_ERR_GENERIC
fi
else
ocf_log "err" "WAS: $1: wget failure: $output"
rc=$OCF_ERR_GENERIC
fi
return $rc
}
#
# Start WAS instance
#
WAS_start() {
# Launch Arguments:
#
# -configFile
# -nodeName
# -serverName
# -oltEnabled
# -oltHost
# -oltPort
# -debugEnabled
# -jdwpPort
# -debugSource
# -serverTrace
# -serverTraceFile
# -script []
# -platform
# -noExecute
# -help
if
[ -x $WASBIN/startServer.sh ]
then
cmd="$WASBIN/startServer.sh -configFile $1"
else
cmd="$WASBIN/startupServer.sh"
fi
if
ocf_run $cmd
then
if
WAS_wait_4_start $STARTTIME "$@"
then
#true
return $OCF_SUCCESS
else
ocf_log "err" "WAS server $1 did not start correctly"
return $OCF_ERR_GENERIC
fi
else
#false
return $OCF_ERR_GENERIC
fi
}
#
# Wait for WAS to actually start up.
#
# It seems to take between 30 and 60 seconds for it to
# start up on a trivial WAS instance.
#
WAS_wait_4_start() {
max=$1
retries=0
shift
while
[ $retries -lt $max ]
do
if
WAS_status "$@"
then
return $OCF_SUCCESS
else
sleep 1
fi
retries=`expr $retries + 1`
done
WAS_status "$@"
}
#
# Shut down WAS
#
WAS_stop() {
# They don't return good return codes...
# And, they seem to allow anyone to stop WAS (!)
if
[ -x $WASBIN/stopServer.sh ]
then
ocf_run $WASBIN/stopServer.sh -configFile $1
else
WASPorts=`GetWASPorts $1`
kill `WASPIDs $WASPorts`
fi
if
WAS_status $1
then
ocf_log "err" "WAS: $1 did not stop correctly"
#false
return $OCF_ERR_GENERIC
else
#true
return $OCF_SUCCESS
fi
}
#
# Check if the port is valid
#
CheckPort() {
ocf_is_decimal "$1" && [ $1 -gt 0 ]
}
WAS_validate_all() {
if [ -x $WASBIN/startServer.sh ]; then
# $arg should be config file
if [ ! -f "$arg" ]; then
ocf_log err "Configuration file [$arg] does not exist"
exit $OCF_ERR_ARGS
fi
# $arg should specify a valid port number at the very least
local WASPorts=`GetWASPorts $arg`
if [ -z "$WASPorts" ]; then
ocf_log err "No port number specified in configuration file [$arg]"
exit $OCF_ERR_CONFIGURED
fi
local port
local have_valid_port=false
for port in $WASPorts; do
if CheckPort $port; then
have_valid_port=true
break
fi
done
if [ "false" = "$have_valid_port" ]; then
ocf_log err "No valid port number specified in configuration file [$arg]"
exit $OCF_ERR_CONFIGURED
fi
elif [ -x $WASBIN/startupServer.sh ]; then
# $arg should be port number
if CheckPort "$arg"; then
ocf_log err "Port number is required but [$arg] is not valid port number"
exit $OCF_ERR_ARGS
fi
else
# Do not know hot to validate_all
ocf_log warn "Do not know how to validate-all, assuming validation OK"
return $OCF_SUCCESS
fi
}
#
# 'main' starts here...
#
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
#
# Supply default configuration parameter(s)
#
if
( [ -z $OCF_RESKEY_config ] && [ -z $OCF_RESKEY_port ] )
then
if
[ -f $DEFAULT ]
then
arg=$DEFAULT
else
arg=$DEFAULT_WASPORTS
fi
elif
[ ! -z $OCF_RESKEY_config ]
then
arg=$OCF_RESKEY_config
else
arg=$OCF_RESKEY_port
fi
if
[ ! -f $arg ]
then
case $arg in
[0-9]*) ;; # ignore port numbers...
*) ocf_log "err" "WAS configuration file $arg does not exist!"
usage
exit $OCF_ERR_ARGS;;
esac
fi
# What kind of method was invoked?
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
start) WAS_start $arg
exit $?;;
stop) WAS_stop $arg
exit $?;;
status) WAS_report_status $arg
exit $?;;
monitor) WAS_monitor $arg
exit $?;;
validate-all) WAS_validate_all $arg
exit $?;;
methods) WAS_methods
exit $?;;
usage) usage
exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/WAS6 b/heartbeat/WAS6
index 560ae602f..b3e7e2245 100755
--- a/heartbeat/WAS6
+++ b/heartbeat/WAS6
@@ -1,546 +1,546 @@
#!/bin/sh
# WAS6
#
# Description: Manages a Websphere Application Server as an HA resource
#
#
# Author: Ru Xiang Min
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2006 International Business Machines China, Ltd., Inc.
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 WAS::/opt/IBM/WebSphere/AppServer/profiles/default/config/cells/Node01Cell/nodes/Node01/serverindex.xml
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_profile
# (WAS profile name, used for the single server edition of WAS6)
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
WAS_DIR=/opt/IBM/WebSphere/AppServer
if
[ ! -d $WAS_DIR ]
then
WAS_DIR=/usr/IBM/WebSphere/AppServer
fi
STARTTIME=300 # 5 minutes
DEFAULT_WASPORTS="9080"
#
#
WAS_BIN=$WAS_DIR/bin
DEFAULT=default
#
# Print usage message
#
usage() {
methods=`WAS_methods | grep -v methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-END
usage: $0 ($methods)
For the single server edition of WAS6, you have to set the following
enviroment virable:
OCF_RESKEY_profile
(WAS profile name)
$0 manages a Websphere Application Server 6(WAS6) as an HA resource
The 'start' operation starts WAS6.
The 'stop' operation stops WAS6.
The 'status' operation reports whether WAS6 is running
The 'monitor' operation reports whether the WAS6 seems to be working
(httpd also needs to be working for this case)
The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_profileName ) is valid
The 'methods' operation reports on the methods $0 supports
This is known to work with the Single Server edition of Websphere.
The default profile name for the single server edition is:
$DEFAULT
The start and stop operations must be run as root.
The status operation will report a pid of "-" for the
WAS root process using unless it is run as root.
If you don't have xmllint on your system, parsing of WAS
configuration files is very primitive.
We run servlet/snoop on the seventh transport port listed in
the config file for the "monitor" operation.
END
}
meta_data() {
cat <1.0
Resource script for WAS6. It manages a Websphere Application Server (WAS6) as
an HA resource.
Manages a WebSphere Application Server 6 instance
The WAS profile name.
profile name
END
}
#
# Reformat the XML document in a sort of canonical form
# if we can. If we don't have xmllint, we just cat it out
# and hope for the best ;-)
#
xmlcat() {
if
[ "X$XMLcat" = X ]
then
XMLcat=`which xmllint 2>/dev/null`
if
[ "X${XMLcat}" = X -o ! -x "${XMLcat}" ]
then
XMLcat=cat
else
XMLcat="$XMLcat --recover --format"
fi
fi
for j in "$@"
do
${XMLcat} "$j"
done
}
#
#This is a bit skanky, but it works anyway...
#
# It's not really skanky if we can find xmllint on the system, because it
# reformats tags so they are all on one line, which is all we we need...
#
#
# Get the numbers of the ports WAS should be listening on...
#
# If we don't have xmllint around, then the applicationserver and the
# port= specification have to be on the same line in the XML config file.
#
GetWASPorts() {
case $1 in
[0-9]*) echo "$1" | tr ',' '\012';;
*)
xmlcat ${WAS_DIR}/profiles/${WAS_PROFILE_NAME}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml |
grep port= |
sed -e 's%.*port= *"* *%%' \
-e 's%[^0-9][^0-9]*.*$%%'
# Delete up to port=, throw away optional quote and optional
# white space.
# Throw away everything after the first non-digit.
# This should leave us the port number all by itself...
esac
}
#
# We assume that the seventh port listed in the serverindex.xml
# is the one we should run servlet/snoop on.
#
GetWASSnoopPort() {
GetWASPorts "$@" | sed -n '7p'
}
#
# Return information on the processname/id for the WAS ports
#
# pid/java is the expected output. Several lines, one per port...
#
#
WASPortInfo() {
pat=""
once=yes
PortCount=0
for j in $*
do
case $pat in
"") pat="$j";;
*) pat="$pat|$j";;
esac
PortCount=`expr $PortCount + 1`
done
netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%'
}
#
# Return the number of WAS ports which are open
#
CheckWASPortsInUse() {
count=`WASPortInfo "$@" | wc -l`
echo $count
}
#
# Return the pid(s) of the processes that have WAS ports open
#
WASPIDs() {
WASPortInfo "$@" | sort -u | cut -f1 -d/
}
#
# The version of ps that returns all processes and their (long) args
# It's only used by WAS_procs, which isn't used for anything ;-)
#
ps_long() {
ps axww
}
#
# The total set of WAS processes (single server only)
#
WAS_procs() {
ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1
}
#
# methods: What methods/operations do we support?
#
WAS_methods() {
cat <<-!
start
stop
status
methods
validate-all
meta-data
usage
!
if
have_binary $WGET
then
echo " monitor"
fi
}
#
# Return WAS status (silently)
#
WAS_status() {
WASPorts=`GetWASPorts $1`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) false;;
*) true;;
esac
}
#
# Report on WAS status to stdout...
#
WAS_report_status() {
WASPorts=`GetWASPorts $1`
PortCount=`echo $WASPorts | wc -w`
PortCount=`echo $PortCount`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;;
*)
pids=`WASPIDs $WASPorts`
if
[ $PortsInUse -ge $PortCount ]
then
ocf_log debug "WAS: server $1 is running (pid" $pids "et al)."
else
ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports."
fi
return $OCF_SUCCESS;;
esac
}
#
# Monitor WAS - does it really seem to be working?
#
# For this we invoke the snoop applet via wget.
#
# This is actually faster than WAS_status above...
#
WAS_monitor() {
trap '[ -z "$tmpfile" || rmtempfile "$tmpfile"' 0
tmpfile=`maketempfile` || exit 1
SnoopPort=`GetWASSnoopPort $1`
output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/snoop 2>&1`
rc=$?
if
[ $rc -eq 0 ]
then
if
grep -i 'user-agent.*Wget' $tmpfile >/dev/null
then
: OK
else
ocf_log "err" "WAS: $1: no user-agent from snoop application"
rc=$OCF_ERR_GENERIC
fi
else
ocf_log "err" "WAS: $1: wget failure: $output"
rc=$OCF_ERR_GENERIC
fi
return $rc
}
#
# Start WAS instance
#
WAS_start() {
# Launch Arguments:
# -nowait
# -quiet
# -logfile
# -replacelog
# -trace
# -script [