Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
This document is not UTF8. It was detected as Shift JIS and converted to UTF8 for display.
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 61b191064..6d6aad91f 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -1,1131 +1,1131 @@
#!/bin/sh
#
# Support: linux-ha@lists.linux-ha.org
# License: GNU General Public License (GPL)
#
# Filesystem
# Description: Manages a Filesystem on a shared storage medium.
# Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
# Original Release: 25 Oct 2000
#
# usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_device
# OCF_RESKEY_directory
# OCF_RESKEY_fstype
# OCF_RESKEY_options
# OCF_RESKEY_statusfile_prefix
# OCF_RESKEY_run_fsck
# OCF_RESKEY_fast_stop
# OCF_RESKEY_force_clones
#
#OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0
# Or a -U or -L option for mount, or an NFS mount specification
#OCF_RESKEY_directory : the mount point for the filesystem
#OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2
#OCF_RESKEY_options : options to be given to the mount command via -o
#OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring
#OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no
#OCF_RESKEY_fast_stop : fast stop: yes(default)/no
#OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts
# for each brick in a glusterfs setup
#
#
# This assumes you want to manage a filesystem on a shared (SCSI) bus,
# on a replicated device (such as DRBD), or a network filesystem (such
# as NFS or Samba).
#
# Do not put this filesystem in /etc/fstab. This script manages all of
# that for you.
#
# NOTE: If 2 or more nodes mount the same file system read-write, and
# that file system is not designed for that specific purpose
# (such as GFS or OCFS2), and is not a network file system like
# NFS or Samba, then the filesystem is going to become
# corrupted.
#
# As a result, you should use this together with the stonith
# option and redundant, independent communications paths.
#
# If you don't do this, don't blame us when you scramble your
# disk.
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
DFLT_STATUSDIR=".Filesystem_status/"
# Variables used by multiple methods
HOSTOS=`uname`
# The status file is going to an extra directory, by default
#
prefix=${OCF_RESKEY_statusfile_prefix}
: ${prefix:=$DFLT_STATUSDIR}
suffix="${OCF_RESOURCE_INSTANCE}"
[ "$OCF_RESKEY_CRM_meta_clone" ] &&
suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone"
suffix="${suffix}_`uname -n`"
STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix
#######################################################################
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
EOT
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Filesystem">
<version>1.1</version>
<longdesc lang="en">
Resource script for Filesystem. It manages a Filesystem on a
shared storage medium.
The standard monitor operation of depth 0 (also known as probe)
checks if the filesystem is mounted. If you want deeper tests,
set OCF_CHECK_LEVEL to one of the following values:
10: read first 16 blocks of the device (raw read)
This doesn't exercise the filesystem at all, but the device on
which the filesystem lives. This is noop for non-block devices
such as NFS, SMBFS, or bind mounts.
20: test if a status file can be written and read
The status file must be writable by root. This is not always the
case with an NFS mount, as NFS exports usually have the
"root_squash" option set. In such a setup, you must either use
read-only monitoring (depth=10), export with "no_root_squash" on
your NFS server, or grant world write permissions on the
directory where the status file is to be placed.
</longdesc>
<shortdesc lang="en">Manages filesystem mounts</shortdesc>
<parameters>
<parameter name="device" required="1">
<longdesc lang="en">
The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification.
</longdesc>
<shortdesc lang="en">block device</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="directory" required="1">
<longdesc lang="en">
The mount point for the filesystem.
</longdesc>
<shortdesc lang="en">mount point</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="fstype" required="1">
<longdesc lang="en">
The type of filesystem to be mounted.
</longdesc>
<shortdesc lang="en">filesystem type</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="options">
<longdesc lang="en">
Any extra options to be given as -o options to mount.
For bind mounts, add "bind" here and set fstype to "none".
We will do the right thing for options such as "bind,ro".
</longdesc>
<shortdesc lang="en">options</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="statusfile_prefix">
<longdesc lang="en">
The prefix to be used for a status file for resource monitoring
with depth 20. If you don't specify this parameter, all status
files will be created in a separate directory.
</longdesc>
<shortdesc lang="en">status file prefix</shortdesc>
<content type="string" default="$DFLT_STATUSDIR" />
</parameter>
<parameter name="run_fsck">
<longdesc lang="en">
Specify how to decide whether to run fsck or not.
"auto" : decide to run fsck depending on the fstype(default)
"force" : always run fsck regardless of the fstype
"no" : do not run fsck ever.
</longdesc>
<shortdesc lang="en">run_fsck</shortdesc>
<content type="string" default="auto" />
</parameter>
<parameter name="fast_stop">
<longdesc lang="en">
Normally, we expect no users of the filesystem and the stop
operation to finish quickly. If you cannot control the filesystem
users easily and want to prevent the stop action from failing,
then set this parameter to "no" and add an appropriate timeout
for the stop operation.
</longdesc>
<shortdesc lang="en">fast stop</shortdesc>
<content type="boolean" default="yes" />
</parameter>
<parameter name="force_clones">
<longdesc lang="en">
-The usage of a clone setup for local filesystems is forbidden
+The use of a clone setup for local filesystems is forbidden
by default. For special setups like glusterfs, cloning a mount
-of a local device with a filesystem like ext4 or xfs, independently
-on several nodes is a valid use-case.
+of a local device with a filesystem like ext4 or xfs independently
+on several nodes is a valid use case.
Only set this to "true" if you know what you are doing!
</longdesc>
<shortdesc lang="en">allow running as a clone, regardless of filesystem type</shortdesc>
<content type="boolean" default="$OCF_RESKEY_force_clones_default" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="notify" timeout="60" />
<action name="monitor" depth="0" timeout="40" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#
# Make sure the kernel does the right thing with the FS buffers
# This function should be called after unmounting and before mounting
# It may not be necessary in 2.4 and later kernels, but it shouldn't hurt
# anything either...
#
# It's really a bug that you have to do this at all...
#
flushbufs() {
if have_binary $BLOCKDEV ; then
if [ "$blockdevice" = "yes" ] ; then
$BLOCKDEV --flushbufs $1
return $?
fi
fi
return 0
}
# Take advantage of /etc/mtab if present, use portable mount command
# otherwise. Normalize format to "dev mountpoint fstype".
is_bind_mount() {
echo "$options" | grep -w bind >/dev/null 2>&1
}
list_mounts() {
local inpf=""
if [ -e "/proc/mounts" ] && ! is_bind_mount; then
inpf=/proc/mounts
elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then
inpf=/etc/mtab
fi
if [ "$inpf" ]; then
cut -d' ' -f1,2,3 < $inpf
else
$MOUNT | cut -d' ' -f1,3,5
fi
}
determine_blockdevice() {
if [ $blockdevice = "yes" ]; then
return
fi
# Get the current real device name, if possible.
# (specified devname could be -L or -U...)
case "$FSTYPE" in
nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|none) ;;
*) DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
if [ -b "$DEVICE" ]; then
blockdevice=yes
fi
;;
esac
}
# Lists all filesystems potentially mounted under a given path,
# excluding the path itself.
list_submounts() {
list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r
}
ocfs2_del_cache() {
if [ -e "$_OCFS2_uuid_cache" ]; then
rm -f $_OCFS2_uuid_cache
fi
}
ocfs2_cleanup() {
# We'll never see the post-stop notification. We're gone now,
# have unmounted, and thus should remove the membership.
#
# (Do so regardless of whether we were unmounted already,
# because the admin might have manually unmounted but not
# cleared up the membership directory. Bad admin, no cookie.)
#
if [ ! -d "$OCFS2_FS_ROOT" ]; then
ocf_log info "$OCFS2_FS_ROOT: Filesystem membership already gone."
else
ocf_log info "$OCFS2_FS_ROOT: Removing membership directory."
rm -rf $OCFS2_FS_ROOT/
fi
ocfs2_del_cache
}
ocfs2_fetch_uuid() {
mounted.ocfs2 -d $DEVICE|tail -1|awk '{print $3}'|tr -d -- -|tr '[a-z]' '[A-Z]'
}
ocfs2_set_uuid() {
_OCFS2_uuid_cache="$HA_RSCTMP/Filesystem.ocfs2_uuid.$(echo $DEVICE|tr / .)"
if [ "$OP" != "start" -a -e "$_OCFS2_uuid_cache" ]; then
# Trust the cache.
OCFS2_UUID=$(cat $_OCFS2_uuid_cache 2>/dev/null)
return 0
fi
OCFS2_UUID=$(ocfs2_fetch_uuid)
if [ -n "$OCFS2_UUID" -a "$OCFS2_UUID" != "UUID" ]; then
# UUID valid:
echo $OCFS2_UUID > $_OCFS2_uuid_cache
return 0
fi
# Ok, no UUID still, but that's alright for stop, because it
# very likely means we never got started -
if [ "$OP" = "stop" ]; then
ocf_log warn "$DEVICE: No UUID; assuming never started!"
OCFS2_UUID="UUID_NOT_SET"
return 0
fi
# Everything else - wrong:
ocf_log err "$DEVICE: Could not determine ocfs2 UUID for device."
exit $OCF_ERR_GENERIC
}
ocfs2_init()
{
# Check & initialize the OCFS2 specific variables.
# This check detects whether the special/legacy hooks to
# integrate OCFS2 with user-space clustering on SLES10 need to
# be activated.
# Newer kernels >= 2.6.28, with OCFS2+openAIS+Pacemaker, do
# not need this:
OCFS2_SLES10=""
if [ "X$HA_cluster_type" = "Xcman" ]; then
return
elif [ "X$HA_cluster_type" != "Xopenais" ]; then
if grep -q "SUSE Linux Enterprise Server 10" /etc/SuSE-release >/dev/null 2>&1 ; then
OCFS2_SLES10="yes"
ocf_log info "$DEVICE: Enabling SLES10 compatibility mode for OCFS2."
else
ocf_log err "$DEVICE: ocfs2 is not compatible with your environment."
exit $OCF_ERR_CONFIGURED
fi
else
return
fi
if [ $OP != "stop" ]; then
if [ -z "$OCF_RESKEY_CRM_meta_clone" ]; then
ocf_log err "ocfs2 must be run as a clone."
exit $OCF_ERR_GENERIC
fi
fi
if [ $blockdevice = "no" ]; then
ocf_log err "$DEVICE: ocfs2 needs a block device instead."
exit $OCF_ERR_GENERIC
fi
for f in "$OCF_RESKEY_ocfs2_configfs" /sys/kernel/config/cluster /configfs/cluster ; do
if [ -n "$f" -a -d "$f" ]; then
OCFS2_CONFIGFS="$f"
break
fi
done
if [ ! -d "$OCFS2_CONFIGFS" ]; then
ocf_log err "ocfs2 needs configfs mounted."
exit $OCF_ERR_GENERIC
fi
ocfs2_set_uuid
if [ -n "$OCF_RESKEY_ocfs2_cluster" ]; then
OCFS2_CLUSTER=$(echo $OCF_RESKEY_ocfs2_cluster)
else
OCFS2_CLUSTER=$(find "$OCFS2_CONFIGFS" -maxdepth 1 -mindepth 1 -type d -printf %f 2>/dev/null)
set -- $OCFS2_CLUSTER
local n; n="$#"
if [ $n -gt 1 ]; then
ocf_log err "$OCFS2_CLUSTER: several clusters found."
exit $OCF_ERR_GENERIC
fi
if [ $n -eq 0 ]; then
ocf_log err "$OCFS2_CONFIGFS: no clusters found."
exit $OCF_ERR_GENERIC
fi
fi
OCFS2_CLUSTER_ROOT="$OCFS2_CONFIGFS/$OCFS2_CLUSTER"
if [ ! -d "$OCFS2_CLUSTER_ROOT" ]; then
ocf_log err "$OCFS2_CLUSTER: Cluster doesn't exist. Maybe o2cb hasn't been run?"
exit $OCF_ERR_GENERIC
fi
OCFS2_FS_ROOT=$OCFS2_CLUSTER_ROOT/heartbeat/$OCFS2_UUID
}
# kernels < 2.6.26 can't handle bind remounts
bind_kernel_check() {
echo "$options" | grep -w ro >/dev/null 2>&1 ||
return
uname -r | awk -F. '
$1==2 && $2==6 {
sub("[^0-9].*","",$3);
if ($3<26)
exit(1);
}'
[ $? -ne 0 ] &&
ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
}
bind_mount() {
if is_bind_mount && [ "$options" != "-o bind" ]
then
bind_kernel_check
bind_opts=`echo $options | sed 's/bind/remount/'`
$MOUNT $bind_opts $MOUNTPOINT
else
true # make sure to return OK
fi
}
is_option() {
echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1
}
is_fsck_needed() {
case $OCF_RESKEY_run_fsck in
force) true;;
no) false;;
""|auto)
case $FSTYPE in
ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs)
false;;
*)
true;;
esac;;
*)
ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'"
OCF_RESKEY_run_fsck="auto"
is_fsck_needed;;
esac
}
#
# START: Start up the filesystem
#
Filesystem_start()
{
if [ -n "$OCFS2_SLES10" ]; then
# "start" now has the notification data available; that
# we're being started means we didn't get the
# pre-notification, because we weren't running, so
# process the information now first.
ocf_log info "$OCFS2_UUID: Faking pre-notification on start."
OCF_RESKEY_CRM_meta_notify_type="pre"
OCF_RESKEY_CRM_meta_notify_operation="start"
Filesystem_notify
fi
# See if the device is already mounted.
if Filesystem_status >/dev/null 2>&1 ; then
ocf_log info "Filesystem $MOUNTPOINT is already mounted."
return $OCF_SUCCESS
fi
if [ "X${HOSTOS}" != "XOpenBSD" ];then
if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then
: No FSTYPE specified, rely on the system has the right file-system support already
else
local support="$FSTYPE"
# support fuse-filesystems (e.g. GlusterFS)
case $FSTYPE in
glusterfs) support="fuse";;
esac
grep -w "$support"'$' /proc/filesystems >/dev/null ||
$MODPROBE $support >/dev/null
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -ne 0 ] ; then
ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems"
return $OCF_ERR_INSTALLED
fi
fi
fi
# Check the filesystem & auto repair.
# NOTE: Some filesystem types don't need this step... Please modify
# accordingly
if [ $blockdevice = "yes" ]; then
if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then
ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
exit $OCF_ERR_INSTALLED
fi
if is_fsck_needed; then
ocf_log info "Starting filesystem check on $DEVICE"
if [ -z "$FSTYPE" ]; then
$FSCK -p $DEVICE
else
$FSCK -t $FSTYPE -p $DEVICE
fi
# NOTE: if any errors at all are detected, it returns non-zero
# if the error is >= 4 then there is a big problem
if [ $? -ge 4 ]; then
ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE"
return $OCF_ERR_GENERIC
fi
fi
fi
[ -d "$MOUNTPOINT" ] ||
ocf_run mkdir -p $MOUNTPOINT
if [ ! -d "$MOUNTPOINT" ] ; then
ocf_log err "Couldn't find directory [$MOUNTPOINT] to use as a mount point"
exit $OCF_ERR_INSTALLED
fi
flushbufs $DEVICE
# Mount the filesystem.
case "$FSTYPE" in
none) $MOUNT $options $DEVICE $MOUNTPOINT &&
bind_mount
;;
"") $MOUNT $options $DEVICE $MOUNTPOINT ;;
*) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;;
esac
if [ $? -ne 0 ]; then
ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT"
if [ -n "$OCFS2_SLES10" ]; then
ocfs2_cleanup
fi
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
# end of Filesystem_start
Filesystem_notify() {
# Process notifications; this is the essential glue level for
# giving user-space membership events to a cluster-aware
# filesystem. Right now, only OCFS2 is supported.
#
# When we get a pre-start notification, we set up all the nodes
# which will be active in our membership for the filesystem.
# (For the resource to be started, this happens at the time of
# the actual 'start' operation.)
#
# At a post-start, actually there's nothing to do for us really,
# but no harm done in re-syncing either.
#
# pre-stop is meaningless; we can't remove any node yet, it
# first needs to unmount.
#
# post-stop: the node is removed from the membership of the
# other nodes.
#
# Note that this expects that the base cluster is already
# active; ie o2cb has been started and populated
# $OCFS2_CLUSTER_ROOT/node/ already. This can be achieved by
# simply having o2cb run on all nodes by the CRM too. This
# probably ought to be mentioned somewhere in the to be written
# documentation. ;-)
#
if [ -z "$OCFS2_SLES10" ]; then
# One of the cases which shouldn't occur; it should have
# been caught much earlier. Still, you know ...
ocf_log err "$DEVICE: Please only enable notifications for SLES10 OCFS2 mounts."
# Yes, in theory this is a configuration error, but
# simply discarding them allows users to switch from the
# SLES10 stack to the new one w/o downtime.
# Ignoring the notifications is harmless, afterall, and
# they can simply disable them in their own time.
return $OCF_SUCCESS
fi
local n_type; n_type="$OCF_RESKEY_CRM_meta_notify_type"
local n_op; n_op="$OCF_RESKEY_CRM_meta_notify_operation"
local n_active; n_active="$OCF_RESKEY_CRM_meta_notify_active_uname"
local n_stop; n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname"
local n_start; n_start="$OCF_RESKEY_CRM_meta_notify_start_uname"
ocf_log info "$OCFS2_UUID: notify: $n_type for $n_op"
ocf_log info "$OCFS2_UUID: notify active: $n_active"
ocf_log info "$OCFS2_UUID: notify stop: $n_stop"
ocf_log info "$OCFS2_UUID: notify start: $n_start"
case "$n_type" in
pre)
case "$n_op" in
stop)
ocf_log info "$OCFS2_UUID: ignoring pre-notify for stop."
return $OCF_SUCCESS
;;
start)
# These are about to become active; prepare to
# communicate with them.
# Duplicate removal - start can contain nodes
# already on the active list, confusing the
# script later on:
for UNAME in $n_active; do
n_start=`echo ${n_start} | sed s/$UNAME//`
done
# Merge pruned lists again:
n_active="$n_active $n_start"
;;
esac
;;
post)
case "$n_op" in
stop)
# remove unames from notify_stop_uname; these have been
# stopped and can no longer be considered active.
for UNAME in $n_stop; do
n_active=`echo ${n_active} | sed s/$UNAME//`
done
;;
start)
if [ "$n_op" = "start" ]; then
ocf_log info "$OCFS2_UUID: ignoring post-notify for start."
return $OCF_SUCCESS
fi
;;
esac
;;
esac
ocf_log info "$OCFS2_UUID: post-processed active: $n_active"
local n_myself; n_myself=${HA_CURHOST:-$(uname -n | tr '[A-Z]' '[a-z]')}
ocf_log info "$OCFS2_UUID: I am node $n_myself."
case " $n_active " in
*" $n_myself "*) ;;
*) ocf_log err "$OCFS2_UUID: $n_myself (local) not on active list!"
return $OCF_ERR_GENERIC
;;
esac
if [ -d "$OCFS2_FS_ROOT" ]; then
entry_prefix=$OCFS2_FS_ROOT/
for entry in $OCFS2_FS_ROOT/* ; do
n_fs="${entry##$entry_prefix}"
# ocf_log info "$OCFS2_UUID: Found current node $n_fs"
case " $n_active " in
*" $n_fs "*)
# Construct a list of nodes which are present
# already in the membership.
n_exists="$n_exists $n_fs"
ocf_log info "$OCFS2_UUID: Keeping node: $n_fs"
;;
*)
# Node is in the membership currently, but not on our
# active list. Must be removed.
if [ "$n_op" = "start" ]; then
ocf_log warn "$OCFS2_UUID: Removing nodes on start"
fi
ocf_log info "$OCFS2_UUID: Removing dead node: $n_fs"
if ! rm -f $entry ; then
ocf_log err "$OCFS2_UUID: Removal of $n_fs failed!"
fi
;;
esac
done
else
ocf_log info "$OCFS2_UUID: heartbeat directory doesn't exist yet, creating."
mkdir -p $OCFS2_FS_ROOT
fi
ocf_log info "$OCFS2_UUID: Existing node list: $n_exists"
# (2)
for entry in $n_active ; do
# ocf_log info "$OCFS2_UUID: Expected active node: $entry"
case " $n_exists " in
*" $entry "*)
ocf_log info "$OCFS2_UUID: Already active: $entry"
;;
*)
if [ "$n_op" = "stop" ]; then
ocf_log warn "$OCFS2_UUID: Adding nodes on stop"
fi
ocf_log info "$OCFS2_UUID: Activating node: $entry"
if ! ln -s $OCFS2_CLUSTER_ROOT/node/$entry $OCFS2_FS_ROOT/$entry ; then
ocf_log err "$OCFS2_CLUSTER_ROOT/node/$entry: failed to link"
fi
;;
esac
done
}
signal_processes() {
local dir=$1
local sig=$2
local pids pid
# fuser returns a non-zero return code if none of the
# specified files is accessed or in case of a fatal
# error.
pids=$(
if [ "X${HOSTOS}" = "XOpenBSD" ];then
fstat | grep $dir | awk '{print $3}'
else
$FUSER -m $dir 2>/dev/null
fi
)
if [ -z "$pids" ]; then
ocf_log info "No processes on $dir were signalled"
return
fi
for pid in $pids; do
ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`"
kill -s $sig $pid
done
}
try_umount() {
local SUB=$1
$UMOUNT $umount_force $SUB
list_mounts | grep -q " $SUB " >/dev/null 2>&1 || {
ocf_log info "unmounted $SUB successfully"
return $OCF_SUCCESS
}
return $OCF_ERR_GENERIC
}
fs_stop() {
local SUB=$1 timeout=$2 sig cnt
for sig in TERM KILL; do
cnt=$((timeout/2)) # try half time with TERM
while [ $cnt -gt 0 ]; do
try_umount $SUB &&
return $OCF_SUCCESS
ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig"
signal_processes $SUB $sig
cnt=$((cnt-1))
sleep 1
done
done
return $OCF_ERR_GENERIC
}
#
# STOP: Unmount the filesystem
#
Filesystem_stop()
{
# See if the device is currently mounted
Filesystem_status >/dev/null 2>&1
if [ $? -eq $OCF_NOT_RUNNING ]; then
# Already unmounted, wonderful.
rc=$OCF_SUCCESS
else
# Wipe the status file, but continue with a warning if
# removal fails -- the file system might be read only
if [ $OCF_CHECK_LEVEL -eq 20 ]; then
rm -f ${STATUSFILE}
if [ $? -ne 0 ]; then
ocf_log warn "Failed to remove status file ${STATUSFILE}."
fi
fi
# Determine the real blockdevice this is mounted on (if
# possible) prior to unmounting.
determine_blockdevice
# For networked filesystems, there's merit in trying -f:
case "$FSTYPE" in
nfs4|nfs|cifs|smbfs) umount_force="-f" ;;
esac
# Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
local timeout
for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do
ocf_log info "Trying to unmount $SUB"
if ocf_is_true "$FAST_STOP"; then
timeout=6
else
timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"}
timeout=$((timeout/1000))
fi
fs_stop $SUB $timeout
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
ocf_log err "Couldn't unmount $SUB, giving up!"
fi
done
fi
flushbufs $DEVICE
# Yes I know the next blob is ugly, sorry.
if [ $rc -eq $OCF_SUCCESS ]; then
if [ "$FSTYPE" = "ocfs2" ]; then
ocfs2_init
if [ -n "$OCFS2_SLES10" ]; then
ocfs2_cleanup
fi
fi
fi
return $rc
}
# end of Filesystem_stop
#
# STATUS: is the filesystem mounted or not?
#
Filesystem_status()
{
if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else
rc=$OCF_NOT_RUNNING
msg="$MOUNTPOINT is unmounted (stopped)"
fi
# TODO: For ocfs2, or other cluster filesystems, should we be
# checking connectivity to other nodes here, or the IO path to
# the storage?
# Special case "monitor" to check whether the UUID cached and
# on-disk still match?
case "$OP" in
status) ocf_log info "$msg";;
esac
return $rc
}
# end of Filesystem_status
# Note: the read/write tests below will stall in case the
# underlying block device (or in the case of a NAS mount, the
# NAS server) has gone away. In that case, if I/O does not
# return to normal in time, the operation hits its timeout
# and it is up to the CRM to initiate appropriate recovery
# actions (such as fencing the node).
#
# MONITOR 10: read the device
#
Filesystem_monitor_10()
{
if [ "$blockdevice" = "no" ] ; then
ocf_log warn "$DEVICE is not a block device, monitor 10 is noop"
return $OCF_SUCCESS
fi
dd_opts="iflag=direct bs=4k count=1"
err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null`
if [ $? -ne 0 ]; then
ocf_log err "Failed to read device $DEVICE"
ocf_log err "dd said: $err_output"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
#
# MONITOR 20: write and read a status file
#
Filesystem_monitor_20()
{
if [ "$blockdevice" = "no" ] ; then
# O_DIRECT not supported on cifs/smbfs
dd_opts="oflag=sync bs=4k conv=fsync,sync"
else
# Writing to the device in O_DIRECT mode is imperative
# to bypass caches.
dd_opts="oflag=direct,sync bs=4k conv=fsync,sync"
fi
status_dir=`dirname $STATUSFILE`
[ -d "$status_dir" ] ||
mkdir -p "$status_dir"
err_output=`
echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1`
if [ $? -ne 0 ]; then
ocf_log err "Failed to write status file ${STATUSFILE}"
ocf_log err "dd said: $err_output"
return $OCF_ERR_GENERIC
fi
test -f ${STATUSFILE}
if [ $? -ne 0 ]; then
ocf_log err "Cannot stat the status file ${STATUSFILE}"
return $OCF_ERR_GENERIC
fi
cat ${STATUSFILE} > /dev/null
if [ $? -ne 0 ]; then
ocf_log err "Cannot read the status file ${STATUSFILE}"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
Filesystem_monitor()
{
Filesystem_status
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then
case "$OCF_CHECK_LEVEL" in
10) Filesystem_monitor_10; rc=$?;;
20) Filesystem_monitor_20; rc=$?;;
*)
ocf_log err "unsupported monitor level $OCF_CHECK_LEVEL"
rc=$OCF_ERR_CONFIGURED
;;
esac
fi
return $rc
}
# end of Filesystem_monitor
#
# VALIDATE_ALL: Are the instance parameters valid?
# FIXME!! The only part that's useful is the return code.
# This code always returns $OCF_SUCCESS (!)
#
Filesystem_validate_all()
{
if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then
ocf_log warn "Mountpoint $MOUNTPOINT does not exist"
fi
# Check if the $FSTYPE is workable
# NOTE: Without inserting the $FSTYPE module, this step may be imprecise
# TODO: This is Linux specific crap.
if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then
cut -f2 /proc/filesystems |grep -q ^$FSTYPE$
if [ $? -ne 0 ]; then
modpath=/lib/modules/`uname -r`
moddep=$modpath/modules.dep
# Do we have $FSTYPE in modules.dep?
cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$"
if [ $? -ne 0 ]; then
ocf_log info "It seems we do not have $FSTYPE support"
fi
fi
fi
# If we are supposed to do monitoring with status files, then
# we need a utility to write in O_DIRECT mode.
if [ $OCF_CHECK_LEVEL -gt 0 ]; then
check_binary dd
# Note: really old coreutils version do not support
# the "oflag" option for dd. We don't check for that
# here. In case dd does not support oflag, monitor is
# bound to fail, with dd spewing an error message to
# the logs. On such systems, we must do without status
# file monitoring.
fi
#TODO: How to check the $options ?
return $OCF_SUCCESS
}
#
# set the blockdevice variable to "no" or "yes"
#
set_blockdevice_var() {
blockdevice=no
# these are definitely not block devices
case $FSTYPE in
nfs4|nfs|smbfs|cifs|none|glusterfs|ceph) return;;
esac
if `is_option "loop"`; then
return
fi
case $DEVICE in
-*) # Oh... An option to mount instead... Typically -U or -L
;;
/dev/null) # Special case for BSC
blockdevice=yes
;;
*) if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then
ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
fi
if [ ! -d "$DEVICE" ]; then
blockdevice=yes
fi
;;
esac
}
# Check the arguments passed to this script
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
# Check the OCF_RESKEY_ environment variables...
DEVICE=$OCF_RESKEY_device
FSTYPE=$OCF_RESKEY_fstype
if [ ! -z "$OCF_RESKEY_options" ]; then
options="-o $OCF_RESKEY_options"
fi
FAST_STOP=${OCF_RESKEY_fast_stop:="yes"}
OP=$1
# These operations do not require instance parameters
case $OP in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
if [ x = x"$DEVICE" ]; then
ocf_log err "Please set OCF_RESKEY_device to the device to be managed"
exit $OCF_ERR_CONFIGURED
fi
set_blockdevice_var
# Normalize instance parameters:
# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/".
# But the output of `mount` and /proc/mounts do not.
if [ -z "$OCF_RESKEY_directory" ]; then
if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then
ocf_log err "Please specify the directory"
exit $OCF_ERR_CONFIGURED
fi
else
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
# kill the whole system. Is that a good idea?
fi
# Check to make sure the utilites are found
if [ "X${HOSTOS}" != "XOpenBSD" ];then
check_binary $MODPROBE
check_binary $FUSER
fi
check_binary $FSCK
check_binary $MOUNT
check_binary $UMOUNT
if [ "$OP" != "monitor" ]; then
ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT"
fi
# These operations do not require the clone checking + OCFS2
# initialization.
case $OP in
status) Filesystem_status
exit $?
;;
monitor) Filesystem_monitor
exit $?
;;
validate-all) Filesystem_validate_all
exit $?
;;
stop) Filesystem_stop
exit $?
;;
esac
CLUSTERSAFE=0
is_option "ro" &&
CLUSTERSAFE=2
case $FSTYPE in
ocfs2) ocfs2_init
CLUSTERSAFE=1
;;
nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph) CLUSTERSAFE=1 # this is kind of safe too
;;
# add here CLUSTERSAFE=0 for all filesystems which are not
# cluster aware and which, even if when mounted read-only,
# could still modify parts of it such as journal/metadata
ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs)
if ocf_is_true "$OCF_RESKEY_force_clones"; then
CLUSTERSAFE=2
else
CLUSTERSAFE=0 # these are not allowed
fi
;;
esac
if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then
case $CLUSTERSAFE in
0)
ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
ocf_log err "DO NOT RUN IT AS A CLONE!"
ocf_log err "Politely refusing to proceed to avoid data corruption."
exit $OCF_ERR_CONFIGURED
;;
2)
ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!"
if ocf_is_true "$OCF_RESKEY_force_clones"; then
ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so."
else
ocf_log warn "But we'll let it run because it is mounted read-only."
ocf_log warn "Please make sure that it's meta data is read-only too!"
fi
;;
esac
fi
case $OP in
start) Filesystem_start
;;
notify) Filesystem_notify
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 782a4df08..b645288ed 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -1,1068 +1,1068 @@
#!/bin/sh
#
# $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $
#
# OCF Resource Agent compliant IPaddr2 script.
#
# Based on work by Tuomo Soini, ported to the OCF RA API by Lars
# Marowsky-Brテゥe. Implements Cluster Alias IP functionality too.
#
# Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff
#
#
# Copyright (c) 2003 Tuomo Soini
# Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brテゥe
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
# TODO:
# - There ought to be an ocf_run_cmd function which does all logging,
# timeout handling etc for us
# - Make this the standard IP address agent on Linux; the other
# platforms simply should ignore the additional parameters OR can use
# the legacy heartbeat resource script...
# - Check LVS <-> clusterip incompatibilities.
#
# OCF parameters are as below
# OCF_RESKEY_ip
# OCF_RESKEY_broadcast
# OCF_RESKEY_nic
# OCF_RESKEY_cidr_netmask
# OCF_RESKEY_iflabel
# OCF_RESKEY_mac
# OCF_RESKEY_clusterip_hash
# OCF_RESKEY_arp_interval
# OCF_RESKEY_arp_count
# OCF_RESKEY_arp_bg
# OCF_RESKEY_arp_mac
#
# OCF_RESKEY_CRM_meta_clone
# OCF_RESKEY_CRM_meta_clone_max
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/findif.sh
# Defaults
OCF_RESKEY_lvs_support_default=false
OCF_RESKEY_lvs_ipv6_addrlabel_default=false
OCF_RESKEY_lvs_ipv6_addrlabel_value_default=99
OCF_RESKEY_clusterip_hash_default="sourceip-sourceport"
OCF_RESKEY_unique_clone_address_default=false
OCF_RESKEY_arp_interval_default=200
OCF_RESKEY_arp_count_default=5
OCF_RESKEY_arp_bg_default=true
OCF_RESKEY_arp_mac_default="ffffffffffff"
: ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}}
: ${OCF_RESKEY_clusterip_hash=${OCF_RESKEY_clusterip_hash_default}}
: ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}}
: ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}}
: ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}}
: ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}}
: ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}}
#######################################################################
SENDARP=$HA_BIN/send_arp
SENDUA=$HA_BIN/send_ua
FINDIF=findif
VLDIR=$HA_RSCTMP
SENDARPPIDDIR=$HA_RSCTMP
CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="IPaddr2">
<version>1.0</version>
<longdesc lang="en">
This Linux-specific resource manages IP alias IP addresses.
It can add an IP alias, or remove one.
In addition, it can implement Cluster Alias IP functionality
if invoked as a clone resource.
If used as a clone, you should explicitly set clone-node-max &gt;= 2,
and/or clone-max &lt; number of nodes. In case of node failure,
clone instances need to be re-allocated on surviving nodes.
-Which would not be possible, if there is already an instance on those nodes,
+This would not be possible if there is already an instance on those nodes,
and clone-node-max=1 (which is the default).
</longdesc>
<shortdesc lang="en">Manages virtual IPv4 and IPv6 addresses (Linux specific version)</shortdesc>
<parameters>
<parameter name="ip" unique="1" required="1">
<longdesc lang="en">
The IPv4 (dotted quad notation) or IPv6 address (colon hexadecimal notation)
example IPv4 "192.168.1.1".
example IPv6 "2001:db8:DC28:0:0:FC57:D4C8:1FFF".
</longdesc>
<shortdesc lang="en">IPv4 or IPv6 address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="nic" unique="0">
<longdesc lang="en">
The base network interface on which the IP address will be brought
online.
If left empty, the script will try and determine this from the
routing table.
Do NOT specify an alias interface in the form eth0:1 or anything here;
rather, specify the base interface only.
If you want a label, see the iflabel parameter.
Prerequisite:
There must be at least one static IP address, which is not managed by
the cluster, assigned to the network interface.
If you can not assign any static IP address on the interface,
modify this kernel parameter:
sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device)
</longdesc>
<shortdesc lang="en">Network interface</shortdesc>
<content type="string"/>
</parameter>
<parameter name="cidr_netmask">
<longdesc lang="en">
The netmask for the interface in CIDR format
(e.g., 24 and not 255.255.255.0)
If unspecified, the script will also try to determine this from the
routing table.
</longdesc>
<shortdesc lang="en">CIDR netmask</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="broadcast">
<longdesc lang="en">
Broadcast address associated with the IP. If left empty, the script will
determine this from the netmask.
</longdesc>
<shortdesc lang="en">Broadcast address</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="iflabel">
<longdesc lang="en">
You can specify an additional label for your IP address here.
This label is appended to your interface name.
A label can be specified in nic parameter but it is deprecated.
If a label is specified in nic name, this parameter has no effect.
</longdesc>
<shortdesc lang="en">Interface label</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="lvs_support">
<longdesc lang="en">
Enable support for LVS Direct Routing configurations. In case a IP
address is stopped, only move it to the loopback device to allow the
local node to continue to service requests, but no longer advertise it
on the network.
Notes for IPv6:
It is not necessary to enable this option on IPv6.
Instead, enable 'lvs_ipv6_addrlabel' option for LVS-DR usage on IPv6.
</longdesc>
<shortdesc lang="en">Enable support for LVS DR</shortdesc>
<content type="boolean" default="${OCF_RESKEY_lvs_support_default}"/>
</parameter>
<parameter name="lvs_ipv6_addrlabel">
<longdesc lang="en">
Enable adding IPv6 address label so IPv6 traffic originating from
-the address' interface does not use this address as the source.
+the address's interface does not use this address as the source.
This is necessary for LVS-DR health checks to realservers to work. Without it,
the most recently added IPv6 address (probably the address added by IPaddr2)
will be used as the source address for IPv6 traffic from that interface and
since that address exists on loopback on the realservers, the realserver
-response to pings/connections will not never leave its loopback.
+response to pings/connections will never leave its loopback.
See RFC3484 for the detail of the source address selection.
See also 'lvs_ipv6_addrlabel_value' parameter.
</longdesc>
-<shortdesc lang="en">Enables adding IPv6 address label.</shortdesc>
+<shortdesc lang="en">Enable adding IPv6 address label.</shortdesc>
<content type="boolean" default="${OCF_RESKEY_lvs_ipv6_addrlabel_default}"/>
</parameter>
<parameter name="lvs_ipv6_addrlabel_value">
<longdesc lang="en">
Specify IPv6 address label value used when 'lvs_ipv6_addrlabel' is enabled.
The value should be an unused label in the policy table
which is shown by 'ip addrlabel list' command.
You would rarely need to change this parameter.
</longdesc>
<shortdesc lang="en">IPv6 address label value.</shortdesc>
<content type="integer" default="${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}"/>
</parameter>
<parameter name="mac">
<longdesc lang="en">
Set the interface MAC address explicitly. Currently only used in case of
the Cluster IP Alias. Leave empty to chose automatically.
</longdesc>
<shortdesc lang="en">Cluster IP MAC address</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="clusterip_hash">
<longdesc lang="en">
Specify the hashing algorithm used for the Cluster IP functionality.
</longdesc>
<shortdesc lang="en">Cluster IP hashing function</shortdesc>
<content type="string" default="${OCF_RESKEY_clusterip_hash_default}"/>
</parameter>
<parameter name="unique_clone_address">
<longdesc lang="en">
-If true, add the clone ID to the supplied value of ip to create
+If true, add the clone ID to the supplied value of IP to create
a unique address to manage
</longdesc>
<shortdesc lang="en">Create a unique address for cloned instances</shortdesc>
<content type="boolean" default="${OCF_RESKEY_unique_clone_address_default}"/>
</parameter>
<parameter name="arp_interval">
<longdesc lang="en">
Specify the interval between unsolicited ARP packets in milliseconds.
</longdesc>
<shortdesc lang="en">ARP packet interval in ms</shortdesc>
<content type="integer" default="${OCF_RESKEY_arp_interval_default}"/>
</parameter>
<parameter name="arp_count">
<longdesc lang="en">
Number of unsolicited ARP packets to send.
</longdesc>
<shortdesc lang="en">ARP packet count</shortdesc>
<content type="integer" default="${OCF_RESKEY_arp_count_default}"/>
</parameter>
<parameter name="arp_bg">
<longdesc lang="en">
-Whether or not to send the arp packets in the background.
+Whether or not to send the ARP packets in the background.
</longdesc>
<shortdesc lang="en">ARP from background</shortdesc>
<content type="string" default="${OCF_RESKEY_arp_bg_default}"/>
</parameter>
<parameter name="arp_mac">
<longdesc lang="en">
MAC address to send the ARP packets to.
You really shouldn't be touching this.
</longdesc>
<shortdesc lang="en">ARP MAC</shortdesc>
<content type="string" default="${OCF_RESKEY_arp_mac_default}"/>
</parameter>
<parameter name="arp_sender">
<longdesc lang="en">
The program to send ARP packets with on start. For infiniband
interfaces, default is ipoibarping. If ipoibarping is not
available, set this to send_arp.
</longdesc>
<shortdesc lang="en">ARP sender</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="flush_routes">
<longdesc lang="en">
Flush the routing table on stop. This is for
applications which use the cluster IP address
and which run on the same physical host that the
IP address lives on. The Linux kernel may force that
application to take a shortcut to the local loopback
interface, instead of the interface the address
is really bound to. Under those circumstances, an
application may, somewhat unexpectedly, continue
to use connections for some time even after the
IP address is deconfigured. Set this parameter in
order to immediately disable said shortcut when the
IP address goes away.
</longdesc>
<shortdesc lang="en">Flush kernel routing table on stop</shortdesc>
<content type="boolean" default="false"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="status" depth="0" timeout="20s" interval="10s" />
<action name="monitor" depth="0" timeout="20s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
ip_init() {
local rc
if [ X`uname -s` != "XLinux" ]; then
ocf_log err "IPaddr2 only supported Linux."
exit $OCF_ERR_INSTALLED
fi
if [ X"$OCF_RESKEY_ip" = "X" ]; then
ocf_log err "IP address (the ip parameter) is mandatory"
exit $OCF_ERR_CONFIGURED
fi
if
case $__OCF_ACTION in
start|stop) ocf_is_root;;
*) true;;
esac
then
: YAY!
else
ocf_log err "You must be root for $__OCF_ACTION operation."
exit $OCF_ERR_PERM
fi
BASEIP="$OCF_RESKEY_ip"
BRDCAST="$OCF_RESKEY_broadcast"
NIC="$OCF_RESKEY_nic"
# Note: We had a version out there for a while which used
# netmask instead of cidr_netmask. Don't remove this aliasing code!
if
[ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ]
then
OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask
export OCF_RESKEY_cidr_netmask
fi
NETMASK="$OCF_RESKEY_cidr_netmask"
IFLABEL="$OCF_RESKEY_iflabel"
IF_MAC="$OCF_RESKEY_mac"
IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1}
IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1`
if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then
ocf_log err "LVS and load sharing do not go together well"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then
:
else
ocf_log err "Invalid OCF_RESKEY_incarnations_max_global [$IP_INC_GLOBAL], should be positive integer"
exit $OCF_ERR_CONFIGURED
fi
echo $OCF_RESKEY_ip | grep -qs ":"
if [ $? -ne 0 ];then
FAMILY=inet
if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
ocf_log err "IPv4 does not support lvs_ipv6_addrlabel"
exit $OCF_ERR_CONFIGURED
fi
else
FAMILY=inet6
if ocf_is_true $OCF_RESKEY_lvs_support ;then
ocf_log err "The IPv6 does not support lvs_support"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
if ocf_is_decimal "$OCF_RESKEY_lvs_ipv6_addrlabel_value" && [ $OCF_RESKEY_lvs_ipv6_addrlabel_value -ge 0 ]; then
:
else
ocf_log err "Invalid lvs_ipv6_addrlabel_value [$OCF_RESKEY_lvs_ipv6_addrlabel_value], should be positive integer"
exit $OCF_ERR_CONFIGURED
fi
fi
fi
# support nic:iflabel format in nic parameter
case $NIC in
*:*)
IFLABEL=`echo $NIC | sed 's/[^:]*://'`
NIC=`echo $NIC | sed 's/:.*//'`
# only the base name should be passed to findif
OCF_RESKEY_nic=$NIC
;;
esac
# $FINDIF takes its parameters from the environment
#
NICINFO=`$FINDIF`
rc=$?
if
[ $rc -eq 0 ]
then
NICINFO=`echo "$NICINFO" | sed -e 's/netmask\ //;s/broadcast\ //'`
NIC=`echo "$NICINFO" | cut -d" " -f1`
NETMASK=`echo "$NICINFO" | cut -d" " -f2`
BRDCAST=`echo "$NICINFO" | cut -d" " -f3`
else
# findif couldn't find the interface
if ocf_is_probe; then
ocf_log info "[$FINDIF] failed"
exit $OCF_NOT_RUNNING
elif [ "$__OCF_ACTION" = stop ]; then
ocf_log warn "[$FINDIF] failed"
exit $OCF_SUCCESS
else
ocf_log err "[$FINDIF] failed"
exit $rc
fi
fi
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
if [ -n "$IFLABEL" ]; then
IFLABEL=${NIC}:${IFLABEL}
fi
if [ "$IP_INC_GLOBAL" -gt 1 ] && ! ocf_is_true "$OCF_RESKEY_unique_clone_address"; then
IP_CIP="yes"
IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}"
if [ -z "$IF_MAC" ]; then
# Choose a MAC
# 1. Concatenate some input together
# 2. This doesn't need to be a cryptographically
# secure hash.
# 3. Drop everything after the first 6 octets (12 chars)
# 4. Delimit the octets with ':'
# 5. Make sure the first octet is odd,
# so the result is a multicast MAC
IF_MAC=`echo $OCF_RESKEY_ip $NETMASK $BRDCAST | \
md5sum | \
sed -e 's#\(............\).*#\1#' \
-e 's#..#&:#g; s#:$##' \
-e 's#^\(.\)[02468aAcCeE]#\11#'`
fi
IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$OCF_RESKEY_ip"
fi
}
#
# Find out which interfaces serve the given IP address and netmask.
# The arguments are an IP address and a netmask.
# Its output are interface names devided by spaces (e.g., "eth0 eth1").
#
find_interface() {
local ipaddr="$1"
local netmask="$2"
#
# List interfaces but exclude FreeS/WAN ipsecN virtual interfaces
#
local iface="`$IP2UTIL -o -f $FAMILY addr show \
| grep "\ $ipaddr/$netmask" \
| cut -d ' ' -f2 \
| grep -v '^ipsec[0-9][0-9]*$'`"
echo "$iface"
return 0
}
#
# Delete an interface
#
delete_interface () {
ipaddr="$1"
iface="$2"
netmask="$3"
CMD="$IP2UTIL -f $FAMILY addr delete $ipaddr/$netmask dev $iface"
ocf_run $CMD || return $OCF_ERR_GENERIC
if ocf_is_true $OCF_RESKEY_flush_routes; then
ocf_run $IP2UTIL route flush cache
fi
if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
delete_ipv6_addrlabel $ipaddr
fi
return $OCF_SUCCESS
}
#
# Add an interface
#
add_interface () {
local cmd msg ipaddr netmask broadcast iface label
ipaddr="$1"
netmask="$2"
broadcast="$3"
iface="$4"
label="$5"
if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
add_ipv6_addrlabel $ipaddr
fi
cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask dev $iface"
msg="Adding $FAMILY address $ipaddr/$netmask to device $iface"
if [ "$broadcast" != "none" ]; then
cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask brd $broadcast dev $iface"
msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface"
fi
if [ ! -z "$label" ]; then
cmd="$cmd label $label"
msg="${msg} (with label $label)"
fi
ocf_log info "$msg"
ocf_run $cmd || return $OCF_ERR_GENERIC
msg="Bringing device $iface up"
cmd="$IP2UTIL link set $iface up"
ocf_log info "$msg"
ocf_run $cmd || return $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
#
# Delete a route
#
delete_route () {
prefix="$1"
iface="$2"
CMD="$IP2UTIL route delete $prefix dev $iface"
ocf_log info "$CMD"
$CMD
return $?
}
# On Linux systems the (hidden) loopback interface may
# conflict with the requested IP address. If so, this
# unoriginal code will remove the offending loopback address
# and save it in VLDIR so it can be added back in later
# when the IPaddr is released.
#
# TODO: This is very ugly and should be controlled by an additional
# instance parameter. Or even: multi-state, with the IP only being
# "active" on the master!?
#
remove_conflicting_loopback() {
ipaddr="$1"
netmask="$2"
broadcast="$3"
ifname="$4"
ocf_log info "Removing conflicting loopback $ifname."
if
echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr"
then
: Saved loopback information in $VLDIR/$ipaddr
else
ocf_log err "Could not save conflicting loopback $ifname." \
"it will not be restored."
fi
delete_interface "$ipaddr" "$ifname" "$netmask"
# Forcibly remove the route (if it exists) to the loopback.
delete_route "$ipaddr" "$ifname"
}
#
# On Linux systems the (hidden) loopback interface may
# need to be restored if it has been taken down previously
# by remove_conflicting_loopback()
#
restore_loopback() {
ipaddr="$1"
if [ -s "$VLDIR/$ipaddr" ]; then
ifinfo=`cat "$VLDIR/$ipaddr"`
ocf_log info "Restoring loopback IP Address " \
"$ifinfo."
add_interface $ifinfo
rm -f "$VLDIR/$ipaddr"
fi
}
add_ipv6_addrlabel() {
local cmd ipaddr value
ipaddr="$1"
value="$OCF_RESKEY_lvs_ipv6_addrlabel_value"
cmd="$IP2UTIL addrlabel add prefix $ipaddr label $value"
ocf_log info "Adding IPv6 address label prefix $ipaddr label $value"
ocf_run $cmd || ocf_log warn "$cmd failed."
}
delete_ipv6_addrlabel() {
local cmd ipaddr value
ipaddr="$1"
value="$OCF_RESKEY_lvs_ipv6_addrlabel_value"
cmd="$IP2UTIL addrlabel del prefix $ipaddr label $value"
ocf_run $cmd # an error can be ignored
}
is_infiniband() {
$IP2UTIL link show $NIC | grep link/infiniband >/dev/null
}
#
# Run send_arp to note peers about new mac address
#
run_send_arp() {
ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip auto not_used not_used"
if [ "x$IP_CIP" = "xyes" ] ; then
if [ x = "x$IF_MAC" ] ; then
MY_MAC=auto
else
MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'`
fi
ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used"
fi
ocf_log info "$SENDARP $ARGS"
if ocf_is_true $OCF_RESKEY_arp_bg; then
($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2
else
$SENDARP $ARGS || ocf_log err "Could not send gratuitous arps"
fi
}
#
# Run send_ua to note send ICMPv6 Unsolicited Neighbor Advertisements.
#
run_send_ua() {
local i
# Wait until the allocated IPv6 address gets ready by checking
# "tentative" flag is disappeared, otherwise send_ua can not
# send the unsolicited advertisement requests.
for i in 1 2 3 4 5; do
$IP2UTIL -o -f $FAMILY addr show dev $NIC \
| grep -q -e "$OCF_RESKEY_ip/$NETMASK .* tentative"
[ $? -ne 0 ] && break
if [ $i -eq 5 ]; then
ocf_log warn "$OCF_RESKEY_ip still has 'tentative' status. (ignored)"
break
fi
sleep 1
done
ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC"
ocf_log info "$SENDUA $ARGS"
$SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements."
}
#
# Run ipoibarping to note peers about new Infiniband address
#
run_send_ib_arp() {
ARGS="-q -c $OCF_RESKEY_arp_count -U -I $NIC $OCF_RESKEY_ip"
ocf_log info "ipoibarping $ARGS"
if ocf_is_true $OCF_RESKEY_arp_bg; then
(ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2
else
ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps"
fi
}
# Do we already serve this IP address on the given $NIC?
#
# returns:
# ok = served (for CIP: + hash bucket)
# partial = served and no hash bucket (CIP only)
# partial2 = served and no CIP iptables rule
# no = nothing
#
ip_served() {
if [ -z "$NIC" ]; then # no nic found or specified
echo "no"
return 0
fi
cur_nic="`find_interface $OCF_RESKEY_ip $NETMASK`"
if [ -z "$cur_nic" ]; then
echo "no"
return 0
fi
if [ -z "$IP_CIP" ]; then
for i in $cur_nic; do
# only mark as served when on the same interfaces as $NIC
[ "$i" = "$NIC" ] || continue
echo "ok"
return 0
done
# There used to be logic here to pretend "not served",
# if ${OCF_RESKEY_lvs_support} was enabled, and the IP was
# found active on "lo*" only. With lvs_support on, you should
# have NIC != lo, so thats already filtered
# by the continue above.
echo "no"
return 0
fi
# Special handling for the CIP:
if [ ! -e $IP_CIP_FILE ]; then
echo "partial2"
return 0
fi
if egrep -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then
echo "ok"
return 0
else
echo "partial"
return 0
fi
exit $OCF_ERR_GENERIC
}
#######################################################################
ip_usage() {
cat <<END
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
ip_start() {
if [ -z "$NIC" ]; then # no nic found or specified
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$IP_CIP" ]; then
# Cluster IPs need special processing when the first bucket
# is added to the node... take a lock to make sure only one
# process executes that code
ocf_take_lock $CIP_lockfile
ocf_release_lock_on_exit $CIP_lockfile
fi
#
# Do we already service this IP address on $NIC?
#
local ip_status=`ip_served`
if [ "$ip_status" = "ok" ]; then
exit $OCF_SUCCESS
fi
if [ -n "$IP_CIP" ] && [ $ip_status = "no" ] || [ $ip_status = "partial2" ]; then
$MODPROBE ip_conntrack
$IPTABLES -I INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \
--new \
--clustermac $IF_MAC \
--total-nodes $IP_INC_GLOBAL \
--local-node $IP_INC_NO \
--hashmode $IP_CIP_HASH
if [ $? -ne 0 ]; then
ocf_log err "iptables failed"
exit $OCF_ERR_GENERIC
fi
fi
if [ -n "$IP_CIP" ] && [ $ip_status = "partial" ]; then
echo "+$IP_INC_NO" >$IP_CIP_FILE
fi
if [ "$ip_status" = "no" ]; then
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
for i in `find_interface $OCF_RESKEY_ip 32`; do
case $i in
lo*)
remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo
;;
esac
done
fi
add_interface $OCF_RESKEY_ip $NETMASK ${BRDCAST:-none} $NIC $IFLABEL
if [ $? -ne 0 ]; then
ocf_log err "$CMD failed."
exit $OCF_ERR_GENERIC
fi
fi
case $NIC in
lo*)
: no need to run send_arp on loopback
;;
*)
if [ $FAMILY = "inet" ];then
$ARP_SEND_FUN
else
if [ -x $SENDUA ]; then
run_send_ua
fi
fi
;;
esac
exit $OCF_SUCCESS
}
ip_stop() {
local ip_del_if="yes"
if [ -n "$IP_CIP" ]; then
# Cluster IPs need special processing when the last bucket
# is removed from the node... take a lock to make sure only one
# process executes that code
ocf_take_lock $CIP_lockfile
ocf_release_lock_on_exit $CIP_lockfile
fi
if [ -f "$SENDARPPIDFILE" ] ; then
kill `cat "$SENDARPPIDFILE"`
if [ $? -ne 0 ]; then
ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip"
else
ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip"
rm -f "$SENDARPPIDFILE"
fi
fi
local ip_status=`ip_served`
ocf_log info "IP status = $ip_status, IP_CIP=$IP_CIP"
if [ $ip_status = "no" ]; then
: Requested interface not in use
exit $OCF_SUCCESS
fi
if [ -n "$IP_CIP" ] && [ $ip_status != "partial2" ]; then
if [ $ip_status = "partial" ]; then
exit $OCF_SUCCESS
fi
echo "-$IP_INC_NO" >$IP_CIP_FILE
if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then
ocf_log info $OCF_RESKEY_ip, $IP_CIP_HASH
i=1
while [ $i -le $IP_INC_GLOBAL ]; do
ocf_log info $i
$IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \
--new \
--clustermac $IF_MAC \
--total-nodes $IP_INC_GLOBAL \
--local-node $i \
--hashmode $IP_CIP_HASH
i=`expr $i + 1`
done
else
ip_del_if="no"
fi
fi
if [ "$ip_del_if" = "yes" ]; then
delete_interface $OCF_RESKEY_ip $NIC $NETMASK
if [ $? -ne 0 ]; then
exit $OCF_ERR_GENERIC
fi
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
restore_loopback "$OCF_RESKEY_ip"
fi
fi
exit $OCF_SUCCESS
}
ip_monitor() {
# TODO: Implement more elaborate monitoring like checking for
# interface health maybe via a daemon like FailSafe etc...
local ip_status=`ip_served`
case $ip_status in
ok)
return $OCF_SUCCESS
;;
partial|no|partial2)
exit $OCF_NOT_RUNNING
;;
*)
# Errors on this interface?
return $OCF_ERR_GENERIC
;;
esac
}
# make sure that we have something to send ARPs with
set_send_arp_program() {
ARP_SEND_FUN=run_send_arp
if [ -n "$OCF_RESKEY_arp_sender" ]; then
case "$OCF_RESKEY_arp_sender" in
send_arp)
check_binary $SENDARP
;;
ipoibarping)
check_binary ipoibarping
ARP_SEND_FUN=run_send_ib_arp
;;
*)
ocf_log err "unrecognized arp_sender value: $OCF_RESKEY_arp_sender"
exit $OCF_ERR_CONFIGURED
;;
esac
else
if is_infiniband; then
ARP_SEND_FUN=run_send_ib_arp
if ! have_binary ipoibarping; then
[ "$__OCF_ACTION" = start ] &&
ocf_log warn "using send_arp for infiniband because ipoibarping is not available (set arp_sender to \"send_arp\" to suppress this message)"
check_binary $SENDARP
ARP_SEND_FUN=run_send_arp
fi
fi
fi
}
ip_validate() {
check_binary $IP2UTIL
IP_CIP=
ip_init
set_send_arp_program
if [ -n "$IP_CIP" ]; then
check_binary $IPTABLES
check_binary $MODPROBE
fi
# $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init,
# do not bother here.
if ocf_is_true "$OCF_RESKEY_unique_clone_address" &&
! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
ocf_log err "unique_clone_address makes sense only with meta globally_unique set"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then
:
else
ocf_log err "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then
:
else
ocf_log err "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]"
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$IP_CIP" ]; then
local valid=1
case $IP_CIP_HASH in
sourceip|sourceip-sourceport|sourceip-sourceport-destport)
;;
*)
ocf_log err "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]"
exit $OCF_ERR_CONFIGURED
;;
esac
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
ecf_log err "LVS and load sharing not advised to try"
exit $OCF_ERR_CONFIGURED
fi
case $IF_MAC in
[0-9a-zA-Z][13579bBdDfF][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z])
;;
*)
valid=0
;;
esac
if [ $valid -eq 0 ]; then
ocf_log err "Invalid IF_MAC [$IF_MAC]"
exit $OCF_ERR_CONFIGURED
fi
fi
}
if ocf_is_true "$OCF_RESKEY_unique_clone_address"; then
prefix=`echo $OCF_RESKEY_ip | awk -F. '{print $1"."$2"."$3}'`
suffix=`echo $OCF_RESKEY_ip | awk -F. '{print $4}'`
suffix=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + $suffix`
OCF_RESKEY_ip="$prefix.$suffix"
fi
case $__OCF_ACTION in
meta-data) meta_data
;;
usage|help) ip_usage
exit $OCF_SUCCESS
;;
esac
ip_validate
case $__OCF_ACTION in
start) ip_start
;;
stop) ip_stop
;;
status) ip_status=`ip_served`
if [ $ip_status = "ok" ]; then
echo "running"
exit $OCF_SUCCESS
else
echo "stopped"
exit $OCF_NOT_RUNNING
fi
;;
monitor) ip_monitor
;;
validate-all) ;;
*) ip_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
# vi:sw=4:ts=8:
diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index d74126a91..40865001d 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -1,570 +1,570 @@
#!/bin/sh
#
# Support: linux-ha@lists.linux-ha.org
# License: GNU General Public License (GPL)
#
# Resource Agent for domains managed by the libvirt API.
# Requires a running libvirt daemon (libvirtd).
#
# (c) 2008-2010 Florian Haas, Dejan Muhamedagic,
# and Linux-HA contributors
#
# usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_force_stop_default=0
OCF_RESKEY_hypervisor_default="$(virsh --quiet uri)"
OCF_RESKEY_autoset_utilization_cpu_default="true"
OCF_RESKEY_autoset_utilization_hv_memory_default="true"
OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 ))
: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
: ${OCF_RESKEY_hypervisor=${OCF_RESKEY_hypervisor_default}}
: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}}
: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}}
: ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}}
#######################################################################
## I'd very much suggest to make this RA use bash,
## and then use magic $SECONDS.
## But for now:
NOW=$(date +%s)
usage() {
echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}"
}
meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="VirtualDomain">
<version>1.1</version>
<longdesc lang="en">
Resource agent for a virtual domain (a.k.a. domU, virtual machine,
virtual environment etc., depending on context) managed by libvirtd.
</longdesc>
<shortdesc lang="en">Manages virtual domains through the libvirt virtualization framework</shortdesc>
<parameters>
<parameter name="config" unique="1" required="1">
<longdesc lang="en">
Absolute path to the libvirt configuration file,
for this virtual domain.
</longdesc>
<shortdesc lang="en">Virtual domain configuration file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="hypervisor" unique="0" required="0">
<longdesc lang="en">
Hypervisor URI to connect to. See the libvirt documentation for
details on supported URI formats. The default is system dependent.
</longdesc>
<shortdesc lang="en">Hypervisor URI</shortdesc>
<content type="string" default="${OCF_RESKEY_hypervisor_default}"/>
</parameter>
<parameter name="force_stop" unique="0" required="0">
<longdesc lang="en">
Always forcefully shut down ("destroy") the domain on stop. The default
behavior is to resort to a forceful shutdown only after a graceful
shutdown attempt has failed. You should only set this to true if
your virtual domain (or your virtualization backend) does not support
graceful shutdown.
</longdesc>
<shortdesc lang="en">Always force shutdown on stop</shortdesc>
<content type="boolean" default="${OCF_RESKEY_force_stop_default}" />
</parameter>
<parameter name="migration_transport" unique="0" required="0">
<longdesc lang="en">
Transport used to connect to the remote hypervisor while
migrating. Please refer to the libvirt documentation for details on
transports available. If this parameter is omitted, the resource will
use libvirt's default transport to connect to the remote hypervisor.
</longdesc>
<shortdesc lang="en">Remote hypervisor transport</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="migration_network_suffix" unique="0" required="0">
<longdesc lang="en">
Use a dedicated migration network. The migration URI is composed by
adding this parameters value to the end of the node name. If the node
name happens to be an FQDN (as opposed to an unqualified host name),
insert the suffix immediately prior to the first period (.) in the FQDN.
At the moment Qemu/KVM and Xen migration via a dedicated network is supported.
Note: Be sure this composed host name is locally resolveable and the
associated IP is reachable through the favored network.
</longdesc>
<shortdesc lang="en">Migration network host name suffix</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="monitor_scripts" unique="0" required="0">
<longdesc lang="en">
To additionally monitor services within the virtual domain, add this
parameter with a list of scripts to monitor.
Note: when monitor scripts are used, the start and migrate_from operations
will complete only when all monitor scripts have completed successfully.
Be sure to set the timeout of these operations to accommodate this delay.
</longdesc>
<shortdesc lang="en">space-separated list of monitor scripts</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="autoset_utilization_cpu" unique="0" required="0">
<longdesc lang="en">
If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it
-into the cpu utilization of the resource when the monitor is executed.
+into the CPU utilization of the resource when the monitor is executed.
</longdesc>
-<shortdesc lang="en">Enable auto setting the cpu utilization of the resource</shortdesc>
+<shortdesc lang="en">Enable auto-setting the CPU utilization of the resource</shortdesc>
<content type="boolean" default="true" />
</parameter>
<parameter name="autoset_utilization_hv_memory" unique="0" required="0">
<longdesc lang="en">
If set true, the agent will detect the number of *Max memory* from virsh, and put it
into the hv_memory utilization of the resource when the monitor is executed.
</longdesc>
-<shortdesc lang="en">Enable auto setting the hv_memory utilization of the resource</shortdesc>
+<shortdesc lang="en">Enable auto-setting the hv_memory utilization of the resource</shortdesc>
<content type="boolean" default="true" />
</parameter>
<parameter name="migrateport" unique="0" required="0">
<longdesc lang="en">
This port will be used in the qemu migrateuri. If unset, the port will be a random highport.
</longdesc>
<shortdesc lang="en">Port for migrateuri</shortdesc>
<content type="integer" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="90" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="migrate_from" timeout="60" />
<action name="migrate_to" timeout="120" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
EOF
}
set_util_attr() {
local attr=$1 val=$2
local cval outp
cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null)
if [ "$cval" != "$val" ]; then
outp=`crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1` ||
ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp"
fi
}
update_utilization() {
local dom_cpu dom_mem
if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then
dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} | awk '/CPU\(s\)/{print $2}')
test -n "$dom_cpu" && set_util_attr cpu $dom_cpu
fi
if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then
dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} | awk '/Max memory/{printf("%d", $3/1024)}')
test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem"
fi
}
# Set options to be passed to virsh:
VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"
# A state file where we record the domain name:
STATEFILE="${HA_RSCTMP}/VirtualDomain-${OCF_RESOURCE_INSTANCE}.state"
VirtualDomain_Define() {
local virsh_output
local domain_name
# Note: passing in the domain name from outside the script is
# intended for testing and debugging purposes only. Don't do this
# in production, instead let the script figure out the domain name
# from the config file. You have been warned.
if [ -z "$DOMAIN_NAME" ]; then
# Spin until we have a domain name
while true; do
virsh_output=$((virsh ${VIRSH_OPTIONS} define ${OCF_RESKEY_config}) 2>&1)
domain_name=`echo "$virsh_output" | sed -n -e 's/Domain \(.*\) defined from .*$/\1/p'`
if [ -n "$domain_name" ]; then
break;
fi
domain_name=`echo $virsh_output | sed -n -e "s/.* '\(.*\)' already exists .*/\1/p"`
if [ -n "$domain_name" ]; then
break;
fi
ocf_log debug "Domain not defined yet, probably unable to connect to hypervisor. Retrying."
sleep 1
done
echo "$domain_name" > $STATEFILE
ocf_log info "Domain name \"$domain_name\" saved to $STATEFILE."
else
ocf_log warn "Domain name ${DOMAIN_NAME} already defined, overriding configuration file ${OCF_RESKEY_config}. You should do this for testing only."
fi
}
VirtualDomain_Cleanup_Statefile() {
rm -f $STATEFILE || ocf_log warn "Failed to remove $STATEFILE during $__OCF_ACTION."
}
VirtualDomain_Status() {
local try=0
rc=$OCF_ERR_GENERIC
status="no state"
while [ "$status" = "no state" ]; do
try=$(($try + 1 ))
status="`virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME`"
case "$status" in
"shut off")
# shut off: domain is defined, but not started
ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
rc=$OCF_NOT_RUNNING
;;
running|paused|idle|blocked|"in shutdown")
# running: domain is currently actively consuming cycles
# paused: domain is paused (suspended)
# idle: domain is running but idle
# blocked: synonym for idle used by legacy Xen versions
# in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed.
ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
rc=$OCF_SUCCESS
;;
""|"no state")
# Empty string may be returned when virsh does not
# receive a reply from libvirtd.
# "no state" may occur when the domain is currently
# being migrated (on the migration target only), or
# whenever virsh can't reliably obtain the domain
# state.
status="no state"
if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then
# During the stop operation, we want to bail out
# quickly, so as to be able to force-stop (destroy)
# the domain if necessary.
ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out."
return $OCF_ERR_GENERIC;
else
# During all other actions, we just wait and try
# again, relying on the CRM/LRM to time us out if
# this takes too long.
ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying."
sleep 1
fi
;;
*)
# any other output is unexpected.
ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!"
;;
esac
done
return $rc
}
VirtualDomain_Start() {
if VirtualDomain_Status; then
ocf_log info "Virtual domain $DOMAIN_NAME already running."
return $OCF_SUCCESS
fi
virsh $VIRSH_OPTIONS start ${DOMAIN_NAME}
rc=$?
if [ $rc -ne 0 ]; then
ocf_log error "Failed to start virtual domain ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
while ! VirtualDomain_Monitor; do
sleep 1
done
return $OCF_SUCCESS
}
VirtualDomain_Stop() {
local i
local status
local shutdown_timeout
local out ex
VirtualDomain_Status
status=$?
case $status in
$OCF_SUCCESS)
if ! ocf_is_true $OCF_RESKEY_force_stop; then
# Issue a graceful shutdown request
ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}."
virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME}
# The "shutdown_timeout" we use here is the operation
# timeout specified in the CIB, minus 5 seconds
shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 ))
# Loop on status until we reach $shutdown_timeout
while [ $NOW -lt $shutdown_timeout ]; do
VirtualDomain_Status
status=$?
case $status in
$OCF_NOT_RUNNING)
# This was a graceful shutdown. Clean
# up and return.
VirtualDomain_Cleanup_Statefile
return $OCF_SUCCESS
;;
$OCF_SUCCESS)
# Domain is still running, keep
# waiting (until shutdown_timeout
# expires)
sleep 1
;;
*)
# Something went wrong. Bail out and
# resort to forced stop (destroy).
break;
esac
NOW=$(date +%s)
done
fi
;;
$OCF_NOT_RUNNING)
ocf_log info "Domain $DOMAIN_NAME already stopped."
return $OCF_SUCCESS
esac
# OK. Now if the above graceful shutdown hasn't worked, kill
# off the domain with destroy. If that too does not work,
# have the LRM time us out.
ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1)
ex=$?
echo >&2 "$out"
# unconditionally clean up.
VirtualDomain_Cleanup_Statefile
case $ex$out in
*"error:"*"domain is not running"*)
: ;; # unexpected path to the intended outcome, all is well
[!0]*)
return $OCF_ERR_GENERIC ;;
0*)
while [ $status != $OCF_NOT_RUNNING ]; do
VirtualDomain_Status
status=$?
done ;;
esac
return $OCF_SUCCESS
}
VirtualDomain_Migrate_To() {
local target_node
local remoteuri
local transport_suffix
local migrateuri
local migrateport
local migrate_target
local hypervisor
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
if VirtualDomain_Status; then
# Find out the remote hypervisor to connect to. That is, turn
# something like "qemu://foo:9999/system" into
# "qemu+tcp://bar:9999/system"
if [ -n "${OCF_RESKEY_migration_transport}" ]; then
transport_suffix="+${OCF_RESKEY_migration_transport}"
fi
# A typical migration URI via a special migration network looks
# like "tcp://bar-mig:49152". The port would be randomly chosen
# by libvirt from the range 49152-49215 if omitted, at least since
# version 0.7.4 ...
if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then
hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}"
# Hostname might be a FQDN
migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},")
case $hypervisor in
qemu)
# For quiet ancient libvirt versions a migration port is needed
# and the URI must not contain the "//". Newer versions can handle
# the "bad" URI.
migrateuri="tcp:${migrate_target}:${OCF_RESKEY_migrateport}"
;;
xen)
migrateuri="xenmigr://${migrate_target}"
;;
*)
ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}."
;;
esac
fi
# Scared of that sed expression? So am I. :-)
remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,")
# OK, we know where to connect to. Now do the actual migration.
ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri} ${migrateuri})."
virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri}
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc"
return $OCF_ERR_GENERIC
else
ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded."
VirtualDomain_Cleanup_Statefile
return $OCF_SUCCESS
fi
else
ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!"
return $OCF_ERR_GENERIC
fi
}
VirtualDomain_Migrate_From() {
while ! VirtualDomain_Monitor; do
sleep 1
done
ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded."
return $OCF_SUCCESS
}
VirtualDomain_Monitor() {
# First, check the domain status. If that returns anything other
# than $OCF_SUCCESS, something is definitely wrong.
VirtualDomain_Status
rc=$?
if [ ${rc} -eq ${OCF_SUCCESS} ]; then
# OK, the generic status check turned out fine. Now, if we
# have monitor scripts defined, run them one after another.
for script in ${OCF_RESKEY_monitor_scripts}; do
script_output="$($script 2>&1)"
script_rc=$?
if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then
# A monitor script returned a non-success exit
# code. Stop iterating over the list of scripts, log a
# warning message, and propagate $OCF_ERR_GENERIC.
ocf_log warn "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}"
rc=$OCF_ERR_GENERIC
break
else
ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}"
fi
done
fi
update_utilization
return ${rc}
}
VirtualDomain_Validate_All() {
# Required binaries:
for binary in virsh sed; do
check_binary $binary
done
if [ -z $OCF_RESKEY_config ]; then
ocf_log error "Missing configuration parameter \"config\"."
return $OCF_ERR_CONFIGURED
fi
# check if we can read the config file (otherwise we're unable to
# deduce $DOMAIN_NAME from it, see below)
if [ ! -r $OCF_RESKEY_config ]; then
if ocf_is_probe; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe."
elif [ "$__OCF_ACTION" = "stop" ]; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped."
else
ocf_log error "Configuration file $OCF_RESKEY_config does not exist or is not readable."
return $OCF_ERR_INSTALLED
fi
fi
}
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
# Everything except usage and meta-data must pass the validate test
VirtualDomain_Validate_All || exit $?
# During a probe, it is permissible for the config file to not be
# readable (it might be on shared storage not available during the
# probe). In that case, VirtualDomain_Define can't work and we're
# unable to get the domain name. Thus, we also can't check whether the
# domain is running. The only thing we can do here is to assume that
# it is not running.
if [ ! -r $OCF_RESKEY_config ]; then
ocf_is_probe && exit $OCF_NOT_RUNNING
[ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS
fi
# Define the domain on startup, and re-define whenever someone deleted
# the state file, or touched the config.
if [ ! -e $STATEFILE ] || [ $OCF_RESKEY_config -nt $STATEFILE ]; then
VirtualDomain_Define
fi
# By now, we should definitely be able to read from the state file.
# If not, something went wrong.
if [ ! -r $STATEFILE ]; then
ocf_log err "$STATEFILE not found or unreadable. This is unexpected. Cannot determine domain name."
exit $OCF_ERR_GENERIC
fi
# Finally, retrieve the domain name from the state file.
DOMAIN_NAME=`cat $STATEFILE 2>/dev/null`
if [ -z $DOMAIN_NAME ]; then
ocf_log err "$STATEFILE is empty. This is unexpected. Cannot determine domain name."
exit $OCF_ERR_GENERIC
fi
case $1 in
start)
VirtualDomain_Start
;;
stop)
VirtualDomain_Stop
;;
migrate_to)
VirtualDomain_Migrate_To
;;
migrate_from)
VirtualDomain_Migrate_From
;;
status)
VirtualDomain_Status
;;
monitor)
VirtualDomain_Monitor
;;
validate-all)
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/apache b/heartbeat/apache
index a313372b7..26b3223f8 100755
--- a/heartbeat/apache
+++ b/heartbeat/apache
@@ -1,552 +1,552 @@
#!/bin/sh
#
# High-Availability Apache/IBMhttp control script
#
# apache (aka IBMhttpd)
#
# Description: starts/stops apache web servers.
#
# Author: Alan Robertson
# Sun Jiang Dong
#
# Support: linux-ha@lists.linux-ha.org
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2002-2005 International Business Machines
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf
# node1 10.0.0.170 IBMhttpd
#
# Our parsing of the Apache config files is very rudimentary.
# It'll work with lots of different configurations - but not every
# possible configuration.
#
# Patches are being accepted ;-)
#
# OCF parameters:
# OCF_RESKEY_configfile
# OCF_RESKEY_httpd
# OCF_RESKEY_port
# OCF_RESKEY_statusurl
# OCF_RESKEY_options
# OCF_RESKEY_testregex
# OCF_RESKEY_client
# OCF_RESKEY_testurl
# OCF_RESKEY_testregex10
# OCF_RESKEY_testconffile
# OCF_RESKEY_testname
# OCF_RESKEY_envfiles
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/apache-conf.sh
. ${OCF_FUNCTIONS_DIR}/http-mon.sh
HA_VARRUNDIR=${HA_VARRUN}
#######################################################################
#
# Configuration options - usually you don't need to change these
#
#######################################################################
#
IBMHTTPD=/opt/IBMHTTPServer/bin/httpd
HTTPDLIST="/sbin/httpd2 /usr/sbin/httpd2 /usr/sbin/apache2 /sbin/httpd /usr/sbin/httpd /usr/sbin/apache $IBMHTTPD"
MPM=/usr/share/apache2/find_mpm
if
[ -x $MPM ]
then
HTTPDLIST="$HTTPDLIST `$MPM 2>/dev/null`"
fi
LOCALHOST="http://localhost"
HTTPDOPTS="-DSTATUS"
DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf
DEFAULT_NORMCONFIG="/etc/apache2/httpd.conf"
#
# You can also set
# HTTPD
# PORT
# STATUSURL
# CONFIGFILE
# in this section if what we're doing doesn't work for you...
#
# End of Configuration options
#######################################################################
CMD=`basename $0`
# The config-file-pathname is the pathname to the configuration
# file for this web server. Various appropriate defaults are
# assumed if no config file is specified. If this command is
# invoked as *IBM*, then the default config file name is
# $DEFAULT_IBMCONFIG, otherwise the default config file
# will be $DEFAULT_NORMCONFIG.
usage() {
cat <<-!
usage: $0 action
action:
start start the web server
stop stop the web server
status return the status of web server, run or down
monitor return TRUE if the web server appears to be working.
For this to be supported you must configure mod_status
and give it a server-status URL. You have to have
installed either curl or wget for this to work.
meta-data show meta data message
validate-all validate the instance parameters
!
}
#
# return TRUE if a process with given PID is running
#
ProcessRunning() {
ApachePID=$1
# Use /proc if it looks like it's here...
if
[ -d /proc -a -d /proc/1 ]
then
[ -d /proc/$ApachePID ]
else
# This assumes we're running as root...
kill -s 0 "$ApachePID" >/dev/null 2>&1
fi
}
silent_status() {
if
[ -f $PidFile ]
then
ProcessRunning `cat $PidFile`
else
: No pid file
false
fi
}
# May be useful to add other distros in future
validate_default_config() {
if [ -e /etc/SuSE-release ]; then
validate_default_suse_config
else
return 0
fi
}
# When using the default /etc/apache2/httpd.conf on SUSE, the file
# /etc/apache2/sysconfig.d/include.conf is required to be present,
# but this is only generated if you run the apache init script
# (with contents derived from /etc/sysconfig/apache2). So, here,
# if we're using the default system config file and it requires
# that include, we run "/etc/init.d/apache2 configtest" to ensure
# the relevant config is generated and valid. We're also taking
# this opportunity to enable mod_status if it's not present.
validate_default_suse_config() {
if [ "$CONFIGFILE" = "$DEFAULT_NORMCONFIG" ] && \
grep -Eq '^Include[[:space:]]+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE"
then
[ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status
ocf_run -q /etc/init.d/apache2 configtest
return
else
return 0
fi
}
apache_start() {
if
silent_status
then
ocf_log info "$CMD already running (pid $ApachePID)"
return $OCF_SUCCESS
fi
validate_default_config || return $OCF_ERR_CONFIGURED
# https://bugs.launchpad.net/ubuntu/+source/apache2/+bug/603211
[ -d /var/run/apache2 ] || mkdir /var/run/apache2
if [ -z $PIDFILE_DIRECTIVE ];
then
ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE
else
ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE -c "PidFile $PidFile"
fi
tries=0
while : # wait until the user set timeout
do
apache_monitor
ec=$?
if [ $ec -eq $OCF_NOT_RUNNING ]
then
tries=`expr $tries + 1`
ocf_log info "waiting for apache $CONFIGFILE to come up"
sleep 1
else
break
fi
done
if [ $ec -ne 0 ] && silent_status; then
apache_stop
fi
return $ec
}
apache_stop() {
if
silent_status
then
if
kill $ApachePID
then
tries=0
while
ProcessRunning $ApachePID &&
[ $tries -lt 10 ]
do
sleep 1
kill $ApachePID >/dev/null
ocf_log info "Killing apache PID $ApachePID"
tries=`expr $tries + 1`
done
else
ocf_log warn "Killing apache PID $ApachePID FAILED."
fi
if
ProcessRunning $ApachePID
then
ocf_log info "$CMD still running ($ApachePID)."
false
else
ocf_log info "$CMD stopped."
fi
else
ocf_log info "$CMD is not running."
fi
for sig in SIGTERM SIGHUP SIGKILL ; do
if pgrep -f $HTTPD.*$CONFIGFILE >/dev/null ; then
pkill -$sig -f $HTTPD.*$CONFIGFILE >/dev/null
ocf_log info "apache children were signalled ($sig)"
sleep 1
else
break
fi
done
}
apache_monitor_10() {
if [ "$TESTCONFFILE" ]; then
readtestconf < $TESTCONFFILE
else
test_url="$TESTURL"
test_regex="$TESTREGEX10"
fi
whattorun=`gethttpclient`
fixtesturl
is_testconf_sane ||
return $OCF_ERR_CONFIGURED
if $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null
then
return $OCF_SUCCESS
else
return $OCF_ERR_GENERIC
fi
}
apache_monitor_basic() {
if ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null
then
return $OCF_SUCCESS
else
return $OCF_ERR_GENERIC
fi
}
apache_monitor() {
silent_status
if [ $? -ne 0 ]; then
ocf_log info "$CMD not running"
return $OCF_NOT_RUNNING
fi
ourhttpclient=`findhttpclient` # we'll need one
if [ -z "$ourhttpclient" ]; then
ocf_log err "could not find a http client; make sure that either wget or curl is available"
return $OCF_ERR_INSTALLED
fi
case `ocf_check_level 10` in
0) apache_monitor_basic;;
10) apache_monitor_10;;
esac
}
apache_meta_data(){
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="apache">
<version>1.0</version>
<longdesc lang="en">
-This is the resource agent for the Apache web server.
+This is the resource agent for the Apache Web server.
This resource agent operates both version 1.x and version 2.x Apache
servers.
The start operation ends with a loop in which monitor is
repeatedly called to make sure that the server started and that
it is operational. Hence, if the monitor operation does not
succeed within the start operation timeout, the apache resource
will end with an error status.
The monitor operation by default loads the server status page
which depends on the mod_status module and the corresponding
configuration file (usually /etc/apache2/mod_status.conf).
Make sure that the server status page works and that the access
is allowed *only* from localhost (address 127.0.0.1).
See the statusurl and testregex attributes for more details.
See also http://httpd.apache.org/
</longdesc>
-<shortdesc lang="en">Manages an Apache web server instance</shortdesc>
+<shortdesc lang="en">Manages an Apache Web server instance</shortdesc>
<parameters>
<parameter name="configfile" required="0" unique="1">
<longdesc lang="en">
The full pathname of the Apache configuration file.
This file is parsed to provide defaults for various other
resource agent parameters.
</longdesc>
<shortdesc lang="en">configuration file path</shortdesc>
<content type="string" default="/etc/apache2/httpd.conf" />
</parameter>
<parameter name="httpd">
<longdesc lang="en">
The full pathname of the httpd binary (optional).
</longdesc>
<shortdesc lang="en">httpd binary path</shortdesc>
<content type="string" default="/usr/sbin/httpd" />
</parameter>
<parameter name="port" >
<longdesc lang="en">
A port number that we can probe for status information
using the statusurl.
This will default to the port number found in the
configuration file, or 80, if none can be found
in the configuration file.
</longdesc>
<shortdesc lang="en">httpd port</shortdesc>
<content type="integer" />
</parameter>
<parameter name="statusurl">
<longdesc lang="en">
The URL to monitor (the apache server status page by default).
If left unspecified, it will be inferred from
the apache configuration file.
If you set this, make sure that it succeeds *only* from the
localhost (127.0.0.1). Otherwise, it may happen that the cluster
complains about the resource being active on multiple nodes.
</longdesc>
<shortdesc lang="en">url name</shortdesc>
<content type="string" />
</parameter>
<parameter name="testregex">
<longdesc lang="en">
Regular expression to match in the output of statusurl.
Case insensitive.
</longdesc>
<shortdesc lang="en">monitor regular expression</shortdesc>
<content type="string" default="exists, but impossible to show in a human readable format (try grep testregex)"/>
</parameter>
<parameter name="client">
<longdesc lang="en">
Client to use to query to Apache. If not specified, the RA will
try to find one on the system. Currently, wget and curl are
supported. For example, you can set this parameter to "curl" if
you prefer that to wget.
</longdesc>
<shortdesc lang="en">http client</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="testurl">
<longdesc lang="en">
URL to test. If it does not start with "http", then it's
considered to be relative to the Listen address.
</longdesc>
<shortdesc lang="en">test url</shortdesc>
<content type="string" />
</parameter>
<parameter name="testregex10">
<longdesc lang="en">
Regular expression to match in the output of testurl.
Case insensitive.
</longdesc>
<shortdesc lang="en">extended monitor regular expression</shortdesc>
<content type="string" />
</parameter>
<parameter name="testconffile">
<longdesc lang="en">
A file which contains test configuration. Could be useful if
you have to check more than one web application or in case sensitive
info should be passed as arguments (passwords). Furthermore,
using a config file is the only way to specify certain
parameters.
Please see README.webapps for examples and file description.
</longdesc>
<shortdesc lang="en">test configuration file</shortdesc>
<content type="string" />
</parameter>
<parameter name="testname">
<longdesc lang="en">
Name of the test within the test configuration file.
</longdesc>
<shortdesc lang="en">test name</shortdesc>
<content type="string" />
</parameter>
<parameter name="options">
<longdesc lang="en">
Extra options to apply when starting apache. See man httpd(8).
</longdesc>
<shortdesc lang="en">command line options</shortdesc>
<content type="string" />
</parameter>
<parameter name="envfiles">
<longdesc lang="en">
Files (one or more) which contain extra environment variables.
If you want to prevent script from reading the default file, set
this parameter to empty string.
</longdesc>
<shortdesc lang="en">environment settings files</shortdesc>
<content type="string" default="/etc/apache2/envvars"/>
</parameter>
<parameter name="use_ipv6">
<longdesc lang="en">
We will try to detect if the URL (for monitor) is IPv6, but if
that doesn't work set this to true to enforce IPv6.
</longdesc>
<shortdesc lang="en">use ipv6 with http clients</shortdesc>
<content type="boolean" default="false"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="40s" />
<action name="stop" timeout="60s" />
<action name="status" timeout="30s" />
<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
apache_validate_all() {
if CheckPort $PORT; then
# We are sure to succeed here, since we forced $PORT to be valid in GetParams()
: OK
else
ocf_log err "Port number $PORT is invalid!"
return $OCF_ERR_INSTALLED
fi
case $STATUSURL in
http://*) ;;
*)
ocf_log err "Invalid STATUSURL $STATUSURL"
return $OCF_ERR_CONFIGURED ;;
esac
if [ ! -x $HTTPD ]; then
ocf_log err "HTTPD $HTTPD not found or is not an executable!"
return $OCF_ERR_INSTALLED
fi
if [ ! -f $CONFIGFILE ]; then
# We are sure to succeed here, since we have parsed $CONFIGFILE before getting here
ocf_log err "Configuration file $CONFIGFILE not found!"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
find_httpd_prog() {
case $0 in
*IBM*) HTTPD=$IBMHTTPD
DefaultConfig=$DEFAULT_IBMCONFIG;;
*)
HTTPD=
for h in $HTTPDLIST
do
if
[ -f $h -a -x $h ]
then
HTTPD=$h
break
fi
done
# Let the user know that the $HTTPD used is not the one (s)he specified via $OCF_RESKEY_httpd
if
[ "X$OCF_RESKEY_httpd" != X -a "X$HTTPD" != X ]
then
ocf_log info "Using $HTTPD as HTTPD"
fi
DefaultConfig=$DEFAULT_NORMCONFIG;;
esac
}
apache_getconfig() {
# these variables are global
HTTPD="$OCF_RESKEY_httpd"
PORT="$OCF_RESKEY_port"
STATUSURL="$OCF_RESKEY_statusurl"
CONFIGFILE="$OCF_RESKEY_configfile"
OPTIONS="$OCF_RESKEY_options"
CLIENT=${OCF_RESKEY_client}
TESTREGEX=${OCF_RESKEY_testregex:-'</ *html *>'}
TESTURL="$OCF_RESKEY_testurl"
TESTREGEX10=${OCF_RESKEY_testregex10}
TESTCONFFILE="$OCF_RESKEY_testconffile"
TESTNAME="$OCF_RESKEY_testname"
: ${OCF_RESKEY_envfiles="/etc/apache2/envvars"}
source_envfiles $OCF_RESKEY_envfiles
if
[ "X$HTTPD" = X -o ! -f "$HTTPD" -o ! -x "$HTTPD" ]
then
find_httpd_prog
fi
CONFIGFILE=${CONFIGFILE:-$DefaultConfig}
httpd_basename=`basename $HTTPD`
case $httpd_basename in
*-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;;
esac
GetParams $CONFIGFILE
}
OCF_REQUIRED_PARAMS=""
OCF_REQUIRED_BINARIES=""
ocf_rarun $*
# vim:sw=2:ts=8:
diff --git a/heartbeat/dhcpd b/heartbeat/dhcpd
index c272b119e..835a78835 100755
--- a/heartbeat/dhcpd
+++ b/heartbeat/dhcpd
@@ -1,545 +1,545 @@
#!/bin/sh
#
# Resource Agent for managing dhcpd resources.
#
# License: GNU General Public License (GPL)
# (c) 2011-2012 Chris Bowlby,
#
# A fair amount of this script has been pulled from the official 0dhcpd
# init script. Those portions have been integrated into this script to
# ensure consistent behavior between the resource agent and the
# original script. The copyrights and original authors are credited
# as follows:
#
# Copyright (c) 1996, 1997, 1998 S.u.S.E. GmbH
# Copyright (c) 1998, 1999, 2000, 2001 SuSE GmbH
# Copyright (c) 2002, 2003 SuSE Linux AG
# Copyright (c) 2004-2008 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# Author(s) : Rolf Haberrecker <rolf@suse.de>, 1997-1999
# Peter Poeml <poeml@suse.de>, 2000-2006
# Marius Tomaschewski <mt@suse.de>, 2006-2010
#
# and Linux-HA contributors
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_binary_default="dhcpd"
OCF_RESKEY_pid_default="/var/run/dhcpd.pid"
OCF_RESKEY_user_default=dhcpd
OCF_RESKEY_group_default=nogroup
OCF_RESKEY_config_default=""
OCF_RESKEY_chrooted_default="true"
OCF_RESKEY_chrooted_path_default="/var/lib/dhcp"
OCF_RESKEY_leases_default="/db/dhcpd.leases"
OCF_RESKEY_interface_default=""
OCF_RESKEY_includes_default=""
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
: ${OCF_RESKEY_chrooted=${OCF_RESKEY_chrooted_default}}
: ${OCF_RESKEY_chrooted_path=${OCF_RESKEY_chrooted_path_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_leases=${OCF_RESKEY_leases_default}}
: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}}
: ${OCF_RESKEY_includes=${OCF_RESKEY_includes_default}}
# To enable support for different versions of dhcp, we need
# to know what version we are being run against.
DHCP_VERSION_MAJOR=`$OCF_RESKEY_binary --version 2>&1 | awk -F- '{print $3}' | awk -F. '{print $1}' | sed s/^[a-zA-Z]//g`
# These files are always copied by default to ensure the chroot environment works.
DEFAULT_FILE_LIST="/etc/gai.conf /etc/nsswitch.conf /etc/resolv.conf /etc/host.conf /etc/hosts /etc/localtime /dev/urandom"
usage() {
cat <<EOF
usage: $0 start|stop|monitor|meta-data|validate-all
$0 manages the dhcp (dhcpd) server as an HA resource.
The 'start' operation starts the dhcpd server.
The 'stop' operation stops the dhcpd server.
The 'monitor' operation reports whether the dhcpd service is running.
The 'validate-all' operation reports whether the parameters are valid.
EOF
return $OCF_SUCCESS
}
dhcpd_meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="dhcpd" version="0.1">
<version>0.1</version>
<longdesc lang="en">
Manage an ISC DHCP server service in a chroot environment.
</longdesc>
- <shortdesc lang="en">Chrooted ISC DHCP Server resource agent.</shortdesc>
+ <shortdesc lang="en">Chrooted ISC DHCP server resource agent.</shortdesc>
<parameters>
<parameter name="config" unique="1" required="1">
<longdesc lang="en">
The absolute path to the DHCP server configuration file.
</longdesc>
<shortdesc lang="en">Configuration file</shortdesc>
<content type="string" default="$OCF_RESKEY_config_default"/>
</parameter>
<parameter name="chrooted" unique="1" required="0">
<longdesc lang="en">
Configure the dhcpd service to run in a chrooted or non-chrooted
mode.
</longdesc>
<shortdesc lang="en">Enable chroot mode</shortdesc>
<content type="boolean" default="$OCF_RESKEY_chrooted_default"/>
</parameter>
<parameter name="chrooted_path" unique="1" required="0">
<longdesc lang="en">
The absolute path of the chrooted DHCP environment.
</longdesc>
<shortdesc lang="en">The chrooted path</shortdesc>
<content type="string" default="$OCF_RESKEY_chrooted_path_default"/>
</parameter>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
The binary for the DHCP server process. An absolute path
definition is not required, but can be used to override
environment path.
</longdesc>
<shortdesc lang="en">dhcpd binary</shortdesc>
<content type="string" default="$OCF_RESKEY_binary_default"/>
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
The system user the DHCP server process will run as when
it is chrooted.
</longdesc>
<shortdesc lang="en">dhcpd owner</shortdesc>
<content type="string" default="$OCF_RESKEY_user_default"/>
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
The system group the DHCP server process will run as when
it is chrooted.
</longdesc>
<shortdesc lang="en">dhcpd group owner</shortdesc>
<content type="string" default="$OCF_RESKEY_group_default"/>
</parameter>
<parameter name="interface" unique="0" required="0">
<longdesc lang="en">
The network interface(s) the DHCP server process will
bind to. A blank value will bind the process to all
interfaces.
</longdesc>
<shortdesc lang="en">Network Interface</shortdesc>
<content type="string" default="$OCF_RESKEY_interface_default"/>
</parameter>
<parameter name="includes" unique="0" required="0">
<longdesc lang="en">
This parameter provides a means to copy include files
into the chrooted environment. If a dhcpd.conf file
contains a line similar to this:
include "/etc/named.keys";
Then an admin also has to tell the dhcpd RA that this
file should be pulled into the chrooted environment. This
is a space delimited list.
</longdesc>
- <shortdesc lang="en">Include Files</shortdesc>
+ <shortdesc lang="en">Include files</shortdesc>
<content type="string" default="$OCF_RESKEY_includes_default"/>
</parameter>
<parameter name="leases" unique="0" required="0">
<longdesc lang="en">
The leases database file, relative to chrooted_path.
</longdesc>
<shortdesc lang="en">Leases file</shortdesc>
<content type="string" default="$OCF_RESKEY_leases_default"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The path and filename of the PID file. It is relative
to chrooted_path.
</longdesc>
<shortdesc lang="en">PID file</shortdesc>
<content type="string" default="$OCF_RESKEY_pid_default"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10" depth="0" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
EOF
}
# Validate most critical parameters
dhcpd_validate_all() {
check_binary $OCF_RESKEY_binary
if ! ocf_is_probe; then
# Test for the appropriate configuration files depending on if
# chroot mode is enabled.
if ocf_is_true $OCF_RESKEY_chrooted ; then
if ! test -e "$OCF_RESKEY_chrooted_path"; then
ocf_log err "Path $OCF_RESKEY_chrooted_path does not exist."
return $OCF_ERR_INSTALLED
fi
if test -n "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config"; then
ocf_log err "Configuration file $OCF_RESKEY_chrooted_path/$OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
else
if test -n "$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_config"; then
ocf_log err "Configuration file $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
fi
fi
if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
# dhcpd_monitor. Send a request to dhcpd and check response.
dhcpd_monitor() {
# Assume chrooted mode is being used, but if not update the PIDF
# variable to point to the non-chrooted PID file.
PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid"
if ! ocf_is_true $OCF_RESKEY_chrooted ; then
PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid`
fi
ocf_pidfile_status $PIDF >/dev/null 2>&1 || return $OCF_NOT_RUNNING
return $OCF_SUCCESS
}
# Initialize Chroot
dhcpd_initialize_chroot() {
# If we are running the initialization for the first time, we need to make
# the new chrooted folder, in case we are not using the same default.
if ! [ -d $OCF_RESKEY_chrooted_path ] ; then
ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use."
fi
# Make sure all sub-paths are created if something went wrong during
# a partial run.
for i in db dev etc lib64 var/run; do
mkdir -p $OCF_RESKEY_chrooted_path/$i
done
# If we are running version 4 of the dhcp server, we need to mount a proc partition.
if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then
mkdir -p $OCF_RESKEY_chrooted_path/proc
if ! [ -e $OCF_RESKEY_chrooted_path/proc/net/dev ] ; then
mount -t proc -o ro proc $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1
fi
fi
# If the folder to store the PID file does not exist, make it.
if ! [ -d "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" ] ; then
mkdir -p "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`"
fi
# Ensure all permissions are in place if the folder was re-created.
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_leases`
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group "$OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_pid`"
## If there is no conf file, we can't initialize the chrooted
## environment, return with "program not configured"
if ! [ -f $OCF_RESKEY_config ] ; then
ocf_log err "dhcpd has not been configured."
return $OCF_ERR_CONFIGURED
fi
# If the leases file does not exist, create it, as this is a fresh install.
if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then
touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases
fi
# Remove the random device.
test -e "$OCF_RESKEY_chrooted_path/dev/urandom" &&
rm -f $OCF_RESKEY_chrooted_path/dev/urandom
# Test for the existance of the defined include files, and append
# them to the list of files to be copied.
for i in $OCF_RESKEY_includes ; do
if [ -e $i ] ; then
DEFAULT_FILE_LIST="$DEFAULT_FILE_LIST $i"
else
ocf_log err "include file $i does not exist"
return $OCF_ERR_INSTALLED
fi
done
# Ensure all "modified" non-chrooted configuration files are copied into the chrooted environment.
for i in $OCF_RESKEY_config $DEFAULT_FILE_LIST; do
# First, lets make sure the directory exists within the chrooted environment.
if test -d "$i" ; then
mkdir -p $OCF_RESKEY_chrooted_path/$i
elif test -e "$i" ; then
mkdir -p "`dirname $OCF_RESKEY_chrooted_path/$i`"
fi
# Next, we copy the configuration file into place.
cp -aL "$i" "$OCF_RESKEY_chrooted_path/${i%/*}/" > /dev/null 2>&1 ||
{ ocf_log err "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
done
libdir=$(basename $(echo /var/lib/dhcp/lib*))
if test -x /usr/bin/ldd ; then
get_ldd_deps()
{
ldd_wl="\/$libdir\/lib"
ldd_bl="\/$libdir\/libc\."
/usr/bin/ldd "$1" | while read a b c d ; do
[ -n "$c" ] || continue
[[ $c =~ $ldd_wl ]] || continue
[[ $c =~ $ldd_bl ]] && continue
echo $c
done
}
else
get_ldd_deps() { :; }
fi
cplibs=`for i in /$libdir/libresolv.so.* /$libdir/libnss_*.so.* /$libdir/libpthread.so.0 /$libdir/libdl.so.2
do
if [ -s "$i" ] ; then
echo "$i"
get_ldd_deps "$i"
fi
done | sort -u`
for i in $cplibs ; do
if [ -s "$i" ]; then
cp -pL "$i" "/var/lib/dhcp/$libdir/" ||
{ ocf_log err "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
fi
done
return $OCF_SUCCESS
}
# Initialize a non-chroot environment
dhcpd_initialize() {
## If there is no conf file, we can't start a dhcp service.
if ! [ -f $OCF_RESKEY_config ] ; then
ocf_log err "dhcpd has not been configured."
return $OCF_ERR_CONFIGURED
fi
# As with the standard DHCP init script, we can still use the
# chrooted default path for storing the leases file. This behavior
# is consistent with the existing /etc/init.d/dhcpd script.
if ! [ -d $OCF_RESKEY_chrooted_path ] ; then
ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use."
fi
# If the leases file does not exist, create it, as this is a fresh install.
if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then
touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases
fi
# if the PID storage path does not exist, make it, and setup the permissions.
# NOTE: This part of the script has a potential security flaw, in that if someone
# puts in /var/run as the path, it will change ownership to the dhcpd user
# and group. However, all that would do is allow that user to view the contents
# of the files, which they can do now anyway. If this becomes an issue, I can work
# in some changes.
# We need to append "dhcpd" to the path for the PID file storage folder, because
# if /var/run is used, that folders permissions can not be changed, otherwise it affects
# more then just one application.
if ! [ -d `dirname $OCF_RESKEY_pid`/dhcpd ] ; then
mkdir -p `dirname $OCF_RESKEY_pid`/dhcpd
if [ -n "$OCF_RESKEY_user" -a "x$OCF_RESKEY_user" != "xroot" ] ; then
chown $OCF_RESKEY_user `dirname $OCF_RESKEY_pid`/dhcpd
fi
if [ -n "$OCF_RESKEY_group" -a "x$OCF_RESKEY_group" != "xwheel" ] ; then
chgrp $OCF_RESKEY_group `dirname $OCF_RESKEY_pid`/dhcpd
fi
fi
return $OCF_SUCCESS
}
# Start
dhcpd_start() {
# Lets make sure we are not already running.
if dhcpd_monitor; then
ocf_log info "dhcpd already running"
return $OCF_SUCCESS
fi
# Only initialize the chrooted path(s) if chroot mode is enabled.
if ocf_is_true $OCF_RESKEY_chrooted ; then
dhcpd_initialize_chroot ||
{ ocf_log err "Could not fully initialize the chroot environment." ; return $OCF_ERR_INSTALLED; }
else
dhcpd_initialize ||
{ ocf_log err "Could not fully initialize the runtime environment." ; return $OCF_ERR_INSTALLED; }
fi
dhcpd_validate_all || exit
# Define an empty string variable, to ensure it exists when needed.
DHCPD_ARGS=""
# To ensure consistent behavior with the standard DHCPD init script,
# use the chrooted default path for storing a leases file, when not in
# a chrooted enviroment.
if ocf_is_true $OCF_RESKEY_chrooted ; then
DHCPD_ARGS="$DHCPD_ARGS -chroot $OCF_RESKEY_chrooted_path -lf $OCF_RESKEY_leases"
else
DHCPD_ARGS="$DHCPD_ARGS -lf $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases"
fi
if [ -n "$OCF_RESKEY_user" ]; then
DHCPD_ARGS="$DHCPD_ARGS -user $OCF_RESKEY_user"
fi
if [ -n "$OCF_RESKEY_group" ]; then
DHCPD_ARGS="$DHCPD_ARGS -group $OCF_RESKEY_group"
fi
# If there is a pid file containing a pid, the machine might have crashed. pid files in
# /var/run are always cleaned up at boot time, but this is not the case for the pid file in
# the chroot jail. Therefore, an old pid file may exist. This is only a problem if it
# incidentally contains the pid of a running process. If this process is not a 'dhcpd',
# we remove the pid. (dhcpd itself only checks whether the pid is alive or not.)
PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid"
if ocf_is_true $OCF_RESKEY_chrooted ; then
ocf_log info "Starting dhcpd [chroot] service."
DHCPD_ARGS="$DHCPD_ARGS -pf $OCF_RESKEY_pid"
else
ocf_log info "Starting dhcpd [non-chroot] service."
PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid`
DHCPD_ARGS="$DHCPD_ARGS -pf $PIDF"
fi
test -e "$PIDF" && rm -f $PIDF
ocf_run $OCF_RESKEY_binary -cf $OCF_RESKEY_config $DHCPD_ARGS $OCF_RESKEY_interface ||
return $OCF_ERR_INSTALLED
while ! dhcpd_monitor; do
sleep .1
ocf_log info "waiting for dhcpd to start"
return $OCF_SUCCESS
done
if ocf_is_true $OCF_RESKEY_chrooted ; then
ocf_log info "dhcpd [chrooted] has started."
else
ocf_log info "dhcpd [non-chrooted] has started."
fi
return $OCF_SUCCESS
}
# Stop
dhcpd_stop () {
local timeout
local timewait
local rc
dhcpd_monitor
rc=$?
case "$rc" in
"$OCF_SUCCESS")
# Currently running, and is expected behaviour.
;;
"$OCF_NOT_RUNNING")
# Currently not running, therefore nothing to do.
ocf_log info "dhcpd already stopped"
return $OCF_SUCCESS
;;
esac
PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid"
if ! ocf_is_true $OCF_RESKEY_chrooted ; then
PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid`
fi
kill `cat $PIDF`
# Allow 2/3 of the action timeout for the orderly shutdown
# (The origin unit is ms, hence the conversion)
timewait=$((OCF_RESKEY_CRM_meta_timeout/1500))
sleep 0.1; timeout=0 # Sleep here for .1 sec to let dhcpd finish.
while dhcpd_monitor ; do
if [ $timeout -ge $timewait ]; then
break
else
sleep 1
timeout=`expr $timeout + 1`
fi
done
#If still up
if dhcpd_monitor 2>&1; then
ocf_log err "dhcpd is still up! Trying kill -s KILL"
kill -s SIGKILL `cat $PIDF`
fi
# If we are running a dhcp server v4 or higher, unmount the proc partition.
if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then
# We only want to unmount proc in a chrooted environment, else we could
# cause other issues.
if ocf_is_true $OCF_RESKEY_chrooted ; then
umount $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1
fi
fi
rm -f $PIDF
ocf_log info "dhcpd stopped"
return $OCF_SUCCESS
}
# Make sure meta-data and usage always succeed
case $__OCF_ACTION in
meta-data) dhcpd_meta_data
exit $OCF_SUCCESS
;;
validate-all) dhcpd_validate_all
exit $OCF_SUCCESS
;;
usage|help) dhcpd_usage
exit $OCF_SUCCESS
;;
esac
# Translate each action into the appropriate function call
case $__OCF_ACTION in
start) dhcpd_start;;
stop) dhcpd_stop;;
monitor) dhcpd_monitor;;
*) dhcpd_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor
index 77775c264..b85d7fc2e 100755
--- a/heartbeat/ethmonitor
+++ b/heartbeat/ethmonitor
@@ -1,454 +1,454 @@
#!/bin/sh
#
# OCF Resource Agent compliant script.
# Monitor the vitality of a local network interface.
#
# Based on the work by Robert Euhus and Lars Marowsky-Br馥.
#
# Transfered from Ipaddr2 into ethmonitor by Alexander Krauth
#
# Copyright (c) 2011 Robert Euhus, Alexander Krauth, Lars Marowsky-Br馥
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
# OCF parameters are as below
#
# OCF_RESKEY_interface
# OCF_RESKEY_multiplicator
# OCF_RESKEY_name
# OCF_RESKEY_repeat_count
# OCF_RESKEY_repeat_interval
# OCF_RESKEY_pktcnt_timeout
# OCF_RESKEY_arping_count
# OCF_RESKEY_arping_timeout
# OCF_RESKEY_arping_cache_entries
#
# TODO: Check against IPv6
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ethmonitor">
<version>1.2</version>
<longdesc lang="en">
Monitor the vitality of a local network interface.
-You may setup this RA as a clone resource to monitor the network interfaces on different nodes, with the same interface name.
-This is not related to the IP adress or the network on which a interface is configured.
+You may set up this RA as a clone resource to monitor the network interfaces on different nodes, with the same interface name.
+This is not related to the IP address or the network on which a interface is configured.
You may use this RA to move resources away from a node, which has a faulty interface or prevent moving resources to such a node.
This gives you independend control of the resources, without involving cluster intercommunication. But it requires your nodes to have more than one network interface.
The resource configuration requires a monitor operation, because the monitor does the main part of the work.
-In addition to the resource configuration, you need to configure some location contraints, based on a CIB attribute value.
+In addition to the resource configuration, you need to configure some location constraints, based on a CIB attribute value.
The name of the attribute value is configured in the 'name' option of this RA.
Example constraint configuration:
location loc_connected_node my_resource_grp \
rule $id="rule_loc_connected_node" -INF: ethmonitor eq 0
The ethmonitor works in 3 different modes to test the interface vitality.
1. call ip to see if the link status is up (if link is down -> error)
-2. call ip an watch the RX counter (if packages come around in a certain time -> success)
-3. call arping to check wether any of the IPs found in the lokal ARP cache answers an ARP REQUEST (one answer -> success)
+2. call ip and watch the RX counter (if packages come around in a certain time -> success)
+3. call arping to check whether any of the IPs found in the local ARP cache answers an ARP REQUEST (one answer -> success)
4. return error
</longdesc>
<shortdesc lang="en">Monitors network interfaces</shortdesc>
<parameters>
<parameter name="interface" unique="1" required="1">
<longdesc lang="en">
The name of the network interface which should be monitored (e.g. eth0).
</longdesc>
<shortdesc lang="en">Network interface name</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="name" unique="1">
<longdesc lang="en">
The name of the CIB attribute to set. This is the name to be used in the constraints. Defaults to "ethmonitor-'interface_name'".
</longdesc>
<shortdesc lang="en">Attribute name</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="multiplier" unique="0" >
<longdesc lang="en">
Multiplier for the value of the CIB attriobute specified in parameter name.
</longdesc>
<shortdesc lang="en">Multiplier for result variable</shortdesc>
<content type="integer" default="1"/>
</parameter>
<parameter name="repeat_count">
<longdesc lang="en">
Specify how often the interface will be monitored, before the status is set to failed. You need to set the timeout of the monitoring operation to at least repeat_count * repeat_interval
</longdesc>
<shortdesc lang="en">Monitor repeat count</shortdesc>
<content type="integer" default="5"/>
</parameter>
<parameter name="repeat_interval">
<longdesc lang="en">
Specify how long to wait in seconds between the repeat_counts.
</longdesc>
<shortdesc lang="en">Monitor repeat interval in seconds</shortdesc>
<content type="integer" default="10"/>
</parameter>
<parameter name="pktcnt_timeout">
<longdesc lang="en">
Timeout for the RX packet counter. Stop listening for packet counter changes after the given number of seconds.
</longdesc>
<shortdesc lang="en">packet counter timeout</shortdesc>
<content type="integer" default="5"/>
</parameter>
<parameter name="arping_count">
<longdesc lang="en">
Number of ARP REQUEST packets to send for every IP.
Usually one ARP REQUEST (arping) is send
</longdesc>
<shortdesc lang="en">Number of arpings per IP</shortdesc>
<content type="integer" default="1"/>
</parameter>
<parameter name="arping_timeout">
<longdesc lang="en">
Time in seconds to wait for ARP REQUESTs (all packets of arping_count).
This is to limit the time for arp requests, to be able to send requests to more than one node, without running in the monitor operation timeout.
</longdesc>
<shortdesc lang="en">Timeout for arpings per IP</shortdesc>
<content type="integer" default="1"/>
</parameter>
<parameter name="arping_cache_entries">
<longdesc lang="en">
Maximum number of IPs from ARP cache list to check for ARP REQUEST (arping) answers. Newest entries are tried first.
</longdesc>
<shortdesc lang="en">Number of ARP cache entries to try</shortdesc>
<content type="integer" default="5"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="status" depth="0" timeout="20s" interval="10s" />
<action name="monitor" depth="0" timeout="20s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
#
# Return true, if the interface exists
#
is_interface() {
#
# List interfaces but exclude FreeS/WAN ipsecN virtual interfaces
#
local iface=`$IP2UTIL -o -f inet addr show | grep " $1 " \
| cut -d ' ' -f2 | sort -u | grep -v '^ipsec[0-9][0-9]*$'`
[ "$iface" != "" ]
}
if_init() {
local rc
if [ X"$OCF_RESKEY_interface" = "X" ]; then
ocf_log err "Interface name (the interface parameter) is mandatory"
exit $OCF_ERR_CONFIGURED
fi
NIC="$OCF_RESKEY_interface"
if is_interface $NIC
then
case "$NIC" in
*:*) ocf_log err "Do not specify a virtual interface : $OCF_RESKEY_interface"
exit $OCF_ERR_CONFIGURED;;
*) ;;
esac
else
case $__OCF_ACTION in
validate-all) ocf_log err "Interface $NIC does not exist"
exit $OCF_ERR_CONFIGURED;;
*) ocf_log warn "Interface $NIC does not exist"
## It might be a bond interface which is temporarily not available, therefore we want to continue here
;;
esac
fi
: ${OCF_RESKEY_multiplier:="1"}
if ! ocf_is_decimal "$OCF_RESKEY_multiplier"; then
ocf_log err "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]"
exit $OCF_ERR_CONFIGURED
fi
ATTRNAME=${OCF_RESKEY_name:-"ethmonitor-$NIC"}
REP_COUNT=${OCF_RESKEY_repeat_count:-5}
if ! ocf_is_decimal "$REP_COUNT" -o [ $REP_COUNT -lt 1 ]; then
ocf_log err "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]"
exit $OCF_ERR_CONFIGURED
fi
REP_INTERVAL_S=${OCF_RESKEY_repeat_interval:-10}
if ! ocf_is_decimal "$REP_INTERVAL_S"; then
ocf_log err "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]"
exit $OCF_ERR_CONFIGURED
fi
: ${OCF_RESKEY_pktcnt_timeout:="5"}
if ! ocf_is_decimal "$OCF_RESKEY_pktcnt_timeout"; then
ocf_log err "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]"
exit $OCF_ERR_CONFIGURED
fi
: ${OCF_RESKEY_arping_count:="1"}
if ! ocf_is_decimal "$OCF_RESKEY_arping_count"; then
ocf_log err "Invalid OCF_RESKEY_arping_count [$OCF_RESKEY_arping_count]"
exit $OCF_ERR_CONFIGURED
fi
: ${OCF_RESKEY_arping_timeout:="1"}
if ! ocf_is_decimal "$OCF_RESKEY_arping_timeout"; then
ocf_log err "Invalid OCF_RESKEY_arping_timeout [$OCF_RESKEY_arping_count]"
exit $OCF_ERR_CONFIGURED
fi
: ${OCF_RESKEY_arping_cache_entries:="5"}
if ! ocf_is_decimal "$OCF_RESKEY_arping_cache_entries"; then
ocf_log err "Invalid OCF_RESKEY_arping_cache_entries [$OCF_RESKEY_arping_cache_entries]"
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
# get the link status on $NIC
# asks ip about running (up) interfaces, returns the number of matching interface names that are up
get_link_status () {
$IP2UTIL -o link show up dev "$NIC" | grep -v 'NO-CARRIER' | grep -c "$NIC"
}
# returns the number of received rx packets on $NIC
get_rx_packets () {
ocf_log debug "$IP2UTIL -o -s link show dev $NIC"
$IP2UTIL -o -s link show dev "$NIC" \
| sed 's/.* RX: [^0-9]*[0-9]* *\([0-9]*\) .*/\1/'
# the first number after RX: ist the # of bytes ,
# the second is the # of packets received
}
# watch for packet counter changes for max. OCF_RESKEY_pktcnt_timeout seconds
# returns immedeately with return code 0 if any packets were received
# otherwise 1 is returned
watch_pkt_counter () {
local RX_PACKETS_NEW
local RX_PACKETS_OLD
RX_PACKETS_OLD="`get_rx_packets`"
for n in `seq $(( $OCF_RESKEY_pktcnt_timeout * 10 ))`; do
sleep 0.1
RX_PACKETS_NEW="`get_rx_packets`"
ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW"
if [ "$RX_PACKETS_OLD" -ne "$RX_PACKETS_NEW" ]; then
ocf_log debug "we received some packets."
return 0
fi
done
return 1
}
# returns list of cached ARP entries for $NIC
# sorted by age ("last confirmed")
# max. OCF_RESKEY_arping_cache_entries entries
get_arp_list () {
$IP2UTIL -s neighbour show dev $NIC \
| sort -t/ -k2,2n | cut -d' ' -f1 \
| head -n $OCF_RESKEY_arping_cache_entries
# the "used" entries in `ip -s neighbour show` are:
# "last used"/"last confirmed"/"last updated"
}
# arping the IP given as argument $1 on $NIC
# until OCF_RESKEY_arping_count answers are received
do_arping () {
# TODO: add the source IP
# TODO: check for diffenrent arping versions out there
arping -q -c $OCF_RESKEY_arping_count -w $OCF_RESKEY_arping_timeout -I $NIC $1
# return with the exit code of the arping command
return $?
}
#
# Check the interface depending on the level given as parameter: $OCF_RESKEY_check_level
#
# 09: check for nonempty ARP cache
# 10: watch for packet counter changes
#
# 19: check arping_ip_list
# 20: check arping ARP cache entries
#
# 30: watch for packet counter changes in promiscios mode
#
# If unsuccessfull in levels 18 and above,
# the tests for higher check levels are run.
#
if_check () {
# always check link status first
link_status="`get_link_status`"
ocf_log debug "link_status: $link_status (1=up, 0=down)"
[ $link_status -eq 0 ] && return $OCF_NOT_RUNNING
# watch for packet counter changes
ocf_log debug "watch for packet counter changes"
watch_pkt_counter && return $OCF_SUCCESS
# check arping ARP cache entries
ocf_log debug "check arping ARP cache entries"
for ip in `get_arp_list`; do
do_arping $ip && return $OCF_SUCCESS
done
# watch for packet counter changes in promiscios mode
# ocf_log debug "watch for packet counter changes in promiscios mode"
# be sure switch off promiscios mode in any case
# TODO: check first, wether promisc is already on and leave it untouched.
# trap "$IP2UTIL link set dev $NIC promisc off; exit" INT TERM EXIT
# $IP2UTIL link set dev $NIC promisc on
# watch_pkt_counter && return $OCF_SUCCESS
# $IP2UTIL link set dev $NIC promisc off
# trap - INT TERM EXIT
# looks like it's not working (for whatever reason)
return $OCF_NOT_RUNNING
}
#######################################################################
if_usage() {
cat <<END
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
set_cib_value() {
local score=`expr $1 \* $OCF_RESKEY_multiplier`
attrd_updater -n $ATTRNAME -v $score -q
local rc=$?
case $rc in
0) ocf_log debug "attrd_updater: Updated $ATTRNAME = $score" ;;
*) ocf_log warn "attrd_updater: Could not update $ATTRNAME = $score: rc=$rc";;
esac
return $rc
}
if_monitor() {
ha_pseudo_resource $OCF_RESOURCE_INSTANCE monitor
local pseudo_status=$?
if [ $pseudo_status -ne $OCF_SUCCESS ]; then
exit $pseudo_status
fi
local mon_rc=$OCF_NOT_RUNNING
local attr_rc=$OCF_NOT_RUNNING
local runs=0
local start_time
local end_time
local sleep_time
while [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]
do
start_time=`date +%s%N`
if_check
mon_rc=$?
REP_COUNT=$(( $REP_COUNT - 1 ))
if [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]; then
ocf_log warn "Monitoring of $OCF_RESOURCE_INSTANCE failed, $REP_COUNT retries left."
end_time=`date +%s%N`
sleep_time=`echo "scale=9; ( $start_time + ( $REP_INTERVAL_S * 1000000000 ) - $end_time ) / 1000000000" | bc -q 2> /dev/null`
sleep $sleep_time 2> /dev/null
runs=$(($runs + 1))
fi
if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then
ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error"
fi
done
ocf_log debug "Monitoring return code: $mon_rc"
if [ $mon_rc -eq $OCF_SUCCESS ]; then
set_cib_value 1
attr_rc=$?
else
ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed."
set_cib_value 0
attr_rc=$?
fi
## The resource should not fail, if the interface is down. It should fail, if the update of the CIB variable has errors.
## To react on the interface failure you must use constraints based on the CIB variable value, not on the resource itself.
exit $attr_rc
}
if_validate() {
check_binary $IP2UTIL
check_binary arping
if_init
}
case $__OCF_ACTION in
meta-data) meta_data
;;
usage|help) if_usage
exit $OCF_SUCCESS
;;
esac
if_validate
case $__OCF_ACTION in
start) ha_pseudo_resource $OCF_RESOURCE_INSTANCE start
exit $?
;;
stop) attrd_updater -D -n $ATTRNAME
ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop
exit $?
;;
monitor|status) if_monitor
exit $?
;;
validate-all) exit $?
;;
*) if_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit
index a75d888ba..7128c2544 100755
--- a/heartbeat/iSCSILogicalUnit
+++ b/heartbeat/iSCSILogicalUnit
@@ -1,522 +1,522 @@
#!/bin/bash
#
#
# iSCSILogicalUnit OCF RA. Exports and manages iSCSI Logical Units.
#
# (c) 2009-2010 Florian Haas, Dejan Muhamedagic,
# and Linux-HA contributors
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
# Set a default implementation based on software installed
if have_binary ietadm; then
OCF_RESKEY_implementation_default="iet"
elif have_binary tgtadm; then
OCF_RESKEY_implementation_default="tgt"
elif have_binary lio_node; then
OCF_RESKEY_implementation_default="lio"
fi
: ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}}
# Use a default SCSI ID and SCSI SN that is unique across the cluster,
# and persistent in the event of resource migration.
# SCSI IDs are limited to 24 bytes, but only 16 bytes are known to be
# supported by all iSCSI implementations this RA cares about. Thus,
# for a default, use the first 16 characters of
# $OCF_RESOURCE_INSTANCE.
OCF_RESKEY_scsi_id_default="${OCF_RESOURCE_INSTANCE:0:16}"
: ${OCF_RESKEY_scsi_id=${OCF_RESKEY_scsi_id_default}}
# To have a reasonably unique default SCSI SN, use the first 8 bytes
# of an MD5 hash of of $OCF_RESOURCE_INSTANCE
sn=`echo -n "${OCF_RESOURCE_INSTANCE}" | openssl md5 | sed -e 's/(stdin)= //'`
OCF_RESKEY_scsi_sn_default=${sn:0:8}
: ${OCF_RESKEY_scsi_sn=${OCF_RESKEY_scsi_sn_default}}
# set 0 as a default value for lio iblock device number
OCF_RESKEY_lio_iblock_default=0
OCF_RESKEY_lio_iblock=${OCF_RESKEY_lio_iblock:-$OCF_RESKEY_lio_iblock_default}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="iSCSILogicalUnit" version="0.9">
<version>0.9</version>
<longdesc lang="en">
Manages iSCSI Logical Unit. An iSCSI Logical unit is a subdivision of
an SCSI Target, exported via a daemon that speaks the iSCSI protocol.
</longdesc>
<shortdesc lang="en">Manages iSCSI Logical Units (LUs)</shortdesc>
<parameters>
<parameter name="implementation" required="0" unique="0">
<longdesc lang="en">
The iSCSI target daemon implementation. Must be one of "iet", "tgt",
or "lio". If unspecified, an implementation is selected based on the
availability of management utilities, with "iet" being tried first,
then "tgt", then "lio".
</longdesc>
<shortdesc lang="en">iSCSI target daemon implementation</shortdesc>
<content type="string" default="${OCF_RESKEY_implementation_default}"/>
</parameter>
<parameter name="target_iqn" required="1" unique="0">
<longdesc lang="en">
The iSCSI Qualified Name (IQN) that this Logical Unit belongs to.
</longdesc>
<shortdesc lang="en">iSCSI target IQN</shortdesc>
<content type="string" />
</parameter>
<parameter name="lun" required="1" unique="0">
<longdesc lang="en">
The Logical Unit number (LUN) exposed to initiators.
</longdesc>
<shortdesc lang="en">Logical Unit number (LUN)</shortdesc>
<content type="integer" />
</parameter>
<parameter name="path" required="1" unique="0">
<longdesc lang="en">
The path to the block device exposed. Some implementations allow this
to be a regular file, too.
</longdesc>
<shortdesc lang="en">Block device (or file) path</shortdesc>
<content type="string" />
</parameter>
<parameter name="scsi_id" required="0" unique="1">
<longdesc lang="en">
The SCSI ID to be configured for this Logical Unit. The default
is the resource name, truncated to 24 bytes.
</longdesc>
<shortdesc lang="en">SCSI ID</shortdesc>
<content type="string" default="${OCF_RESKEY_scsi_id_default}"/>
</parameter>
<parameter name="scsi_sn" required="0" unique="1">
<longdesc lang="en">
The SCSI serial number to be configured for this Logical Unit.
The default is a hash of the resource name, truncated to 8 bytes.
</longdesc>
<shortdesc lang="en">SCSI serial number</shortdesc>
<content type="string" default="${OCF_RESKEY_scsi_sn_default}"/>
</parameter>
<parameter name="vendor_id" required="0" unique="0">
<longdesc lang="en">
The SCSI vendor ID to be configured for this Logical Unit.
</longdesc>
<shortdesc lang="en">SCSI vendor ID</shortdesc>
<content type="string" />
</parameter>
<parameter name="product_id" required="0" unique="0">
<longdesc lang="en">
The SCSI product ID to be configured for this Logical Unit.
</longdesc>
<shortdesc lang="en">SCSI product ID</shortdesc>
<content type="string" />
</parameter>
<parameter name="additional_parameters" required="0" unique="0">
<longdesc lang="en">
Additional LU parameters. A space-separated list of "name=value" pairs
which will be passed through to the iSCSI daemon's management
interface. The supported parameters are implementation
dependent. Neither the name nor the value may contain whitespace.
</longdesc>
<shortdesc lang="en">List of iSCSI LU parameters</shortdesc>
<content type="string" />
</parameter>
<parameter name="allowed_initiators" required="0" unique="0">
<longdesc lang="en">
Allowed initiators. A space-separated list of initiators allowed to
connect to this lun. Initiators may be listed in any syntax
the target implementation allows. If this parameter is empty or
not set, access to this lun will not be allowed from any initiator,
if target is not in demo mode.
-This parameter is only necessary, when using LIO.
+This parameter is only necessary when using LIO.
</longdesc>
<shortdesc lang="en">List of iSCSI initiators allowed to connect
to this lun.</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="lio_iblock" required="0" unique="0">
<longdesc lang="en">
LIO iblock device name, a number starting from 0.
Using distinct values here avoids a warning in LIO "LEGACY: SHARED HBA";
and it is necessary when using multiple LUNs started at the same time
(eg. on node failover) to prevent a race condition in tcm_core on mkdir()
in /sys/kernel/config/target/core/.
</longdesc>
<shortdesc lang="en">LIO iblock device number</shortdesc>
<content type="integer" default="0"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="10" />
<action name="stop" timeout="10" />
<action name="status" timeout="10" interval="10" depth="0" />
<action name="monitor" timeout="10" interval="10" depth="0" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="10" />
</actions>
</resource-agent>
END
}
#######################################################################
iSCSILogicalUnit_usage() {
cat <<END
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
iSCSILogicalUnit_start() {
iSCSILogicalUnit_monitor
if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
local params
case $OCF_RESKEY_implementation in
iet)
params="Path=${OCF_RESKEY_path}"
# use blockio if path points to a block device, fileio
# otherwise.
if [ -b "${OCF_RESKEY_path}" ]; then
params="${params} Type=blockio"
else
params="${params} Type=fileio"
fi
# in IET, we have to set LU parameters on creation
if [ -n "${OCF_RESKEY_scsi_id}" ]; then
params="${params} ScsiId=${OCF_RESKEY_scsi_id}"
fi
if [ -n "${OCF_RESKEY_scsi_sn}" ]; then
params="${params} ScsiSN=${OCF_RESKEY_scsi_sn}"
fi
params="${params} ${OCF_RESKEY_additional_parameters}"
ocf_run ietadm --op new \
--tid=${TID} \
--lun=${OCF_RESKEY_lun} \
--params ${params// /,} || exit $OCF_ERR_GENERIC
;;
tgt)
# tgt requires that we create the LU first, then set LU
# parameters
params=""
local var
local envar
for var in scsi_id scsi_sn vendor_id product_id; do
envar="OCF_RESKEY_${var}"
if [ -n "${!envar}" ]; then
params="${params} ${var}=${!envar}"
fi
done
params="${params} ${OCF_RESKEY_additional_parameters}"
ocf_run tgtadm --lld iscsi --op new --mode logicalunit \
--tid=${TID} \
--lun=${OCF_RESKEY_lun} \
--backing-store ${OCF_RESKEY_path} || exit $OCF_ERR_GENERIC
if [ -z "$params" ]; then
return $OCF_SUCCESS
else
ocf_run tgtadm --lld iscsi --op update --mode logicalunit \
--tid=${TID} \
--lun=${OCF_RESKEY_lun} \
--params ${params// /,} || exit $OCF_ERR_GENERIC
fi
;;
lio)
# For lio, we first have to create a target device, then
# add it to the Target Portal Group as an LU.
ocf_run tcm_node --createdev=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} \
${OCF_RESKEY_path} || exit $OCF_ERR_GENERIC
if [ -n "${OCF_RESKEY_scsi_sn}" ]; then
ocf_run tcm_node --setunitserial=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} \
${OCF_RESKEY_scsi_sn} || exit $OCF_ERR_GENERIC
fi
ocf_run lio_node --addlun=${OCF_RESKEY_target_iqn} 1 ${OCF_RESKEY_lun} \
${OCF_RESOURCE_INSTANCE} iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC
if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
for initiator in ${OCF_RESKEY_allowed_initiators}; do
ocf_run lio_node --addlunacl=${OCF_RESKEY_target_iqn} 1 \
${initiator} ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
done
fi
;;
esac
return $OCF_SUCCESS
}
iSCSILogicalUnit_stop() {
iSCSILogicalUnit_monitor
if [ $? = $OCF_SUCCESS ]; then
case $OCF_RESKEY_implementation in
iet)
# IET allows us to remove LUs while they are in use
ocf_run ietadm --op delete \
--tid=${TID} \
--lun=${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
;;
tgt)
# tgt will fail to remove an LU while it is in use,
# but at the same time does not allow us to
# selectively shut down a connection that is using a
# specific LU. Thus, we need to loop here until tgtd
# decides that the LU is no longer in use, or we get
# timed out by the LRM.
while ! ocf_run -warn tgtadm --lld iscsi --op delete --mode logicalunit \
--tid ${TID} \
--lun=${OCF_RESKEY_lun}; do
sleep 1
done
;;
lio)
if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
for initiator in ${OCF_RESKEY_allowed_initiators}; do
ocf_run lio_node --dellunacl=${OCF_RESKEY_target_iqn} 1 \
${initiator} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
done
fi
ocf_run lio_node --dellun=${OCF_RESKEY_target_iqn} 1 ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
ocf_run tcm_node --freedev=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC
esac
fi
return $OCF_SUCCESS
}
iSCSILogicalUnit_monitor() {
case $OCF_RESKEY_implementation in
iet)
# Figure out and set the target ID
TID=`sed -ne "s/tid:\([[:digit:]]\+\) name:${OCF_RESKEY_target_iqn}$/\1/p" < /proc/net/iet/volume`
if [ -z "${TID}" ]; then
# Our target is not configured, thus we're not
# running.
return $OCF_NOT_RUNNING
fi
# FIXME: this looks for a matching LUN and path, but does
# not actually test for the correct target ID.
grep -E -q "[[:space:]]+lun:${OCF_RESKEY_lun}.*path:${OCF_RESKEY_path}$" /proc/net/iet/volume && return $OCF_SUCCESS
;;
tgt)
# Figure out and set the target ID
TID=`tgtadm --lld iscsi --op show --mode target \
| sed -ne "s/^Target \([[:digit:]]\+\): ${OCF_RESKEY_target_iqn}$/\1/p"`
if [ -z "$TID" ]; then
# Our target is not configured, thus we're not
# running.
return $OCF_NOT_RUNNING
fi
# This only looks for the backing store, but does not test
# for the correct target ID and LUN.
tgtadm --lld iscsi --op show --mode target \
| grep -E -q "[[:space:]]+Backing store.*: ${OCF_RESKEY_path}$" && return $OCF_SUCCESS
;;
lio)
configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/${OCF_RESOURCE_INSTANCE}/udev_path"
[ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS
;;
esac
return $OCF_NOT_RUNNING
}
iSCSILogicalUnit_validate() {
# Do we have all required variables?
for var in target_iqn lun path; do
param="OCF_RESKEY_${var}"
if [ -z "${!param}" ]; then
ocf_log error "Missing resource parameter \"$var\"!"
exit $OCF_ERR_CONFIGURED
fi
done
# Is the configured implementation supported?
case "$OCF_RESKEY_implementation" in
"iet"|"tgt"|"lio")
;;
"")
# The user didn't specify an implementation, and we were
# unable to determine one from installed binaries (in
# other words: no binaries for any supported
# implementation could be found)
ocf_log error "Undefined iSCSI target implementation"
exit $OCF_ERR_INSTALLED
;;
*)
ocf_log error "Unsupported iSCSI target implementation \"$OCF_RESKEY_implementation\"!"
exit $OCF_ERR_CONFIGURED
;;
esac
# Do we have a valid LUN?
case $OCF_RESKEY_implementation in
iet)
# IET allows LUN 0 and up
[ $OCF_RESKEY_lun -ge 0 ]
case $? in
0)
# OK
;;
1)
ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be a non-negative integer)."
exit $OCF_ERR_CONFIGURED
;;
*)
ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be an integer)."
exit $OCF_ERR_CONFIGURED
;;
esac
;;
tgt)
# tgt reserves LUN 0 for its own purposes
[ $OCF_RESKEY_lun -ge 1 ]
case $? in
0)
# OK
;;
1)
ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be greater than 0)."
exit $OCF_ERR_CONFIGURED
;;
*)
ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be an integer)."
exit $OCF_ERR_CONFIGURED
;;
esac
;;
esac
# Do we have any configuration parameters that the current
# implementation does not support?
local unsupported_params
local var
local envar
case $OCF_RESKEY_implementation in
iet)
# IET does not support setting the vendor and product ID
# (it always uses "IET" and "VIRTUAL-DISK")
unsupported_params="vendor_id product_id allowed_initiators lio_iblock"
;;
tgt)
unsupported_params="allowed_initiators lio_iblock"
;;
lio)
unsupported_params="scsi_id vendor_id product_id"
;;
esac
for var in ${unsupported_params}; do
envar=OCF_RESKEY_${var}
if [ -n "${!envar}" ]; then
ocf_log warn "Configuration parameter \"${var}\"" \
"is not supported by the iSCSI implementation" \
"and will be ignored."
fi
done
if ! ocf_is_probe; then
# Do we have all required binaries?
case $OCF_RESKEY_implementation in
iet)
check_binary ietadm
;;
tgt)
check_binary tgtadm
;;
lio)
check_binary tcm_node
check_binary lio_node
;;
esac
# Is the required kernel functionality available?
case $OCF_RESKEY_implementation in
iet)
[ -d /proc/net/iet ]
if [ $? -ne 0 ]; then
ocf_log err "/proc/net/iet does not exist or is not a directory -- check if required modules are loaded."
exit $OCF_ERR_INSTALLED
fi
;;
tgt)
# tgt is userland only
;;
esac
fi
return $OCF_SUCCESS
}
case $1 in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage|help)
iSCSILogicalUnit_usage
exit $OCF_SUCCESS
;;
esac
# Everything except usage and meta-data must pass the validate test
iSCSILogicalUnit_validate
case $__OCF_ACTION in
start) iSCSILogicalUnit_start;;
stop) iSCSILogicalUnit_stop;;
monitor|status) iSCSILogicalUnit_monitor;;
reload) ocf_log err "Reloading..."
iSCSILogicalUnit_start
;;
validate-all) ;;
*) iSCSILogicalUnit_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/iscsi b/heartbeat/iscsi
index 02bc3301a..de3f4c03d 100755
--- a/heartbeat/iscsi
+++ b/heartbeat/iscsi
@@ -1,503 +1,503 @@
#!/bin/sh
#
# iSCSI OCF resource agent
# Description: manage iSCSI disks (add/remove) using open-iscsi
#
# Copyright Dejan Muhamedagic <dejan@suse.de>
# (C) 2007 Novell Inc. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
# See usage() and meta_data() below for more details...
#
# OCF instance parameters:
# OCF_RESKEY_portal: the iSCSI portal address or host name (required)
# OCF_RESKEY_target: the iSCSI target (required)
# OCF_RESKEY_iscsiadm: iscsiadm program path (optional)
# OCF_RESKEY_discovery_type: discovery type (optional; default: sendtargets)
# OCF_RESKEY_try_recovery: wait for iSCSI recovery in monitor (optional; default: false)
#
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_udev_default="yes"
OCF_RESKEY_iscsiadm_default="iscsiadm"
OCF_RESKEY_discovery_type_default="sendtargets"
OCF_RESKEY_try_recovery_default="false"
: ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}}
: ${OCF_RESKEY_iscsiadm=${OCF_RESKEY_iscsiadm_default}}
: ${OCF_RESKEY_discovery_type=${OCF_RESKEY_discovery_type_default}}
usage() {
methods=`iscsi_methods`
methods=`echo $methods | tr ' ' '|'`
cat <<EOF
usage: $0 {$methods}
$0 manages an iSCSI target
The 'start' operation starts (adds) the iSCSI target.
The 'stop' operation stops (removes) the iSCSI target.
The 'status' operation reports whether the iSCSI target is connected
The 'monitor' operation reports whether the iSCSI target is connected
The 'validate-all' operation reports whether the parameters are valid
The 'methods' operation reports on the methods $0 supports
EOF
}
meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="iscsi">
<version>1.0</version>
<longdesc lang="en">
OCF Resource Agent for iSCSI. Add (start) or remove (stop) iSCSI
targets.
</longdesc>
<shortdesc lang="en">Manages a local iSCSI initiator and its connections to iSCSI targets</shortdesc>
<parameters>
<parameter name="portal" unique="0" required="1">
<longdesc lang="en">
The iSCSI portal address in the form: {ip_address|hostname}[":"port]
</longdesc>
<shortdesc lang="en">Portal address</shortdesc>
<content type="string" />
</parameter>
<parameter name="target" unique="1" required="1">
<longdesc lang="en">
The iSCSI target IQN.
</longdesc>
<shortdesc lang="en">Target IQN</shortdesc>
<content type="string" />
</parameter>
<parameter name="discovery_type" unique="0" required="0">
<longdesc lang="en">
Target discovery type. Check the open-iscsi documentation for
supported discovery types.
</longdesc>
<shortdesc lang="en">Target discovery type</shortdesc>
<content type="string" default="${OCF_RESKEY_discovery_type_default}" />
</parameter>
<parameter name="iscsiadm" unique="0" required="0">
<longdesc lang="en">
open-iscsi administration utility binary.
</longdesc>
<shortdesc lang="en">iscsiadm binary</shortdesc>
<content type="string" default="${OCF_RESKEY_iscsiadm_default}" />
</parameter>
<parameter name="udev" unique="0" required="0">
<longdesc lang="en">
If the next resource depends on the udev creating a device then
we wait until it is finished. On a normally loaded host this
should be done quickly, but you may be unlucky. If you are not
using udev set this to "no", otherwise we will spin in a loop
until a timeout occurs.
</longdesc>
<shortdesc lang="en">udev</shortdesc>
<content type="string" default="${OCF_RESKEY_udev_default}" />
</parameter>
<parameter name="try_recovery" unique="0" required="0">
<longdesc lang="en">
If the iSCSI session exists but is currently inactive/broken,
which is most probably due to network problems, the iSCSI layer
will try to recover. If this parameter is set to true, we'll wait
for the recovery to succeed. In that case the monitor operation
can only time out so you should set the monitor op timeout
attribute appropriately.
</longdesc>
-<shortdesc lang="en">on error wait for iSCSI recovery in monitor</shortdesc>
+<shortdesc lang="en">On error wait for iSCSI recovery in monitor</shortdesc>
<content type="boolean" default="${OCF_RESKEY_try_recovery_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="30" />
<action name="monitor" depth="0" timeout="30" interval="120" />
<action name="validate-all" timeout="5" />
<action name="methods" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
EOF
}
iscsi_methods() {
cat <<EOF
start
stop
status
monitor
validate-all
methods
meta-data
usage
EOF
}
#
# open-iscsi interface
#
is_iscsid_running() {
ps -e -o cmd | grep -qs '[i]scsid'
}
open_iscsi_setup() {
discovery=open_iscsi_discovery
add_disk=open_iscsi_add
remove_disk=open_iscsi_remove
disk_status=open_iscsi_status
iscsiadm=${OCF_RESKEY_iscsiadm}
have_binary ${iscsiadm} ||
return 3
if is_iscsid_running; then
return 0
elif grep -qs '^iscsid.startup' /etc/iscsi/iscsid.conf; then
# apparently on RedHat (perhaps elsewhere?), there is a
# kind of iscsid autostart once root invokes some
# open_iscsi command; the iscsid.startup hook should take
# care of it; reported by m.richardson@ed.ac.uk (see also
# the discussion at the linux-ha-dev ML)
return 1
else
ocf_log err "iscsid not running; please start open-iscsi utilities"
return 2
fi
}
#
# discovery return codes:
# 0: ok (variable portal set)
# 1: target not found
# 2: target found but can't connect it unambigously
# 3: iscsiadm returned error
#
# open-iscsi >= "2.0-872" changed discovery semantics
# see http://www.mail-archive.com/open-iscsi@googlegroups.com/msg04883.html
# there's a new discoverydb command which should be used instead discovery
open_iscsi_discovery() {
local output
local discovery_variant="discovery"
local options=""
local cmd
local version=`$iscsiadm --version | awk '{print $3}'`
ocf_version_cmp "$version" "2.0-871"
if [ $? -eq 2 ]; then # newer than 2.0-871?
discovery_variant="discoverydb"
[ "$discovery_type" = "sendtargets" ] &&
options="-D"
fi
cmd="$iscsiadm -m $discovery_variant -p $OCF_RESKEY_portal -t $discovery_type $options"
output=`$cmd`
if [ $? -ne 0 -o x = "x$output" ]; then
[ x != "x$output" ] && {
ocf_log err "$cmd FAILED"
echo "$output"
}
return 3
fi
PORTAL=`echo "$output" |
awk -v target="$OCF_RESKEY_target" '
$NF==target{
if( NF==3 ) portal=$2; # sles compat mode
else portal=$1;
sub(",.*","",portal);
print portal;
}'`
case `echo "$PORTAL" | wc -w` in
0) #target not found
echo "$output"
ocf_log err "target $OCF_RESKEY_target not found at portal $OCF_RESKEY_portal"
return 1
;;
1) #we're ok
return 0
;;
*) # handle multihome hosts reporting multiple portals
for p in $PORTAL; do
if [ "$OCF_RESKEY_portal" = "$p" ]; then
PORTAL="$OCF_RESKEY_portal"
return 0
fi
done
echo "$output"
ocf_log err "sorry, can't handle multihomed hosts unless you specify the portal exactly"
return 2
;;
esac
}
open_iscsi_add() {
$iscsiadm -m node -p $1 -T $2 -l
}
open_iscsi_get_session_id() {
local target="$1"
$iscsiadm -m session 2>/dev/null | grep "$target$" |
awk '{print $2}' | tr -d '[]'
}
open_iscsi_remove() {
local target="$1"
local session_id
session_id=`open_iscsi_get_session_id "$target"`
if [ "$session_id" ]; then
$iscsiadm -m session -r $session_id -u
else
ocf_log err "cannot find session id for target $target"
return 1
fi
}
open_iscsi_status() {
local target="$1"
local session_id conn_state outp
local prev_state
local recov
recov=${2:-$OCF_RESKEY_try_recovery}
session_id=`open_iscsi_get_session_id "$target"`
prev_state=""
[ -z "$session_id" ] &&
return 1
while :; do
outp=`$iscsiadm -m session -r $session_id -P 1` ||
return 2
conn_state=`echo "$outp" | sed -n '/Connection State/s/.*: //p'`
# some drivers don't return connection state, in that case
# we'll assume that we're still connected
case "$conn_state" in
"LOGGED IN")
[ -n "$msg_logged" ] &&
ocf_log info "connection state $conn_state. Session restored."
return 0;;
"Unknown"|"") # this is also probably OK
[ -n "$msg_logged" ] &&
ocf_log info "connection state $conn_state. Session restored."
return 0;;
*) # failed
if [ "$__OCF_ACTION" != stop ] && ! ocf_is_probe && ocf_is_true $recov; then
if [ "$conn_state" != "$prev_state" ]; then
ocf_log warning "connection state $conn_state, waiting for recovery..."
prev_state="$conn_state"
fi
sleep 1
else
ocf_log err "iscsiadm output: $outp"
return 2
fi
;;
esac
done
}
disk_discovery() {
$discovery # discover and setup the real portal string (address)
case $? in
0) ;;
1|2) exit $OCF_ERR_GENERIC ;;
3) if ! is_iscsid_running; then
[ $setup_rc -eq 1 ] &&
ocf_log warning "iscsid.startup probably not correctly set in /etc/iscsi/iscsid.conf"
exit $OCF_ERR_INSTALLED
fi
exit $OCF_ERR_GENERIC
;;
esac
}
#
# NB: this is udev specific!
#
wait_for_udev() {
dev=/dev/disk/by-path/ip-$PORTAL-iscsi-$OCF_RESKEY_target
while :; do
ls $dev* >/dev/null 2>&1 && break
ocf_log warning "waiting for udev to create $dev"
sleep 1
done
}
iscsi_status() {
$disk_status $OCF_RESKEY_target $*
case $? in
0) return $OCF_SUCCESS;;
1) return $OCF_NOT_RUNNING;;
2) return $OCF_ERR_GENERIC;;
esac
}
iscsi_start() {
iscsi_status
case $? in
$OCF_SUCCESS)
ocf_log info "iscsi $PORTAL $OCF_RESKEY_target already running"
return $OCF_SUCCESS
;;
$OCF_NOT_RUNNING)
$add_disk $PORTAL $OCF_RESKEY_target ||
return $OCF_ERR_GENERIC
case "$OCF_RESKEY_udev" in
[Yy]es) wait_for_udev ||
return $OCF_ERR_GENERIC
;;
*) ;;
esac
;;
*) # the session exists, but it's broken
ocf_log warning "iscsi $PORTAL $OCF_RESKEY_target in failed state"
;;
esac
iscsi_status 1 # enforce wait
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
else
return $OCF_ERR_GENERIC
fi
}
iscsi_stop() {
iscsi_status
if [ $? -ne $OCF_NOT_RUNNING ] ; then
$remove_disk $OCF_RESKEY_target ||
return $OCF_ERR_GENERIC
iscsi_status
if [ $? -ne $OCF_NOT_RUNNING ] ; then
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
else
ocf_log info "iscsi $OCF_RESKEY_target already stopped"
return $OCF_SUCCESS
fi
}
iscsi_monitor() {
if $disk_status $OCF_RESKEY_target; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi
}
#
# 'main' starts here...
#
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
# These operations don't require OCF instance parameters to be set
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage) usage
exit $OCF_SUCCESS;;
methods) iscsi_methods
exit $OCF_SUCCESS;;
esac
if [ x = "x$OCF_RESKEY_target" ]; then
ocf_log err "target parameter not set"
exit $OCF_ERR_CONFIGURED
fi
if [ x = "x$OCF_RESKEY_portal" ]; then
ocf_log err "portal parameter not set"
exit $OCF_ERR_CONFIGURED
fi
case `uname` in
Linux) setup=open_iscsi_setup
;;
*) ocf_log info "platform `uname` may not be supported"
setup=open_iscsi_setup
;;
esac
LSB_STATUS_STOPPED=3
$setup
setup_rc=$?
if [ $setup_rc -gt 1 ]; then
ocf_log info "iscsi initiator utilities not installed or not setup"
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $OCF_ERR_INSTALLED;;
esac
fi
if [ `id -u` != 0 ]; then
ocf_log err "$0 must be run as root"
exit $OCF_ERR_PERM
fi
# which method was invoked?
case "$1" in
start)
discovery_type=${OCF_RESKEY_discovery_type}
disk_discovery
iscsi_start
;;
stop) iscsi_stop
;;
status) iscsi_status
rc=$?
case $rc in
$OCF_SUCCESS)
echo iscsi target $OCF_RESKEY_target running
;;
$OCF_NOT_RUNNING)
echo iscsi target $OCF_RESKEY_target stopped
;;
*)
echo iscsi target $OCF_RESKEY_target failed
;;
esac
exit $rc
;;
monitor) iscsi_status
;;
validate-all) # everything already validated
# just exit successfully here.
exit $OCF_SUCCESS;;
*) iscsi_methods
exit $OCF_ERR_UNIMPLEMENTED;;
esac
#
# vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0
diff --git a/heartbeat/jboss b/heartbeat/jboss
index 247924ca6..8cc5c86e9 100755
--- a/heartbeat/jboss
+++ b/heartbeat/jboss
@@ -1,500 +1,500 @@
#!/bin/sh
#
# Description: Manages a Jboss Server as an OCF High-Availability
# resource under Heartbeat/LinuxHA control
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
# Copyright (c) 2009 Bauer Systems KG / Stefan Schluppeck
#
#######################################################################
# OCF parameters:
# OCF_RESKEY_resource_name - The name of the resource. Default is ${OCF_RESOURCE_INSTANCE}
# why not let the RA log through lrmd?
# 2009/09/09 Nakahira:
# jboss_console is used to record output of the "run.sh".
# The log of "Run.sh" should not be output to ha-log because it is so annoying.
# OCF_RESKEY_console - A destination of the log of jboss run and shutdown script. Default is /var/log/${OCF_RESKEY_resource_name}.log
# OCF_RESKEY_shutdown_timeout - Time-out at the time of the stop. Default is 5
# OCF_RESKEY_kill_timeout - The re-try number of times awaiting a stop. Default is 10
# OCF_RESKEY_user - A user name to start a JBoss. Default is root
# OCF_RESKEY_statusurl - URL for state confirmation. Default is http://127.0.0.1:8080
# OCF_RESKEY_java_home - Home directory of the Java. Default is ${JAVA_HOME}
# OCF_RESKEY_java_opts - Options for Java.
# OCF_RESKEY_jboss_home - Home directory of Jboss. Default is None
# is it possible to devise this string from options? I'm afraid
# that allowing users to set this could be error prone.
# 2009/09/09 Nakahira:
# It is difficult to set it automatically because jboss_pstring
# greatly depends on the environment. At any rate, system architect
# should note that pstring doesn't influence other processes.
# OCF_RESKEY_pstring - String Jboss will found in procceslist. Default is "java -Dprogram.name=run.sh"
# OCF_RESKEY_run_opts - Options for jboss to run. Default is "-c default -l lpg4j"
# OCF_RESKEY_shutdown_opts - Options for jboss to shutdonw. Default is "-s 127.0.0.1:1099"
# OCF_RESKEY_rotate_consolelog - Control console log logrotation flag. Default is false.
# OCF_RESKEY_rotate_value - console log logrotation value. Default is 86400 span(seconds).
# OCF_RESKEY_rotate_logsuffix - Control console log logrotation suffix. Default is .%F.
###############################################################################
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
usage()
{
cat <<-!
usage: $0 action
action:
start start jboss
stop stop the jboss
status return the status of jboss, run or down
monitor return TRUE if the jboss appears to be working.
You have to have installed $WGETNAME for this to work.
meta-data show meta data message
validate-all validate the instance parameters
!
return $OCF_ERR_ARGS
}
isrunning_jboss()
{
local rc
if [ -z "$1" ];then
ocf_run -q -err wget -t 1 -O /dev/null $STATUSURL
else
# Retry message for restraint
wget -t 1 -O /dev/null $STATUSURL 2>/dev/null
fi
rc=$?
if [ $rc -eq 0 ]; then
return $OCF_SUCCESS
fi
# JBoss service error
return $OCF_ERR_GENERIC
}
monitor_jboss()
{
if ! pgrep -f "$PSTRING" > /dev/null; then
return $OCF_NOT_RUNNING
fi
isrunning_jboss $1
}
rotate_console()
{
# Look for rotatelogs/rotatelogs2
if [ -x /usr/sbin/rotatelogs ]; then
ROTATELOGS=/usr/sbin/rotatelogs
elif [ -x /usr/sbin/rotatelogs2 ]; then
ROTATELOGS=/usr/sbin/rotatelogs2
else
ocf_log warn "rotatelogs command not found."
return 1
fi
# Clean up and set permissions on required files
rm -rf "$CONSOLE"
mkfifo -m700 "$CONSOLE"
chown --dereference "$JBOSS_USER" "$CONSOLE" || true
su - -s /bin/sh $JBOSS_USER \
-c "$ROTATELOGS -l \"$CONSOLE$ROTATELOG_SUFFIX\" $ROTATEVALUE" \
< "$CONSOLE" > /dev/null 2>&1 &
}
start_jboss()
{
monitor_jboss start
if [ $? = $OCF_SUCCESS ]; then
ocf_log info "JBoss already running."
return $OCF_SUCCESS
fi
if ocf_is_true $ROTATELOG_FLG; then
rotate_console
if [ $? = 0 ]; then
ocf_log debug "Rotate console log succeeded."
else
ocf_log warn "Rotate console log failed. Starting jboss without console log rotation."
fi
fi
ocf_log info "Starting JBoss[$RESOURCE_NAME]"
if [ "$JBOSS_USER" = root ]; then
"$JBOSS_HOME/bin/run.sh" $RUN_OPTS \
>> "$CONSOLE" 2>&1 &
else
su - -s /bin/bash "$JBOSS_USER" \
-c "export JAVA_HOME=${JAVA_HOME}; \
export JAVA_OPTS=\"${JAVA_OPTS}\"; \
export JBOSS_HOME=${JBOSS_HOME}; \
$JBOSS_HOME/bin/run.sh $RUN_OPTS" \
>> "$CONSOLE" 2>&1 &
fi
while true; do
monitor_jboss start
if [ $? = $OCF_SUCCESS ]; then
break
fi
ocf_log info "start_jboss[$RESOURCE_NAME]: retry monitor_jboss"
sleep 3
done
return $OCF_SUCCESS
}
stop_jboss()
{
ocf_log info "Stopping JBoss[$RESOURCE_NAME]"
if [ "$JBOSS_USER" = root ]; then
"$JBOSS_HOME/bin/shutdown.sh" $SHUTDOWN_OPTS -S \
>> "$CONSOLE" 2>&1 &
else
su - -s /bin/bash "$JBOSS_USER" \
-c "export JAVA_HOME=${JAVA_HOME}; \
export JBOSS_HOME=${JBOSS_HOME}; \
$JBOSS_HOME/bin/shutdown.sh $SHUTDOWN_OPTS -S" \
>> "$CONSOLE" 2>&1 &
fi
lapse_sec=0
while pgrep -f "$PSTRING" > /dev/null; do
sleep 1
lapse_sec=`expr $lapse_sec + 1`
ocf_log info "stop_jboss[$RESOURCE_NAME]: stop NORM $lapse_sec/$SHUTDOWN_TIMEOUT"
if [ $lapse_sec -ge $SHUTDOWN_TIMEOUT ]; then
break
fi
done
if pgrep -f "$PSTRING" > /dev/null; then
ocf_log info "stop_jboss[$RESOURCE_NAME]: output a JVM thread dump to $CONSOLE"
pkill -QUIT -f "$PSTRING"
lapse_sec=0
while true; do
sleep 1
lapse_sec=`expr $lapse_sec + 1`
ocf_log info "stop_jboss[$RESOURCE_NAME]: kill jboss by SIGTERM ($lapse_sec/$KILL_TIMEOUT)"
pkill -TERM -f "$PSTRING"
if pgrep -f "$PSTRING" > /dev/null; then
if [ $lapse_sec -ge $KILL_TIMEOUT ]; then
break
fi
else
break
fi
done
fi
# If the JBoss process hangs, JBoss RA waits $SHUTDOWN_TIMEOUT
# seconds and tries kill TERM and QUIT for $KILL_TIMEOUT seconds.
# The stop timeout of RA should be
# longer than $SHUTDOWN_TIMEOUT + $KILL_TIMEOUT.
lapse_sec=0
while pgrep -f "$PSTRING" > /dev/null; do
sleep 1
lapse_sec=`expr $lapse_sec + 1`
ocf_log info "stop_jboss[$RESOURCE_NAME]: kill jboss by SIGKILL ($lapse_sec/@@@)"
pkill -KILL -f "$PSTRING"
done
if ocf_is_true $ROTATELOG_FLG; then
rm -f "${CONSOLE}"
fi
return $OCF_SUCCESS
}
status_jboss()
{
if ! pgrep -f "$PSTRING" > /dev/null; then
echo "JBoss process[$RESOURCE_NAME] is not running."
return $OCF_NOT_RUNNING
fi
if isrunning_jboss; then
echo "JBoss[$RESOURCE_NAME] is running."
return $OCF_SUCCESS
else
echo "JBoss process[$RESOURCE_NAME] is running."
echo "But, we can not access JBoss web service."
return $OCF_NOT_RUNNING
fi
}
metadata_jboss()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="jboss">
<version>1.0</version>
<longdesc lang="en">
Resource script for Jboss. It manages a Jboss instance as an HA resource.
</longdesc>
<shortdesc lang="en">Manages a JBoss application server instance</shortdesc>
<parameters>
<parameter name="resource_name" unique="1" required="0">
<longdesc lang="en">
The name of the resource. Defaults to the name of the resource
instance.
</longdesc>
<shortdesc>The name of the resource</shortdesc>
<content type="string" default="${OCF_RESOURCE_INSTANCE}" />
</parameter>
<parameter name="console" unique="1" required="0">
<longdesc lang="en">
A destination of the log of jboss run and shutdown script.
</longdesc>
<shortdesc>jboss log path</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="shutdown_timeout" unique="0" required="0">
<longdesc lang="en">
Timeout for jboss bin/shutdown.sh. We wait for this timeout to
expire, then send the TERM and QUIT signals. Finally, the KILL
signal is used to terminate the jboss process. You should set the
timeout for the stop operation to a value bigger than the sum of
the timeout parameters. See also kill_timeout.
</longdesc>
<shortdesc>shutdown timeout</shortdesc>
<content type="integer" default="5" />
</parameter>
<parameter name="kill_timeout" unique="0" required="0">
<longdesc lang="en">
If bin/shutdown.sh doesn't stop the jboss process, then we send
it TERM and QUIT signals, intermittently and once a second. After
this timeout expires, if the process is still live, we use the
KILL signal. See also shutdown_timeout.
</longdesc>
<shortdesc>stop by signal timeout</shortdesc>
<content type="integer" default="10" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
A user name to start a JBoss.
</longdesc>
<shortdesc>A user name to start a resource.</shortdesc>
<content type="string" default="root"/>
</parameter>
<parameter name="statusurl" unique="0" required="0">
<longdesc lang="en">
URL to test in the monitor operation.
</longdesc>
<shortdesc>URL to test in the monitor operation.</shortdesc>
<content type="string" default="http://127.0.0.1:8080" />
</parameter>
<parameter name="java_home" unique="0" required="0">
<longdesc lang="en">
Home directory of Java. Defaults to the environment variable
JAVA_HOME. If it is not set, then define this parameter.
</longdesc>
<shortdesc>Home directory of Java.</shortdesc>
<content type="string" default="$JAVA_HOME"/>
</parameter>
<parameter name="java_opts" unique="0" required="0">
<longdesc lang="en">
Java options.
</longdesc>
<shortdesc>Java options.</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="jboss_home" unique="1" required="1">
<longdesc lang="en">
Home directory of Jboss.
</longdesc>
<shortdesc>Home directory of Jboss.</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="pstring" unique="0" required="0">
<longdesc lang="en">
With this string heartbeat matches for the right process to kill.
</longdesc>
<shortdesc>pkill/pgrep search string</shortdesc>
<content type="string" default="java -Dprogram.name=run.sh" />
</parameter>
<parameter name="run_opts" unique="0" required="0">
<longdesc lang="en">
Start options to start Jboss with, defaults are from the Jboss-Doku.
</longdesc>
<shortdesc>options for jboss run.sh</shortdesc>
<content type="string" default="-c default -l lpg4j" />
</parameter>
<parameter name="shutdown_opts" unique="0" required="0">
<longdesc lang="en">
Stop options to stop Jboss with.
</longdesc>
<shortdesc>options for jboss shutdown.sh</shortdesc>
<content type="string" default="-s 127.0.0.1:1099" />
</parameter>
<parameter name="rotate_consolelog" unique="0">
<longdesc lang="en">
Rotate console log flag.
</longdesc>
<shortdesc>Rotate console log flag</shortdesc>
<content type="string" default="false" />
</parameter>
<parameter name="rotate_value" unique="0">
<longdesc lang="en">
-console log rotation value (default is 86400 seconds).
+Console log rotation value (default is 86400 seconds).
</longdesc>
-<shortdesc>console log rotation value (default is 86400 seconds)</shortdesc>
+<shortdesc>Console log rotation value (default is 86400 seconds)</shortdesc>
<content type="integer" default="86400" />
</parameter>
<parameter name="rotate_logsuffix" unique="0">
<longdesc lang="en">
Rotate console log suffix.
</longdesc>
<shortdesc>Rotate console log suffix</shortdesc>
<content type="integer" default=".%F" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="30s" />
<action name="monitor" depth="0" timeout="30s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5"/>
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
validate_all_jboss()
{
ocf_log info "validate_all_jboss[$RESOURCE_NAME]"
return $OCF_SUCCESS
}
COMMAND=$1
RESOURCE_NAME="${OCF_RESKEY_resource_name-${OCF_RESOURCE_INSTANCE}}"
CONSOLE="${OCF_RESKEY_console-/var/log/${RESOURCE_NAME}.log}"
SHUTDOWN_TIMEOUT="${OCF_RESKEY_shutdown_timeout-5}"
KILL_TIMEOUT="${OCF_RESKEY_kill_timeout-10}"
JBOSS_USER="${OCF_RESKEY_user-root}"
STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}"
PSTRING="${OCF_RESKEY_pstring-java -Dprogram.name=run.sh}"
RUN_OPTS="${OCF_RESKEY_run_opts--c default -l lpg4j}"
SHUTDOWN_OPTS="${OCF_RESKEY_shutdown_opts--s 127.0.0.1:1099}"
ROTATELOG_FLG="${OCF_RESKEY_rotate_consolelog-false}"
ROTATEVALUE="${OCF_RESKEY_rotate_value-86400}"
ROTATELOG_SUFFIX="${OCF_RESKEY_rotate_logsuffix-.%F}"
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
if [ "$COMMAND" = "meta-data" ]; then
metadata_jboss
exit $OCF_SUCCESS
fi
if [ "$COMMAND" = "help" -o "$COMMAND" = "usage" ]; then
usage
exit $OCF_SUCCESS
fi
# test if these two are set and if directories exist and if the
# required scripts/binaries exist; use OCF_ERR_INSTALLED
JAVA_HOME="${OCF_RESKEY_java_home-${JAVA_HOME}}"
JAVA_OPTS="${OCF_RESKEY_java_opts}"
JBOSS_HOME="${OCF_RESKEY_jboss_home}"
LSB_STATUS_STOPPED=3
if [ ! -d "$JAVA_HOME" -o ! -d "$JBOSS_HOME" ]; then
case $COMMAND in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
esac
ocf_log err "JAVA_HOME or JBOSS_HOME does not exist."
exit $OCF_ERR_INSTALLED
fi
export JAVA_HOME JAVA_OPTS JBOSS_HOME
JAVA=${JAVA_HOME}/bin/java
if [ ! -x "$JAVA" ]; then
case $COMMAND in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
esac
ocf_log err "java command does not exist."
exit $OCF_ERR_INSTALLED
fi
case "$COMMAND" in
start)
start_jboss
func_status=$?
exit $func_status
;;
stop)
stop_jboss
func_status=$?
exit $func_status
;;
status)
status_jboss
exit $?
;;
monitor)
monitor_jboss
func_status=$?
exit $func_status
;;
validate-all)
validate_all_jboss
exit $?
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/mysql-proxy b/heartbeat/mysql-proxy
index 48c6df3c1..024e97e0d 100755
--- a/heartbeat/mysql-proxy
+++ b/heartbeat/mysql-proxy
@@ -1,719 +1,719 @@
#!/bin/sh
#
# Resource script for MySQL Proxy
#
# Description: Manages MySQL Proxy as an OCF resource in
# an high-availability setup.
#
# Tested with MySQL Proxy 0.8.1 and 0.8.3 on Debian 6.0.
#
# Based on the mysql and Pure-Ftpd OCF resource agents.
#
# Author: Raoul Bhatia <r.bhatia@ipax.at> : Original Author
# License: GNU General Public License (GPL)
#
#
# usage: $0 {start|stop|reload|status|monitor|validate-all|meta-data}
#
# The "start" arg starts a MySQL Proxy instance
#
# The "stop" arg stops it.
#
# TODO
# * add in-depth monitoring by querying the mysql-proxy admin port
#
# Test via
# (note: this did not work with MySQL Proxy 0.8.1 and ocf-tester from resource-agents 3.9.2 on Debian 6.0)
#
# * /usr/sbin/ocf-tester -n mp -o binary="/usr/sbin/mysql-proxy" -o defaults_file="" -o parameters="--proxy-skip-profiling" \
# -o admin_address="127.0.0.1:4041" -o admin_username="root" -o admin_password="la" -o admin_lua_script="/usr/lib/mysql-proxy/lua/admin.lua" \
# -o proxy_backend_addresses="192.168.100.200:42006" -o proxy_address="/var/run/mysqld/mysqld.sock" /usr/lib/ocf/resource.d/heartbeat/mysql-proxy
#
#
# OCF parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_client_binary
# OCF_RESKEY_defaults_file
# OCF_RESKEY_proxy_backend_addresses
# OCF_RESKEY_proxy_read_only_backend_addresses
# OCF_RESKEY_proxy_address
# OCF_RESKEY_log_level
# OCF_RESKEY_keepalive
# OCF_RESKEY_plugins
# OCF_RESKEY_admin_address
# OCF_RESKEY_admin_username
# OCF_RESKEY_admin_password
# OCF_RESKEY_admin_lua_script
# OCF_RESKEY_test_table
# OCF_RESKEY_test_user
# OCF_RESKEY_test_passwd
# OCF_RESKEY_parameters
# OCF_RESKEY_pidfile
#
##########################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_RESKEY_binary="/usr/sbin/mysql-proxy"}
: ${OCF_RESKEY_client_binary="mysql"}
: ${OCF_RESKEY_defaults_file=""}
: ${OCF_RESKEY_proxy_backend_addresses="127.0.0.1:3306"}
: ${OCF_RESKEY_proxy_read_only_backend_addresses=""}
: ${OCF_RESKEY_proxy_address=":4040"}
: ${OCF_RESKEY_log_level=""}
: ${OCF_RESKEY_keepalive=""}
: ${OCF_RESKEY_plugins=""}
: ${OCF_RESKEY_admin_address="127.0.0.1:4041"}
: ${OCF_RESKEY_admin_username=""}
: ${OCF_RESKEY_admin_password=""}
: ${OCF_RESKEY_admin_lua_script=""}
: ${OCF_RESKEY_test_table="mysql.user"}
: ${OCF_RESKEY_test_user=""}
: ${OCF_RESKEY_test_passwd=""}
: ${OCF_RESKEY_parameters=""}
: ${OCF_RESKEY_pidfile="${HA_RSCTMP}/mysql-proxy-${OCF_RESOURCE_INSTANCE}.pid"}
USAGE="Usage: $0 {start|stop|reload|status|monitor|validate-all|meta-data}"
##########################################################################
usage() {
echo $USAGE >&2
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mysql-proxy">
<version>0.1</version>
<longdesc lang="en">
This script manages MySQL Proxy as an OCF resource in a high-availability setup.
The default monitor operation will verify that mysql-proxy is running.
The level 10 monitor operation is left out intentionally for possible future enhancements in conjunction with the admin plugin.
-The level 20 monitor operation will perform a SELECT on a given table to verify that the connection to a backend-server is actually working.
+The level 20 monitor operation will perform a SELECT on a given table to verify that the connection to a back-end server is actually working.
Tested with MySQL Proxy 0.8.1 and 0.8.3 on Debian 6.0.
</longdesc>
<shortdesc lang="en">Manages a MySQL Proxy instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Full path to the MySQL Proxy binary.
For example, "/usr/sbin/mysql-proxy".
</longdesc>
<shortdesc lang="en">Full path to MySQL Proxy binary</shortdesc>
<content type="string" default="/usr/sbin/mysql-proxy" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
-Location of the MySQL client binary
+Location of the MySQL client binary.
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="mysql" />
</parameter>
<parameter name="defaults_file" unique="0" required="0">
<longdesc lang="en">
Full path to a MySQL Proxy configuration file.
For example, "/etc/mysql-proxy.conf".
</longdesc>
<shortdesc lang="en">Full path to configuration file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="proxy_backend_addresses" unique="0" required="0">
<longdesc lang="en">
-Address:port of the remote backend-servers (default: 127.0.0.1:3306).
+Address:port of the remote back-end servers (default: 127.0.0.1:3306).
</longdesc>
-<shortdesc lang="en">MySQL Proxy backend-servers</shortdesc>
+<shortdesc lang="en">MySQL Proxy back-end servers</shortdesc>
<content type="string" default="127.0.0.1:3306" />
</parameter>
<parameter name="proxy_read_only_backend_addresses" unique="0" required="0">
<longdesc lang="en">
Address:port of the remote (read only) slave-server (default: ).
</longdesc>
-<shortdesc lang="en">MySql Proxy read only backend-servers</shortdesc>
+<shortdesc lang="en">MySql Proxy read only back-end servers</shortdesc>
<content type="string" default="127.0.0.1:3306" />
</parameter>
<parameter name="proxy_address" unique="0" required="0">
<longdesc lang="en">
-Listening address:port of the proxy-server (default: :4040).
+Listening address:port of the proxy server (default: :4040).
You can also specify a socket like "/tmp/mysql-proxy.sock".
</longdesc>
<shortdesc lang="en">MySQL Proxy listening address</shortdesc>
<content type="string" default=":4040" />
</parameter>
<parameter name="log_level" unique="0" required="0">
<longdesc lang="en">
Log all messages of level (error|warning|info|message|debug|) or higher.
An empty value disables logging.
</longdesc>
<shortdesc lang="en">MySQL Proxy log level.</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="keepalive" unique="0" required="0">
<longdesc lang="en">
Try to restart the proxy if it crashed (default: ).
Valid values: true or false. An empty value equals "false".
</longdesc>
<shortdesc lang="en">Use keepalive option</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="plugins" unique="0" required="0">
<longdesc lang="en">
Whitespace separated list of plugins to load (default: ).
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy plugins</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="admin_address" unique="0" required="0">
<longdesc lang="en">
Listening address:port of the admin plugin (default: 127.0.0.1:4041).
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy admin plugin listening address</shortdesc>
<content type="string" default="127.0.0.1:4041" />
</parameter>
<parameter name="admin_username" unique="0" required="0">
<longdesc lang="en">
Username for the admin plugin (default: ).
Required since MySQL Proxy 0.8.1, if the admin plugin is loaded.
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy admin plugin username</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="admin_password" unique="0" required="0">
<longdesc lang="en">
Password for the admin plugin (default: ).
Required since MySQL Proxy 0.8.1, if the admin plugin is loaded.
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy admin plugin password</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="admin_lua_script" unique="0" required="0">
<longdesc lang="en">
Script to execute by the admin plugin.
Required since MySQL Proxy 0.8.1, if the admin plugin is loaded.
Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter.
</longdesc>
<shortdesc lang="en">MySQL Proxy admin plugin lua script</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="test_table" unique="0" required="0">
<longdesc lang="en">
Table to be tested in monitor statement (in database.table notation)
</longdesc>
<shortdesc lang="en">MySQL test table</shortdesc>
<content type="string" default="mysql.user" />
</parameter>
<parameter name="test_user" unique="0" required="0">
<longdesc lang="en">
MySQL test user
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="test_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL test user password
</longdesc>
<shortdesc lang="en">MySQL test user password</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="parameters" unique="0" required="0">
<longdesc lang="en">
The MySQL Proxy daemon may be called with additional parameters.
Specify any of them here.
</longdesc>
<shortdesc lang="en">MySQL Proxy additional parameters</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="pidfile" unique="1" required="0">
<longdesc lang="en">PID file</longdesc>
<shortdesc lang="en">PID file</shortdesc>
<content type="string" default="${HA_RSCTMP}/mysql-proxy-${OCF_RESOURCE_INSTANCE}.pid" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30s" />
<action name="stop" timeout="30s" />
<action name="reload" timeout="30s" />
<action name="monitor" depth="0" timeout="20s" interval="60s" />
<action name="validate-all" timeout="30s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
isRunning()
{
kill -s 0 "$1" 2>/dev/null
}
mysqlproxy_status()
{
local PID
if [ -f "${pidfile}" ]; then
# MySQL Proxy is probably running
PID=`head -n 1 "${pidfile}"`
if [ ! -z "$PID" ] ; then
isRunning "$PID"
return $?
fi
fi
# MySQL Proxy is not running
false
}
mysqlproxy_start()
{
local PARAM_PREFIX OPTIONS
local p pa pba proba
local pid_dir socket_dir
# if MySQL Proxy is running return success
if mysqlproxy_status ; then
ocf_log info "MySQL Proxy already running."
return $OCF_SUCCESS
fi
PARAM_PREFIX=''
# MySQL Proxy plugins to load
# @TODO check if the plugins are actually available?
if ocf_is_true $plugin_support; then
for p in $plugins; do
PARAM_PREFIX="$PARAM_PREFIX --plugins=$p"
done
fi
# check if the MySQL Proxy defaults-file exist
if [ -f "$defaults_file" ]; then
PARAM_PREFIX="$PARAM_PREFIX --defaults-file=$defaults_file"
fi
# set log-level
if [ ! -z "$log_level" ]; then
PARAM_PREFIX="$PARAM_PREFIX --log-level=$log_level"
fi
# set keepalive
if [ "$keepalive" = "true" ]; then
PARAM_PREFIX="$PARAM_PREFIX --keepalive"
fi
# honor admin_* options
if [ ! -z "$admin_username" ]; then
PARAM_PREFIX="$PARAM_PREFIX --admin-username=$admin_username"
fi
if [ ! -z "$admin_password" ]; then
PARAM_PREFIX="$PARAM_PREFIX --admin-password=$admin_password"
fi
if [ ! -z "$admin_lua_script" ]; then
PARAM_PREFIX="$PARAM_PREFIX --admin-lua-script=$admin_lua_script"
fi
# make sure that the pid directory exists
pid_dir=`dirname $pidfile`
if [ ! -d $pid_dir ] ; then
ocf_log info "Creating PID directory '$pid_dir'."
mkdir -p $pid_dir
#chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir # c/p from mysql ra; currently not needed
fi
# split multiple proxy-address options.
# currently unsupported but let us hope for the future ;)
for pa in $proxy_address; do
[ -z "$pa" ] && continue
OPTIONS=" $OPTIONS --proxy-address=$pa"
# if $pa contains a slash, we are dealing with a socket
# make sure that the socket directory exists
if echo "$pa" | grep -q '/' ; then
socket_dir=`dirname $pa`
if [ ! -d $socket_dir ] ; then
ocf_log info "Creating socket directory '$socket_dir'."
mkdir -p $socket_dir
#chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir # c/p from mysql ra; currently not needed
fi
fi
done
# split multiple proxy-backend-addresses options.
for pba in $proxy_backend_addresses; do
[ -z "$pba" ] && continue
OPTIONS=" $OPTIONS --proxy-backend-addresses=$pba"
done
# split multiple proxy-backend-addresses options.
for proba in $proxy_read_only_backend_addresses; do
[ -z "$proba" ] && continue
OPTIONS=" $OPTIONS --proxy-read-only-backend-addresses=$proba"
done
# build $OPTIONS and add admin-address and pidfile
OPTIONS="$PARAM_PREFIX $OPTIONS --admin-address=$admin_address --pid-file=${pidfile}"
# add additional parameters
if [ -n "$parameters" ]; then
OPTIONS="$OPTIONS $parameters"
fi
# start MySQL Proxy
#start-stop-daemon --start --quiet --pidfile $pidfile --make-pidfile --name mysql-proxy --startas $binary -b -- $OPTIONS
$binary --daemon $OPTIONS
ret=$?
if [ $ret -ne 0 ]; then
ocf_log err "MySQL Proxy returned error: " $ret
return $OCF_ERR_GENERIC
fi
# @TODO add an initial monitoring action?
return $OCF_SUCCESS
}
mysqlproxy_stop()
{
local ret
local pa
if mysqlproxy_status ; then
#start-stop-daemon --stop --quiet --retry 3 --exec $binary --pidfile $pidfile
/bin/kill `cat "${pidfile}"`
ret=$?
if [ $ret -ne 0 ]; then
ocf_log err "MySQL Proxy returned an error while stopping: " $ret
return $OCF_ERR_GENERIC
fi
# grant some time for shutdown and recheck
sleep 1
if mysqlproxy_status ; then
ocf_log err "MySQL Proxy failed to stop."
return $OCF_ERR_GENERIC
fi
# remove dangling socketfile, if specified
for pa in $proxy_address; do
if [ -S "$pa" ]; then
ocf_log info "Removing dangling socket file '$pa'."
rm -f "$pa"
fi
done
# remove dangling pidfile
if [ -f "${pidfile}" ]; then
ocf_log info "Removing dangling pidfile '${pidfile}'."
rm -f "${pidfile}"
fi
fi
return $OCF_SUCCESS
}
mysqlproxy_reload()
{
# @TODO check if pidfile is empty
# PID=`head -n 1 "${pidfile}"`
# if [ ! -z "$PID" ] ; then
if mysqlproxy_status; then
ocf_log info "Reloading MySQL Proxy."
kill -s HUP `cat ${pidfile}`
fi
}
mysqlproxy_monitor()
{
local rc
if [ "${OCF_RESKEY_CRM_meta_interval:-0}" -eq "0" ]; then
# in case of probe, monitor operation is surely treated as
# under suspension. This will call start operation.
# (c/p from ocf:heartbeat:sfex)
mysqlproxy_validate_all
rc=$?
[ $rc -ne 0 ] && return $rc
fi
if ! mysqlproxy_status ; then
return $OCF_NOT_RUNNING
fi
if [ $OCF_CHECK_LEVEL -eq 20 ]; then
mysqlproxy_monitor_20
rc=$?
[ $rc -ne 0 ] && return $rc
fi
return $OCF_SUCCESS
}
mysqlproxy_monitor_20()
{
local rc
local mysql_options pa
local mysql_server_parameter mysql_server_host mysql_server_port
if [ -z "$OCF_RESKEY_test_table" -o -z "$OCF_RESKEY_test_user" -a -z "$OCF_RESKEY_test_passwd" ]; then
ocf_log warn "Missing proper configuration for OCF_CHECK_LEVEL=20 (test_table=[$OCF_RESKEY_test_table] test_user=[$OCF_RESKEY_test_user] test_password=[$OCF_RESKEY_test_passwd]). Not running in-depth monitoring."
return $OCF_SUCCESS
fi
mysql_options="--connect_timeout=10 --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
# cycle each address
for pa in $proxy_address; do
# build correct connect parameter
if [ -S "$pa" ]; then
# we need to monitor a mysql socket
mysql_server_parameter="--socket=$pa"
else
# we need to monitor a host address
mysql_server_parameter=""
# split host:port
# @TODO correctly handle IPv6 address
# @TODO correctly handle 0.0.0.0 address
mysql_server_host=`echo $pa | cut -d : -f 1`
mysql_server_port=`echo $pa | cut -d : -f 2`
if [ -n $mysql_server_host ]; then
mysql_server_parameter="$mysql_server_parameter --host=$mysql_server_host"
fi
if [ -n $mysql_server_port ]; then
mysql_server_parameter="$mysql_server_parameter --port=$mysql_server_port"
fi
fi
# Check for test table
ocf_run $mysql $mysql_server_parameter $mysql_options \
-e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table"
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Failed to select from $OCF_RESKEY_test_table: " $rc
return $OCF_ERR_GENERIC
fi
done
return $OCF_SUCCESS
}
mysqlproxy_validate_all()
{
# local variables
local config_error=0
# check that the MySQL Proxy binary exists and can be executed
check_binary $binary
# check MySQL client binary only if in-depth monitoring is requested
# do not break backwards compatibility otherwise
if [ $OCF_CHECK_LEVEL -gt 0 ]; then
check_binary $mysql
fi
# check for valid log-level
echo $log_level | egrep -q "^(error|warning|info|message|debug|)$"
if [ $? -ne 0 ]; then
ocf_log err "MySQL Proxy log level '$log_level' not in valid range error|warning|info|message|debug"
return $OCF_ERR_CONFIGURED
fi
# if we're running MySQL Proxy > 0.8.1 and there is any admin parameter set,
# explicitly load the admin (and the proxy) plugin.
# (version 0.8.2 does not load the admin plugin by default anymore)
ocf_version_cmp "$version" "0.8.1"
ret=$?
if [ $ret -eq 2 ]; then
# simple check: concat all parameters and check if the string has non-zero length
if [ -n "$admin_username$admin_password$admin_lua_script$admin_address" ]; then
plugins="proxy admin"
has_plugin_admin=1
else
has_plugin_admin=0
fi
fi
# check for required admin_* parameters for 0.8.1 and 0.8.2 (with admin module)
# translated: if (version == 0.8.1 or (version > 0.8.1 and has_plugin_admin))
if [ $ret -eq 1 -o \( $ret -eq 2 -a $has_plugin_admin -eq 1 \) ]; then
if [ -z "$admin_username" ]; then
ocf_log err "Missing required parameter \"admin_username\""
config_error=1
fi
if [ -z "$admin_password" ]; then
ocf_log err "Missing required parameter \"admin_password\""
config_error=1
fi
if [ -z "$admin_lua_script" ]; then
ocf_log err "Missing required parameter \"admin_lua_script\""
config_error=1
fi
# check if the admin_lua_script, if specified, exists
if [ -n "$admin_lua_script" -a ! -e "$admin_lua_script" ]; then
ocf_log err "MySQL Proxy admin lua script '$admin_lua_script' does not exist or is not readable."
fi
fi
# issue a warning during start if the user wants to load a plugin
# but this version of MySQL Proxy does not support the plugin architecture.
if [ -n "$plugins" ] && ocf_is_false "$plugin_support" && [ $__OCF_ACTION = 'start' ]; then
ocf_log warn "You are running MySQL Proxy version '$version'. This version does not support the plugin architecture. Please use version 0.7.0 or later to load the plugins '$plugins'."
fi
# exit in case we have found relevant config errors
if [ $config_error -eq 1 ]; then
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
#
# Main
#
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
pidfile=$OCF_RESKEY_pidfile
binary=$OCF_RESKEY_binary
defaults_file=$OCF_RESKEY_defaults_file
proxy_backend_addresses=$OCF_RESKEY_proxy_backend_addresses
proxy_read_only_backend_addresses=$OCF_RESKEY_proxy_read_only_backend_addresses
admin_address=$OCF_RESKEY_admin_address
admin_username=$OCF_RESKEY_admin_username
admin_password=$OCF_RESKEY_admin_password
admin_lua_script=$OCF_RESKEY_admin_lua_script
proxy_address=$OCF_RESKEY_proxy_address
log_level=$OCF_RESKEY_log_level
keepalive=$OCF_RESKEY_keepalive
plugins=`echo $OCF_RESKEY_plugins | tr "[:space:]" "\n" | sort -u`
mysql=$OCF_RESKEY_client_binary
parameters=$OCF_RESKEY_parameters
plugin_support=false
has_plugin_admin=0 # 0 because this simplifies the if statements
# debugging stuff
#echo OCF_RESKEY_binary=$OCF_RESKEY_binary >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_defaults_file=$OCF_RESKEY_defaults_file >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_proxy_backend_addresses=$OCF_RESKEY_proxy_backend_addresses >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_proxy_read_only_backend_addresses=$OCF_RESKEY_proxy_read_only_backend_addresses >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_proxy_address=$OCF_RESKEY_proxy_address >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_log_level=$OCF_RESKEY_log_level >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_keepalive=$OCF_RESKEY_keepalive >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_admin_address=$OCF_RESKEY_admin_address >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_admin_username=$OCF_RESKEY_admin_username >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_admin_password=$OCF_RESKEY_admin_password >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_admin_lua_script=$OCF_RESKEY_admin_lua_script >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_parameters=$OCF_RESKEY_parameters >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
#echo OCF_RESKEY_pidfile=$OCF_RESKEY_pidfile >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE
# handle some parameters before performing any additional checks
case $1 in
meta-data) meta_data
exit $?
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
# determine MySQL Proxy version
check_binary $binary
version=`$binary --version | grep ^mysql-proxy | awk '{print $NF}'`
# version 0.7.0 (and later) support the plugin architecture and load the admin plugin by default
# version 0.8.1 loads admin plugin by default and requires the admin parameters to be set
# version 0.8.2 does not load the admin plugin by default anymore
ocf_version_cmp "$version" "0.7.0"
ret=$?
if [ $ret -eq 1 -o $ret -eq 2 ]; then
plugin_support=true
has_plugin_admin=1
fi
# perform action
case $1 in
start) mysqlproxy_validate_all &&
mysqlproxy_start
exit $?
;;
stop) mysqlproxy_validate_all &&
mysqlproxy_stop
exit $?
;;
reload) mysqlproxy_reload
exit $?
;;
status) if mysqlproxy_status; then
ocf_log info "MySQL Proxy is running."
exit $OCF_SUCCESS
else
ocf_log info "MySQL Proxy is stopped."
exit $OCF_NOT_RUNNING
fi
;;
monitor) mysqlproxy_monitor
exit $?
;;
validate-all) mysqlproxy_validate_all
exit $?
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index aaaf27a2f..2f62df4b1 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -1,484 +1,484 @@
#!/bin/sh
# nfsserver
#
# Description: Manages nfs server as OCF resource
# by hxinwei@gmail.com
# License: GNU General Public License v2 (GPLv2) and later
if [ -n "$OCF_DEBUG_LIBRARY" ]; then
. $OCF_DEBUG_LIBRARY
else
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
fi
DEFAULT_INIT_SCRIPT="/etc/init.d/nfsserver"
DEFAULT_NOTIFY_CMD="/sbin/sm-notify"
DEFAULT_NOTIFY_FOREGROUND="false"
DEFAULT_RPCPIPEFS_DIR="/var/lib/nfs/rpc_pipefs"
EXEC_MODE=0
SELINUX_ENABLED=-1
STATD_PATH="/var/lib/nfs"
STATD_DIR=""
nfsserver_meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="nfsserver">
<version>1.0</version>
<longdesc lang="en">
Nfsserver helps to manage the Linux nfs server as a failover-able resource in Linux-HA.
It depends on Linux specific NFS implementation details, so is considered not portable to other platforms yet.
</longdesc>
<shortdesc lang="en">Manages an NFS server</shortdesc>
<parameters>
<parameter name="nfs_init_script" unique="0" required="0">
<longdesc lang="en">
The default init script shipped with the Linux distro.
The nfsserver resource agent offloads the start/stop/monitor
work to the init script because the procedure to start/stop/monitor
nfsserver varies on different Linux distro. In the event that this
option is not set, this agent will attempt to use an init script at
this location, ${DEFAULT_INIT_SCRIPT}, or detect a systemd unit-file
to use in the event that no init script is detected.
</longdesc>
<shortdesc lang="en">
Init script for nfsserver
</shortdesc>
<content type="string" default="auto detected" />
</parameter>
<parameter name="nfs_notify_cmd" unique="0" required="0">
<longdesc lang="en">
-The tool to send out NSM reboot notification, it should be either sm-notify or rpc.statd.
+The tool to send out NSM reboot notification; it should be either sm-notify or rpc.statd.
Failover of nfsserver can be considered as rebooting to different machines.
-The nfsserver resource agent use this command to notify all clients about the happening of failover.
+The nfsserver resource agent use this command to notify all clients about the occurrence of failover.
</longdesc>
<shortdesc lang="en">
The tool to send out notification.
</shortdesc>
<content type="string" default="$DEFAULT_NOTIFY_CMD" />
</parameter>
<parameter name="nfs_notify_foreground" unique="0" required="0">
<longdesc lang="en">
Keeps the notify tool attached to its controlling terminal and running in the foreground.
</longdesc>
<shortdesc lang="en">
Keeps the notify tool running in the foreground.
</shortdesc>
<content type="boolean" default="$DEFAULT_NOTIFY_FOREGROUND" />
</parameter>
<parameter name="nfs_smnotify_retry_time" unique="0" required="0">
<longdesc lang="en">
Specifies the length of sm-notify retry time, in minutes, to continue retrying notifications to unresponsive hosts.
If this option is not specified, sm-notify attempts to send notifications for 15 minutes. Specifying a value of 0
causes sm-notify to continue sending notifications to unresponsive peers until it is manually killed.
</longdesc>
<shortdesc lang="en">
-Specifies the length of sm-notify retry time(minutes).
+Specifies the length of sm-notify retry time (minutes).
</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="nfs_shared_infodir" unique="0" required="1">
<longdesc lang="en">
The nfsserver resource agent will save nfs related information in this specific directory.
And this directory must be able to fail-over before nfsserver itself.
</longdesc>
<shortdesc lang="en">
Directory to store nfs server related information.
</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="nfs_ip" unique="0" required="1">
<longdesc lang="en">
Comma separated list of floating IP addresses used to access the nfs service
</longdesc>
<shortdesc lang="en">
IP addresses.
</shortdesc>
<content type="string"/>
</parameter>
<parameter name="rpcpipefs_dir" unique="0" required="0">
<longdesc lang="en">
-The mount point for the sunrpc file system. Default is $DEFAULT_RPCPIPEFS_DIR .
-This script will mount(bind) nfs_shared_infodir on /var/lib/nfs/ (can not be changed),
+The mount point for the sunrpc file system. Default is $DEFAULT_RPCPIPEFS_DIR.
+This script will mount (bind) nfs_shared_infodir on /var/lib/nfs/ (cannot be changed),
and this script will mount the sunrpc file system on $DEFAULT_RPCPIPEFS_DIR (default, can be changed by this parameter).
-If you want to move only rpc_pipefs/ (e.g. to keep rpc_pipefs/ local) from default , please set this value.
+If you want to move only rpc_pipefs/ (e.g. to keep rpc_pipefs/ local) from default, please set this value.
</longdesc>
<shortdesc lang="en">
The mount point for the sunrpc file system.
</shortdesc>
<content type="string" default="$DEFAULT_RPCPIPEFS_DIR" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="40" />
<action name="stop" timeout="20s" />
<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="30" />
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
nfsserver_usage() {
cat <<END
usage: $0 {start|stop|monitor|status|validate-all|meta-data}
END
}
if [ $# -ne 1 ]; then
nfsserver_usage
exit $OCF_ERR_ARGS
fi
case $__OCF_ACTION in
meta-data) nfsserver_meta_data
exit $OCF_SUCCESS
;;
usage|help) nfsserver_usage
exit $OCF_SUCCESS
;;
*)
;;
esac
fp="$OCF_RESKEY_nfs_shared_infodir"
: ${OCF_RESKEY_nfs_notify_cmd="$DEFAULT_NOTIFY_CMD"}
: ${OCF_RESKEY_nfs_notify_foreground="$DEFAULT_NOTIFY_FOREGROUND"}
if [ -z ${OCF_RESKEY_rpcpipefs_dir} ]; then
rpcpipefs_make_dir=$fp/rpc_pipefs
rpcpipefs_umount_dir=${DEFAULT_RPCPIPEFS_DIR}
else
rpcpipefs_make_dir=${OCF_RESKEY_rpcpipefs_dir}
rpcpipefs_umount_dir=${OCF_RESKEY_rpcpipefs_dir}
fi
# Use statd folder if it exists
if [ -d "/var/lib/nfs/statd" ]; then
STATD_DIR="statd"
STATD_PATH="/var/lib/nfs/statd"
fi
# SELinux information. We are taking the permissions from
# the current statd dir and applying it to the HA one that is
# being mounted in its place.
which restorecon > /dev/null 2>&1 && selinuxenabled
SELINUX_ENABLED=$?
if [ $SELINUX_ENABLED -eq 0 ]; then
export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')"
fi
##
# EXEC_MODE values
# 1 user init script or default init script
# 2 systemd
#
# On error, this function will terminate the process
# with error code $OCF_ERR_INSTALLED
##
set_exec_mode()
{
##
# If EXEC_MODE is already set, we don't need to run this function again.
##
if [ $EXEC_MODE -ne 0 ]; then
return 0;
fi
##
# If the user defined an init script, It must exist for us to continue
##
if [ -n "$OCF_RESKEY_nfs_init_script" ]; then
# check_binary will exit the process if init script does not exist
check_binary ${OCF_RESKEY_nfs_init_script}
EXEC_MODE=1
return 0
fi
##
# Check to see if the default init script exists, if so we'll use that.
##
if which $DEFAULT_INIT_SCRIPT > /dev/null 2>&1; then
OCF_RESKEY_nfs_init_script=$DEFAULT_INIT_SCRIPT
EXEC_MODE=1
return 0
fi
##
# Last of all, attempt systemd.
##
if which systemctl > /dev/null 2>&1; then
if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep nfs-lock > /dev/null; then
EXEC_MODE=2
return 0
fi
fi
ocf_log err "No init script or systemd unit file detected for nfs server"
exit $OCF_ERR_INSTALLED
}
nfs_systemd_exec()
{
local cmd=$1
local server_res
local lock_res
if [ "$cmd" = "stop" ]; then
systemctl $cmd nfs-server.service
server_res=$?
systemctl $cmd nfs-lock.service
lock_res=$?
else
systemctl $cmd nfs-lock.service
lock_res=$?
systemctl $cmd nfs-server.service
server_res=$?
fi
if [ $lock_res -ne $server_res ]; then
# If one is running and the other isn't, or for whatever other reason
# the return code's aren't the same, this is bad.
ocf_log err "Systemd services nfs-lock and nfs-server are not in the same state after attempting $cmd command"
return $OCF_ERR_GENERIC
fi
return $server_res
}
##
# wrapper for init script and systemd calls.
##
nfs_exec()
{
local cmd=$1
set_exec_mode
case $EXEC_MODE in
1) ${OCF_RESKEY_nfs_init_script} $cmd;;
2) nfs_systemd_exec $cmd;;
esac
}
nfsserver_monitor ()
{
fn=`mktemp`
nfs_exec status > $fn 2>&1
rc=$?
ocf_log debug `cat $fn`
rm -f $fn
#Adapte LSB status code to OCF return code
if [ $rc -eq 0 ]; then
return $OCF_SUCCESS
elif [ $rc -eq 3 ]; then
return $OCF_NOT_RUNNING
else
return $OCF_ERR_GENERIC
fi
}
prepare_directory ()
{
[ -d "$fp" ] || mkdir -p $fp
[ -d "$rpcpipefs_make_dir" ] || mkdir -p $rpcpipefs_make_dir
[ -d "$fp/v4recovery" ] || mkdir -p $fp/v4recovery
[ -d "$fp/$STATD_DIR" ] || mkdir -p "$fp/$STATD_DIR"
[ -d "$fp/$STATD_DIR/sm" ] || mkdir -p "$fp/$STATD_DIR/sm"
[ -d "$fp/$STATD_DIR/sm.ha" ] || mkdir -p "$fp/$STATD_DIR/sm.ha"
[ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak"
[ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown -R rpcuser.rpcuser "$fp/$STATD_DIR"
[ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp"
}
is_bound ()
{
if mount | grep -q "on $1 type"; then
return 0
fi
return 1
}
bind_tree ()
{
if is_bound /var/lib/nfs; then
ocf_log debug "$fp is already bound to /var/lib/nfs"
return 0
fi
mount --bind $fp /var/lib/nfs
[ $SELINUX_ENABLED -eq 0 ] && restorecon /var/lib/nfs
}
unbind_tree ()
{
if `mount | grep -q " on $rpcpipefs_umount_dir"`; then
umount -t rpc_pipefs $rpcpipefs_umount_dir
fi
if is_bound /var/lib/nfs; then
umount /var/lib/nfs
fi
}
nfsserver_start ()
{
if nfsserver_monitor; then
ocf_log debug "NFS server is already started"
return $OCF_SUCCESS
fi
prepare_directory
bind_tree
rm -rf $STATD_PATH/sm.ha/* > /dev/null 2>&1
cp -rf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1
ocf_log info "Starting NFS server ..."
fn=`mktemp`
nfs_exec start > $fn 2>&1
rc=$?
ocf_log debug `cat $fn`
rm -f $fn
if [ $rc -ne 0 ]; then
ocf_log err "Failed to start NFS server"
return $rc
fi
#Notify the nfs server has been moved or rebooted
#The init script do that already, but with the hostname, which may be ignored by client
#we have to do it again with the nfs_ip
local opts
case ${OCF_RESKEY_nfs_notify_cmd##*/} in
sm-notify)
# run in foreground, if requested
if ocf_is_true "$OCF_RESKEY_nfs_notify_foreground"; then
opts="-d"
fi
if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then
opts="$opts -m $OCF_RESKEY_nfs_smnotify_retry_time"
fi
opts="$opts -f -v"
;;
rpc.statd)
if ocf_is_true "$OCF_RESKEY_nfs_notify_foreground"; then
opts="-F"
fi
opts="$opts -n"
;;
esac
rm -rf $STATD_PATH/sm.ha.save > /dev/null 2>&1
cp -rf $STATD_PATH/sm.ha $STATD_PATH/sm.ha.save > /dev/null 2>&1
for ip in `echo ${OCF_RESKEY_nfs_ip} | sed 's/,/ /g'`; do
${OCF_RESKEY_nfs_notify_cmd} $opts $ip -P $STATD_PATH/sm.ha
rm -rf $STATD_PATH/sm.ha > /dev/null 2>&1
cp -rf $STATD_PATH/sm.ha.save $STATD_PATH/sm.ha > /dev/null 2>&1
done
ocf_log info "NFS server started"
return $OCF_SUCCESS
}
nfsserver_stop ()
{
ocf_log info "Stopping NFS server ..."
fn=`mktemp`
nfs_exec stop > $fn 2>&1
rc=$?
ocf_log debug `cat $fn`
rm -f $fn
if [ $rc -eq 0 ]; then
unbind_tree
ocf_log info "NFS server stopped"
return $OCF_SUCCESS
fi
ocf_log err "Failed to stop NFS server"
return $rc
}
nfsserver_validate ()
{
##
# set_exec_mode will exit if nfs server is not installed
##
set_exec_mode
check_binary ${OCF_RESKEY_nfs_notify_cmd}
if [ x = x"${OCF_RESKEY_nfs_ip}" ]; then
ocf_log err "nfs_ip not set"
exit $OCF_ERR_CONFIGURED
fi
if [ x = "x$OCF_RESKEY_nfs_shared_infodir" ]; then
ocf_log err "nfs_shared_infodir not set"
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then
if ! ocf_is_decimal "$OCF_RESKEY_nfs_smnotify_retry_time"; then
ocf_log err "Invalid nfs_smnotify_retry_time [$OCF_RESKEY_nfs_smnotify_retry_time]"
exit $OCF_ERR_CONFIGURED
fi
fi
case ${OCF_RESKEY_nfs_notify_cmd##*/} in
sm-notify|rpc.statd) ;;
*)
ocf_log err "Invalid nfs_notify_cmd [$OCF_RESKEY_nfs_notify_cmd]"
exit $OCF_ERR_CONFIGURED
;;
esac
return $OCF_SUCCESS
}
if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then
ocf_log err "THIS RA DO NOT SUPPORT CLONE MODE!"
exit $OCF_ERR_CONFIGURED
fi
nfsserver_validate
case $__OCF_ACTION in
start) nfsserver_start
;;
stop) nfsserver_stop
;;
monitor) nfsserver_monitor
;;
validate-all) exit $OCF_SUCCESS
;;
*) nfsserver_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index 8d9fc3fa1..fa9427245 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -1,1836 +1,1836 @@
#!/bin/sh
#
# Description: Manages a PostgreSQL Server as an OCF High-Availability
# resource
#
# Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA
# Florian Haas (florian@linbit.com) -- makeover
# Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication
#
# Copyright: 2006-2012 Serge Dubrouski <sergeyfd@gmail.com>
# and other Linux-HA contributors
# License: GNU General Public License (GPL)
#
###############################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#
# Get PostgreSQL Configuration parameter
#
get_pgsql_param() {
local param_name
param_name=$1
perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) {
\$dir=\$1;
\$dir =~ s/\s*\#.*//;
\$dir =~ s/^'(\S*)'/\$1/;
print \$dir;}"
perl -ne "$perl_code" < $OCF_RESKEY_config
}
# Defaults
OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl
OCF_RESKEY_psql_default=/usr/bin/psql
OCF_RESKEY_pgdata_default=/var/lib/pgsql/data
OCF_RESKEY_pgdba_default=postgres
OCF_RESKEY_pghost_default=""
OCF_RESKEY_pgport_default=5432
OCF_RESKEY_start_opt_default=""
OCF_RESKEY_pgdb_default=template1
OCF_RESKEY_logfile_default=/dev/null
OCF_RESKEY_stop_escalate_default=30
OCF_RESKEY_monitor_user_default=""
OCF_RESKEY_monitor_password_default=""
OCF_RESKEY_monitor_sql_default="select now();"
OCF_RESKEY_check_wal_receiver_default="false"
# Defaults for replication
OCF_RESKEY_rep_mode_default=none
OCF_RESKEY_node_list_default=""
OCF_RESKEY_restore_command_default=""
OCF_RESKEY_archive_cleanup_command_default=""
OCF_RESKEY_recovery_end_command_default=""
OCF_RESKEY_master_ip_default=""
OCF_RESKEY_repuser_default="postgres"
OCF_RESKEY_primary_conninfo_opt_default=""
OCF_RESKEY_restart_on_promote_default="false"
OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp"
OCF_RESKEY_xlog_check_count_default="3"
OCF_RESKEY_crm_attr_timeout_default="5"
OCF_RESKEY_stop_escalate_in_slave_default=30
: ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}}
: ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}}
: ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}}
: ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}}
: ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}}
: ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf}
: ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}}
: ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}}
: ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}}
: ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}}
: ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}}
: ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}}
: ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}}
: ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}}
# for replication
: ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}}
: ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}}
: ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}}
: ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}}
: ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}}
: ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}}
: ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}}
: ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}}
: ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}}
: ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}}
: ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}}
: ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}}
: ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}}
usage() {
cat <<EOF
usage: $0 start|stop|status|monitor|promote|demote|notify|meta-data|validate-all|methods
$0 manages a PostgreSQL Server as an HA resource.
The 'start' operation starts the PostgreSQL server.
The 'stop' operation stops the PostgreSQL server.
The 'status' operation reports whether the PostgreSQL is up.
The 'monitor' operation reports whether the PostgreSQL is running.
The 'promote' operation promotes the PostgreSQL server.
The 'demote' operation demotes the PostgreSQL server.
The 'validate-all' operation reports whether the parameters are valid.
The 'methods' operation reports on the methods $0 supports.
EOF
return $OCF_ERR_ARGS
}
meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="pgsql">
<version>1.0</version>
<longdesc lang="en">
Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource.
</longdesc>
<shortdesc lang="en">Manages a PostgreSQL database instance</shortdesc>
<parameters>
<parameter name="pgctl" unique="0" required="0">
<longdesc lang="en">
Path to pg_ctl command.
</longdesc>
<shortdesc lang="en">pgctl</shortdesc>
<content type="string" default="${OCF_RESKEY_pgctl_default}" />
</parameter>
<parameter name="start_opt" unique="0" required="0">
<longdesc lang="en">
Start options (-o start_opt in pg_ctl). "-i -p 5432" for example.
</longdesc>
<shortdesc lang="en">start_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_start_opt_default}" />
</parameter>
<parameter name="ctl_opt" unique="0" required="0">
<longdesc lang="en">
Additional pg_ctl options (-w, -W etc..).
</longdesc>
<shortdesc lang="en">ctl_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_ctl_opt_default}" />
</parameter>
<parameter name="psql" unique="0" required="0">
<longdesc lang="en">
Path to psql command.
</longdesc>
<shortdesc lang="en">psql</shortdesc>
<content type="string" default="${OCF_RESKEY_psql_default}" />
</parameter>
<parameter name="pgdata" unique="0" required="0">
<longdesc lang="en">
Path to PostgreSQL data directory.
</longdesc>
<shortdesc lang="en">pgdata</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdata_default}" />
</parameter>
<parameter name="pgdba" unique="0" required="0">
<longdesc lang="en">
User that owns PostgreSQL.
</longdesc>
<shortdesc lang="en">pgdba</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdba_default}" />
</parameter>
<parameter name="pghost" unique="0" required="0">
<longdesc lang="en">
Hostname/IP address where PostgreSQL is listening
</longdesc>
<shortdesc lang="en">pghost</shortdesc>
<content type="string" default="${OCF_RESKEY_pghost_default}" />
</parameter>
<parameter name="pgport" unique="0" required="0">
<longdesc lang="en">
Port where PostgreSQL is listening
</longdesc>
<shortdesc lang="en">pgport</shortdesc>
<content type="integer" default="${OCF_RESKEY_pgport_default}" />
</parameter>
<parameter name="monitor_user" unique="0" required="0">
<longdesc lang="en">
PostgreSQL user that pgsql RA will user for monitor operations. If it's not set
pgdba user will be used.
</longdesc>
<shortdesc lang="en">monitor_user</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_user_default}" />
</parameter>
<parameter name="monitor_password" unique="0" required="0">
<longdesc lang="en">
Password for monitor user.
</longdesc>
<shortdesc lang="en">monitor_password</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_password_default}" />
</parameter>
<parameter name="monitor_sql" unique="0" required="0">
<longdesc lang="en">
SQL script that will be used for monitor operations.
</longdesc>
<shortdesc lang="en">monitor_sql</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_sql_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Path to the PostgreSQL configuration file for the instance.
</longdesc>
<shortdesc lang="en">Configuration file</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdata}/postgresql.conf" />
</parameter>
<parameter name="pgdb" unique="0" required="0">
<longdesc lang="en">
Database that will be used for monitoring.
</longdesc>
<shortdesc lang="en">pgdb</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdb_default}" />
</parameter>
<parameter name="logfile" unique="0" required="0">
<longdesc lang="en">
Path to PostgreSQL server log output file.
</longdesc>
<shortdesc lang="en">logfile</shortdesc>
<content type="string" default="${OCF_RESKEY_logfile_default}" />
</parameter>
<parameter name="socketdir" unique="0" required="0">
<longdesc lang="en">
-Unix socket directory for PostgeSQL
+Unix socket directory for PostgreSQL
</longdesc>
<shortdesc lang="en">socketdir</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="stop_escalate" unique="0" required="0">
<longdesc lang="en">
Number of shutdown retries (using -m fast) before resorting to -m immediate
</longdesc>
<shortdesc lang="en">stop escalation</shortdesc>
<content type="integer" default="${OCF_RESKEY_stop_escalate_default}" />
</parameter>
<parameter name="rep_mode" unique="0" required="0">
<longdesc lang="en">
Replication mode may be set to "async" or "sync" or "slave".
They require PostgreSQL 9.1 or later.
Once set, "async" and "sync" require node_list, master_ip, and
-restore_command parameters,as well as configuring postgresql
+restore_command parameters,as well as configuring PostgreSQL
for replication (in postgresql.conf and pg_hba.conf).
"slave" means that RA only makes recovery.conf before starting
-to connect to Primary which is running somewhere.
-It dosen't need Master/Slave setting.
+to connect to primary which is running somewhere.
+It dosen't need master/slave setting.
It requires master_ip restore_command parameters.
</longdesc>
<shortdesc lang="en">rep_mode</shortdesc>
<content type="string" default="${OCF_RESKEY_rep_mode_default}" />
</parameter>
<parameter name="node_list" unique="0" required="0">
<longdesc lang="en">
All node names. Please separate each node name with a space.
This is required for replication.
</longdesc>
<shortdesc lang="en">node list</shortdesc>
<content type="string" default="${OCF_RESKEY_node_list_default}" />
</parameter>
<parameter name="restore_command" unique="0" required="0">
<longdesc lang="en">
restore_command for recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">restore_command</shortdesc>
<content type="string" default="${OCF_RESKEY_restore_command_default}" />
</parameter>
<parameter name="archive_cleanup_command" unique="0" required="0">
<longdesc lang="en">
archive_cleanup_command for recovery.conf.
This is used for replication and is optional.
</longdesc>
<shortdesc lang="en">archive_cleanup_command</shortdesc>
<content type="string" default="${OCF_RESKEY_archive_cleanup_command_default}" />
</parameter>
<parameter name="recovery_end_command" unique="0" required="0">
<longdesc lang="en">
recovery_end_command for recovery.conf.
This is used for replication and is optional.
</longdesc>
<shortdesc lang="en">recovery_end_command</shortdesc>
<content type="string" default="${OCF_RESKEY_recovery_end_command_default}" />
</parameter>
<parameter name="master_ip" unique="0" required="0">
<longdesc lang="en">
Master's floating IP address to be connected from hot standby.
This parameter is used for "primary_conninfo" in recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">master ip</shortdesc>
<content type="string" default="${OCF_RESKEY_master_ip_default}" />
</parameter>
<parameter name="repuser" unique="0" required="0">
<longdesc lang="en">
User used to connect to the master server.
This parameter is used for "primary_conninfo" in recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">repuser</shortdesc>
<content type="string" default="${OCF_RESKEY_repuser_default}" />
</parameter>
<parameter name="primary_conninfo_opt" unique="0" required="0">
<longdesc lang="en">
primary_conninfo options of recovery.conf except host, port, user and application_name.
This is optional for replication.
</longdesc>
<shortdesc lang="en">primary_conninfo_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_primary_conninfo_opt_default}" />
</parameter>
<parameter name="restart_on_promote" unique="0" required="0">
<longdesc lang="en">
If this is true, RA deletes recovery.conf and restarts PostgreSQL
on promote to keep Timeline ID. It probably makes fail-over slower.
It's recommended to set on-fail of promote up as fence.
This is optional for replication.
</longdesc>
<shortdesc lang="en">restart_on_promote</shortdesc>
<content type="boolean" default="${OCF_RESKEY_restart_on_promote_default}" />
</parameter>
<parameter name="tmpdir" unique="0" required="0">
<longdesc lang="en">
Path to temporary directory.
This is optional for replication.
</longdesc>
<shortdesc lang="en">tmpdir</shortdesc>
<content type="string" default="${OCF_RESKEY_tmpdir_default}" />
</parameter>
<parameter name="xlog_check_count" unique="0" required="0">
<longdesc lang="en">
-Number of checking xlog on monitor before promote.
+Number of checks of xlog on monitor before promote.
This is optional for replication.
</longdesc>
<shortdesc lang="en">xlog check count</shortdesc>
<content type="integer" default="${OCF_RESKEY_check_count_default}" />
</parameter>
<parameter name="crm_attr_timeout" unique="0" required="0">
<longdesc lang="en">
The timeout of crm_attribute forever update command.
Default value is 5 seconds.
This is optional for replication.
</longdesc>
<shortdesc lang="en">The timeout of crm_attribute forever update command.</shortdesc>
<content type="integer" default="${OCF_RESKEY_crm_attr_timeout_default}" />
</parameter>
<parameter name="stop_escalate_in_slave" unique="0" required="0">
<longdesc lang="en">
Number of shutdown retries (using -m fast) before resorting to -m immediate
-in Slave state.
+in slave state.
This is optional for replication.
</longdesc>
<shortdesc lang="en">stop escalation_in_slave</shortdesc>
<content type="integer" default="${OCF_RESKEY_stop_escalate_in_slave_default}" />
</parameter>
<parameter name="check_wal_receiver" unique="0" required="0">
<longdesc lang="en">
If this is true, RA checks wal_receiver process on monitor
-and notify its status using "(resource name)-receiver-status" attribute.
-It's useful for checking whether PostgreSQL(Hot Standby) connects to primary.
+and notifies its status using "(resource name)-receiver-status" attribute.
+It's useful for checking whether PostgreSQL (hot standby) connects to primary.
The attribute shows status as "normal" or "ERROR".
</longdesc>
<shortdesc lang="en">check_wal_receiver</shortdesc>
<content type="boolean" default="${OCF_RESKEY_check_wal_receiver_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="30"/>
<action name="monitor" depth="0" timeout="30" interval="29" role="Master" />
<action name="promote" timeout="120" />
<action name="demote" timeout="120" />
<action name="notify" timeout="90" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
<action name="methods" timeout="5" />
</actions>
</resource-agent>
EOF
}
#
# Run the given command in the Resource owner environment...
#
runasowner() {
local quietrun=""
local loglevel="-err"
local var
for var in 1 2
do
case "$1" in
"-q")
quietrun="-q"
shift 1;;
"warn"|"err")
loglevel="-$1"
shift 1;;
*)
;;
esac
done
ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*"
}
#
# Shell escape
#
escape_string() {
echo "$*" | sed -e "s/'/'\\\\''/g"
}
#
# methods: What methods/operations do we support?
#
pgsql_methods() {
cat <<EOF
start
stop
status
monitor
promote
demote
notify
methods
meta-data
validate-all
EOF
}
#pgsql_real_start: Starts PostgreSQL
pgsql_real_start() {
local pgctl_options
local postgres_options
local rc
if pgsql_status; then
ocf_log info "PostgreSQL is already running. PID=`cat $PIDFILE`"
if is_replication; then
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
fi
# Remove postmaster.pid if it exists
rm -f $PIDFILE
# Remove backup_label if it exists
if [ -f $BACKUPLABEL ] && ! is_replication; then
ocf_log info "Removing $BACKUPLABEL. The previous backup might have failed."
rm -f $BACKUPLABEL
fi
# Check if we need to create a log file
if ! check_log_file $OCF_RESKEY_logfile
then
ocf_log err "PostgreSQL can't write to the log file: $OCF_RESKEY_logfile"
return $OCF_ERR_PERM
fi
# Check socket directory
if [ -n "$OCF_RESKEY_socketdir" ]
then
check_socket_dir
fi
if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
rm -f $RECOVERY_CONF
make_recovery_conf || return $OCF_ERR_GENERIC
fi
# Set options passed to pg_ctl
pgctl_options="$OCF_RESKEY_ctl_opt -D $OCF_RESKEY_pgdata -l $OCF_RESKEY_logfile"
# Set options passed to the PostgreSQL server process
postgres_options="-c config_file=${OCF_RESKEY_config}"
if [ -n "$OCF_RESKEY_pghost" ]; then
postgres_options="$postgres_options -h $OCF_RESKEY_pghost"
fi
if [ -n "$OCF_RESKEY_start_opt" ]; then
postgres_options="$postgres_options $OCF_RESKEY_start_opt"
fi
# Tack pass-through options onto pg_ctl options
pgctl_options="$pgctl_options -o '$postgres_options'"
# Invoke pg_ctl
runasowner "unset PGUSER; unset PGPASSWORD; $OCF_RESKEY_pgctl $pgctl_options start"
if [ $? -eq 0 ]; then
# Probably started.....
ocf_log info "PostgreSQL start command sent."
else
ocf_log err "Can't start PostgreSQL."
return $OCF_ERR_GENERIC
fi
while :
do
pgsql_real_monitor warn
rc=$?
if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then
break;
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't started yet. Waiting..."
done
ocf_log info "PostgreSQL is started."
return $rc
}
pgsql_replication_start() {
local rc
# initializing for replication
change_pgsql_status "$NODENAME" "STOP"
delete_master_baseline
$CRM_MASTER -v $CAN_NOT_PROMOTE
rm -f ${XLOG_NOTE_FILE}.* $REP_MODE_CONF $RECOVERY_CONF
if ! make_recovery_conf || ! delete_xlog_location || ! set_async_mode_all; then
return $OCF_ERR_GENERIC
fi
if [ -f $PGSQL_LOCK ]; then
ocf_log err "My data may be inconsistent. You have to remove $PGSQL_LOCK file to force start."
return $OCF_ERR_GENERIC
fi
# start
pgsql_real_start
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
change_pgsql_status "$NODENAME" "HS:alone"
return $OCF_SUCCESS
}
#pgsql_start: pgsql_real_start() wrapper for replication
pgsql_start() {
if ! is_replication; then
pgsql_real_start
return $?
else
pgsql_replication_start
return $?
fi
}
#pgsql_promote: Promote PostgreSQL
pgsql_promote() {
local target
local rc
if ! is_replication; then
ocf_log err "Not in a replication mode."
return $OCF_ERR_CONFIGURED
fi
rm -f ${XLOG_NOTE_FILE}.*
for target in $NODE_LIST; do
[ "$target" = "$NODENAME" ] && continue
change_data_status "$target" "DISCONNECT"
change_master_score "$target" "$CAN_NOT_PROMOTE"
done
ocf_log info "Creating $PGSQL_LOCK."
touch $PGSQL_LOCK
show_master_baseline
if ocf_is_true ${OCF_RESKEY_restart_on_promote}; then
ocf_log info "Restarting PostgreSQL instead of promote."
#stop : this function returns $OCF_SUCCESS only.
pgsql_real_stop slave
rm -f $RECOVERY_CONF
pgsql_real_start
rc=$?
if [ $rc -ne $OCF_RUNNING_MASTER ]; then
ocf_log err "Can't start PostgreSQL as primary on promote."
if [ $rc -ne $OCF_SUCCESS ]; then
change_pgsql_status "$NODENAME" "STOP"
fi
return $OCF_ERR_GENERIC
fi
else
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata promote"
if [ $? -eq 0 ]; then
ocf_log info "PostgreSQL promote command sent."
else
ocf_log err "Can't promote PostgreSQL."
return $OCF_ERR_GENERIC
fi
while :
do
pgsql_real_monitor warn
rc=$?
if [ $rc -eq $OCF_RUNNING_MASTER ]; then
break;
elif [ $rc -eq $OCF_ERR_GENERIC ]; then
ocf_log err "Can't promote PostgreSQL."
return $rc
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't promoted yet. Waiting..."
done
ocf_log info "PostgreSQL is promoted."
fi
change_data_status "$NODENAME" "LATEST"
$CRM_MASTER -v $PROMOTE_ME
change_pgsql_status "$NODENAME" "PRI"
return $OCF_SUCCESS
}
#pgsql_demote: Demote PostgreSQL
pgsql_demote() {
local rc
if ! is_replication; then
ocf_log err "Not in a replication mode."
return $OCF_ERR_CONFIGURED
fi
$CRM_MASTER -v $CAN_NOT_PROMOTE
delete_master_baseline
if ! pgsql_status; then
ocf_log info "PostgreSQL is already stopped on demote."
else
ocf_log info "Stopping PostgreSQL on demote."
pgsql_real_stop master
rc=$?
if [ "$rc" -ne "$OCF_SUCCESS" ]; then
change_pgsql_status "$NODENAME" "UNKNOWN"
return $rc
fi
fi
change_pgsql_status "$NODENAME" "STOP"
return $OCF_SUCCESS
}
#pgsql_real_stop: Stop PostgreSQL
pgsql_real_stop() {
local rc
local count
local stop_escalate
if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -D -q
fi
if ! pgsql_status
then
#Already stopped
return $OCF_SUCCESS
fi
stop_escalate=$OCF_RESKEY_stop_escalate
if [ "$1" = "slave" ]; then
stop_escalate="$OCF_RESKEY_stop_escalate_in_slave"
fi
# Stop PostgreSQL, do not wait for clients to disconnect
if [ $stop_escalate -gt 0 ]; then
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m fast"
fi
# stop waiting
count=0
while [ $count -lt $stop_escalate ]
do
if ! pgsql_status
then
#PostgreSQL stopped
break;
fi
count=`expr $count + 1`
sleep 1
done
if pgsql_status
then
#PostgreSQL is still up. Use another shutdown mode.
ocf_log info "PostgreSQL failed to stop after ${OCF_RESKEY_stop_escalate}s using -m fast. Trying -m immediate..."
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m immediate"
fi
while :
do
pgsql_real_monitor
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
# An unnecessary debug log is prevented.
break;
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't stopped yet. Waiting..."
done
# Remove postmaster.pid if it exists
rm -f $PIDFILE
if [ "$1" = "master" -a "$OCF_RESKEY_CRM_meta_notify_slave_uname" = " " ]; then
ocf_log info "Removing $PGSQL_LOCK."
rm -f $PGSQL_LOCK
fi
return $OCF_SUCCESS
}
pgsql_replication_stop() {
local rc
$CRM_MASTER -v $CAN_NOT_PROMOTE
delete_xlog_location
if ! pgsql_status
then
ocf_log info "PostgreSQL is already stopped."
change_pgsql_status "$NODENAME" "STOP"
return $OCF_SUCCESS
fi
pgsql_real_stop slave
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
change_pgsql_status "$NODENAME" "UNKNOWN"
return $rc
fi
change_pgsql_status "$NODENAME" "STOP"
set_async_mode_all
delete_master_baseline
return $OCF_SUCCESS
}
#pgsql_stop: pgsql_real_stop() wrapper for replication
pgsql_stop() {
if ! is_replication; then
pgsql_real_stop
return $?
else
pgsql_replication_stop
return $?
fi
}
#
# pgsql_status: is PostgreSQL up?
#
pgsql_status() {
if [ -f $PIDFILE ]
then
PID=`head -n 1 $PIDFILE`
runasowner "kill -s 0 $PID >/dev/null 2>&1"
return $?
fi
# No PID file
false
}
pgsql_wal_receiver_status() {
local PID
local receiver_parent_pids
PID=`head -n 1 $PIDFILE`
receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al receiver process" | cut -d " " -f 3`
if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" -q
return 0
fi
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" -q
ocf_log warn "wal receiver process is not running"
return 1
}
#
# pgsql_real_monitor
#
pgsql_real_monitor() {
local loglevel
local rc
local output
# Set the log level of the error message
loglevel=${1:-err}
if ! pgsql_status
then
ocf_log info "PostgreSQL is down"
return $OCF_NOT_RUNNING
fi
if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
pgsql_wal_receiver_status
fi
if is_replication; then
#Check replication state
output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
$OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
-Atc \"${CHECK_MS_SQL}\""`
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc $loglevel
return $OCF_ERR_GENERIC
fi
case "$output" in
f) ocf_log debug "PostgreSQL is running as a primary."
if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then
return $OCF_RUNNING_MASTER
fi
;;
t) ocf_log debug "PostgreSQL is running as a hot standby."
return $OCF_SUCCESS;;
*) ocf_log err "$CHECK_MS_SQL output is $output"
return $OCF_ERR_GENERIC;;
esac
fi
OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"`
runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \
-c '$OCF_RESKEY_monitor_sql'"
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc $loglevel
return $OCF_ERR_GENERIC
fi
if is_replication; then
return $OCF_RUNNING_MASTER
fi
return $OCF_SUCCESS
}
pgsql_replication_monitor() {
local rc
rc=$1
if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then
return $rc
fi
# If I am Master
if [ $rc -eq $OCF_RUNNING_MASTER ]; then
change_data_status "$NODENAME" "LATEST"
change_pgsql_status "$NODENAME" "PRI"
control_slave_status || return $OCF_ERR_GENERIC
return $rc
fi
# I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor,
# so I will get master node name using crm_mon -n
crm_mon -n1 | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):]Master"
if [ $? -ne 0 ] ; then
# If I am Slave and Master is not exist
ocf_log info "Master does not exist."
change_pgsql_status "$NODENAME" "HS:alone"
have_master_right
if [ $? -eq 0 ]; then
rm -f ${XLOG_NOTE_FILE}.*
fi
else
output=`$CRM_ATTR_FOREVER -N "$NODENAME" \
-n "$PGSQL_DATA_STATUS_ATTR" -G -q`
if [ "$output" = "DISCONNECT" ]; then
change_pgsql_status "$NODENAME" "HS:alone"
fi
fi
return $rc
}
#pgsql_monitor: pgsql_real_monitor() wrapper for replication
pgsql_monitor() {
local rc
pgsql_real_monitor
rc=$?
if ! is_replication; then
return $rc
else
pgsql_replication_monitor $rc
return $?
fi
}
# pgsql_post_demote
pgsql_post_demote() {
DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE"
if [ "$DEMOTE_NODE" != "$NODENAME" ]; then
if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | tr '[A-Z]' '[a-z]' | grep $NODENAME; then
show_master_baseline
change_pgsql_status "$NODENAME" "HS:alone"
fi
fi
return $OCF_SUCCESS
}
pgsql_pre_promote() {
local master_baseline
local my_master_baseline
local cmp_location
local number_of_nodes
# If my data is newer than new master's one, I fail my resource.
PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
number_of_nodes=`echo $NODE_LIST | wc -w`
if [ $number_of_nodes -ge 3 -a \
"$OCF_RESKEY_rep_mode" = "sync" -a \
"$PROMOTE_NODE" != "$NODENAME" ]; then
master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \
"$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
if [ $? -eq 0 ]; then
my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \
"$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
# get older location
cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\
sort | head -1`
if [ "$cmp_location" != "$my_master_baseline" ]; then
ocf_log err "My data is newer than new master's one. New master's location : $master_baseline"
$CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY
return $OCF_ERR_GENERIC
fi
fi
fi
return $OCF_SUCCESS
}
pgsql_notify() {
local type="${OCF_RESKEY_CRM_meta_notify_type}"
local op="${OCF_RESKEY_CRM_meta_notify_operation}"
local rc
if ! is_replication; then
return $OCF_SUCCESS
fi
ocf_log debug "notify: ${type} for ${op}"
case $type in
pre)
case $op in
promote)
pgsql_pre_promote
return $?
;;
esac
;;
post)
case $op in
promote)
delete_xlog_location
PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
if [ "$PROMOTE_NODE" != "$NODENAME" ]; then
delete_master_baseline
fi
return $OCF_SUCCESS
;;
demote)
pgsql_post_demote
return $?
;;
start|stop)
MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
if [ "$NODENAME " = "$MASTER_NODE" ]; then
control_slave_status
fi
return $OCF_SUCCESS
;;
esac
;;
esac
return $OCF_SUCCESS
}
control_slave_status() {
local rc
local data_status
local target
local all_data_status
local tmp_data_status
local node_name
local number_of_nodes
all_data_status=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
$OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
-Atc \"${CHECK_REPLICATION_STATE_SQL}\""`
rc=$?
if [ $rc -eq 0 ]; then
if [ -n "$all_data_status" ]; then
all_data_status=`echo $all_data_status | sed "s/\n/ /g"`
fi
else
report_psql_error $rc warn
return 1
fi
number_of_nodes=`echo $NODE_LIST | wc -w`
for target in $NODE_LIST; do
if [ "$target" = "$NODENAME" ]; then
continue
fi
data_status="DISCONNECT"
if [ -n "$all_data_status" ]; then
for tmp_data_status in $all_data_status; do
node_name=`echo $tmp_data_status | cut -d "|" -f 1`
state=`echo $tmp_data_status | cut -d "|" -f 2`
sync_state=`echo $tmp_data_status | cut -d "|" -f 3`
ocf_log debug "node=$node_name, state=$state, sync_state=$sync_state"
if [ "$node_name" = "$target" ];then
data_status="$state|$sync_state"
break
fi
done
fi
case "$data_status" in
"STREAMING|SYNC")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_PROMOTE"
change_pgsql_status "$target" "HS:sync"
;;
"STREAMING|ASYNC")
change_data_status "$target" "$data_status"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
change_master_score "$target" "$CAN_NOT_PROMOTE"
if ! is_sync_mode "$target"; then
set_sync_mode "$target"
fi
else
if [ $number_of_nodes -le 2 ]; then
change_master_score "$target" "$CAN_PROMOTE"
else
# I can't determine which slave's data is newest in async mode.
change_master_score "$target" "$CAN_NOT_PROMOTE"
fi
fi
change_pgsql_status "$target" "HS:async"
;;
"STREAMING|POTENTIAL")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
change_pgsql_status "$target" "HS:potential"
;;
"DISCONNECT")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
if [ "$OCF_RESKEY_rep_mode" = "sync" ] && \
is_sync_mode "$target"; then
set_async_mode "$target"
fi
;;
*)
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
if [ "$OCF_RESKEY_rep_mode" = "sync" ] && \
is_sync_mode "$target"; then
set_async_mode "$target"
fi
change_pgsql_status "$target" "HS:connected"
;;
esac
done
return 0
}
have_master_right() {
local old
local new
local output
local data_status
local node
local mylocation
local count
local newestXlog
local oldfile
local newfile
ocf_log debug "Checking if I have a master right."
data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \
"$PGSQL_DATA_STATUS_ATTR" -G -q`
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
"$data_status" != "LATEST" ]; then
ocf_log warn "My data is out-of-date. status=$data_status"
return 1
fi
else
if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
"$data_status" != "STREAMING|ASYNC" -a \
"$data_status" != "LATEST" ]; then
ocf_log warn "My data is out-of-date. status=$data_status"
return 1
fi
fi
ocf_log info "My data status=$data_status."
show_xlog_location
if [ $? -ne 0 ]; then
ocf_log err "Failed to show my xlog location."
exit $OCF_ERR_GENERIC
fi
old=0
for count in `seq $OCF_RESKEY_xlog_check_count`; do
if [ -f ${XLOG_NOTE_FILE}.$count ]; then
old=$count
continue
fi
break
done
new=`expr $old + 1`
# get xlog locations of all nodes
for node in ${NODE_LIST}; do
output=`$CRM_ATTR_REBOOT -N "$node" -n \
"$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null`
if [ $? -ne 0 ]; then
ocf_log warn "Can't get $node xlog location."
continue
else
ocf_log info "$node xlog location : $output"
echo "$node $output" >> ${XLOG_NOTE_FILE}.${new}
if [ "$node" = "$NODENAME" ]; then
mylocation=$output
fi
fi
done
oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null`
newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null`
if [ "$oldfile" != "$newfile" ]; then
# reset counter
rm -f ${XLOG_NOTE_FILE}.*
printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
return 1
fi
if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then
newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \
head -1 | cut -d " " -f 2`
if [ "$newestXlog" = "$mylocation" ]; then
ocf_log info "I have a master right."
$CRM_MASTER -v $PROMOTE_ME
return 0
fi
change_data_status "$NODENAME" "DISCONNECT"
ocf_log info "I don't have correct master data."
# reset counter
rm -f ${XLOG_NOTE_FILE}.*
printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
fi
return 1
}
is_replication() {
if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then
return 0
fi
return 1
}
get_my_location() {
local rc
local output
local replay_loc
local receive_loc
local output1
local output2
local log1
local log2
local newer_location
output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
$OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
-Atc \"${CHECK_XLOG_LOC_SQL}\""`
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc warn
ocf_log err "Can't get my xlog location."
return 1
fi
replay_loc=`echo $output | cut -d "|" -f 1`
receive_loc=`echo $output | cut -d "|" -f 2`
output1=`echo "$replay_loc" | cut -d "/" -f 1`
output2=`echo "$replay_loc" | cut -d "/" -f 2`
log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
replay_loc="${log1}${log2}"
output1=`echo "$receive_loc" | cut -d "/" -f 1`
output2=`echo "$receive_loc" | cut -d "/" -f 2`
log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
receive_loc="${log1}${log2}"
newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1`
echo "$newer_location"
return 0
}
show_xlog_location() {
local location
location=`get_my_location` || return 1
$CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location"
}
delete_xlog_location() {
$CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D
}
show_master_baseline() {
local rc
local location
runasowner -q err "$OCF_RESKEY_psql $psql_options \
-U $OCF_RESKEY_pgdba -c 'CHECKPOINT'"
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc warn
fi
location=`get_my_location`
ocf_log info "My master baseline : $location."
$CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location"
}
delete_master_baseline() {
$CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D
}
set_async_mode_all() {
[ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0
ocf_log info "Set all nodes into async mode."
runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
if [ $? -ne 0 ]; then
ocf_log err "Can't set all nodes into async mode."
return 1
fi
return 0
}
set_async_mode() {
local sync_node_in_conf
sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2`
if [ -n "$sync_node_in_conf" ]; then
ocf_log info "Setup $1 into async mode."
sync_node_in_conf=`echo $sync_node_in_conf | sed "s/$1//g" |\
sed "s/^,//g" | sed "s/,,/,/g" | sed "s/,$//g"`
echo "synchronous_standby_names = '$sync_node_in_conf'" > "$REP_MODE_CONF"
else
ocf_log info "$1 is already in async mode."
return 0
fi
ocf_log info "All synced nodes : \"$sync_node_in_conf\""
reload_conf
}
set_sync_mode() {
local sync_node_in_conf
sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2`
if [ -n "$sync_node_in_conf" ]; then
ocf_log info "Setup $1 into sync mode."
echo "synchronous_standby_names = '$sync_node_in_conf,$1'" > "$REP_MODE_CONF"
else
ocf_log info "Setup $1 into sync mode."
echo "synchronous_standby_names = '$1'" > "$REP_MODE_CONF"
fi
sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2`
ocf_log info "All synced nodes : \"$sync_node_in_conf\""
reload_conf
}
is_sync_mode() {
cat $REP_MODE_CONF | grep -q -e "[,' ]$1[,' ]"
}
reload_conf() {
# Invoke pg_ctl
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload"
if [ $? -eq 0 ]; then
ocf_log info "Reload configuration file."
else
ocf_log err "Can't reload configuration file."
return 1
fi
return 0
}
user_recovery_conf() {
# put archive_cleanup_command and recovery_end_command only when defined by user
if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then
echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'"
fi
if [ -n "$OCF_RESKEY_recovery_end_command" ]; then
echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'"
fi
}
make_recovery_conf() {
runasowner "touch $RECOVERY_CONF"
if [ $? -ne 0 ]; then
ocf_log err "Can't create recovery.conf."
return 1
fi
cat > $RECOVERY_CONF <<END
standby_mode = 'on'
primary_conninfo = 'host=${OCF_RESKEY_master_ip} port=${OCF_RESKEY_pgport} user=${OCF_RESKEY_repuser} application_name=${NODENAME} ${OCF_RESKEY_primary_conninfo_opt}'
restore_command = '${OCF_RESKEY_restore_command}'
recovery_target_timeline = 'latest'
END
user_recovery_conf >> $RECOVERY_CONF
ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}"
return 0
}
# change pgsql-status.
# arg1:node, arg2: value
change_pgsql_status() {
local output
if ! is_node_online $1; then
return 0
fi
output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null`
if [ "$output" != "$2" ]; then
# If slave's disk is broken, RA cannot read PID file
# and misjudges the PostgreSQL as down while it is running.
# It causes overwriting of pgsql-status by Master because replication is still connected.
if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then
if [ "$1" != "$NODENAME" ]; then
ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited."
return 0
fi
fi
ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2."
$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2"
if [ $? -ne 0 ]; then
ocf_log err "Can't change $PGSQL_STATUS_ATTR."
return 1
fi
fi
return 0
}
# change pgsql-data-status.
# arg1:node, arg2: value
change_data_status() {
local output
if ! node_exist $1; then
return 0
fi
while :
do
output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
if [ "$output" != "$2" ]; then
ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2."
exec_func_with_timeout "$CRM_ATTR_FOREVER" "-N $1 -n \
$PGSQL_DATA_STATUS_ATTR -v \"$2\"" \
$OCF_RESKEY_crm_attr_timeout
if [ $? -ne 0 ]; then
ocf_log err "Can't change $PGSQL_DATA_STATUS_ATTR."
return 1
fi
else
break
fi
done
return 0
}
# set master-score
# arg1:node, arg2: score, arg3: resoure
set_master_score() {
local current_score
current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null`
if [ -n "$current_score" -a "$current_score" != "$2" ]; then
ocf_log info "Changing $3 master score on $1 : $current_score->$2."
$CRM_ATTR_REBOOT -N "$target" -n "master-$3" -v "$2"
if [ $? -ne 0 ]; then
ocf_log err "Can't change master score."
return 1
fi
fi
return 0
}
# change master-score
# arg1:node, arg2: score
change_master_score() {
local instance
if ! is_node_online $1; then
return 0
fi
if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then
# If Pacemaker version is 1.0.x
instance=0
while :
do
if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then
break
fi
if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then
instance=`expr $instance + 1`
continue
fi
set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1
instance=`expr $instance + 1`
done
else
# If globally-unique=false and Pacemaker version is 1.1.8 or higher
# Master/Slave resource has no instance number
set_master_score $1 $2 ${RESOURCE_NAME} || return 1
fi
return 0
}
report_psql_error()
{
local rc
local loglevel
rc=$1
loglevel=${2:-err}
ocf_log $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running"
if [ $rc -eq 1 ]; then
ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command."
elif [ $rc -eq 2 ]; then
ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command."
elif [ $rc -eq 3 ]; then
ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command."
fi
}
#
# timeout management function
# arg1 : command
# arg2 : command's args
# arg3 : timeout(s)
#
exec_func_with_timeout() {
local func_pid
local count
local rc
$1 `eval echo $2` &
func_pid=$!
count=0
while kill -s 0 $func_pid >/dev/null 2>&1; do
sleep 1
count=`expr $count + 1`
if [ $count -ge $3 ]; then
ocf_log debug "Execute $1 time out."
kill -s 9 $func_pid >/dev/null 2>&1
return 0
fi
done
wait $func_pid
}
is_node_online() {
crm_mon -1 -n | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline"
}
node_exist() {
crm_mon -1 -n | tr '[A-Z]' '[a-z]' | grep -q "^node $1"
}
check_binary2() {
if ! have_binary "$1"; then
ocf_log err "Setup problem: couldn't find command: $1"
return 1
fi
return 0
}
check_config() {
local rc=0
if [ ! -f "$1" ]; then
if ocf_is_probe; then
ocf_log info "Configuration file is $1 not readable during probe."
rc=1
else
ocf_log err "Configuration file $1 doesn't exist"
rc=2
fi
fi
return $rc
}
# Validate most critical parameters
pgsql_validate_all() {
local version
local check_config_rc
local rep_mode_string
if ! check_binary2 "$OCF_RESKEY_pgctl" ||
! check_binary2 "$OCF_RESKEY_psql"; then
return $OCF_ERR_INSTALLED
fi
check_config "$OCF_RESKEY_config"
check_config_rc=$?
[ $check_config_rc -eq 2 ] && return $OCF_ERR_INSTALLED
[ $check_config_rc -eq 0 ] && : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`}
getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
ocf_log err "User $OCF_RESKEY_pgdba doesn't exist";
return $OCF_ERR_INSTALLED;
fi
if ocf_is_probe; then
ocf_log info "Don't check $OCF_RESKEY_pgdata during probe"
else
if ! runasowner "test -w $OCF_RESKEY_pgdata"; then
ocf_log err "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba"
return $OCF_ERR_PERM;
fi
fi
if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ]
then
ocf_log err "monitor password can't be empty"
return $OCF_ERR_CONFIGURED
fi
if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ]
then
ocf_log err "monitor_user has to be set if monitor_password is set"
return $OCF_ERR_CONFIGURED
fi
if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
version=`cat $OCF_RESKEY_pgdata/PG_VERSION`
if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then
ocf_log err "Replication mode needs PostgreSQL 9.1 or higher."
return $OCF_ERR_INSTALLED
fi
if [ ! -n "$OCF_RESKEY_master_ip" ]; then
ocf_log err "master_ip can't be empty."
return $OCF_ERR_CONFIGURED
fi
fi
if is_replication; then
if ! ocf_is_ms; then
ocf_log err "Replication(rep_mode=async or sync) requires Master/Slave configuration."
return $OCF_ERR_CONFIGURED
fi
if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then
ocf_log err "Invalid rep_mode : $OCF_RESKEY_rep_mode"
return $OCF_ERR_CONFIGURED
fi
if [ ! -n "$NODE_LIST" ]; then
ocf_log err "node_list can't be empty."
return $OCF_ERR_CONFIGURED
fi
if [ $check_config_rc -eq 0 ]; then
rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
if ! grep -q "$rep_mode_string" $OCF_RESKEY_config; then
ocf_log info "adding include directive into $OCF_RESKEY_config"
echo "$rep_mode_string" >> $OCF_RESKEY_config
fi
else
if grep -q "$rep_mode_string" $OCF_RESKEY_config; then
ocf_log info "deleting include directive from $OCF_RESKEY_config"
sed -i "/${rep_mode_string//\//\\/}/d" $OCF_RESKEY_config
fi
fi
fi
if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then
ocf_log err "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba"
return $OCF_ERR_PERM
fi
fi
if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
if ocf_is_ms; then
ocf_log err "Replication(rep_mode=slave) does not support Master/Slave configuration."
return $OCF_ERR_CONFIGURED
fi
fi
return $OCF_SUCCESS
}
#
# Check if we need to create a log file
#
check_log_file() {
if [ ! -f "$1" ]
then
touch $1 > /dev/null 2>&1
chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1
fi
#Check if $OCF_RESKEY_pgdba can write to the log file
if ! runasowner "test -w $1"
then
return 1
fi
return 0
}
#
# Check socket directory
#
check_socket_dir() {
if [ ! -d "$OCF_RESKEY_socketdir" ]; then
if ! mkdir "$OCF_RESKEY_socketdir"; then
ocf_log err "Can't create directory $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
if ! chown $OCF_RESKEY_pgdba:`getent passwd \
$OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir"
then
ocf_log err "Can't change ownership for $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
if ! chmod 2775 "$OCF_RESKEY_socketdir"; then
ocf_log err "Can't change permissions for $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
else
if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then
ocf_log err "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
rm $OCF_RESKEY_socketdir/test.$$
fi
}
#
# 'main' starts here...
#
if [ $# -ne 1 ]
then
usage
exit $OCF_ERR_GENERIC
fi
PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid
BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label
RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1`
PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status"
RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf
NODENAME=`uname -n | tr '[A-Z]' '[a-z]'`
if is_replication; then
REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf
PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock
XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note
CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot"
CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever"
CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount"
CAN_NOT_PROMOTE="-INFINITY"
CAN_PROMOTE="100"
PROMOTE_ME="1000"
CHECK_MS_SQL="select pg_is_in_recovery()"
CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()"
CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication"
PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status"
PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status"
PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc"
PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline"
NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'`
fi
case "$1" in
methods) pgsql_methods
exit $?;;
meta-data) meta_data
exit $OCF_SUCCESS;;
esac
pgsql_validate_all
rc=$?
[ "$1" = "validate-all" ] && exit $rc
if [ $rc -ne 0 ]
then
case "$1" in
stop) if is_replication; then
change_pgsql_status "$NODENAME" "UNKNOWN"
fi
exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $OCF_NOT_RUNNING;;
*) exit $rc;;
esac
fi
US=`id -u -n`
if [ $US != root -a $US != $OCF_RESKEY_pgdba ]
then
ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba"
exit $OCF_ERR_GENERIC
fi
# make psql command options
if [ -n "$OCF_RESKEY_monitor_user" ]; then
PGUSER=$OCF_RESKEY_monitor_user; export PGUSER
PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD
psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb"
else
psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb"
fi
if [ -n "$OCF_RESKEY_pghost" ]; then
psql_options="$psql_options -h $OCF_RESKEY_pghost"
else
if [ -n "$OCF_RESKEY_socketdir" ]; then
psql_options="$psql_options -h $OCF_RESKEY_socketdir"
fi
fi
# What kind of method was invoked?
case "$1" in
status) if pgsql_status
then
ocf_log info "PostgreSQL is up"
exit $OCF_SUCCESS
else
ocf_log info "PostgreSQL is down"
exit $OCF_NOT_RUNNING
fi;;
monitor) pgsql_monitor
exit $?;;
start) pgsql_start
exit $?;;
promote) pgsql_promote
exit $?;;
demote) pgsql_demote
exit $?;;
notify) pgsql_notify
exit $?;;
stop) pgsql_stop
exit $?;;
*)
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/pound b/heartbeat/pound
index 4fabb9317..d4ffa9fd3 100755
--- a/heartbeat/pound
+++ b/heartbeat/pound
@@ -1,321 +1,321 @@
#!/bin/sh
#
#
# Pound
#
# Description: Manage pound instances as a HA resource
#
# Author: Taro Matsuzawa <btm@tech.email.ne.jp>
#
# License: GNU General Public License (GPL)
#
# See usage() for more details
#
# OCF instance parameters:
# OCF_RESKEY_pid
# OCF_RESKEY_binary
# OCF_RESKEY_ctl_binary
# OCF_RESKEY_socket_path
# OCF_RESKEY_config
# OCF_RESKEY_name
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Set default paramenter values
# Set these two first, as other defaults depend on it
OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE}
: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}}
OCF_RESKEY_binary_default=pound
OCF_RESKEY_ctl_binary_default=poundctl
OCF_RESKEY_pid_default=/var/run/pound_${OCF_RESKEY_name}.pid
OCF_RESKEY_socket_path_default=/var/lib/pound/pound.cfg
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_ctl_binary=${OCF_RESKEY_ctl_binary_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_socket_path=${OCF_RESKEY_socket_path_default}}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="pound" version="0.1">
<version>1.0</version>
<longdesc lang="en">
-The Pound Resource Agent can manage pound instances.
+The Pound Resource Agent can manage Pound instances.
</longdesc>
<shortdesc lang="en">Manage a Pound instance</shortdesc>
<parameters>
<parameter name="config" unique="1" required="1">
<longdesc lang="en">
-The Pound configuration file that pound should manage, for example
+The Pound configuration file that Pound should manage, for example
"/etc/pound.cfg".
</longdesc>
<shortdesc lang="en">Pound configuration file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="name" unique="1">
<longdesc lang="en">
-Override the name of the instance that should be given to pound
+Override the name of the instance that should be given to Pound
(defaults to the resource identifier).
</longdesc>
<shortdesc lang="en">Instance name</shortdesc>
<content type="string" default="${OCF_RESKEY_name_default}" />
</parameter>
<parameter name="pid" unique="1">
<longdesc lang="en">
Write the process's PID to the specified file.
-The default will include the specified name, ie.:
-"/var/run/pound_production.pid". Unlike what this help message shows.
-It is most likely not necessary to change this parameter.
+The default will include the specified name, i.e.:
+"/var/run/pound_production.pid". Unlike what this help message shows,
+it is most likely not necessary to change this parameter.
</longdesc>
<shortdesc lang="en">Pidfile</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="binary">
<longdesc lang="en">
-This is used to start pound server.
+This is used to start Pound server.
Normally use pound.
</longdesc>
<shortdesc lang="en"></shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="ctl_binary">
<longdesc lang="en">
-This is used to watch pound status via unix socket.
+This is used to watch Pound status via Unix socket.
Normally use poundctl.
</longdesc>
<shortdesc lang="en"></shortdesc>
<content type="string" default="${OCF_RESKEY_ctl_binary_default}" />
</parameter>
<parameter name="socket_path">
<longdesc lang="en">
-Write the process's unix socket.
-This parameter is same 'Control' parameter in configuration file, ie.:
-Control "/var/lib/pound/pound.cfg"
+Write the process's Unix socket.
+This parameter is same 'Control' parameter in configuration file, i.e.:
+Control "/var/lib/pound/pound.cfg".
</longdesc>
<shortdesc lang="en"></shortdesc>
<content type="string" default="${OCF_RESKEY_socket_path_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10" depth="0" />
<action name="status" timeout="20" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
#######################################################################
pound_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
pound_status() {
local pid
local rc
# FAILED = pidfile exist, but no running proc (or mismatch pid)
# SUCCES = contents of pidfile == running process id
# NOTRUN = no pidfile, no running process
# check if pidfile exists and larger than 0 bytes
if [ -s $OCF_RESKEY_pid ]; then
# it does, now check if the pid exists
pid=$(cat $OCF_RESKEY_pid)
ocf_run kill -s 0 $pid
rc=$?
if [ $rc -eq 0 ]; then
ocf_log info "Pound is running"
# check if the poundctl
ocf_run $OCF_RESKEY_ctl_binary -c $OCF_RESKEY_socket_path
p_rc=$?
if [ "$p_rc" -eq 0 ]; then
ocf_log info "poundctl reports success"
return $OCF_SUCCESS
else
ocf_log err "poundctl reports error"
return $OCF_ERR_GENERIC
fi
else
ocf_log err "Pound PID file exists, but pound is not running"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_NOT_RUNNING
}
pound_start() {
local rc
local backend_options
pound_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "Pound already running"
return $OCF_SUCCESS
fi
# check configuration before start
ocf_run $OCF_RESKEY_binary \
-c -f $OCF_RESKEY_config
c_rc=$?
if [ "$c_rc" -ne 0 ]; then
ocf_log err "Pound configuration file is not valid"
return $OCF_ERR_CONFIGURED
fi
ocf_run $OCF_RESKEY_binary \
-f $OCF_RESKEY_config \
-p $OCF_RESKEY_pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Pound failed to start"
return $OCF_ERR_GENERIC
fi
# Spin waiting for pound to come up.
# Let the CRM/LRM time us out if required
while true; do
pound_status
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "Pound start failed"
exit $OCF_ERR_GENERIC
fi
sleep 2
done
ocf_log info "Pound started succesfully"
return $OCF_SUCCESS
}
pound_stop() {
local rc
local pid
pound_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log info "Pound already stopped"
return $OCF_SUCCESS
fi
# kill the pound process
pid=$(cat $OCF_RESKEY_pid)
ocf_run kill -s 0 $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log warn "Pound pid is not a valid process. Assume it is already stopped"
rm -f $OCF_RESKEY_pid
return $OCF_SUCCESS
fi
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Pound failed to stop"
return $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
count=0
while [ $count -lt $shutdown_timeout ]; do
# check if process still exists
ocf_run kill -s 0 $pid
rc=$?
if [ $rc -ne 0 ]; then
# Pound stopped succesfully, so let's delete the pidfile
rm -f $OCF_RESKEY_pid
break
fi
count=$(expr $count + 1)
sleep 1
ocf_log info "Pound still hasn't stopped yet. Waiting..."
done
pound_status
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# Poound didn't quit on a SIGTERM, try SIGKILL
ocf_log warn "Pound failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
ocf_run kill -s KILL $pid
# delete the pidfile
rm -f $OCF_RESKEY_pid
fi
ocf_log info "Pound stopped"
return $OCF_SUCCESS
}
pound_validate() {
if [ -f $OCF_RESKEY_config ]; then
return $OCF_SUCCESS
else
return $OCF_ERR_INSTALLED
fi
}
case $__OCF_ACTION in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
start)
pound_start
;;
stop)
pound_stop
;;
monitor|status)
pound_status
;;
validate-all)
pound_validate
;;
usage|help)
pound_usage
exit $OCF_SUCCESS
;;
*)
pound_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/tomcat b/heartbeat/tomcat
index 27f71b59a..95cc49e3d 100755
--- a/heartbeat/tomcat
+++ b/heartbeat/tomcat
@@ -1,626 +1,626 @@
#!/bin/sh
#
# Description: Manages a Tomcat Server as an OCF High-Availability
# resource under Heartbeat/LinuxHA control
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
# Copyright (c) 2007 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
#
#######################################################################
# OCF parameters:
# OCF_RESKEY_tomcat_name - The name of the resource. Default is tomcat
# OCF_RESKEY_script_log - A destination of the log of this script. Default /var/log/OCF_RESKEY_tomcat_name.log
# OCF_RESKEY_tomcat_stop_timeout - Time-out at the time of the stop. Default is 5. DEPRECATED
# OCF_RESKEY_tomcat_suspend_trialcount - The re-try number of times awaiting a stop. Default is 10. DEPRECATED
# OCF_RESKEY_tomcat_user - A user name to start a resource.
# OCF_RESKEY_statusurl - URL for state confirmation. Default is http://127.0.0.1:8080
# OCF_RESKEY_max_stop_time - The max time it should take for proper shutdown. Restrictions, only Tomcat6.
# OCF_RESKEY_java_home - Home directory of Java. Default is none
# OCF_RESKEY_java_opts - Options to pass to Java JVM for start and stop. Default is none
# OCF_RESKEY_catalina_home - Home directory of Tomcat. Default is none
# OCF_RESKEY_catalina_base - Base directory of Tomcat. Default is OCF_RESKEY_catalina_home
# OCF_RESKEY_catalina_out - Log file name of Tomcat. Default is OCF_RESKEY_catalina_home/logs/catalina.out
# OCF_RESKEY_catalina_pid - A PID file name of Tomcat. Default is OCF_RESKEY_catalina_home/logs/catalina.pid
# OCF_RESKEY_tomcat_start_opts - Start options of Tomcat. Default is none.
# OCF_RESKEY_catalina_opts - CATALINA_OPTS environment variable. Default is none.
# OCF_RESKEY_catalina_tmpdir - CATALINA_TMPDIR environment variable. Default is none.
# OCF_RESKEY_catalina_rotate_log - Control catalina.out logrotation flag. Default is NO.
# OCF_RESKEY_catalina_rotatetime - catalina.out logrotation time span(seconds). Default is 86400.
# OCF_RESKEY_java_endorsed_dirs - JAVA_ENDORSED_DIRS environment variable. Default is none.
# OCF_RESKEY_logging_config - LOGGING_CONFIG environment variable. Default is none.
# OCF_RESKEY_logging_manager - LOGGING_MANAGER environment variable. Default is none.
###############################################################################
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
############################################################################
# Usage
usage()
{
cat <<-!
usage: $0 action
action:
start start Tomcat
stop stop Tomcat
status return the status of Tomcat, up or down
monitor return TRUE if Tomcat appears to be working.
You have to have installed $WGETNAME for this to work.
meta-data show meta data message
validate-all validate the instance parameters
!
}
############################################################################
# Check tomcat service availability
isrunning_tomcat()
{
$WGET --tries=20 -O /dev/null $RESOURCE_STATUSURL >/dev/null 2>&1
}
############################################################################
#
isalive_tomcat()
{
# As the server stops, the PID file disappears. To avoid race conditions,
# we will have remembered the PID of a running instance on script entry.
local pid=$rememberedPID
# If there is a PID file, use that
if [ -f $CATALINA_PID ]; then
ocf_log debug "Reading pid from $CATALINA_PID"
# race conditions on PID file being removed by stopping tomcat...
pid=`head -n 1 $CATALINA_PID`
fi
if [ -n "$pid" ] && [ "$pid" -gt 0 ]; then
# Retry message for restraint
ocf_log debug "Sending noop signal to $pid"
kill -s 0 $pid >/dev/null 2>&1
return $?
fi
# No PID file
false
}
############################################################################
# Check tomcat process and service availability
monitor_tomcat()
{
isalive_tomcat ||
return $OCF_NOT_RUNNING
isrunning_tomcat ||
return $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
############################################################################
# Execute catalina.out log rotation
rotate_catalina_out()
{
# Look for rotatelogs/rotatelogs2
if [ -x /usr/sbin/rotatelogs ]; then
ROTATELOGS=/usr/sbin/rotatelogs
elif [ -x /usr/sbin/rotatelogs2 ]; then
ROTATELOGS=/usr/sbin/rotatelogs2
else
ocf_log warn "rotatelogs command not found."
return 1
fi
# Clean up and set permissions on required files
rm -rf "$CATALINA_HOME"/temp/* "$CATALINA_OUT"
mkfifo -m700 "$CATALINA_OUT"
chown --dereference "$RESOURCE_TOMCAT_USER" "$CATALINA_OUT" || true
# -s is required because tomcat5.5's login shell is /bin/false
su - -s /bin/sh $RESOURCE_TOMCAT_USER \
-c "$ROTATELOGS -l \"$CATALINA_HOME/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \
< "$CATALINA_OUT" > /dev/null 2>&1 &
}
############################################################################
# Tomcat Command
tomcatCommand()
{
cat<<-END_TOMCAT_COMMAND
export JAVA_HOME=${JAVA_HOME}
export JAVA_OPTS="${JAVA_OPTS}"
export CATALINA_HOME=${CATALINA_HOME}
export CATALINA_BASE=${CATALINA_BASE}
export CATALINA_OUT=${CATALINA_OUT}
export CATALINA_PID=${CATALINA_PID}
export CATALINA_OPTS="${CATALINA_OPTS}"
export CATALINA_TMPDIR="${CATALINA_TMPDIR}"
export JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}"
export LOGGING_CONFIG="${LOGGING_CONFIG}"
export LOGGING_MANAGER="${LOGGING_MANAGER}"
$CATALINA_HOME/bin/catalina.sh $@
END_TOMCAT_COMMAND
}
attemptTomcatCommand()
{
if [ "$RESOURCE_TOMCAT_USER" = RUNASIS ]; then
"$CATALINA_HOME/bin/catalina.sh" $@ >> "$TOMCAT_CONSOLE" 2>&1
else
tomcatCommand $@ | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1
fi
}
############################################################################
# Start Tomcat
start_tomcat()
{
cd "$CATALINA_HOME/bin"
validate_all_tomcat || exit $?
monitor_tomcat
if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
# Remove $CATALINA_PID if it exists
rm -f $CATALINA_PID
#ocf_log debug "catalina.out rotation FLG = ${CATALINA_ROTATE_LOG}"
if [ ${CATALINA_ROTATE_LOG} = "YES" ]; then
rotate_catalina_out
if [ $? = 0 ]; then
ocf_log debug "Rotate catalina.out succeeded."
else
ocf_log warn "Rotate catalina.out failed. Starting tomcat without catalina.out rotation."
fi
fi
echo "`date "+%Y/%m/%d %T"`: start ===========================" >> "$TOMCAT_CONSOLE"
ocf_log debug "CATALINA_OPTS value = ${CATALINA_OPTS}"
attemptTomcatCommand start ${TOMCAT_START_OPTS} &
while true; do
monitor_tomcat
if [ $? = $OCF_SUCCESS ]; then
break
fi
ocf_log debug "start_tomcat[$TOMCAT_NAME]: retry monitor_tomcat"
sleep 3
done
return $OCF_SUCCESS
}
############################################################################
# Stop Tomcat
stop_tomcat()
{
RA_TIMEOUT=$((OCF_RESKEY_CRM_meta_timeout/1000))
STOP_TIMEOUT=$((RA_TIMEOUT-5))
if [ -n "$MAX_STOP_TIME" ]; then
if [ $MAX_STOP_TIME -gt $RA_TIMEOUT ]; then
ocf_log warn "max_stop_timeout must be shorter than the timeout of stop operation."
fi
if [ $MAX_STOP_TIME -eq 0 ]; then
STOP_TIMEOUT=$RA_TIMEOUT
else
STOP_TIMEOUT=$MAX_STOP_TIME
fi
fi
cd "$CATALINA_HOME/bin"
memorize_pid # This lets monitoring continue to work reliably
echo "`date "+%Y/%m/%d %T"`: stop ###########################" >> "$TOMCAT_CONSOLE"
attemptTomcatCommand stop $STOP_TIMEOUT -force
lapse_sec=0
while isalive_tomcat; do
sleep 1
lapse_sec=`expr $lapse_sec + 1`
ocf_log debug "stop_tomcat[$TOMCAT_NAME]: stop failed, killing with SIGKILL ($lapse_sec)"
kill -KILL $rememberedPID
done
if [ ${CATALINA_ROTATE_LOG} = "YES" ]; then
rm -f "$CATALINA_PID" "${CATALINA_OUT}"
else
rm -f "$CATALINA_PID"
fi
return $OCF_SUCCESS
}
metadata_tomcat()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="tomcat">
<version>1.0</version>
<longdesc lang="en">
Resource script for Tomcat. It manages a Tomcat instance as a cluster resource.
</longdesc>
<shortdesc lang="en">Manages a Tomcat servlet environment instance</shortdesc>
<parameters>
<parameter name="tomcat_name" unique="1" >
<longdesc lang="en"><![CDATA[
The name of the resource, added as a Java parameter in JAVA_OPTS:
-Dname=<tomcat_name> to Tomcat process on start. Used to ensure
process is still running and must be unique.
]]></longdesc>
<shortdesc>The name of the resource</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="script_log" unique="1">
<longdesc lang="en">
Log file, used during start and stop operations.
</longdesc>
<shortdesc>Log file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="tomcat_stop_timeout" unique="0">
<longdesc lang="en">
Time-out for stop operation. DEPRECATED
</longdesc>
<shortdesc>Time-out for the stop operation. DEPRECATED</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="tomcat_suspend_trialcount" unique="0">
<longdesc lang="en">
Maximum number of times to retry stop operation before suspending
and killing Tomcat. DEPRECATED. Does not retry.
</longdesc>
<shortdesc>Max retry count for stop operation. DEPRECATED</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="tomcat_user" unique="0">
<longdesc lang="en">
The user who starts Tomcat.
</longdesc>
<shortdesc>The user who starts Tomcat</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="statusurl" unique="0">
<longdesc lang="en">
URL for state confirmation.
</longdesc>
<shortdesc>URL for state confirmation</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="max_stop_time" unique="0">
<longdesc lang="en">
Number of seconds to wait during a stop before drastic measures
(force kill) are used on the tomcat process.
This number MUST be less than your cluster stop timeout for the resource.
The default value is five seconds before the timeout value of stop operation.
-When it is over this value, it stop a process in kill commands.
+When it is over this value, it stops a process in kill commands.
This parameter is only effective on Tomcat 6 or later.
</longdesc>
<shortdesc>The max time it should take for proper shutdown.</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="java_home" unique="0" required="1">
<longdesc lang="en">
Home directory of Java.
</longdesc>
<shortdesc>Home directory of Java</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="java_opts" unique="0">
<longdesc lang="en">
Java JVM options used on start and stop.
</longdesc>
<shortdesc>Java options parsed to JVM, used on start and stop.</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_home" unique="1" required="1">
<longdesc lang="en">
Home directory of Tomcat.
</longdesc>
<shortdesc>Home directory of Tomcat</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_base" unique="1">
<longdesc lang="en">
Instance directory of Tomcat
</longdesc>
<shortdesc>Instance directory of Tomcat, defaults to catalina_home</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_out" unique="1">
<longdesc lang="en">
Log file name of Tomcat
</longdesc>
<shortdesc>Log file name of Tomcat, defaults to catalina_home/logs/catalina.out</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_pid" unique="1">
<longdesc lang="en">
A PID file name for Tomcat.
</longdesc>
<shortdesc>A PID file name for Tomcat</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="tomcat_start_opts" unique="0">
<longdesc lang="en">
Tomcat start options.
</longdesc>
<shortdesc>Tomcat start options</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_opts" unique="0">
<longdesc lang="en">
Catalina options, for the start operation only.
</longdesc>
<shortdesc>Catalina options</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_tmpdir" unique="1">
<longdesc lang="en">
Temporary directory of Tomcat
</longdesc>
<shortdesc>Temporary directory of Tomcat, defaults to none</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_rotate_log" unique="0">
<longdesc lang="en">
Rotate catalina.out flag.
</longdesc>
<shortdesc>Rotate catalina.out flag</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_rotatetime" unique="0">
<longdesc lang="en">
catalina.out rotation interval (seconds).
</longdesc>
<shortdesc>catalina.out rotation interval (seconds)</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="java_endorsed_dirs" unique="1">
<longdesc lang="en">
Java_endorsed_dirs of tomcat
</longdesc>
-<shortdesc>Java_endorsed_dirs of tomcat, defaults to none</shortdesc>
+<shortdesc>Java_endorsed_dirs of Tomcat, defaults to none</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="logging_config" unique="1">
<longdesc lang="en">
Logging_config of tomcat
</longdesc>
-<shortdesc>Logging_config of tomcat, defaults to none</shortdesc>
+<shortdesc>Logging_config of Tomcat, defaults to none</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="logging_manager" unique="1">
<longdesc lang="en">
Logging_manager of tomcat
</longdesc>
-<shortdesc>Logging_manager of tomcat, defaults to none.</shortdesc>
+<shortdesc>Logging_manager of Tomcat, defaults to none.</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5"/>
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
validate_all_tomcat()
{
ocf_log info "validate_all_tomcat[$TOMCAT_NAME]"
misconfigured=0
notinstalled=0
wrongpermissions=0
check_binary $WGET
if [ -n "$MAX_STOP_TIME" ] && [ "$MAX_STOP_TIME" -lt 0 ]; then
ocf_log err "max_stop_time must be set to a value greater than 0."
misconfigured=1
fi
if [[ "$RESOURCE_STATUSURL" =~ :[0-9][0-9]* ]]; then
port=${RESOURCE_STATUSURL##*:}
port=${port%%/*}
ocf_log debug "Tomcat port is $port"
ocf_log debug "grep port=\"$port\" $CATALINA_HOME/conf/server.xml"
if [ "$port" -gt 0 ]; then
grep "port=\"$port\"" $CATALINA_HOME/conf/server.xml > /dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_log err "Your configured status URL specifies a port ($port), but the server does not have a connector listening to that port in $CATALINA_HOME/conf/server.xml"
notinstalled=1
fi
fi
fi
if [ $misconfigured -gt 0 ]; then
return $OCF_ERR_CONFIGURED
fi
if [ $notinstalled -gt 0 ]; then
return $OCF_ERR_INSTALLED
fi
if [ $wrongpermissions -gt 0 ]; then
return $OCF_ERR_PERM
fi
return $OCF_SUCCESS
}
# As we stop tomcat, it removes it's own pid file...we still want to know what it was
memorize_pid()
{
if [ -f $CATALINA_PID ]; then
rememberedPID=$(cat $CATALINA_PID)
fi
}
#
### tomcat RA environment variables
#
COMMAND=$1
TOMCAT_NAME="${OCF_RESKEY_tomcat_name-tomcat}"
TOMCAT_CONSOLE="${OCF_RESKEY_script_log-/var/log/$TOMCAT_NAME.log}"
RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user-RUNASIS}"
RESOURCE_STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}"
JAVA_HOME="${OCF_RESKEY_java_home}"
JAVA_OPTS="${OCF_RESKEY_java_opts}"
CATALINA_HOME="${OCF_RESKEY_catalina_home}"
CATALINA_BASE="${OCF_RESKEY_catalina_base-${OCF_RESKEY_catalina_home}}"
CATALINA_OUT="${OCF_RESKEY_catalina_out-$CATALINA_HOME/logs/catalina.out}"
CATALINA_PID="${OCF_RESKEY_catalina_pid-$CATALINA_HOME/logs/catalina.pid}"
MAX_STOP_TIME="${OCF_RESKEY_max_stop_time}"
TOMCAT_START_OPTS="${OCF_RESKEY_tomcat_start_opts}"
CATALINA_OPTS="-Dname=$TOMCAT_NAME ${OCF_RESKEY_catalina_opts}"
CATALINA_TMPDIR="${OCF_RESKEY_catalina_tmpdir}"
CATALINA_ROTATE_LOG="${OCF_RESKEY_catalina_rotate_log-NO}"
CATALINA_ROTATETIME="${OCF_RESKEY_catalina_rotatetime-86400}"
JAVA_ENDORSED_DIRS="${OCF_RESKEY_java_endorsed_dirs}"
LOGGING_CONFIG="${OCF_RESKEY_logging_config}"
LOGGING_MANAGER="${OCF_RESKEY_logging_manager}"
LSB_STATUS_STOPPED=3
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
case "$COMMAND" in
meta-data) metadata_tomcat; exit $OCF_SUCCESS;;
help|usage) usage; exit $OCF_SUCCESS;;
esac
if [ ! -d "$JAVA_HOME" -o ! -d "$CATALINA_HOME" -o ! -d "$CATALINA_BASE" ]; then
case $COMMAND in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
esac
ocf_log err "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist."
exit $OCF_ERR_INSTALLED
fi
export JAVA_HOME JAVA_OPTS CATALINA_HOME CATALINA_BASE CATALINA_OUT CATALINA_PID CATALINA_OPTS CATALINA_TMPDIR JAVA_ENDORSED_DIRS LOGGING_CONFIG LOGGING_MANAGER
JAVA=${JAVA_HOME}/bin/java
if [ ! -x "$JAVA" ]; then
case $COMMAND in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
esac
ocf_log err "java command does not exist."
exit $OCF_ERR_INSTALLED
fi
#
# ------------------
# the main script
# ------------------
#
case "$COMMAND" in
start)
ocf_log debug "[$TOMCAT_NAME] Enter tomcat start"
start_tomcat
func_status=$?
ocf_log debug "[$TOMCAT_NAME] Leave tomcat start $func_status"
exit $func_status
;;
stop)
ocf_log debug "[$TOMCAT_NAME] Enter tomcat stop"
stop_tomcat
func_status=$?
ocf_log debug "[$TOMCAT_NAME] Leave tomcat stop $func_status"
exit $func_status
;;
status)
if monitor_tomcat; then
echo tomcat instance $TOMCAT_NAME is running
exit $OCF_SUCCESS
else
echo tomcat instance $TOMCAT_NAME is stopped
exit $OCF_NOT_RUNNING
fi
exit $?
;;
monitor)
#ocf_log debug "[$TOMCAT_NAME] Enter tomcat monitor"
monitor_tomcat
func_status=$?
#ocf_log debug "[$TOMCAT_NAME] Leave tomcat monitor $func_status"
exit $func_status
;;
meta-data)
metadata_tomcat
exit $?
;;
validate-all)
validate_all_tomcat
exit $?
;;
usage|help)
usage
exit $OCF_SUCCESS
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/varnish b/heartbeat/varnish
index 99e2092df..b9d7df3fe 100755
--- a/heartbeat/varnish
+++ b/heartbeat/varnish
@@ -1,414 +1,414 @@
#!/bin/sh
#
#
# Varnish
#
# Description: Manage varnish instances as a HA resource
#
# Author: Lテゥon Keijser <keijser@stone-it.com>
#
# License: GNU General Public License (GPL)
#
# See usage() for more details
#
# OCF instance parameters:
# OCF_RESKEY_pid
# OCF_RESKEY_binary
# OCF_RESKEY_client_binary
# OCF_RESKEY_config
# OCF_RESKEY_name
# OCF_RESKEY_listen_address
# OCF_RESKEY_mgmt_address
# OCF_RESKEY_ttl
# OCF_RESKEY_varnish_user
# OCF_RESKEY_varnish_group
# OCF_RESKEY_backend_type
# OCF_RESKEY_backend_size
# OCF_RESKEY_backend_file
# OCF_RESKEY_worker_threads
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Set default paramenter values
# Set these two first, as other defaults depend on it
OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE}
: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}}
OCF_RESKEY_binary_default=varnishd
OCF_RESKEY_client_binary_default=varnishadm
OCF_RESKEY_pid_default=/var/run/varnishd_${OCF_RESKEY_name}.pid
OCF_RESKEY_listen_address_default=0.0.0.0:80
OCF_RESKEY_ttl_default=600
OCF_RESKEY_varnish_user_default=varnish
OCF_RESKEY_varnish_group_default=varnish
OCF_RESKEY_backend_type_default=malloc
OCF_RESKEY_backend_size_default=1G
OCF_RESKEY_backend_file_default=/var/lib/varnish/${OCF_RESKEY_name}.bin
OCF_RESKEY_worker_threads_default=100,3000,120
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_listen_address=${OCF_RESKEY_listen_address_default}}
: ${OCF_RESKEY_ttl=${OCF_RESKEY_ttl_default}}
: ${OCF_RESKEY_varnish_user=${OCF_RESKEY_varnish_user_default}}
: ${OCF_RESKEY_varnish_group=${OCF_RESKEY_varnish_group_default}}
: ${OCF_RESKEY_backend_type=${OCF_RESKEY_backend_type_default}}
: ${OCF_RESKEY_backend_size=${OCF_RESKEY_backend_size_default}}
: ${OCF_RESKEY_backend_file=${OCF_RESKEY_backend_file_default}}
: ${OCF_RESKEY_worker_threads=${OCF_RESKEY_worker_threads_default}}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="varnish" version="0.1">
<version>1.0</version>
<longdesc lang="en">
The Varnish Resource Agent can manage several varnishd
instances throughout the cluster. It does so by creating
a unique PID file and requires a unique listen address
and name for each instance.
</longdesc>
<shortdesc lang="en">Manage a Varnish instance</shortdesc>
<parameters>
<parameter name="config" unique="1" required="1">
<longdesc lang="en">
-The VCL configuration file that varnish should manage, for example
+The VCL configuration file that Varnish should manage, for example
"/etc/varnish/default.vcl".
</longdesc>
<shortdesc lang="en">VCL file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="name" unique="1">
<longdesc lang="en">
-Override the name of the instance that should be given to varnish
+Override the name of the instance that should be given to Varnish
(defaults to the resource identifier).
</longdesc>
<shortdesc lang="en">Instance name</shortdesc>
<content type="string" default="${OCF_RESKEY_name_default}" />
</parameter>
<parameter name="pid" unique="1">
<longdesc lang="en">
Write the process's PID to the specified file.
-The default will include the specified name, ie.:
-"/var/run/varnish_production.pid". Unlike what this help message shows.
-It is most likely not necessary to change this parameter.
+The default will include the specified name, i.e.:
+"/var/run/varnish_production.pid". Unlike what this help message shows,
+it is most likely not necessary to change this parameter.
</longdesc>
<shortdesc lang="en">Listen address</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="listen_address" unique="1">
<longdesc lang="en">
Listen on this address:port, for example "192.168.1.1:80"
</longdesc>
<shortdesc lang="en">Listen address</shortdesc>
<content type="string" default="${OCF_RESKEY_listen_address_default}" />
</parameter>
<parameter name="mgmt_address" unique="1" required="1">
<longdesc lang="en">
Provide a management interface, for example "127.0.0.1:2222"
</longdesc>
<shortdesc lang="en">Management interface</shortdesc>
<content type="string" />
</parameter>
<parameter name="ttl">
<longdesc lang="en">
-Specifies a hard minimum time to live for cached documents.
+Specify a hard minimum time to live for cached documents.
</longdesc>
<shortdesc lang="en">TTL</shortdesc>
<content type="integer" default="${OCF_RESKEY_ttl_default}" />
</parameter>
<parameter name="varnish_user">
<longdesc lang="en">
-Specifies the name of an unprivileged user to which the
+Specify the name of an unprivileged user to which the
child process should switch before it starts accepting
connections.
</longdesc>
<shortdesc lang="en">Unprivileged user</shortdesc>
<content type="string" default="${OCF_RESKEY_varnish_user_default}" />
</parameter>
<parameter name="varnish_group">
<longdesc lang="en">
-Specifies the name of an unprivileged group to which
+Specify the name of an unprivileged group to which
the child process should switch before it starts accepting
connections.
</longdesc>
<shortdesc lang="en">Unprivileged group</shortdesc>
<content type="string" default="${OCF_RESKEY_varnish_group_default}" />
</parameter>
<parameter name="backend_type">
<longdesc lang="en">
Use the specified storage backend. Valid options are
'malloc' for memory and 'file' for a file backend.
</longdesc>
<shortdesc lang="en">Backend type</shortdesc>
<content type="string" default="${OCF_RESKEY_backend_type_default}" />
</parameter>
<parameter name="backend_size">
<longdesc lang="en">
Specify the size of the backend. For example "1G".
</longdesc>
<shortdesc lang="en">Backend size</shortdesc>
<content type="string" default="${OCF_RESKEY_backend_size_default}" />
</parameter>
<parameter name="backend_file" unique="1">
<longdesc lang="en">
Specify the backend filename if you use backend_type file.
For example /var/lib/varnish/mybackend.bin
</longdesc>
<shortdesc lang="en">Backend file</shortdesc>
<content type="string" default="${OCF_RESKEY_backend_file_default}" />
</parameter>
<parameter name="worker_threads">
<longdesc lang="en">
Start at least min but no more than max worker
threads with the specified idle timeout.
Syntax: min[,max[,timeout]]
For example: 100,3000,120
</longdesc>
<shortdesc lang="en">Worker threads</shortdesc>
<content type="string" default="${OCF_RESKEY_worker_threads_default}" />
</parameter>
<parameter name="client_binary">
<longdesc lang="en">
-This is used to control varnish via a CLI. It's currently
+This is used to control Varnish via a CLI. It's currently
only used to check the status of the running child process.
</longdesc>
<shortdesc lang="en">Varnish admin utility</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10" depth="0" />
<action name="status" timeout="20" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
#######################################################################
varnish_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
varnish_status() {
local pid
local rc
# FAILED = pidfile exist, but no running proc (or mismatch pid)
# SUCCES = contents of pidfile == running process id
# NOTRUN = no pidfile, no running process
# check if pidfile exists and larger than 0 bytes
if [ -s $OCF_RESKEY_pid ]; then
# it does, now check if the pid exists
pid=$(cat $OCF_RESKEY_pid)
ocf_run kill -s 0 $pid
rc=$?
if [ $rc -eq 0 ]; then
ocf_log info "Varnish is running"
# check if the child process is started and varnish is
# reporting child status as ok
ocf_run $OCF_RESKEY_client_binary -T $OCF_RESKEY_mgmt_address status
v_rc=$?
if [ "$v_rc" -eq 0 ]; then
ocf_log info "Varnish child reported running"
return $OCF_SUCCESS
else
ocf_log err "Varnish child not running"
return $OCF_ERR_GENERIC
fi
else
ocf_log err "Varnish PID file exists, but varnishd is not running"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_NOT_RUNNING
}
varnish_start() {
local rc
local backend_options
varnish_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "Varnish already running"
return $OCF_SUCCESS
fi
# check which backend is to be used
case "$OCF_RESKEY_backend_type" in
malloc)
backend_options="$OCF_RESKEY_backend_size"
;;
file)
backend_options="$OCF_RESKEY_backend_file,$OCF_RESKEY_backend_size"
;;
*)
# not implemented yet
return $OCF_ERR_CONFIGURED
;;
esac
ocf_run $OCF_RESKEY_binary \
-P $OCF_RESKEY_pid \
-a $OCF_RESKEY_listen_address \
-f $OCF_RESKEY_config \
-T $OCF_RESKEY_mgmt_address \
-t $OCF_RESKEY_ttl \
-u $OCF_RESKEY_varnish_user \
-g $OCF_RESKEY_varnish_group \
-w $OCF_RESKEY_worker_threads \
-s $OCF_RESKEY_backend_type,$backend_options \
-n $OCF_RESKEY_name
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Varnish failed to start"
return $OCF_ERR_GENERIC
fi
# Spin waiting for varnishd to come up.
# Let the CRM/LRM time us out if required
while true; do
varnish_status
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "Varnish start failed"
exit $OCF_ERR_GENERIC
fi
sleep 2
done
ocf_log info "Varnish started succesfully"
return $OCF_SUCCESS
}
varnish_stop() {
local rc
local pid
varnish_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log info "Varnish already stopped"
return $OCF_SUCCESS
fi
# kill the varnish process
pid=$(cat $OCF_RESKEY_pid)
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Varnish failed to stop"
return $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
count=0
while [ $count -lt $shutdown_timeout ]; do
# check if process still exists
ocf_run kill -s 0 $pid
rc=$?
if [ $rc -ne 0 ]; then
# Varnish stopped succesfully, so let's delete the pidfile
rm -f $OCF_RESKEY_pid
break
fi
count=$(expr $count + 1)
sleep 1
ocf_log info "Varnish still hasn't stopped yet. Waiting..."
done
varnish_status
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# varnish didn't quit on a SIGTERM, try SIGKILL
ocf_log warn "Varnish failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
ocf_run kill -s KILL $pid
# delete the pidfile
rm -f $OCF_RESKEY_pid
fi
ocf_log info "Varnish stopped"
return $OCF_SUCCESS
}
varnish_validate() {
if [ -f $OCF_RESKEY_config ]; then
return $OCF_SUCCESS
else
return $OCF_ERR_INSTALLED
fi
}
case $__OCF_ACTION in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
start)
varnish_start
;;
stop)
varnish_stop
;;
monitor|status)
varnish_status
;;
validate-all)
varnish_validate
;;
usage|help)
varnish_usage
exit $OCF_SUCCESS
;;
*)
varnish_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/zabbixserver b/heartbeat/zabbixserver
index ddec371e4..fe7266ec5 100755
--- a/heartbeat/zabbixserver
+++ b/heartbeat/zabbixserver
@@ -1,336 +1,336 @@
#!/bin/sh
#
#
# zabbixserver OCF RA for zabbix_server daemon
#
# Copyright (c) 2012 Krzysztof Gajdemski <songo@debian.org.pl>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
#
# Defaults
#
OCF_RESKEY_binary_default="zabbix_server"
OCF_RESKEY_pid_default="/var/run/zabbix-server/zabbix_server.pid"
OCF_RESKEY_config_default=""
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
# sleep interval when waiting for threads cleanup
sleepint=1
#
# Functions
#
zabbixserver_meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="zabbixserver">
<version>0.0.1</version>
<longdesc lang="en">
-This is a zabbixserver Resource Agent for zabbix_server monitoring
+This is a Zabbix server Resource Agent for zabbix_server monitoring
daemon. See: http://www.zabbix.com/
</longdesc>
<shortdesc lang="en">Zabbix server resource agent</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the zabbix_server binary.
</longdesc>
<shortdesc lang="en">Zabbix server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="pid" unique="1" required="0">
<longdesc lang="en">
Path to zabbix_server pidfile. As it's created by daemon itself
-it must be the same as specified in the zabbix configuration file
+it must be the same as specified in the Zabbix configuration file
with parameter 'PidFile='.
</longdesc>
<shortdesc lang="en">Path to pidfile</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="config" unique="1" required="0">
<longdesc lang="en">
Path to zabbix_server configuration file. Assumed server default
if not specified.
</longdesc>
<shortdesc lang="en">Path to configuration file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10" depth="0"/>
<action name="validate-all" timeout="20" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
zabbixserver_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
#
# Get an actual PID from a given pidfile. If it can't
# be found then return 1
#
getpid() {
# pidfile doesn't exists
[ -f $1 ] || return 1
sed -n '1 { /[0-9]/p }' $1
return 0
}
#
# Check if PID directory exists
#
check_piddir() {
local piddir
local severity
# lower severity to info during probe
severity=err
ocf_is_probe && severity=info
piddir=`dirname ${OCF_RESKEY_pid}`
if [ ! -d $piddir ]; then
ocf_log $severity "PID directory ${piddir} doesn't exist"
return 1
fi
return 0
}
#
# Check for the server configuration file
#
check_config() {
# check only when it is specified by user
if [ ! -z "$1" ] && [ ! -f "$1" ]; then
if ocf_is_probe; then
ocf_log info "Can't read configuration file $1 during probe"
else
ocf_log err "Can't read configuration file $1"
return 1
fi
fi
return 0
}
#
# Start Zabbix daemon
#
startserver() {
local command
local params
command=$OCF_RESKEY_binary
# use additional parameters if specified
if [ "$OCF_RESKEY_config" ]; then
params="--config $OCF_RESKEY_config"
command="$command $params"
fi
ocf_log debug "Starting server using command: $command"
ocf_run $command
}
#
# Check the process status (PID is given as an argument)
#
process_status() {
local pid
pid=$1
# check if parent process is running
ocf_run -q kill -s 0 $pid 2> /dev/null 1>&2
}
#
# start the agent
#
zabbixserver_start() {
local rc
# check the resource status
zabbixserver_monitor
rc=$?
case "$rc" in
$OCF_SUCCESS)
ocf_log info "Resource is already running"
return $OCF_SUCCESS
;;
$OCF_NOT_RUNNING)
;;
*)
exit $OCF_ERR_GENERIC
;;
esac
# remove stale pidfile if it exists
if [ -f $OCF_RESKEY_pid ]; then
ocf_log info "Removing stale pidfile"
rm $OCF_RESKEY_pid
fi
startserver
if [ $? -ne 0 ]; then
ocf_log err "Can't start Zabbix server"
return $OCF_ERR_GENERIC
fi
# wait if it starts really
while ! zabbixserver_monitor; do
ocf_log debug "Resource has not started yet, waiting"
sleep $sleepint
done
return $OCF_SUCCESS
}
#
# stop the agent
#
zabbixserver_stop() {
local pid
local rc
# check the resource status
zabbixserver_monitor
rc=$?
case "$rc" in
$OCF_SUCCESS)
;;
$OCF_NOT_RUNNING)
ocf_log info "Resource is already stopped"
return $OCF_SUCCESS
;;
*)
exit $OCF_ERR_GENERIC
;;
esac
pid=`getpid $OCF_RESKEY_pid`
if [ $? -ne 0 ]; then
ocf_log err "Can't find process PID"
return $OCF_ERR_GENERIC
fi
# kill the process
ocf_run -q kill $pid
if [ $? -ne 0 ]; then
ocf_log err "Can't stop process (PID $pid)"
return $OCF_ERR_GENERIC
fi
# Wait until the parent process terminates.
# NOTE: The parent may be still waiting for its children. A regular monitor
# function will not detect this condition because the pidfile may be
# removed just now.
while process_status $pid; do
ocf_log debug "Waiting for process to terminate..."
sleep $sleepint
done
# wait if it stops really
while zabbixserver_monitor; do
ocf_log debug "Resource has not stopped yet, waiting"
sleep $sleepint
done
# remove stale pidfile if it exists
if [ -f $OCF_RESKEY_pid ]; then
ocf_log debug "Pidfile still exists, removing"
rm $OCF_RESKEY_pid
fi
return $OCF_SUCCESS
}
#
# resource monitor
#
zabbixserver_monitor() {
local pid
pid=`getpid $OCF_RESKEY_pid`
if [ $? -eq 0 ]; then
process_status $pid
if [ $? -eq 0 ]; then
ocf_log debug "Resource is running"
return $OCF_SUCCESS
fi
fi
ocf_log info "Resource is not running"
return $OCF_NOT_RUNNING
}
#
# validate configuration
#
zabbixserver_validate_all() {
check_piddir || return $OCF_ERR_INSTALLED
check_config $OCF_RESKEY_config || return $OCF_ERR_INSTALLED
return $OCF_SUCCESS
}
#
# main
#
OCF_REQUIRED_PARAMS=""
OCF_REQUIRED_BINARIES="$OCF_RESKEY_binary"
ocf_rarun $*

File Metadata

Mime Type
text/x-diff
Expires
Sun, Jul 20, 7:30 PM (3 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2081332
Default Alt Text
(321 KB)

Event Timeline