Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/heartbeat/CTDB b/heartbeat/CTDB
index 9a6f96cde..3419b544b 100755
--- a/heartbeat/CTDB
+++ b/heartbeat/CTDB
@@ -1,757 +1,757 @@
#!/bin/sh
#
# OCF Resource Agent for managing CTDB
#
# Copyright (c) 2009-2010 Novell Inc., Tim Serong
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
# OVERVIEW
#
# When run by itself, CTDB can handle IP failover and includes scripts
# to manage various services (Samba, Winbind, HTTP, etc.). When run as
# a resource in a Pacemaker cluster, this additional functionality
# should not be used; instead one should define separate resources for
# CTDB, Samba, Winbind, IP addresses, etc.
#
# As of 2010-11-17, there is no separate OCF Samba or Winbind RA, so
# it is still possible to configure CTDB so that it manages these
# resources itself. In future, once Samba and Winbind RAs are
# available, this ability will be deprecated and ultimately removed.
#
# This RA intentionally provides no ability to configure CTDB such that
# it manages IP failover, HTTP, NFS, etc.
#
#
# TODO:
# - ctdb_stop doesn't really support multiple independent CTDB instances,
# unless they're running from distinct ctdbd binaries (it uses pkill
# $OCF_RESKEY_ctdbd_binary if "ctdb stop" doesn't work, which it might
# not under heavy load - this will kill all ctdbd instances on the
# system). OTOH, running multiple CTDB instances per node is, well,
# AFAIK, completely crazy. Can't run more than one in a vanilla CTDB
# cluster, with the CTDB init script. So it might be nice to address
# this for complete semantic correctness of the RA, but shouldn't
# actually cause any trouble in real life.
# - As much as possible, get rid of auto config generation
# - Especially smb.conf
# - Verify timeouts are sane
# - Monitor differentiate between error and not running?
# - Do we need to verify globally unique setting?
# - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on
# current nodes)
# - Look at enabling set_ctdb_variables() if necessary.
# - Probably possible for sysconfig file to not be restored if
# CTDB dies unexpectedly.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Default parameter values:
# Some distro's ctdb package stores the persistent db in /var/lib/ctdb,
# others store in /var/ctdb. This attempts to detect the correct default
# directory.
var_prefix="/var/lib/ctdb"
if [ ! -d "$var_prefix" ] && [ -d "/var/ctdb" ]; then
var_prefix="/var/ctdb"
fi
run_prefix="/run"
if [ ! -d "$var_prefix" ] && [ -d "/var/run" ]; then
var_prefix="/var/run"
fi
: ${OCF_RESKEY_ctdb_manages_samba:=no}
: ${OCF_RESKEY_ctdb_manages_winbind:=no}
: ${OCF_RESKEY_ctdb_service_smb:=""}
: ${OCF_RESKEY_ctdb_service_nmb:=""}
: ${OCF_RESKEY_ctdb_service_winbind:=""}
: ${OCF_RESKEY_ctdb_samba_skip_share_check:=yes}
: ${OCF_RESKEY_ctdb_monitor_free_memory:=100}
: ${OCF_RESKEY_ctdb_start_as_disabled:=no}
: ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb}
: ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb}
: ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd}
: ${OCF_RESKEY_ctdb_socket:=${var_prefix}/ctdb.socket}
: ${OCF_RESKEY_ctdb_dbdir:=${var_prefix}}
: ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb}
: ${OCF_RESKEY_ctdb_rundir:=${run_prefix}/ctdb}
: ${OCF_RESKEY_ctdb_debuglevel:=2}
: ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf}
: ${OCF_RESKEY_smb_passdb_backend:=tdbsam}
: ${OCF_RESKEY_smb_idmap_backend:=tdb2}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="CTDB" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This resource agent manages CTDB, allowing one to use Clustered Samba in a
Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2 or GFS2) on
which the CTDB lock will be stored. Create /etc/ctdb/nodes containing a list
of private IP addresses of each node in the cluster, then configure this RA
as a clone. This agent expects the samba and windbind resources
to be managed outside of CTDB's control as a separate set of resources controlled
by the cluster manager. The optional support for enabling CTDB management of these
daemons will be depreciated.
For more information see http://linux-ha.org/wiki/CTDB_(resource_agent)
</longdesc>
<shortdesc lang="en">CTDB Resource Agent</shortdesc>
<parameters>
<parameter name="ctdb_recovery_lock" unique="1" required="1">
<longdesc lang="en">
The location of a shared lock file, common across all nodes.
This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock
</longdesc>
<shortdesc lang="en">CTDB shared lock file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ctdb_manages_samba" unique="0" required="0">
<longdesc lang="en">
Should CTDB manage starting/stopping the Samba service for you?
This will be deprecated in future, in favor of configuring a
separate Samba resource.
</longdesc>
<shortdesc lang="en">Should CTDB manage Samba?</shortdesc>
<content type="boolean" default="no" />
</parameter>
<parameter name="ctdb_manages_winbind" unique="0" required="0">
<longdesc lang="en">
Should CTDB manage starting/stopping the Winbind service for you?
This will be deprecated in future, in favor of configuring a
separate Winbind resource.
</longdesc>
<shortdesc lang="en">Should CTDB manage Winbind?</shortdesc>
<content type="boolean" default="no" />
</parameter>
<parameter name="ctdb_service_smb" unique="0" required="0">
<longdesc lang="en">
Name of smb init script. Only necessary if CTDB is managing
Samba directly. Will usually be auto-detected.
</longdesc>
<shortdesc lang="en">Name of smb init script</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ctdb_service_nmb" unique="0" required="0">
<longdesc lang="en">
Name of nmb init script. Only necessary if CTDB is managing
Samba directly. Will usually be auto-detected.
</longdesc>
<shortdesc lang="en">Name of nmb init script</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ctdb_service_winbind" unique="0" required="0">
<longdesc lang="en">
Name of winbind init script. Only necessary if CTDB is managing
Winbind directly. Will usually be auto-detected.
</longdesc>
<shortdesc lang="en">Name of winbind init script</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ctdb_samba_skip_share_check" unique="0" required="0">
<longdesc lang="en">
If there are very many shares it may not be feasible to check that all
of them are available during each monitoring interval. In that case
this check can be disabled.
</longdesc>
<shortdesc lang="en">Skip share check during monitor?</shortdesc>
<content type="boolean" default="yes" />
</parameter>
<parameter name="ctdb_monitor_free_memory" unique="0" required="0">
<longdesc lang="en">
If the amount of free memory drops below this value the node will
become unhealthy and ctdb and all managed services will be shutdown.
Once this occurs, the administrator needs to find the reason for the
OOM situation, rectify it and restart ctdb with "service ctdb start".
</longdesc>
<shortdesc lang="en">Minimum amount of free memory (MB)</shortdesc>
<content type="integer" default="100" />
</parameter>
<parameter name="ctdb_start_as_disabled" unique="0" required="0">
<longdesc lang="en">
When set to yes, the CTDB node will start in DISABLED mode and not
host any public ip addresses.
</longdesc>
<shortdesc lang="en">Start CTDB disabled?</shortdesc>
<content type="boolean" default="no" />
</parameter>
<parameter name="ctdb_config_dir" unique="0" required="0">
<longdesc lang="en">
The directory containing various CTDB configuration files.
The "nodes" and "notify.sh" scripts are expected to be
in this directory, as is the "events.d" subdirectory.
</longdesc>
<shortdesc lang="en">CTDB config file directory</shortdesc>
<content type="string" default="/etc/ctdb" />
</parameter>
<parameter name="ctdb_binary" unique="0" required="0">
<longdesc lang="en">
Full path to the CTDB binary.
</longdesc>
<shortdesc lang="en">CTDB binary path</shortdesc>
<content type="string" default="/usr/bin/ctdb" />
</parameter>
<parameter name="ctdbd_binary" unique="0" required="0">
<longdesc lang="en">
Full path to the CTDB cluster daemon binary.
</longdesc>
<shortdesc lang="en">CTDB Daemon binary path</shortdesc>
<content type="string" default="/usr/sbin/ctdbd" />
</parameter>
<parameter name="ctdb_socket" unique="1" required="0">
<longdesc lang="en">
Full path to the domain socket that ctdbd will create, used for
local clients to attach and communicate with the ctdb daemon.
</longdesc>
<shortdesc lang="en">CTDB socket location</shortdesc>
<content type="string" default="${OCF_RESKEY_ctdb_socket}" />
</parameter>
<parameter name="ctdb_dbdir" unique="1" required="0">
<longdesc lang="en">
The directory to put the local CTDB database files in.
Persistent database files will be put in ctdb_dbdir/persistent.
</longdesc>
<shortdesc lang="en">CTDB database directory</shortdesc>
<content type="string" default="${OCF_RESKEY_ctdb_dbdir}" />
</parameter>
<parameter name="ctdb_logfile" unique="0" required="0">
<longdesc lang="en">
Full path to log file. To log to syslog instead, use the
value "syslog".
</longdesc>
<shortdesc lang="en">CTDB log file location</shortdesc>
<content type="string" default="/var/log/ctdb/log.ctdb" />
</parameter>
<parameter name="ctdb_rundir" unique="0" required="0">
<longdesc lang="en">
Full path to ctdb runtime directory, used for storage of socket
lock state.
</longdesc>
<shortdesc lang="en">CTDB runtime directory location</shortdesc>
<content type="string" default="${OCF_RESKEY_ctdb_rundir}" />
</parameter>
<parameter name="ctdb_debuglevel" unique="0" required="0">
<longdesc lang="en">
What debug level to run at (0-10). Higher means more verbose.
</longdesc>
<shortdesc lang="en">CTDB debug level</shortdesc>
<content type="integer" default="2" />
</parameter>
<parameter name="smb_conf" unique="0" required="0">
<longdesc lang="en">
Path to default samba config file. Only necessary if CTDB
is managing Samba.
</longdesc>
<shortdesc lang="en">Path to smb.conf</shortdesc>
<content type="string" default="/etc/samba/smb.conf" />
</parameter>
<parameter name="smb_private_dir" unique="1" required="0">
<longdesc lang="en">
The directory for smbd to use for storing such files as
smbpasswd and secrets.tdb. Old versions of CTBD (prior to 1.0.50)
required this to be on shared storage. This parameter should not
be set for current versions of CTDB, and only remains in the RA
for backwards compatibility.
</longdesc>
<shortdesc lang="en">Samba private dir (deprecated)</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="smb_passdb_backend" unique="0" required="0">
<longdesc lang="en">
Which backend to use for storing user and possibly group
information. Only necessary if CTDB is managing Samba.
</longdesc>
<shortdesc lang="en">Samba passdb backend</shortdesc>
<content type="string" default="tdbsam" />
</parameter>
<parameter name="smb_idmap_backend" unique="0" required="0">
<longdesc lang="en">
Which backend to use for SID/uid/gid mapping. Only necessary
if CTDB is managing Samba.
</longdesc>
<shortdesc lang="en">Samba idmap backend</shortdesc>
<content type="string" default="tdb2" />
</parameter>
<parameter name="smb_fileid_algorithm" unique="0" required="0">
<longdesc lang="en">
Which fileid:algorithm to use with vfs_fileid. The correct
value depends on which clustered filesystem is in use, e.g.:
for OCFS2, this should be set to "fsid". Only necessary if
CTDB is managing Samba.
</longdesc>
<shortdesc lang="en">Samba VFS fileid algorithm</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="100" />
<action name="monitor" timeout="20" interval="10" depth="0" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="30" />
</actions>
</resource-agent>
END
}
#######################################################################
# Figure out path to /etc/sysconfig/ctdb (same logic as
# loadconfig() from /etc/ctdb/functions
if [ -f /etc/sysconfig/ctdb ]; then
CTDB_SYSCONFIG=/etc/sysconfig/ctdb
elif [ -f /etc/default/ctdb ]; then
CTDB_SYSCONFIG=/etc/default/ctdb
elif [ -f $OCF_RESKEY_ctdb_config_dir/ctdb ]; then
CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb
fi
# Backup paths
CTDB_SYSCONFIG_BACKUP=${CTDB_SYSCONFIG}.ctdb-ra-orig
invoke_ctdb() {
# CTDB's defaults are:
local timeout=3
local timelimit=120
# ...but we override with the timeout for the current op:
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
timeout=$((OCF_RESKEY_CRM_meta_timeout/1000))
timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000))
fi
$OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket \
-t $timeout -T $timelimit \
"$@"
}
# Enable any event scripts that are explicitly required.
# Any others will ultimately be invoked or not based on how they ship
# with CTDB, but will generally have no effect, beacuase the relevant
# CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb.
enable_event_scripts() {
local event_dir=$OCF_RESKEY_ctdb_config_dir/events.d
if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
chmod u+x $event_dir/10.interface
else
chmod a-x $event_dir/10.interface
fi
if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then
chmod u+x $event_dir/11.routing
else
chmod a-x $event_dir/11.routing
fi
if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || \
ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then
chmod u+x $event_dir/50.samba
else
chmod a-x $event_dir/50.samba
fi
}
# This function has no effect (currently no way to set CTDB_SET_*)
# but remains here in case we need it in future.
set_ctdb_variables() {
rv=$OCF_SUCCESS
set | grep ^CTDB_SET_ | cut -d_ -f3- |
while read v; do
varname=`echo $v | cut -d= -f1`
value=`echo $v | cut -d= -f2`
invoke_ctdb setvar $varname $value || rv=$OCF_ERR_GENERIC
done || rv=$OCF_ERR_GENERIC
return $rv
}
# Add necessary settings to /etc/samba/smb.conf. In a perfect world,
# we'd be able to generate a new, temporary, smb.conf file somewhere,
# something like:
# include = /etc/samba/smb.conf
# [global]
# clustering = yes
# # ...etc...
# Unfortunately, we can't do this, because there's no way to tell the
# smb init script where the temporary config is, so we just edit
# the default config file.
init_smb_conf() {
# Don't screw around with the config if CTDB isn't managing Samba!
ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0
# replace these things in smb.conf
local repl='# CTDB-RA:|passdb backend|clustering|idmap backend|private dir|ctdbd socket'
local private_dir
[ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n"
local vfs_fileid
local do_vfs=0
if [ -n "$OCF_RESKEY_smb_fileid_algorithm" ]; then
repl="${repl}|fileid:algorithm|fileid:mapping"
vfs_fileid="\tfileid:algorithm = $OCF_RESKEY_smb_fileid_algorithm\n"
if sed -n '/^[[:space:]]*\[global\]/,/^[[:space:]]*\[/p' $OCF_RESKEY_smb_conf | \
grep -Eq '^[[:space:]]*vfs objects'; then
# vfs objects already specified, will append fileid to existing line
do_vfs=1
else
vfs_fileid="$vfs_fileid\tvfs objects = fileid\n"
fi
fi
awk '
/^[[:space:]]*\[/ { global = 0 }
/^[[:space:]]*\[global\]/ { global = 1 }
{
if(global) {
if ('$do_vfs' && $0 ~ /^[[:space:]]vfs objects/ && $0 !~ /fileid/) {
print $0" fileid"
} else if ($0 !~ /^[[:space:]]*('"$repl"')/) {
print
}
} else {
print
}
}' $OCF_RESKEY_smb_conf | sed "/^[[:space:]]*\[global\]/ a\\
\t# CTDB-RA: Begin auto-generated section (do not change below)\n\
\tpassdb backend = $OCF_RESKEY_smb_passdb_backend\n\
\tclustering = yes\n\
\tidmap backend = $OCF_RESKEY_smb_idmap_backend\n\
\tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir$vfs_fileid\
\t# CTDB-RA: End auto-generated section (do not change above)" > $OCF_RESKEY_smb_conf.$$
mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
}
# Get rid of that section we added
cleanup_smb_conf() {
ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0
sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' $OCF_RESKEY_smb_conf > $OCF_RESKEY_smb_conf.$$
mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
}
append_ctdb_sysconfig() {
[ -n "$2" ] && echo "$1=$2" >> $CTDB_SYSCONFIG
}
# Generate a new, minimal CTDB config file that's just enough
# to get CTDB running as configured by the RA parameters.
generate_ctdb_sysconfig() {
# Backup existing sysconfig if we're not already using an auto-generated one
grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG || cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP
if [ $? -ne 0 ]; then
ocf_log warn "Unable to backup $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP"
fi
ocf_log info "Generating new $CTDB_SYSCONFIG"
# Note to maintainers and other random hackers:
# Parameters may need to be set here, for CTDB event
# scripts to pick up, or may need to be passed to ctdbd
# when starting, or both. Be careful. The CTDB source
# tree and manpages are your friends. As a concrete
# example, setting CTDB_START_AS_DISABLED here is
# completely useless, as this is actually a command line
# argument for ctdbd; it's not used anywhere else.
cat >$CTDB_SYSCONFIG <<EOF
# CTDB-RA: Auto-generated by ${0}, backup is at $CTDB_SYSCONFIG_BACKUP
CTDB_MONITOR_FREE_MEMORY=$OCF_RESKEY_ctdb_monitor_free_memory
CTDB_SAMBA_SKIP_SHARE_CHECK=$(ocf_is_true "$OCF_RESKEY_ctdb_samba_skip_share_check" && echo 'yes' || echo 'no')
CTDB_MANAGES_SAMBA=$(ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && echo 'yes' || echo 'no')
CTDB_MANAGES_WINBIND=$(ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind" && echo 'yes' || echo 'no')
EOF
append_ctdb_sysconfig CTDB_SERVICE_SMB $OCF_RESKEY_ctdb_service_smb
append_ctdb_sysconfig CTDB_SERVICE_NMB $OCF_RESKEY_ctdb_service_nmb
append_ctdb_sysconfig CTDB_SERVICE_WINBIND $OCF_RESKEY_ctdb_service_winbind
}
ctdb_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
ctdb_start() {
# Do nothing if already running
ctdb_monitor && return $OCF_SUCCESS
# Make sure config is adequate
ctdb_validate
rv=$?
[ $rv -ne 0 ] && return $rv
# Die if databases are corrupted
persistent_db_dir="${OCF_RESKEY_ctdb_dbdir}/persistent"
mkdir -p $persistent_db_dir 2>/dev/null
for pdbase in $(ls $persistent_db_dir/*.tdb.[0-9] 2>/dev/null$) ; do
/usr/bin/tdbdump $pdbase >/dev/null 2>/dev/null || {
- ocf_log err "Persistent database $pdbase is corrupted! CTDB will not start."
+ ocf_exit_reason "Persistent database $pdbase is corrupted! CTDB will not start."
return $OCF_ERR_GENERIC
}
done
# Add necessary configuration to smb.conf
init_smb_conf
if [ $? -ne 0 ]; then
- ocf_log err "Failed to update $OCF_RESKEY_smb_conf."
+ ocf_exit_reason "Failed to update $OCF_RESKEY_smb_conf."
return $OCF_ERR_GENERIC
fi
# Generate new CTDB sysconfig
generate_ctdb_sysconfig
enable_event_scripts
# Use logfile by default, or syslog if asked for
local log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
log_option="--syslog"
elif [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then
# ensure the logfile's directory exists, otherwise ctdb will fail to start
mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile)
fi
# ensure ctdb's rundir exists, otherwise it will fail to start
mkdir -p $OCF_RESKEY_ctdb_rundir 2>/dev/null
# public addresses file (should not be present, but need to set for correctness if it is)
local pub_addr_option=""
[ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \
pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses"
# start as disabled
local start_as_disabled="--start-as-disabled"
ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled=""
# Start her up
$OCF_RESKEY_ctdbd_binary \
--reclock=$OCF_RESKEY_ctdb_recovery_lock \
--nlist=$OCF_RESKEY_ctdb_config_dir/nodes \
--socket=$OCF_RESKEY_ctdb_socket \
--dbdir=$OCF_RESKEY_ctdb_dbdir \
--dbdir-persistent=$OCF_RESKEY_ctdb_dbdir/persistent \
--event-script-dir=$OCF_RESKEY_ctdb_config_dir/events.d \
--notification-script=$OCF_RESKEY_ctdb_config_dir/notify.sh \
--transport=tcp \
$start_as_disabled $log_option $pub_addr_option \
-d $OCF_RESKEY_ctdb_debuglevel
if [ $? -ne 0 ]; then
# cleanup smb.conf
cleanup_smb_conf
- ocf_log err "Failed to execute $OCF_RESKEY_ctdbd_binary."
+ ocf_exit_reason "Failed to execute $OCF_RESKEY_ctdbd_binary."
return $OCF_ERR_GENERIC
else
# Wait a bit for CTDB to stabilize
# (until start times out if necessary)
while true; do
# Initial sleep is intentional (ctdb init script
# has sleep after ctdbd start, but before invoking
# ctdb to talk to it)
sleep 1
status=$(invoke_ctdb status 2>/dev/null)
if [ $? -ne 0 ]; then
# CTDB will be running, kill it before returning
ctdb_stop
- ocf_log err "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status"
+ ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status"
return $OCF_ERR_GENERIC
fi
if ! echo $status | grep -qs 'UNHEALTHY (THIS'; then
# Status does not say this node is unhealthy,
# so we're good to go. Do a bit of final
# setup and (hopefully) return success.
set_ctdb_variables
return $?
fi
done
fi
# ctdbd will (or can) actually still be running at this point, so kill it
ctdb_stop
- ocf_log err "Timeout waiting for CTDB to stabilize"
+ ocf_exit_reason "Timeout waiting for CTDB to stabilize"
return $OCF_ERR_GENERIC
}
ctdb_stop() {
# Do nothing if already stopped
pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS
# Tell it to die nicely
invoke_ctdb shutdown >/dev/null 2>&1
rv=$?
# No more Mr. Nice Guy
count=0
while pkill -0 -f $OCF_RESKEY_ctdbd_binary ; do
sleep 1
count=$(($count + 1))
[ $count -gt 10 ] && {
ocf_log info "killing ctdbd "
pkill -9 -f $OCF_RESKEY_ctdbd_binary
pkill -9 -f ${OCF_RESKEY_ctdb_config_dir}/events.d/
}
done
# Cleanup smb.conf
cleanup_smb_conf
# It was a clean shutdown, return success
[ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS
# Unclean shutdown, return success if there's no ctdbds left (we
# killed them forcibly, but at least they're good and dead).
pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS
# Problem: ctdb shutdown didn't work and neither did some vigorous
# kill -9ing. Only thing to do is report failure.
return $OCF_ERR_GENERIC
}
ctdb_monitor() {
local status
# "ctdb status" exits non-zero if CTDB isn't running.
# It can also exit non-zero if there's a timeout (ctdbd blocked,
# stalled, massive load, or otherwise wedged). If it's actually
# not running, STDERR will say "Errno:Connection refused(111)",
# whereas if it's wedged, it'll say various other unpleasant things.
status=$(invoke_ctdb status 2>&1)
if [ $? -ne 0 ]; then
if echo $status | grep -qs 'Connection refused'; then
return $OCF_NOT_RUNNING
elif echo $status | grep -qs 'No such file or directory'; then
return $OCF_NOT_RUNNING
else
- ocf_log err "CTDB status call failed: $status"
+ ocf_exit_reason "CTDB status call failed: $status"
return $OCF_ERR_GENERIC
fi
fi
if echo $status | grep -Eqs '(OK|DISABLED) \(THIS'; then
return $OCF_SUCCESS
fi
- ocf_log err "CTDB status is bad: $status"
+ ocf_exit_reason "CTDB status is bad: $status"
return $OCF_ERR_GENERIC
}
ctdb_validate() {
# Required binaries (full path to tdbdump is intentional, as that's
# what's used in ctdb_start, which was lifted from the init script)
for binary in pkill /usr/bin/tdbdump; do
check_binary $binary
done
if [ -z "$CTDB_SYSCONFIG" ]; then
- ocf_log err "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)"
+ ocf_exit_reason "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)"
return $OCF_ERR_INSTALLED
fi
if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then
- ocf_log err "Samba config file '$OCF_RESKEY_smb_conf' does not exist."
+ ocf_exit_reason "Samba config file '$OCF_RESKEY_smb_conf' does not exist."
return $OCF_ERR_INSTALLED
fi
if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!"
fi
if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then
- ocf_log err "$OCF_RESKEY_ctdb_config_dir/nodes does not exist."
+ ocf_exit_reason "$OCF_RESKEY_ctdb_config_dir/nodes does not exist."
return $OCF_ERR_ARGS
fi
if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then
- ocf_log err "ctdb_recovery_lock not specified."
+ ocf_exit_reason "ctdb_recovery_lock not specified."
return $OCF_ERR_CONFIGURED
fi
lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock")
touch "$lock_dir/$$" 2>/dev/null
if [ $? != 0 ]; then
- ocf_log err "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable."
+ ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable."
return $OCF_ERR_ARGS
fi
rm "$lock_dir/$$"
return $OCF_SUCCESS
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) ctdb_start;;
stop) ctdb_stop;;
monitor) ctdb_monitor;;
validate-all) ctdb_validate;;
usage|help) ctdb_usage
exit $OCF_SUCCESS
;;
*) ctdb_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/Delay b/heartbeat/Delay
index f50539110..9cfa939d6 100755
--- a/heartbeat/Delay
+++ b/heartbeat/Delay
@@ -1,223 +1,223 @@
#!/bin/sh
#
#
# Support: linux-ha@lists.linux-ha.org
# License: GNU General Public License (GPL)
#
# This script is a test resource for introducing delay.
#
# usage: $0 {start|stop|status|monitor|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_startdelay
# OCF_RESKEY_stopdelay
# OCF_RESKEY_mondelay
#
#
# OCF_RESKEY_startdelay defaults to 30 (seconds)
# OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay
# OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay
#
#
# This is really a test resource script.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
cat <<-!
usage: $0 {start|stop|status|monitor|meta-data|validate-all}
!
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Delay">
<version>1.0</version>
<longdesc lang="en">
This script is a test resource for introducing delay.
</longdesc>
<shortdesc lang="en">Waits for a defined timespan</shortdesc>
<parameters>
<parameter name="startdelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on start operation.
</longdesc>
<shortdesc lang="en">Start delay</shortdesc>
<content type="integer" default="30" />
</parameter>
<parameter name="stopdelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on stop operation.
Defaults to "startdelay" if unspecified.
</longdesc>
<shortdesc lang="en">Stop delay</shortdesc>
<content type="integer" default="30" />
</parameter>
<parameter name="mondelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on monitor operation.
Defaults to "startdelay" if unspecified.
</longdesc>
<shortdesc lang="en">Monitor delay</shortdesc>
<content type="integer" default="30" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30" />
<action name="stop" timeout="30" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
}
Delay_stat() {
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor
}
Delay_Status() {
if
Delay_stat
then
ocf_log info "Delay is running OK"
return $OCF_SUCCESS
else
ocf_log info "Delay is stopped"
return $OCF_NOT_RUNNING
fi
}
Delay_Monitor() {
Delay_Validate_All -q
sleep $OCF_RESKEY_mondelay
Delay_Status
}
Delay_Start() {
if
Delay_stat
then
ocf_log info "Delay already running."
return $OCF_SUCCESS
else
Delay_Validate_All -q
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start
rc=$?
sleep $OCF_RESKEY_startdelay
if
[ $rc -ne 0 ]
then
return $OCF_ERR_PERM
fi
return $OCF_SUCCESS
fi
}
Delay_Stop() {
if
Delay_stat
then
Delay_Validate_All -q
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop
rc=$?
sleep $OCF_RESKEY_stopdelay
if
[ $rc -ne 0 ]
then
return $OCF_ERR_PERM
fi
return $OCF_SUCCESS
else
ocf_log info "Delay already stopped."
return $OCF_SUCCESS
fi
}
# Check if all the arguments are valid numbers, a string is considered valid if:
# 1. It does not contain any character but digits and period ".";
# 2. The period "." does not occur more than once
Are_Valid_Numbers() {
for i in "$@"; do
echo $i |grep -v [^0-9.] |grep -q -v [.].*[.]
if test $? -ne 0; then
return $OCF_ERR_ARGS
fi
done
return $OCF_SUCCESS
}
Delay_Validate_All() {
# Be quiet when specified -q option _and_ validation succeded
getopts "q" option
if test $option = "q"; then
quiet=yes
else
quiet=no
fi
shift $(($OPTIND -1))
if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \
$OCF_RESKEY_mondelay; then
if test $quiet = "no"; then
echo "Validate OK"
fi
# _Return_ on validation success
return $OCF_SUCCESS
else
- echo "Some of the instance parameters are invalid"
+ ocf_exit_reason "Some of the instance parameters are invalid"
# _Exit_ on validation failure
exit $OCF_ERR_ARGS
fi
}
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
: ${OCF_RESKEY_startdelay=30}
: ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay}
: ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay}
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) Delay_Start
;;
stop) Delay_Stop
;;
monitor) Delay_Monitor
;;
status) Delay_Status
;;
validate-all) Delay_Validate_All
;;
usage) usage
exit $OCF_SUCCESS
;;
*) usage
exit $OCF_ERR_ARGS
;;
esac
exit $?
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 00ba454c8..cefe52bb9 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -1,883 +1,883 @@
#!/bin/sh
#
# Support: linux-ha@lists.linux-ha.org
# License: GNU General Public License (GPL)
#
# Filesystem
# Description: Manages a Filesystem on a shared storage medium.
# Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
# Original Release: 25 Oct 2000
#
# usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_device
# OCF_RESKEY_directory
# OCF_RESKEY_fstype
# OCF_RESKEY_options
# OCF_RESKEY_statusfile_prefix
# OCF_RESKEY_run_fsck
# OCF_RESKEY_fast_stop
# OCF_RESKEY_force_clones
#
#OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0
# Or a -U or -L option for mount, or an NFS mount specification
#OCF_RESKEY_directory : the mount point for the filesystem
#OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2
#OCF_RESKEY_options : options to be given to the mount command via -o
#OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring
#OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no
#OCF_RESKEY_fast_stop : fast stop: yes(default)/no
#OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts
# for each brick in a glusterfs setup
#
#
# This assumes you want to manage a filesystem on a shared (SCSI) bus,
# on a replicated device (such as DRBD), or a network filesystem (such
# as NFS or Samba).
#
# Do not put this filesystem in /etc/fstab. This script manages all of
# that for you.
#
# NOTE: If 2 or more nodes mount the same file system read-write, and
# that file system is not designed for that specific purpose
# (such as GFS or OCFS2), and is not a network file system like
# NFS or Samba, then the filesystem is going to become
# corrupted.
#
# As a result, you should use this together with the stonith
# option and redundant, independent communications paths.
#
# If you don't do this, don't blame us when you scramble your
# disk.
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
DFLT_STATUSDIR=".Filesystem_status/"
# Variables used by multiple methods
HOSTOS=`uname`
# The status file is going to an extra directory, by default
#
prefix=${OCF_RESKEY_statusfile_prefix}
: ${prefix:=$DFLT_STATUSDIR}
suffix="${OCF_RESOURCE_INSTANCE}"
[ "$OCF_RESKEY_CRM_meta_clone" ] &&
suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone"
suffix="${suffix}_`uname -n`"
STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix
#######################################################################
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
EOT
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Filesystem">
<version>1.1</version>
<longdesc lang="en">
Resource script for Filesystem. It manages a Filesystem on a
shared storage medium.
The standard monitor operation of depth 0 (also known as probe)
checks if the filesystem is mounted. If you want deeper tests,
set OCF_CHECK_LEVEL to one of the following values:
10: read first 16 blocks of the device (raw read)
This doesn't exercise the filesystem at all, but the device on
which the filesystem lives. This is noop for non-block devices
such as NFS, SMBFS, or bind mounts.
20: test if a status file can be written and read
The status file must be writable by root. This is not always the
case with an NFS mount, as NFS exports usually have the
"root_squash" option set. In such a setup, you must either use
read-only monitoring (depth=10), export with "no_root_squash" on
your NFS server, or grant world write permissions on the
directory where the status file is to be placed.
</longdesc>
<shortdesc lang="en">Manages filesystem mounts</shortdesc>
<parameters>
<parameter name="device" required="1">
<longdesc lang="en">
The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification.
</longdesc>
<shortdesc lang="en">block device</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="directory" required="1">
<longdesc lang="en">
The mount point for the filesystem.
</longdesc>
<shortdesc lang="en">mount point</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="fstype" required="1">
<longdesc lang="en">
The type of filesystem to be mounted.
</longdesc>
<shortdesc lang="en">filesystem type</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="options">
<longdesc lang="en">
Any extra options to be given as -o options to mount.
For bind mounts, add "bind" here and set fstype to "none".
We will do the right thing for options such as "bind,ro".
</longdesc>
<shortdesc lang="en">options</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="statusfile_prefix">
<longdesc lang="en">
The prefix to be used for a status file for resource monitoring
with depth 20. If you don't specify this parameter, all status
files will be created in a separate directory.
</longdesc>
<shortdesc lang="en">status file prefix</shortdesc>
<content type="string" default="$DFLT_STATUSDIR" />
</parameter>
<parameter name="run_fsck">
<longdesc lang="en">
Specify how to decide whether to run fsck or not.
"auto" : decide to run fsck depending on the fstype(default)
"force" : always run fsck regardless of the fstype
"no" : do not run fsck ever.
</longdesc>
<shortdesc lang="en">run_fsck</shortdesc>
<content type="string" default="auto" />
</parameter>
<parameter name="fast_stop">
<longdesc lang="en">
Normally, we expect no users of the filesystem and the stop
operation to finish quickly. If you cannot control the filesystem
users easily and want to prevent the stop action from failing,
then set this parameter to "no" and add an appropriate timeout
for the stop operation.
</longdesc>
<shortdesc lang="en">fast stop</shortdesc>
<content type="boolean" default="yes" />
</parameter>
<parameter name="force_clones">
<longdesc lang="en">
The use of a clone setup for local filesystems is forbidden
by default. For special setups like glusterfs, cloning a mount
of a local device with a filesystem like ext4 or xfs independently
on several nodes is a valid use case.
Only set this to "true" if you know what you are doing!
</longdesc>
<shortdesc lang="en">allow running as a clone, regardless of filesystem type</shortdesc>
<content type="boolean" default="false" />
</parameter>
<parameter name="force_unmount">
<longdesc lang="en">
This option allows specifying how to handle processes that are
currently accessing the mount directory.
"true" : Default value, kill processes accessing mount point
"safe" : Kill processes accessing mount point using methods that
avoid functions that could potentially block during process
detection
"false" : Do not kill any processes.
The 'safe' option uses shell logic to walk the /procs/ directory
for pids using the mount point while the default option uses the
fuser cli tool. fuser is known to perform operations that can potentially
block if unresponsive nfs mounts are in use on the system.
</longdesc>
<shortdesc lang="en">Kill processes before unmount</shortdesc>
<content type="boolean" default="true" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="notify" timeout="60" />
<action name="monitor" depth="0" timeout="40" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#
# Make sure the kernel does the right thing with the FS buffers
# This function should be called after unmounting and before mounting
# It may not be necessary in 2.4 and later kernels, but it shouldn't hurt
# anything either...
#
# It's really a bug that you have to do this at all...
#
flushbufs() {
if have_binary $BLOCKDEV ; then
if [ "$blockdevice" = "yes" ] ; then
$BLOCKDEV --flushbufs $1
return $?
fi
fi
return 0
}
# Take advantage of /etc/mtab if present, use portable mount command
# otherwise. Normalize format to "dev mountpoint fstype".
is_bind_mount() {
echo "$options" | grep -w bind >/dev/null 2>&1
}
list_mounts() {
local inpf=""
if [ -e "/proc/mounts" ] && ! is_bind_mount; then
inpf=/proc/mounts
elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then
inpf=/etc/mtab
fi
if [ "$inpf" ]; then
cut -d' ' -f1,2,3 < $inpf
else
$MOUNT | cut -d' ' -f1,3,5
fi
}
determine_blockdevice() {
if [ $blockdevice = "yes" ]; then
return
fi
# Get the current real device name, if possible.
# (specified devname could be -L or -U...)
case "$FSTYPE" in
nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|none)
: ;;
*)
DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
if [ -b "$DEVICE" ]; then
blockdevice=yes
fi
;;
esac
}
# Lists all filesystems potentially mounted under a given path,
# excluding the path itself.
list_submounts() {
list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r
}
# kernels < 2.6.26 can't handle bind remounts
bind_kernel_check() {
echo "$options" | grep -w ro >/dev/null 2>&1 ||
return
uname -r | awk -F. '
$1==2 && $2==6 {
sub("[^0-9].*","",$3);
if ($3<26)
exit(1);
}'
[ $? -ne 0 ] &&
ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
}
bind_mount() {
if is_bind_mount && [ "$options" != "-o bind" ]
then
bind_kernel_check
bind_opts=`echo $options | sed 's/bind/remount/'`
$MOUNT $bind_opts $MOUNTPOINT
else
true # make sure to return OK
fi
}
is_option() {
echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1
}
is_fsck_needed() {
case $OCF_RESKEY_run_fsck in
force) true;;
no) false;;
""|auto)
case $FSTYPE in
ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs)
false;;
*)
true;;
esac;;
*)
ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'"
OCF_RESKEY_run_fsck="auto"
is_fsck_needed;;
esac
}
fstype_supported()
{
local support="$FSTYPE"
local rc
if [ "X${HOSTOS}" != "XOpenBSD" ];then
# skip checking /proc/filesystems for obsd
return $OCF_SUCCESS
fi
if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then
: No FSTYPE specified, rely on the system has the right file-system support already
return $OCF_SUCCESS
fi
# support fuse-filesystems (e.g. GlusterFS)
case $FSTYPE in
glusterfs) support="fuse";;
esac
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -eq 0 ]; then
# found the fs type
return $OCF_SUCCESS
fi
# if here, we should attempt to load the module and then
# check the if the filesystem support exists again.
$MODPROBE $support >/dev/null
if [ $? -ne 0 ]; then
- ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernal module"
+ ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernal module"
return $OCF_ERR_INSTALLED
fi
# It is possible for the module to load and not be complete initialized
# before we check /proc/filesystems again. Give this a few trys before
# giving up entirely.
for try in $(seq 5); do
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -eq 0 ] ; then
# yes. found the filesystem after doing the modprobe
return $OCF_SUCCESS
fi
ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again"
sleep 1
done
- ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems"
+ ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems"
return $OCF_ERR_INSTALLED
}
#
# START: Start up the filesystem
#
Filesystem_start()
{
# See if the device is already mounted.
if Filesystem_status >/dev/null 2>&1 ; then
ocf_log info "Filesystem $MOUNTPOINT is already mounted."
return $OCF_SUCCESS
fi
fstype_supported || exit $OCF_ERR_INSTALLED
# Check the filesystem & auto repair.
# NOTE: Some filesystem types don't need this step... Please modify
# accordingly
if [ $blockdevice = "yes" ]; then
if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then
- ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
+ ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
exit $OCF_ERR_INSTALLED
fi
if is_fsck_needed; then
ocf_log info "Starting filesystem check on $DEVICE"
if [ -z "$FSTYPE" ]; then
$FSCK -p $DEVICE
else
$FSCK -t $FSTYPE -p $DEVICE
fi
# NOTE: if any errors at all are detected, it returns non-zero
# if the error is >= 4 then there is a big problem
if [ $? -ge 4 ]; then
- ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE"
+ ocf_exit_reason "Couldn't sucessfully fsck filesystem for $DEVICE"
return $OCF_ERR_GENERIC
fi
fi
fi
[ -d "$MOUNTPOINT" ] ||
ocf_run mkdir -p $MOUNTPOINT
if [ ! -d "$MOUNTPOINT" ] ; then
- ocf_log err "Couldn't find directory [$MOUNTPOINT] to use as a mount point"
+ ocf_exit_reason "Couldn't find directory [$MOUNTPOINT] to use as a mount point"
exit $OCF_ERR_INSTALLED
fi
flushbufs $DEVICE
# Mount the filesystem.
case "$FSTYPE" in
none) $MOUNT $options $DEVICE $MOUNTPOINT &&
bind_mount
;;
"") $MOUNT $options $DEVICE $MOUNTPOINT ;;
*) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;;
esac
if [ $? -ne 0 ]; then
- ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT"
+ ocf_exit_reason "Couldn't mount filesystem $DEVICE on $MOUNTPOINT"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
# end of Filesystem_start
get_pids()
{
local dir=$1
local procs
local mmap_procs
if ocf_is_true "$FORCE_UNMOUNT"; then
if [ "X${HOSTOS}" = "XOpenBSD" ];then
fstat | grep $dir | awk '{print $3}'
else
$FUSER -m $dir 2>/dev/null
fi
elif [ "$FORCE_UNMOUNT" = "safe" ]; then
procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}')
mmap_procs=$(grep " ${dir}" /proc/[0-9]*/maps | awk -F/ '{print $3}')
printf "${procs}\n${mmap_procs}" | sort | uniq
fi
}
signal_processes() {
local dir=$1
local sig=$2
local pids pid
# fuser returns a non-zero return code if none of the
# specified files is accessed or in case of a fatal
# error.
pids=$(get_pids "$dir")
if [ -z "$pids" ]; then
ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'"
return
fi
for pid in $pids; do
ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`"
kill -s $sig $pid
done
}
try_umount() {
local SUB=$1
$UMOUNT $umount_force $SUB
list_mounts | grep -q " $SUB " >/dev/null 2>&1 || {
ocf_log info "unmounted $SUB successfully"
return $OCF_SUCCESS
}
return $OCF_ERR_GENERIC
}
fs_stop() {
local SUB=$1 timeout=$2 sig cnt
for sig in TERM KILL; do
cnt=$((timeout/2)) # try half time with TERM
while [ $cnt -gt 0 ]; do
try_umount $SUB &&
return $OCF_SUCCESS
- ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig"
+ ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig"
signal_processes $SUB $sig
cnt=$((cnt-1))
sleep 1
done
done
return $OCF_ERR_GENERIC
}
#
# STOP: Unmount the filesystem
#
Filesystem_stop()
{
# See if the device is currently mounted
Filesystem_status >/dev/null 2>&1
if [ $? -eq $OCF_NOT_RUNNING ]; then
# Already unmounted, wonderful.
rc=$OCF_SUCCESS
else
# Wipe the status file, but continue with a warning if
# removal fails -- the file system might be read only
if [ $OCF_CHECK_LEVEL -eq 20 ]; then
rm -f ${STATUSFILE}
if [ $? -ne 0 ]; then
ocf_log warn "Failed to remove status file ${STATUSFILE}."
fi
fi
# Determine the real blockdevice this is mounted on (if
# possible) prior to unmounting.
determine_blockdevice
# For networked filesystems, there's merit in trying -f:
case "$FSTYPE" in
nfs4|nfs|cifs|smbfs) umount_force="-f" ;;
esac
# Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
local timeout
for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do
ocf_log info "Trying to unmount $SUB"
if ocf_is_true "$FAST_STOP"; then
timeout=6
else
timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"}
timeout=$((timeout/1000))
fi
fs_stop $SUB $timeout
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
- ocf_log err "Couldn't unmount $SUB, giving up!"
+ ocf_exit_reason "Couldn't unmount $SUB, giving up!"
fi
done
fi
flushbufs $DEVICE
return $rc
}
# end of Filesystem_stop
#
# STATUS: is the filesystem mounted or not?
#
Filesystem_status()
{
if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else
rc=$OCF_NOT_RUNNING
msg="$MOUNTPOINT is unmounted (stopped)"
fi
# Special case "monitor" to check whether the UUID cached and
# on-disk still match?
case "$OP" in
status) ocf_log info "$msg";;
esac
return $rc
}
# end of Filesystem_status
# Note: the read/write tests below will stall in case the
# underlying block device (or in the case of a NAS mount, the
# NAS server) has gone away. In that case, if I/O does not
# return to normal in time, the operation hits its timeout
# and it is up to the CRM to initiate appropriate recovery
# actions (such as fencing the node).
#
# MONITOR 10: read the device
#
Filesystem_monitor_10()
{
if [ "$blockdevice" = "no" ] ; then
ocf_log warn "$DEVICE is not a block device, monitor 10 is noop"
return $OCF_SUCCESS
fi
dd_opts="iflag=direct bs=4k count=1"
err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null`
if [ $? -ne 0 ]; then
- ocf_log err "Failed to read device $DEVICE"
+ ocf_exit_reason "Failed to read device $DEVICE"
ocf_log err "dd said: $err_output"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
#
# MONITOR 20: write and read a status file
#
Filesystem_monitor_20()
{
if [ "$blockdevice" = "no" ] ; then
# O_DIRECT not supported on cifs/smbfs
dd_opts="oflag=sync bs=4k conv=fsync,sync"
else
# Writing to the device in O_DIRECT mode is imperative
# to bypass caches.
dd_opts="oflag=direct,sync bs=4k conv=fsync,sync"
fi
status_dir=`dirname $STATUSFILE`
[ -d "$status_dir" ] || mkdir -p "$status_dir"
err_output=`echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1`
if [ $? -ne 0 ]; then
- ocf_log err "Failed to write status file ${STATUSFILE}"
+ ocf_exit_reason "Failed to write status file ${STATUSFILE}"
ocf_log err "dd said: $err_output"
return $OCF_ERR_GENERIC
fi
test -f ${STATUSFILE}
if [ $? -ne 0 ]; then
- ocf_log err "Cannot stat the status file ${STATUSFILE}"
+ ocf_exit_reason "Cannot stat the status file ${STATUSFILE}"
return $OCF_ERR_GENERIC
fi
cat ${STATUSFILE} > /dev/null
if [ $? -ne 0 ]; then
- ocf_log err "Cannot read the status file ${STATUSFILE}"
+ ocf_exit_reason "Cannot read the status file ${STATUSFILE}"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
Filesystem_monitor()
{
Filesystem_status
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then
case "$OCF_CHECK_LEVEL" in
10) Filesystem_monitor_10; rc=$?;;
20) Filesystem_monitor_20; rc=$?;;
*)
- ocf_log err "unsupported monitor level $OCF_CHECK_LEVEL"
+ ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"
rc=$OCF_ERR_CONFIGURED
;;
esac
fi
return $rc
}
# end of Filesystem_monitor
#
# VALIDATE_ALL: Are the instance parameters valid?
# FIXME!! The only part that's useful is the return code.
# This code always returns $OCF_SUCCESS (!)
#
Filesystem_validate_all()
{
if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then
ocf_log warn "Mountpoint $MOUNTPOINT does not exist"
fi
# Check if the $FSTYPE is workable
# NOTE: Without inserting the $FSTYPE module, this step may be imprecise
# TODO: This is Linux specific crap.
if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then
cut -f2 /proc/filesystems |grep -q ^$FSTYPE$
if [ $? -ne 0 ]; then
modpath=/lib/modules/`uname -r`
moddep=$modpath/modules.dep
# Do we have $FSTYPE in modules.dep?
cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$"
if [ $? -ne 0 ]; then
ocf_log info "It seems we do not have $FSTYPE support"
fi
fi
fi
# If we are supposed to do monitoring with status files, then
# we need a utility to write in O_DIRECT mode.
if [ $OCF_CHECK_LEVEL -gt 0 ]; then
check_binary dd
# Note: really old coreutils version do not support
# the "oflag" option for dd. We don't check for that
# here. In case dd does not support oflag, monitor is
# bound to fail, with dd spewing an error message to
# the logs. On such systems, we must do without status
# file monitoring.
fi
#TODO: How to check the $options ?
return $OCF_SUCCESS
}
#
# set the blockdevice variable to "no" or "yes"
#
set_blockdevice_var() {
blockdevice=no
# these are definitely not block devices
case $FSTYPE in
nfs4|nfs|smbfs|cifs|none|glusterfs|ceph) return;;
esac
if `is_option "loop"`; then
return
fi
case $DEVICE in
-*) # Oh... An option to mount instead... Typically -U or -L
;;
/dev/null) # Special case for BSC
blockdevice=yes
;;
*)
if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then
ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
fi
if [ ! -d "$DEVICE" ]; then
blockdevice=yes
fi
;;
esac
}
# Check the arguments passed to this script
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
# Check the OCF_RESKEY_ environment variables...
FORCE_UNMOUNT="yes"
if [ -n "${OCF_RESKEY_force_unmount}" ]; then
FORCE_UNMOUNT=$OCF_RESKEY_force_unmount
fi
DEVICE=$OCF_RESKEY_device
FSTYPE=$OCF_RESKEY_fstype
if [ ! -z "$OCF_RESKEY_options" ]; then
options="-o $OCF_RESKEY_options"
fi
FAST_STOP=${OCF_RESKEY_fast_stop:="yes"}
OP=$1
# These operations do not require instance parameters
case $OP in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
if [ x = x"$DEVICE" ]; then
- ocf_log err "Please set OCF_RESKEY_device to the device to be managed"
+ ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed"
exit $OCF_ERR_CONFIGURED
fi
set_blockdevice_var
# Normalize instance parameters:
# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/".
# But the output of `mount` and /proc/mounts do not.
if [ -z "$OCF_RESKEY_directory" ]; then
if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then
- ocf_log err "Please specify the directory"
+ ocf_exit_reason "Please specify the directory"
exit $OCF_ERR_CONFIGURED
fi
else
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
# kill the whole system. Is that a good idea?
fi
# Check to make sure the utilites are found
if [ "X${HOSTOS}" != "XOpenBSD" ];then
check_binary $MODPROBE
check_binary $FUSER
fi
check_binary $FSCK
check_binary $MOUNT
check_binary $UMOUNT
if [ "$OP" != "monitor" ]; then
ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT"
fi
case $OP in
status) Filesystem_status
exit $?
;;
monitor) Filesystem_monitor
exit $?
;;
validate-all) Filesystem_validate_all
exit $?
;;
stop) Filesystem_stop
exit $?
;;
esac
CLUSTERSAFE=0
is_option "ro" &&
CLUSTERSAFE=2
case $FSTYPE in
nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2)
CLUSTERSAFE=1 # this is kind of safe too
;;
# add here CLUSTERSAFE=0 for all filesystems which are not
# cluster aware and which, even if when mounted read-only,
# could still modify parts of it such as journal/metadata
ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs)
if ocf_is_true "$OCF_RESKEY_force_clones"; then
CLUSTERSAFE=2
else
CLUSTERSAFE=0 # these are not allowed
fi
;;
esac
if ocf_is_clone; then
case $CLUSTERSAFE in
0)
- ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
+ ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
ocf_log err "DO NOT RUN IT AS A CLONE!"
ocf_log err "Politely refusing to proceed to avoid data corruption."
exit $OCF_ERR_CONFIGURED
;;
2)
ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!"
if ocf_is_true "$OCF_RESKEY_force_clones"; then
ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so."
else
ocf_log warn "But we'll let it run because it is mounted read-only."
ocf_log warn "Please make sure that it's meta data is read-only too!"
fi
;;
esac
fi
case $OP in
start) Filesystem_start
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index e8c0f77d1..8163c0c26 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -1,486 +1,486 @@
#!/bin/sh
#
# Description: IPsrcaddr - Preferred source address modification
#
# Author: John Sutton <john@scl.co.uk>
# Support: linux-ha@lists.linux-ha.org
# License: GNU General Public License (GPL)
# Copyright: SCL Internet
#
# Based on the IPaddr script.
#
# This script manages the preferred source address associated with
# packets which originate on the localhost and are routed through the
# default route. By default, i.e. without the use of this script or
# similar, these packets will carry the IP of the primary i.e. the
# non-aliased interface. This can be a nuisance if you need to ensure
# that such packets carry the same IP irrespective of which host in
# a redundant cluster they actually originate from.
#
# It can add a preferred source address, or remove one.
#
# usage: IPsrcaddr {start|stop|status|monitor|validate-all|meta-data}
#
# The "start" arg adds a preferred source address.
#
# Surprisingly, the "stop" arg removes it. :-)
#
# NOTES:
#
# 1) There must be one and not more than 1 default route! Mainly because
# I can't see why you should have more than one. And if there is more
# than one, we would have to box clever to find out which one is to be
# modified, or we would have to pass its identity as an argument.
#
# 2) The script depends on Alexey Kuznetsov's ip utility from the
# iproute aka iproute2 package.
#
# 3) No checking is done to see if the passed in IP address can
# reasonably be associated with the interface on which the default
# route exists. So unless you want to deliberately spoof your source IP,
# check it! Normally, I would expect that your haresources looks
# something like:
#
# nodename ip1 ip2 ... ipN IPsrcaddr::ipX
#
# where ipX is one of the ip1 to ipN.
#
# OCF parameters are as below:
# OCF_RESKEY_ipaddress
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0"
CMDCHANGE="$IP2UTIL route change to "
SYSTYPE="`uname -s`"
usage() {
echo $USAGE >&2
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="IPsrcaddr">
<version>1.0</version>
<longdesc lang="en">
Resource script for IPsrcaddr. It manages the preferred source address
modification.
</longdesc>
<shortdesc lang="en">Manages the preferred source address for outgoing IP packets</shortdesc>
<parameters>
<parameter name="ipaddress" unique="0" required="1">
<longdesc lang="en">
The IP address.
</longdesc>
<shortdesc lang="en">IP address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="cidr_netmask">
<longdesc lang="en">
The netmask for the interface in CIDR format. (ie, 24), or in
dotted quad notation 255.255.255.0).
</longdesc>
<shortdesc lang="en">Netmask</shortdesc>
<content type="string" default=""/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
errorexit() {
- ocf_log err "$*"
+ ocf_exit_reason "$*"
exit $OCF_ERR_GENERIC
}
#
# We can distinguish 3 cases: no preferred source address, a
# preferred source address exists which matches that specified, and one
# exists but doesn't match that specified. srca_read() returns 1,0,2
# respectively.
#
# The output of route show is something along the lines of:
#
# default via X.X.X.X dev eth1 src Y.Y.Y.Y
#
# where the src clause "src Y.Y.Y.Y" may or may not be present
WS="[`echo -en ' \t'`]"
OCTET="[0-9]\{1,3\}"
IPADDR="\($OCTET\.\)\{3\}$OCTET"
SRCCLAUSE="src$WS$WS*\($IPADDR\)"
MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)"
FINDIF=$HA_BIN/findif
# findif needs that to be set
export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress
srca_read() {
# Capture the default route - doublequotes prevent word splitting...
DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed"
# ... so we can make sure there is only 1 default route
[ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \
errorexit "more than 1 default route exists"
# But there might still be no default route
[ -z "$DEFROUTE" ] && errorexit "no default route exists"
# Sed out the source ip address if it exists
SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"`
# and what remains after stripping out the source ip address clause
ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"`
[ -z "$SRCIP" ] && return 1
[ $SRCIP = $1 ] && return 0
return 2
}
#
# Add (or change if it already exists) the preferred source address
# The exit code should conform to LSB exit codes.
#
srca_start() {
srca_read $1
rc=$?
if [ $rc = 0 ]; then
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
ip route replace $NETWORK dev $INTERFACE src $1 || \
errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed"
$CMDCHANGE $ROUTE_WO_SRC src $1 || \
errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed"
rc=$?
fi
return $rc
}
#
# Remove (if it exists) the preferred source address.
# If one exists but it's not the same as the one specified, that's
# an error. Maybe that's the wrong behaviour because if this fails
# then when IPaddr releases the associated interface (if there is one)
# your default route will also get dropped ;-(
# The exit code should conform to LSB exit codes.
#
srca_stop() {
srca_read $1
rc=$?
if [ $rc = 1 ]; then
# We do not have a preferred source address for now
ocf_log info "No preferred source address defined, nothing to stop"
exit $OCF_SUCCESS
fi
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
ip route replace $NETWORK dev $INTERFACE || \
errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed"
$CMDCHANGE $ROUTE_WO_SRC || \
errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed"
return $?
}
srca_status() {
srca_read $1
case $? in
0) echo "OK"
return $OCF_SUCCESS;;
1) echo "No preferred source address defined"
return $OCF_NOT_RUNNING;;
2) echo "Preferred source address has incorrect value"
return $OCF_ERR_GENERIC;;
esac
}
# A not reliable IP address checking function, which only picks up those _obvious_ violations...
#
# It accepts IPv4 address in dotted quad notation, for example "192.168.1.1"
#
# 100% confidence whenever it reports "negative",
# but may get false "positive" answer.
#
CheckIP() {
ip="$1"
case $ip in
*[!0-9.]*) #got invalid char
false;;
.*|*.) #begin or end by ".", which is invalid
false;;
*..*) #consecutive ".", which is invalid
false;;
*.*.*.*.*) #four decimal dots, which is too many
false;;
*.*.*.*) #exactly three decimal dots, candidate, evaluate each field
local IFS=.
set -- $ip
if
( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] )
then
if [ $1 -eq 127 ]; then
- ocf_log err "IP address [$ip] is a loopback address, thus can not be preferred source address"
+ ocf_exit_reason "IP address [$ip] is a loopback address, thus can not be preferred source address"
exit $OCF_ERR_CONFIGURED
fi
else
true
fi
;;
*) #less than three decimal dots
false;;
esac
return $? # This return is unnecessary, this comment too :)
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface_solaris() {
$IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' |
while read ifname linkstuff
do
: ifname = $ifname
read inet addr junk
: inet = $inet addr = $addr
while
read line && [ "X$line" != "X" ]
do
: Nothing
done
# This doesn't look right for a box with multiple NICs.
# It looks like it always selects the first interface on
# a machine. Yet, we appear to use the results for this case too...
ifname=`echo "$ifname" | sed s'%:*$%%'`
case $addr in
addr:$BASEIP) echo $ifname; return $OCF_SUCCESS;;
$BASEIP) echo $ifname; return $OCF_SUCCESS;;
esac
done
return $OCF_ERR_GENERIC
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface_generic() {
local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \
| cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'`
if [ -z "$iface" ]; then
return $OCF_ERR_GENERIC
else
echo $iface
return $OCF_SUCCESS
fi
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface() {
case "$SYSTYPE" in
SunOS)
IF=`find_interface_solaris $BASEIP`
;;
*)
IF=`find_interface_generic $BASEIP`
;;
esac
echo $IF
return $OCF_SUCCESS;
}
ip_status() {
BASEIP="$1"
case "$SYSTYPE" in
Darwin)
# Treat Darwin the same as the other BSD variants (matched as *BSD)
SYSTYPE="${SYSTYPE}BSD"
;;
*)
;;
esac
case "$SYSTYPE" in
*BSD)
$IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$BASEIP " >/dev/null 2>&1
if [ $? = 0 ]; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi;;
Linux|SunOS)
IF=`find_interface "$BASEIP"`
if [ -z "$IF" ]; then
return $OCF_NOT_RUNNING
fi
case $IF in
lo*)
- ocf_log err "IP address [$BASEIP] is served by loopback, thus can not be preferred source address"
+ ocf_exit_reason "IP address [$BASEIP] is served by loopback, thus can not be preferred source address"
exit $OCF_ERR_CONFIGURED
;;
*)return $OCF_SUCCESS;;
esac
;;
*)
if [ -z "$IF" ]; then
return $OCF_NOT_RUNNING
else
return $OCF_SUCCESS
fi;;
esac
}
srca_validate_all() {
check_binary $AWK
check_binary $IFCONFIG
# The IP address should be in good shape
if CheckIP "$ipaddress"; then
:
else
- ocf_log err "Invalid IP address [$ipaddress]"
+ ocf_exit_reason "Invalid IP address [$ipaddress]"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_probe; then
return $OCF_SUCCESS
fi
# We should serve this IP address of course
if ip_status "$ipaddress"; then
:
else
- ocf_log err "We are not serving [$ipaddress], hence can not make it a preferred source address"
+ ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
exit $OCF_ERR_INSTALLED
fi
}
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations do not require the OCF instance parameters to be set
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
*)
;;
esac
if
[ -z "$OCF_RESKEY_ipaddress" ]
then
# usage
- ocf_log err "Please set OCF_RESKEY_ipaddress to the preferred source IP address!"
+ ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!"
exit $OCF_ERR_CONFIGURED
fi
ipaddress="$OCF_RESKEY_ipaddress"
if [ "x$SYSTYPE" = "xLinux" ]; then
srca_validate_all
fi
findif_out=`$FINDIF -C`
rc=$?
[ $rc -ne 0 ] && {
- ocf_log err "[$FINDIF -C] failed"
+ ocf_exit_reason "[$FINDIF -C] failed"
exit $rc
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
NETWORK=`ip route list dev $INTERFACE scope link match $ipaddress|grep -o '^[^ ]*'`
case $1 in
start) srca_start $ipaddress
;;
stop) srca_stop $ipaddress
;;
status) srca_status $ipaddress
;;
monitor) srca_status $ipaddress
;;
validate-all) srca_validate_all
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
#
# Version 0.3 2002/11/04 17:00:00 John Sutton <john@scl.co.uk>
# Name changed from IPsrcroute to IPsrcaddr and now reports errors
# using ha_log rather than on stderr.
#
# Version 0.2 2002/11/02 17:00:00 John Sutton <john@scl.co.uk>
# Changed status output to "OK" to satisfy ResourceManager's
# we_own_resource() function.
#
# Version 0.1 2002/11/01 17:00:00 John Sutton <john@scl.co.uk>
# First effort but does the job?
#
diff --git a/heartbeat/LVM b/heartbeat/LVM
index 7d86fb417..b466fa34e 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -1,691 +1,691 @@
#!/bin/sh
#
#
# LVM
#
# Description: Manages an LVM volume as an HA resource
#
#
# Author: Alan Robertson
# Support: linux-ha@lists.linux-ha.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 - 2005 International Business Machines, Inc.
#
# This code significantly inspired by the LVM resource
# in FailSafe by Lars Marowsky-Bree
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_volgrpname
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
methods=`LVM_methods`
methods=`echo $methods | tr ' ' '|'`
cat <<EOF
usage: $0 $methods
$0 manages an Linux Volume Manager volume (LVM) as an HA resource
The 'start' operation brings the given volume online
The 'stop' operation takes the given volume offline
The 'status' operation reports whether the volume is available
The 'monitor' operation reports whether the volume seems present
The 'validate-all' operation checks whether the OCF parameters are valid
The 'meta-data' operation show meta data
The 'methods' operation reports on the methods $0 supports
EOF
}
# default for "tag"
OUR_TAG="pacemaker"
meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="LVM">
<version>1.0</version>
<longdesc lang="en">
Resource script for LVM. It manages an Linux Volume Manager volume (LVM)
as an HA resource.
</longdesc>
<shortdesc lang="en">Controls the availability of an LVM Volume Group</shortdesc>
<parameters>
<parameter name="volgrpname" unique="0" required="1">
<longdesc lang="en">
The name of volume group.
</longdesc>
<shortdesc lang="en">Volume group name</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="exclusive" unique="0" required="0">
<longdesc lang="en">
If set, the volume group will be activated exclusively. This option works one of
two ways. If the volume group has the cluster attribute set, then the volume group
will be activated exclusively using clvmd across the cluster. If the cluster attribute
is not set, the volume group will be activated exclusively using a tag and the volume_list
filter. When the tag option is in use, the volume_list in lvm.con must be initialized. This
can be as simple as setting 'volume_list = []' depending on your setup.
</longdesc>
<shortdesc lang="en">Exclusive activation</shortdesc>
<content type="boolean" default="false" />
</parameter>
<parameter name="tag" unique="0" required="0">
<longdesc lang="en">
If "exclusive" is set on a non clustered volume group, this overrides the tag to be used.
</longdesc>
<shortdesc lang="en">Exclusive activation tag</shortdesc>
<content type="string" default="$OUR_TAG" />
</parameter>
<parameter name="partial_activation" unique="0" required="0">
<longdesc lang="en">
If set, the volume group will be activated even only partial of the physical
volumes available. It helps to set to true, when you are using mirroring
logical volumes.
</longdesc>
<shortdesc lang="en">Activate VG even with partial PV only</shortdesc>
<content type="string" default="false" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30" />
<action name="stop" timeout="30" />
<action name="status" timeout="30" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="methods" timeout="5" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
EOF
}
#
# methods: What methods/operations do we support?
#
LVM_methods() {
cat <<EOF
start
stop
status
monitor
methods
validate-all
meta-data
usage
EOF
}
##
# returns mode
#
# 0 = normal (non-exclusive) local activation
# 1 = tagged-exclusive activation
# 2 = clvm-exclusive activation
##
VG_MODE=
get_vg_mode()
{
if [ -n "$VG_MODE" ]; then
echo "$VG_MODE"
return
fi
VG_MODE=0
if ocf_is_true "$OCF_RESKEY_exclusive"; then
case $(vgs -o attr --noheadings $OCF_RESKEY_volgrpname | tr -d ' ') in
?????c*)
VG_MODE=2 ;;
*)
VG_MODE=1 ;;
esac
fi
echo "$VG_MODE"
}
##
# Verify tags setup
##
verify_tags_environment()
{
##
# The volume_list must be initialized to something in order to
# guarantee our tag will be filtered on startup
##
if ! lvm dumpconfig activation/volume_list; then
- ocf_log err "LVM: Improper setup detected"
- ocf_log err "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
+ ocf_log err "LVM: Improper setup detected"
+ ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
return $OCF_ERR_GENERIC
fi
##
# Our tag must _NOT_ be in the volume_list. This agent
# overrides the volume_list during activation using the
# special tag reserved for cluster activation
##
if lvm dumpconfig activation/volume_list | grep -e "\"@$OUR_TAG\"" -e "\"${OCF_RESKEY_volgrpname}\""; then
ocf_log err "LVM: Improper setup detected"
- ocf_log err "The volume_list in lvm.conf must not contain the cluster tag, \"$OUR_TAG\", or volume group, $OCF_RESKEY_volgrpname"
+ ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"$OUR_TAG\", or volume group, $OCF_RESKEY_volgrpname"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
check_initrd_warning()
{
# First check to see if there is an initrd img we can safely
# compare timestamps agaist. If not, don't even bother with
# this check. This is known to work in rhel/fedora distros
ls "/boot/*$(uname -r)*.img" > /dev/null 2>&1
if [ $? -ne 0 ]; then
return
fi
##
# Now check to see if the initrd has been updated.
# If not, the machine could boot and activate the VG outside
# the control of pacemaker
##
if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" = "" ]; then
ocf_log warn "LVM: Improper setup detected"
ocf_log warn "* initrd image needs to be newer than lvm.conf"
# While dangerous if not done the first time, there are many
# cases where we don't simply want to fail here. Instead,
# keep warning until the user remakes the initrd - or has
# it done for them by upgrading the kernel.
#
# initrd can be updated using this command.
# dracut -H -f /boot/initramfs-$(uname -r).img $(uname -r)
#
fi
}
##
# does this vg have our tag
##
check_tags()
{
local owner=`vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '`
if [ -z "$owner" ]; then
# No-one owns this VG yet
return 1
fi
if [ "$OUR_TAG" = "$owner" ]; then
# yep, this is ours
return 0
fi
# some other tag is set on this vg
return 2
}
strip_tags()
{
local i
for i in `vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g`; do
ocf_log info "Stripping tag, $i"
# LVM version 2.02.98 allows changing tags if PARTIAL
vgchange --deltag $i $OCF_RESKEY_volgrpname
done
if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '` ]; then
- ocf_log err "Failed to remove ownership tags from $OCF_RESKEY_volgrpname"
+ ocf_exit_reason "Failed to remove ownership tags from $OCF_RESKEY_volgrpname"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
set_tags()
{
check_tags
case $? in
0)
# we already own it.
return $OCF_SUCCESS
;;
2)
# other tags are set, strip them before setting
if ! strip_tags; then
return $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
vgchange --addtag $OUR_TAG $OCF_RESKEY_volgrpname
if [ $? -ne 0 ]; then
- ocf_log err "Failed to add ownership tag to $OCF_RESKEY_volgrpname"
+ ocf_exit_reason "Failed to add ownership tag to $OCF_RESKEY_volgrpname"
return $OCF_ERR_GENERIC
fi
ocf_log info "New tag \"$OUR_TAG\" added to $OCF_RESKEY_volgrpname"
return $OCF_SUCCESS
}
#
# Return LVM status (silently)
#
LVM_status() {
local rc=1
loglevel="debug"
# Set the log level of the error message
if [ "X${2}" = "X" ]; then
loglevel="err"
if ocf_is_probe; then
loglevel="warn"
else
if [ ${OP_METHOD} = "stop" ]; then
loglevel="info"
fi
fi
fi
if [ -d /dev/$1 ]; then
test "`cd /dev/$1 && ls`" != ""
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log err "VG $1 with no logical volumes is not supported by this RA!"
+ ocf_exit_reason "VG $1 with no logical volumes is not supported by this RA!"
fi
fi
if [ $rc -ne 0 ]; then
ocf_log $loglevel "LVM Volume $1 is not available (stopped)"
rc=$OCF_NOT_RUNNING
else
case $(get_vg_mode) in
1) # exclusive with tagging.
# If vg is running, make sure the correct tag is present. Otherwise we
# can not guarantee exclusive activation.
if ! check_tags; then
- ocf_log err "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\""
+ ocf_exit_reason "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\""
rc=$OCF_ERR_GENERIC
fi
# make sure the environment for tags activation is still valid
if ! verify_tags_environment; then
rc=$OCF_ERR_GENERIC
fi
# let the user know if their initrd is older than lvm.conf.
check_initrd_warning
;;
*)
: ;;
esac
fi
if [ "X${2}" = "X" ]; then
# status call return
return $rc
fi
# Report on LVM volume status to stdout...
if [ $rc -eq 0 ]; then
echo "Volume $1 is available (running)"
else
echo "Volume $1 is not available (stopped)"
fi
return $rc
}
get_activate_options()
{
local options="-a"
case $(get_vg_mode) in
0) options="${options}ly";;
1) options="${options}y --config activation{volume_list=[\"@${OUR_TAG}\"]}";;
2) options="${options}ey";;
esac
if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
options="${options} --partial"
fi
# for clones (clustered volume groups), we'll also have to force
# monitoring, even if disabled in lvm.conf.
if ocf_is_clone; then
options="$options --monitor y"
fi
echo $options
}
##
# Attempt to deactivate vg cluster wide and then start the vg exclusively
##
retry_exclusive_start()
{
local vgchange_options=$(get_activate_options)
# Deactivate each LV in the group one by one cluster wide
set -- $(lvs -o name,attr --noheadings $OCF_RESKEY_volgrpname 2> /dev/null)
while [ $# -ge 2 ]; do
case $2 in
????ao*)
# open LVs cannot be deactivated.
return $OCF_ERR_GENERIC;;
*)
if ! lvchange -an $OCF_RESKEY_volgrpname/$1; then
- ocf_log err "Unable to perform required deactivation of $OCF_RESKEY_volgrpname/$1 before starting"
+ ocf_exit_reason "Unable to perform required deactivation of $OCF_RESKEY_volgrpname/$1 before starting"
return $OCF_ERR_GENERIC
fi
;;
esac
shift 2
done
ocf_run vgchange $vgchange_options $OCF_RESKEY_volgrpname
}
#
# Enable LVM volume
#
LVM_start() {
local vgchange_options=$(get_activate_options)
local vg=$1
local clvmd=0
# TODO: This MUST run vgimport as well
ocf_log info "Activating volume group $vg"
if [ "$LVM_MAJOR" -eq "1" ]; then
ocf_run vgscan $vg
else
ocf_run vgscan
fi
case $(get_vg_mode) in
2)
clvmd=1
;;
1)
if ! set_tags; then
return $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
if ! ocf_run vgchange $vgchange_options $vg; then
if [ $clvmd -eq 0 ]; then
return $OCF_ERR_GENERIC
fi
# Failure to exclusively activate cluster vg.:
# This could be caused by a remotely active LV, Attempt
# to disable volume group cluster wide and try again.
# Allow for some settling
sleep 5
if ! retry_exclusive_start; then
return $OCF_ERR_GENERIC
fi
fi
if LVM_status $vg; then
: OK Volume $vg activated just fine!
return $OCF_SUCCESS
else
- ocf_log err "LVM: $vg did not activate correctly"
+ ocf_exit_reason "LVM: $vg did not activate correctly"
return $OCF_NOT_RUNNING
fi
}
#
# Disable the LVM volume
#
LVM_stop() {
local res=$OCF_ERR_GENERIC
local vgchange_options="-aln"
local vg=$1
if ! vgs $vg > /dev/null 2>&1; then
ocf_log info "Volume group $vg not found"
return $OCF_SUCCESS
fi
ocf_log info "Deactivating volume group $vg"
case $(get_vg_mode) in
1) vgchange_options="-an" ;;
esac
for i in $(seq 10)
do
ocf_run vgchange $vgchange_options $vg
res=$?
if LVM_status $vg; then
- ocf_log err "LVM: $vg did not stop correctly"
+ ocf_exit_reason "LVM: $vg did not stop correctly"
res=1
fi
if [ $res -eq 0 ]; then
break
fi
res=$OCF_ERR_GENERIC
ocf_log warn "$vg still Active"
ocf_log info "Retry deactivating volume group $vg"
sleep 1
which udevadm > /dev/null 2>&1 && udevadm settle --timeout=5
done
case $(get_vg_mode) in
1)
if [ $res -eq 0 ]; then
strip_tags
res=$?
fi
;;
esac
return $res
}
#
# Check whether the OCF instance parameters are valid
#
LVM_validate_all() {
check_binary $AWK
##
# lvmetad is a daemon that caches lvm metadata to improve the
# performance of LVM commands. This daemon should never be used when
# volume groups exist that are being managed by the cluster. The lvmetad
# daemon introduces a response lag, where certain LVM commands look like
# they have completed (like vg activation) when in fact the command
# is still in progress by the lvmetad. This can cause reliability issues
# when managing volume groups in the cluster. For Example, if you have a
# volume group that is a dependency for another application, it is possible
# the cluster will think the volume group is activated and attempt to start
# the application before volume group is really accesible... lvmetad is bad.
##
lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1
if [ $? -eq 0 ]; then
# for now warn users that lvmetad is enabled and that they should disable it. In the
# future we may want to consider refusing to start, or killing the lvmetad daemon.
ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process"
fi
##
# Off-the-shelf tests...
##
VGOUT=`vgck ${VOLUME} 2>&1`
if [ $? -ne 0 ]; then
- ocf_log err "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
+ ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
exit $OCF_ERR_GENERIC
fi
##
# Does the Volume Group exist?
##
if [ "$LVM_MAJOR" = "1" ]; then
VGOUT=`vgdisplay ${VOLUME} 2>&1`
else
VGOUT=`vgdisplay -v ${VOLUME} 2>&1`
fi
if [ $? -ne 0 ]; then
- ocf_log err "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
+ ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
exit $OCF_ERR_GENERIC
fi
##
# If exclusive activation is not enabled, then
# further checking of proper setup is not necessary
##
if ! ocf_is_true "$OCF_RESKEY_exclusive"; then
return $OCF_SUCCESS;
fi
##
# Having cloned lvm resources with exclusive vg activation makes no sense at all.
##
if ocf_is_clone; then
- ocf_log err "cloned lvm resources can not be activated exclusively"
+ ocf_exit_reason "cloned lvm resources can not be activated exclusively"
exit $OCF_ERR_CONFIGURED
fi
##
# Make sure the cluster attribute is set and clvmd is up when exclusive
# activation is enabled. Otherwise we can't exclusively activate the volume group.
##
case $(get_vg_mode) in
1) # exclusive activation using tags
if ! verify_tags_environment; then
exit $OCF_ERR_GENERIC
fi
;;
2) # exclusive activation with clvmd
##
# verify is clvmd running
##
if ! ps -C clvmd > /dev/null 2>&1; then
- ocf_log err "$OCF_RESKEY_volgrpname has the cluster attribute set, but 'clvmd' is not running"
+ ocf_exit_reason "$OCF_RESKEY_volgrpname has the cluster attribute set, but 'clvmd' is not running"
exit $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
return $OCF_SUCCESS
}
#
# 'main' starts here...
#
if
[ $# -ne 1 ]
then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS;;
methods) LVM_methods
exit $?;;
usage) usage
exit $OCF_SUCCESS;;
*) ;;
esac
if
[ -z "$OCF_RESKEY_volgrpname" ]
then
- ocf_log err "You must identify the volume group name!"
+ ocf_exit_reason "You must identify the volume group name!"
exit $OCF_ERR_CONFIGURED
fi
# Get the LVM version number, for this to work we assume(thanks to panjiam):
#
# LVM1 outputs like this
#
# # vgchange --version
# vgchange: Logical Volume Manager 1.0.3
# Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10)
#
# LVM2 and higher versions output in this format
#
# # vgchange --version
# LVM version: 2.00.15 (2004-04-19)
# Library version: 1.00.09-ioctl (2004-03-31)
# Driver version: 4.1.0
LVM_VERSION=`vgchange --version 2>&1 | \
$AWK '/Logical Volume Manager/ {print $5"\n"; exit; }
/LVM version:/ {printf $3"\n"; exit;}'`
rc=$?
if
( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] )
then
- ocf_log err "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?"
+ ocf_exit_reason "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?"
exit $OCF_ERR_INSTALLED
fi
LVM_MAJOR="${LVM_VERSION%%.*}"
VOLUME=$OCF_RESKEY_volgrpname
OP_METHOD=$1
if [ -n "$OCF_RESKEY_tag" ]; then
OUR_TAG=$OCF_RESKEY_tag
fi
# What kind of method was invoked?
case "$1" in
start)
LVM_validate_all
LVM_start $VOLUME
exit $?;;
stop) LVM_stop $VOLUME
exit $?;;
status) LVM_status $VOLUME $1
exit $?;;
monitor) LVM_status $VOLUME
exit $?;;
validate-all) LVM_validate_all
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/MailTo b/heartbeat/MailTo
index acf673078..3936c39de 100755
--- a/heartbeat/MailTo
+++ b/heartbeat/MailTo
@@ -1,191 +1,191 @@
#!/bin/sh
#
# Resource script for MailTo
#
# Author: Alan Robertson <alanr@unix.sh>
#
# Description: sends email to a sysadmin whenever a takeover occurs.
#
# Note: This command requires an argument, unlike normal init scripts.
#
# This can be given in the haresources file as:
#
# You can also give a mail subject line or even multiple addresses
# MailTo::alanr@unix.sh::BigImportantWebServer
# MailTo::alanr@unix.sh,spoppi@gmx.de::BigImportantWebServer
#
# This will then be put into the message subject and body.
#
# OCF parameters are as below:
# OCF_RESKEY_email
# OCF_RESKEY_subject
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2005 International Business Machines
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
ARGS="$0 $*"
us=`uname -n`
usage() {
echo "Usage: $0 {start|stop|status|monitor|meta-data|validate-all}"
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="MailTo">
<version>1.0</version>
<longdesc lang="en">
This is a resource agent for MailTo. It sends email to a sysadmin whenever
a takeover occurs.
</longdesc>
<shortdesc lang="en">Notifies recipients by email in the event of resource takeover</shortdesc>
<parameters>
<parameter name="email" unique="0" required="1">
<longdesc lang="en">
The email address of sysadmin.
</longdesc>
<shortdesc lang="en">Email address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="subject" unique="0">
<longdesc lang="en">
The subject of the email.
</longdesc>
<shortdesc lang="en">Subject</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="10" />
<action name="stop" timeout="10" />
<action name="status" depth="0" timeout="10" interval="10" />
<action name="monitor" depth="0" timeout="10" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
}
MailProgram() {
$MAILCMD -s "$1" "$email" <<EOF
$Subject
Command line was:
$ARGS
EOF
return $?
}
SubjectLine() {
case $1 in
??*) echo $@;;
*) echo "Resource Group";;
esac
}
MailToStart() {
Subject="`SubjectLine $subject` Takeover in progress at `date` on $us"
MailProgram "$Subject" $1
ha_pseudo_resource MailTo_${OCF_RESOURCE_INSTANCE} start
}
MailToStop () {
Subject="`SubjectLine $subject` Migrating resource away at `date` from $us"
MailProgram "$Subject" $1
ha_pseudo_resource MailTo_${OCF_RESOURCE_INSTANCE} stop
}
MailToStatus () {
# ocf_log warn "Don't stat/monitor me! MailTo is a pseudo resource agent, so the status reported may be incorrect"
if ha_pseudo_resource MailTo_${OCF_RESOURCE_INSTANCE} monitor
then
echo "running"
return $OCF_SUCCESS
else
echo "stopped"
return $OCF_NOT_RUNNING
fi
}
MailToValidateAll () {
if [ -z "$MAILCMD" ]; then
- ocf_log err "MAILCMD not set: complain to the packager"
+ ocf_exit_reason "MAILCMD not set: complain to the packager"
exit $OCF_ERR_INSTALLED
fi
check_binary "$MAILCMD"
return $OCF_SUCCESS
}
#
# See how we were called.
#
# The order in which heartbeat provides arguments to resource
# scripts is broken. It should be fixed.
#
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_GENERIC
fi
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
status|monitor) MailToStatus
exit $?
;;
usage) usage
exit $OCF_SUCCESS
;;
*) ;;
esac
if
[ -z "$OCF_RESKEY_email" ]
then
- ocf_log err "At least 1 Email address has to be given!"
+ ocf_exit_reason "At least 1 Email address has to be given!"
exit $OCF_ERR_CONFIGURED
fi
email=$OCF_RESKEY_email
subject=$OCF_RESKEY_subject
MailToValidateAll
case $1 in
start) MailToStart
;;
stop) MailToStop
;;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/Route b/heartbeat/Route
index 9a49a2605..cfed2b095 100755
--- a/heartbeat/Route
+++ b/heartbeat/Route
@@ -1,313 +1,313 @@
#!/bin/sh
#
# Route OCF RA. Enables and disables network routes.
#
# (c) 2008-2010 Florian Haas, Dejan Muhamedagic,
# and Linux-HA contributors
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Route" version="0.1">
<version>1.0</version>
<longdesc lang="en">
Enables and disables network routes.
Supports host and net routes, routes via a gateway address,
and routes using specific source addresses.
This resource agent is useful if a node's routing table
needs to be manipulated based on node role assignment.
Consider the following example use case:
- One cluster node serves as an IPsec tunnel endpoint.
- All other nodes use the IPsec tunnel to reach hosts
in a specific remote network.
Then, here is how you would implement this scheme making use
of the Route resource agent:
- Configure an ipsec LSB resource.
- Configure a cloned Route OCF resource.
- Create an order constraint to ensure
that ipsec is started before Route.
- Create a colocation constraint between the
ipsec and Route resources, to make sure no instance
of your cloned Route resource is started on the
tunnel endpoint itself.
</longdesc>
<shortdesc lang="en">Manages network routes</shortdesc>
<parameters>
<parameter name="destination" unique="1" required="1">
<longdesc lang="en">
The destination network (or host) to be configured for the route.
Specify the netmask suffix in CIDR notation (e.g. "/24").
If no suffix is given, a host route will be created.
Specify "0.0.0.0/0" or "default" if you want this resource to set
the system default route.
</longdesc>
<shortdesc lang="en">Destination network</shortdesc>
<content type="string" />
</parameter>
<parameter name="device" unique="1">
<longdesc lang="en">
The outgoing network device to use for this route.
</longdesc>
<shortdesc lang="en">Outgoing network device</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="gateway" unique="1">
<longdesc lang="en">
The gateway IP address to use for this route.
</longdesc>
<shortdesc lang="en">Gateway IP address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="source" unique="1">
<longdesc lang="en">
The source IP address to be configured for the route.
</longdesc>
<shortdesc lang="en">Source IP address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="table" unique="0">
<longdesc lang="en">
The routing table to be configured for the route.
</longdesc>
<shortdesc lang="en">Routing table</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10"
depth="0"/>
<action name="reload" timeout="20" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
#######################################################################
create_route_spec() {
# Creates a route specification for use by "ip route (add|del|show)"
route_spec="to ${OCF_RESKEY_destination}"
if [ -n "${OCF_RESKEY_device}" ]; then
route_spec="${route_spec} dev ${OCF_RESKEY_device}"
fi
if [ -n "${OCF_RESKEY_gateway}" ]; then
route_spec="${route_spec} via ${OCF_RESKEY_gateway}"
fi
if [ -n "${OCF_RESKEY_source}" ]; then
route_spec="${route_spec} src ${OCF_RESKEY_source}"
fi
if [ -n "${OCF_RESKEY_table}" ]; then
route_spec="${route_spec} table ${OCF_RESKEY_table}"
fi
echo "$route_spec"
}
route_usage() {
cat <<END
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
route_start() {
route_status
status=$?
if [ $status -eq $OCF_SUCCESS ]; then
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : already started."
return $OCF_SUCCESS
fi
route_spec="$(create_route_spec)"
if ip route add $route_spec; then
ocf_log info "${OCF_RESOURCE_INSTANCE} Added network route: $route_spec"
return $OCF_SUCCESS
else
- ocf_log error "${OCF_RESOURCE_INSTANCE} Failed to add network route: $route_spec"
+ ocf_exit_reason "${OCF_RESOURCE_INSTANCE} Failed to add network route: $route_spec"
fi
return $OCF_ERR_GENERIC
}
route_stop() {
route_status
status=$?
case $status in
$OCF_SUCCESS)
route_spec="$(create_route_spec)"
if ip route del $route_spec; then
ocf_log info "${OCF_RESOURCE_INSTANCE} Removed network route: $route_spec"
return $OCF_SUCCESS
else
- ocf_log error "${OCF_RESOURCE_INSTANCE} Failed to remove network route: $route_spec"
+ ocf_exit_reason "${OCF_RESOURCE_INSTANCE} Failed to remove network route: $route_spec"
fi
;;
$OCF_NOT_RUNNING)
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : already stopped."
return $OCF_SUCCESS
;;
esac
return $OCF_ERR_GENERIC
}
route_status() {
show_output="$(ip $addr_family route show $(create_route_spec) 2>/dev/null)"
if [ $? -eq 0 ]; then
if [ -n "$show_output" ]; then
# "ip route show" returned zero, and produced output on
# stdout. That is what we expect.
return $OCF_SUCCESS
else
# "ip route show" returned zero, but produced no
# output on stdout. Assume the route was cleanly
# unconfigured.
return $OCF_NOT_RUNNING
fi
else
# "ip route show" returned an error code. Assume something
# went wrong.
return $OCF_ERR_GENERIC
fi
}
route_validate() {
# If we're running as a clone, are the clone meta attrs OK?
if [ "${OCF_RESKEY_CRM_meta_clone}" ]; then
if [ "${OCF_RESKEY_CRM_meta_clone_node_max}" != 1 ]; then
- ocf_log error "Misconfigured clone parameters. Must set meta attribute \"clone_node_max\" to 1, got ${OCF_RESKEY_CRM_meta_clone_node_max}."
+ ocf_exit_reason "Misconfigured clone parameters. Must set meta attribute \"clone_node_max\" to 1, got ${OCF_RESKEY_CRM_meta_clone_node_max}."
return $OCF_ERR_ARGS
fi
fi
# Did we get a destination?
if [ -z "${OCF_RESKEY_destination}" ]; then
- ocf_log error "Missing required parameter \"destination\"."
+ ocf_exit_reason "Missing required parameter \"destination\"."
return $OCF_ERR_ARGS
fi
# Did we get either a device or a gateway address?
if [ -z "${OCF_RESKEY_device}" -a -z "${OCF_RESKEY_gateway}" ]; then
- ocf_log error "Must specifiy either \"device\", or \"gateway\", or both."
+ ocf_exit_reason "Must specifiy either \"device\", or \"gateway\", or both."
return $OCF_ERR_ARGS
fi
# If a device has been configured, is it available on this system?
if [ -n "${OCF_RESKEY_device}" ]; then
if ! ip link show ${OCF_RESKEY_device} >/dev/null 2>&1; then
- ocf_log error "Network device ${OCF_RESKEY_device} appears not to be available on this system."
+ ocf_exit_reason "Network device ${OCF_RESKEY_device} appears not to be available on this system."
# OCF_ERR_ARGS prevents the resource from running anywhere at all,
# maybe another node has the interface?
# OCF_ERR_INSTALLED just prevents starting on this particular node.
return $OCF_ERR_INSTALLED
fi
fi
# The following tests must return $OCF_ERR_INSTALLED, but only if
# the resource is actually running (i.e., not during probes)
if ! ocf_is_probe; then
# If a source address has been configured, is it available on
# this system?
if [ -n "${OCF_RESKEY_source}" ]; then
if ! ip address show | grep -w ${OCF_RESKEY_source} >/dev/null 2>&1; then
- ocf_log error "Source address ${OCF_RESKEY_source} appears not to be available on this system."
+ ocf_exit_reason "Source address ${OCF_RESKEY_source} appears not to be available on this system."
# same reason as with _device:
return $OCF_ERR_INSTALLED
fi
fi
# If a gateway address has been configured, is it reachable?
if [ -n "${OCF_RESKEY_gateway}" ]; then
if ! ip route get ${OCF_RESKEY_gateway} >/dev/null 2>&1; then
- ocf_log error "Gateway address ${OCF_RESKEY_gateway} is unreachable."
+ ocf_exit_reason "Gateway address ${OCF_RESKEY_gateway} is unreachable."
# same reason as with _device:
return $OCF_ERR_INSTALLED
fi
fi
fi
return $OCF_SUCCESS
}
# These two actions must always succeed
case $__OCF_ACTION in
meta-data) meta_data
# OCF variables are not set when querying meta-data
exit 0
;;
usage|help) route_usage
exit $OCF_SUCCESS
;;
esac
# Don't do anything if the necessary utilities aren't present
for binary in ip grep; do
check_binary $binary
done
route_validate || exit $?
case $OCF_RESKEY_destination in
*:*) addr_family="-6" ;;
*) addr_family="-4" ;;
esac
case $__OCF_ACTION in
start) route_start;;
stop) route_stop;;
status|monitor) route_status;;
reload) ocf_log info "Reloading..."
route_start
;;
validate-all) ;;
*) route_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
exit $rc
diff --git a/heartbeat/SendArp b/heartbeat/SendArp
index 675070cbb..b67404f24 100755
--- a/heartbeat/SendArp
+++ b/heartbeat/SendArp
@@ -1,267 +1,267 @@
#!/bin/sh
#
#
# Copyright (c) 2006, Huang Zhen <zhen.huang@gmail.com>
# Converting original heartbeat RA to OCF RA.
#
# Copyright (C) 2004 Horms <horms@verge.net.au>
#
# Based on IPaddr2: Copyright (C) 2003 Tuomo Soini <tis@foobar.fi>
#
# License: GNU General Public License (GPL)
# Support: linux-ha@lists.linux-ha.org
#
# This script send out gratuitous Arp for an IP address
#
# It can be used _instead_ of the IPaddr2 or IPaddr resource
# to send gratuitous arp for an IP address on a given interface,
# without adding the address to that interface. I.e. if for
# some reason you want to send gratuitous arp for addresses
# managed by IPaddr2 or IPaddr on an additional interface.
#
# OCF parameters are as below:
# OCF_RESKEY_ip
# OCF_RESKEY_nic
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
SENDARP=$HA_BIN/send_arp
SENDARPPIDDIR=${HA_RSCTMP}
BASEIP="$OCF_RESKEY_ip"
INTERFACE="$OCF_RESKEY_nic"
RESIDUAL=""
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP"
BACKGROUND=${OCF_RESKEY_background:-"yes"}
# Set default values
: ${ARP_INTERVAL_MS=200} # milliseconds between ARPs
: ${ARP_REPEAT=5} # repeat count
: ${ARP_BACKGROUND=$BACKGROUND} # no to run in foreground
: ${ARP_NETMASK=ffffffffffff} # netmask for ARP
#######################################################################
sendarp_meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SendArp" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This RA can be used _instead_ of the IPaddr2 or IPaddr RA to
send gratuitous ARP for an IP address on a given interface,
without adding the address to that interface. For example,
if for some resaon you wanted to send gratuitous ARP for
addresses managed by IPaddr2 or IPaddr on an additional
interface.
</longdesc>
<shortdesc lang="en">Broadcasts unsolicited ARP announcements</shortdesc>
<parameters>
<parameter name="ip" unique="0" required="1">
<longdesc lang="en">
The IP address for sending ARP packet.
</longdesc>
<shortdesc lang="en">IP address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="nic" unique="0" required="1">
<longdesc lang="en">
The NIC for sending ARP packet.
</longdesc>
<shortdesc lang="en">NIC</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="background" unique="0" required="0">
<longdesc lang="en">
Send ARPs in background. Set to false if you want to test if
sending ARPs succeeded.
</longdesc>
<shortdesc lang="en">Send ARPs in background</shortdesc>
<content type="boolean" default="true" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" depth="0" timeout="20" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
}
#######################################################################
sendarp_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
#
# Send gratuitous arp
#
sendarp_start() {
local rc
sendarp_validate
if [ $? = $OCF_ERR_CONFIGURED ]; then
return $OCF_ERR_CONFIGURED
fi
sendarp_monitor
if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
[ -r ${HA_CONFDIR}/arp_config ] && . ${HA_CONFDIR}/arp_config
if [ -r "${HA_CONFDIR}/arp_config:${TARGET_INTERFACE}" ]; then
. "${HA_CONFDIR}/arp_config:${TARGET_INTERFACE}"
fi
ARGS="-i $ARP_INTERVAL_MS -r $ARP_REPEAT -p $SENDARPPIDFILE $INTERFACE $BASEIP auto $BASEIP $ARP_NETMASK"
ocf_log debug "$SENDARP $ARGS"
rc=$OCF_SUCCESS
if ocf_is_true $ARP_BACKGROUND; then
# not possible to check the status without wait! we can
# just log the outcome
# and wait-ing would be equal to not running in
# background
($SENDARP $ARGS ||
- ocf_log err "Could not send gratuitous arps") &
+ ocf_exit_reason "Could not send gratuitous arps") &
else
$SENDARP $ARGS || {
- ocf_log err "Could not send gratuitous arps"
+ ocf_exit_reason "Could not send gratuitous arps"
rc=$OCF_ERR_GENERIC
}
fi
if [ $rc -eq $OCF_SUCCESS ]; then
ha_pseudo_resource SendArp_${OCF_RESOURCE_INSTANCE} start
fi
return $rc
}
#
# Stop sending gratuitous arp
#
sendarp_stop() {
sendarp_monitor
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
rc=$OCF_SUCCESS
if
[ -f "$SENDARPPIDFILE" ]
then
kill `cat "$SENDARPPIDFILE"`
rc=$?
case $rc in
0)
ocf_log info "killed previously running send_arp for $BASEIP"
rm -f "$SENDARPPIDFILE"
rc=$OCF_SUCCESS
;;
*)
ocf_log warn "Could not kill previously running send_arp for $BASEIP"
rc=$OCF_ERR_GENERIC
;;
esac
fi
case $rc in
$OCF_SUCCESS)
ocf_log info "SendArp for $BASEIP/$INTERFACE released"
ha_pseudo_resource SendArp_${OCF_RESOURCE_INSTANCE} stop
;;
*)
ocf_log warn "SendArp for $BASEIP/$INTERFACE NOT released"
;;
esac
return $rc
}
#
sendarp_monitor() {
if [ -f "$SENDARPPIDFILE" ]; then
return $OCF_SUCCESS
fi
ha_pseudo_resource SendArp_${OCF_RESOURCE_INSTANCE} monitor
}
sendarp_validate() {
if [ -z "$INTERFACE" -o -z "$BASEIP" -o -n "$RESIDUAL" ]
then
return $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
case $__OCF_ACTION in
meta-data) sendarp_meta_data
exit $OCF_SUCCESS
;;
start) sendarp_start
;;
stop) sendarp_stop
;;
monitor) sendarp_monitor
;;
status) sendarp_monitor
if [ $? = $OCF_SUCCESS ]; then
echo "running"
exit $OCF_SUCCESS;
else
echo "stopped"
exit $OCF_NOT_RUNNING;
fi
;;
validate-all) sendarp_validate
;;
usage|help) sendarp_usage
exit $OCF_SUCCESS
;;
*) sendarp_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/Squid b/heartbeat/Squid
index 28e2db524..70c7c3d67 100755
--- a/heartbeat/Squid
+++ b/heartbeat/Squid
@@ -1,446 +1,446 @@
#!/bin/bash
#
# Description: Manages a Squid Server provided by NTT OSSC as an
# OCF High-Availability resource under Heartbeat/LinuxHA control
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
# Copyright (c) 2008 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
#
#######################################################################
# OCF parameters:
# OCF_RESKEY_squid_exe : Executable file
# OCF_RESKEY_squid_conf : Configuration file
# OCF_RESKEY_squid_pidfile: Process id file
# OCF_RESKEY_squid_port : Port number
# OCF_RESKEY_debug_mode : Debug mode
# OCF_RESKEY_debug_log : Debug log file
# OCF_RESKEY_squid_stop_timeout:
# Number of seconds to await to confirm a
# normal stop method
#
# OCF_RESKEY_squid_exe, OCF_RESKEY_squid_conf, OCF_RESKEY_squid_pidfile
# and OCF_RESKEY_squid_port must be specified. Each of the rests
# has its default value or refers OCF_RESKEY_squid_conf to make
# its value when no explicit value is given.
###############################################################################
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
usage()
{
cat <<-!
usage: $0 action
action:
start : start a new squid instance
stop : stop the running squid instance
status : return the status of squid, run or down
monitor : return TRUE if the squid appears to be working.
meta-data : show meta data message
validate-all: validate the instance parameters
!
return $OCF_ERR_ARGS
}
metadata_squid()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Squid">
<version>1.0</version>
<longdesc lang="en">
The resource agent of Squid.
This manages a Squid instance as an HA resource.
</longdesc>
<shortdesc lang="en">Manages a Squid proxy server instance</shortdesc>
<parameters>
<parameter name="squid_exe" required="1" unique="0">
<longdesc lang="en">
This is a required parameter. This parameter specifies squid's
executable file.
</longdesc>
<shortdesc>Executable file</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="squid_conf" required="1" unique="1">
<longdesc lang="en">
This is a required parameter. This parameter specifies a configuration file
for a squid instance managed by this RA.
</longdesc>
<shortdesc>Configuration file</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="squid_pidfile" required="1" unique="1">
<longdesc lang="en">
This is a required parameter. This parameter specifies a process id file
for a squid instance managed by this RA.
</longdesc>
<shortdesc>Pidfile</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="squid_port" required="1" unique="1">
<longdesc lang="en">
This is a required parameter. This parameter specifies a port number
for a squid instance managed by this RA. If plural ports are used,
you must specifiy the only one of them.
</longdesc>
<shortdesc>Port number</shortdesc>
<content type="integer" default=""/>
</parameter>
<parameter name="squid_stop_timeout" unique="0">
<longdesc lang="en">
On stop, a squid shutdown is invoked first. If the resource
doesn't stop within this timeout, we resort to stopping
processes by sending signals and finally KILLing them.
</longdesc>
<shortdesc>how long to wait for squid shutdown to stop the
instance before resorting to kill</shortdesc>
<content type="integer" default="10"/>
</parameter>
<parameter name="debug_mode" unique="0">
<longdesc lang="en">
This is an optional parameter.
This RA runs in debug mode when this parameter includes 'x' or 'v'.
If 'x' is included, both of STDOUT and STDERR redirect to the logfile
specified by "debug_log", and then the builtin shell option 'x' is turned on.
It is similar about 'v'.
</longdesc>
<shortdesc>Debug mode</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="debug_log" unique="0">
<longdesc lang="en">
This is an optional and omittable parameter.
This parameter specifies a destination file for debug logs
and works only if this RA run in debug mode. Refer to "debug_mode"
about debug mode. If no value is given but it's requied, it's made by the
following rules: "/var/log/" as a directory part, the basename of
the configuration file given by "syslog_ng_conf" as a basename part,
".log" as a suffix.
</longdesc>
<shortdesc>A destination of the debug log</shortdesc>
<content type="string" default=""/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="60s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5"/>
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
get_pids()
{
SQUID_PIDS=( )
# Seek by pattern
SQUID_PIDS[0]=$(pgrep -f "$PROCESS_PATTERN")
# Seek by pidfile
SQUID_PIDS[1]=$(awk '1{print $1}' $SQUID_PIDFILE 2>/dev/null)
if [[ -n "${SQUID_PIDS[1]}" ]]; then
typeset exe
exe=$(ls -l "/proc/${SQUID_PIDS[1]}/exe")
if [[ $? = 0 ]]; then
exe=${exe##*-> }
if ! [[ "$exe" = $SQUID_EXE ]]; then
SQUID_PIDS[1]=""
fi
else
SQUID_PIDS[1]=""
fi
fi
# Seek by port
SQUID_PIDS[2]=$(
netstat -apn |
awk '/tcp.*:'$SQUID_PORT' .*LISTEN/ && $7~/^[1-9]/ {
sub("\\/.*", "", $7); print $7; exit}')
}
are_all_pids_found()
{
if
[[ -n "${SQUID_PIDS[0]}" ]] &&
[[ -n "${SQUID_PIDS[1]}" ]] &&
[[ -n "${SQUID_PIDS[2]}" ]]
then
return 0
else
return 1
fi
}
are_pids_sane()
{
if [[ "${SQUID_PIDS[1]}" = "${SQUID_PIDS[2]}" ]]; then
return $OCF_SUCCESS
else
- ocf_log err "$SQUID_NAME:Pid unmatch"
+ ocf_exit_reason "$SQUID_NAME:Pid unmatch"
return $OCF_ERR_GENERIC
fi
}
is_squid_dead()
{
if
[[ -z "${SQUID_PIDS[0]}" ]] &&
[[ -z "${SQUID_PIDS[2]}" ]]
then
return 0
else
return 1
fi
}
monitor_squid()
{
typeset trialcount=0
while true; do
get_pids
if are_all_pids_found; then
are_pids_sane
return $OCF_SUCCESS
fi
if is_squid_dead; then
return $OCF_NOT_RUNNING
fi
ocf_log info "$SQUID_NAME:Inconsistent processes:" \
"${SQUID_PIDS[0]},${SQUID_PIDS[1]},${SQUID_PIDS[2]}"
(( trialcount = trialcount + 1 ))
if (( trialcount > SQUID_CONFIRM_TRIALCOUNT )); then
- ocf_log err "$SQUID_NAME:Inconsistency of processes remains unsolved"
+ ocf_exit_reason "$SQUID_NAME:Inconsistency of processes remains unsolved"
return $OCF_ERR_GENERIC
fi
sleep 1
done
}
start_squid()
{
typeset status
monitor_squid
status=$?
if [[ $status != $OCF_NOT_RUNNING ]]; then
return $status
fi
set -- "$SQUID_OPTS"
ocf_run $SQUID_EXE -f "$SQUID_CONF" "$@"
status=$?
if [[ $status != $OCF_SUCCESS ]]; then
return $OCF_ERR_GENERIC
fi
while true; do
get_pids
if are_all_pids_found && are_pids_sane; then
return $OCF_SUCCESS
fi
ocf_log info "$SQUID_NAME:Waiting for squid to be invoked"
sleep 1
done
return $OCF_ERR_GENERIC
}
stop_squid()
{
typeset lapse_sec
if ocf_run $SQUID_EXE -f $SQUID_CONF -k shutdown; then
lapse_sec=0
while true; do
get_pids
if is_squid_dead; then
rm -f $SQUID_PIDFILE
return $OCF_SUCCESS
fi
(( lapse_sec = lapse_sec + 1 ))
if (( lapse_sec > SQUID_STOP_TIMEOUT )); then
break
fi
sleep 1
ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \
"stop NORM $lapse_sec/$SQUID_STOP_TIMEOUT"
done
fi
while true; do
get_pids
ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \
"try to stop by SIGKILL:${SQUID_PIDS[0]} ${SQUID_PIDS[2]}"
kill -KILL ${SQUID_PIDS[0]} ${SQUID_PIDS[2]}
sleep 1
if is_squid_dead; then
rm -f $SQUID_PIDFILE
return $OCF_SUCCESS
fi
done
return $OCF_ERR_GENERIC
}
status_squid()
{
return $OCF_SUCCESS
}
validate_all_squid()
{
ocf_log info "validate_all_squid[$SQUID_NAME]"
return $OCF_SUCCESS
}
: === Debug ${0##*/} $1 ===
if [[ "$1" = "meta-data" ]]; then
metadata_squid
exit $?
fi
SQUID_CONF="${OCF_RESKEY_squid_conf}"
if [[ -z "$SQUID_CONF" ]]; then
- ocf_log err "SQUID_CONF is not defined"
+ ocf_exit_reason "SQUID_CONF is not defined"
exit $OCF_ERR_CONFIGURED
fi
SQUID_NAME="${SQUID_CONF##*/}"
SQUID_NAME="${SQUID_NAME%.*}"
DEBUG_LOG="${OCF_RESKEY_debug_log-/var/log/squid_${SQUID_NAME}_debug}.log"
DEBUG_MODE=""
case $OCF_RESKEY_debug_mode in
*x*) DEBUG_MODE="${DEBUG_MODE}x";;
esac
case $OCF_RESKEY_debug_mode in
*v*) DEBUG_MODE="${DEBUG_MODE}v";;
esac
if [ -n "$DEBUG_MODE" ]; then
PS4='\d \t \h '"${1-unknown} "
export PS4
exec 1>>$DEBUG_LOG 2>&1
set -$DEBUG_MODE
fi
SQUID_EXE="${OCF_RESKEY_squid_exe}"
if [[ -z "$SQUID_EXE" ]]; then
- ocf_log err "SQUID_EXE is not defined"
+ ocf_exit_reason "SQUID_EXE is not defined"
exit $OCF_ERR_CONFIGURED
fi
if [[ ! -x "$SQUID_EXE" ]]; then
- ocf_log err "$SQUID_EXE is not found"
+ ocf_exit_reason "$SQUID_EXE is not found"
exit $OCF_ERR_CONFIGURED
fi
SQUID_PIDFILE="${OCF_RESKEY_squid_pidfile}"
if [[ -z "$SQUID_PIDFILE" ]]; then
- ocf_log err "SQUID_PIDFILE is not defined"
+ ocf_exit_reason "SQUID_PIDFILE is not defined"
exit $OCF_ERR_CONFIGURED
fi
SQUID_PORT="${OCF_RESKEY_squid_port}"
if [[ -z "$SQUID_PORT" ]]; then
- ocf_log err "SQUID_PORT is not defined"
+ ocf_exit_reason "SQUID_PORT is not defined"
exit $OCF_ERR_CONFIGURED
fi
SQUID_OPTS="${OCF_RESKEY_squid_opts}"
SQUID_PIDS=( )
SQUID_CONFIRM_TRIALCOUNT="${OCF_RESKEY_squid_confirm_trialcount-3}"
SQUID_STOP_TIMEOUT="${OCF_RESKEY_squid_stop_timeout-10}"
SQUID_SUSPEND_TRIALCOUNT="${OCF_RESKEY_squid_suspend_trialcount-10}"
PROCESS_PATTERN="$SQUID_EXE -f $SQUID_CONF"
COMMAND=$1
case "$COMMAND" in
start)
ocf_log debug "[$SQUID_NAME] Enter squid start"
start_squid
func_status=$?
ocf_log debug "[$SQUID_NAME] Leave squid start $func_status"
exit $func_status
;;
stop)
ocf_log debug "[$SQUID_NAME] Enter squid stop"
stop_squid
func_status=$?
ocf_log debug "[$SQUID_NAME] Leave squid stop $func_status"
exit $func_status
;;
status)
status_squid
exit $?
;;
monitor)
#ocf_log debug "[$SQUID_NAME] Enter squid monitor"
monitor_squid
func_status=$?
#ocf_log debug "[$SQUID_NAME] Leave squid monitor $func_status"
exit $func_status
;;
validate-all)
validate_all_squid
exit $?
;;
*)
usage
;;
esac
# vim: set sw=4 ts=4 :
diff --git a/heartbeat/Xinetd b/heartbeat/Xinetd
index 1d1be0b58..b6a7b56e2 100755
--- a/heartbeat/Xinetd
+++ b/heartbeat/Xinetd
@@ -1,250 +1,250 @@
#!/bin/sh
#
# Startup/shutdown script for services managed by xinetd.
#
# Copyright (C) 2003 Charlie Brooks
# Copyright (C) 2011 Ulrich Windl
#
# WARNING: Tested ONLY on SLES11 SP1 at this time.
#
# Author: Charlie Brooks <ha@HBCS.Org>
# Description: given parameters of a service name and start|stop|status,
# will enable, disable or report on a specified xinetd service
# Config: all services must have a descriptor file in /etc/xinetd.d
# Support: linux-ha@lists.linux-ha.org
# License: GNU General Public License (GPL)
#
# OCF parameters are as below:
# OCF_RESKEY_service
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
service=$OCF_RESKEY_service
SVCDEF=/etc/xinetd.d/$service
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Xinetd">
<version>1.0</version>
<longdesc lang="en">
Resource script for Xinetd. It starts/stops services managed
by xinetd by enabling or disabling them in the configuration file.
The xinetd daemon itself must be running: we are not going to start or
stop it ourselves.
All services should have a line saying either "disable=yes" or "disable=no".
The script just changes those settings before reloading xinetd.
Important: in case the services managed by the cluster are the
only ones enabled, you should specify the -stayalive option for
xinetd or it will exit on Heartbeat stop. Alternatively, you may
enable some internal service such as echo.
</longdesc>
<shortdesc lang="en">Manages a service of Xinetd</shortdesc>
<parameters>
<parameter name="service" unique="0" required="1">
<longdesc lang="en">
The name of the service managed by xinetd.
</longdesc>
<shortdesc lang="en">service name</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="restart" timeout="20s" />
<action name="status" depth="0" timeout="10" interval="10" />
<action name="monitor" depth="0" timeout="10" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
get_xinetd_pid() {
ps -e -o pid,comm | $AWK '$2 == "xinetd" { print $1 }'
}
# force xinetd to reload the service descriptions
hup_inetd () {
# don't rely on the pid file, but lookup xinetd in the list of
# processes
local pid
pid=`get_xinetd_pid`
if [ "$pid" ]; then
if kill -s HUP $pid; then
ocf_log info "asked xinetd to reload by sending SIGHUP to process $pid!"
else
- ocf_log err "could not send SIGHUP to process $pid!"
+ ocf_exit_reason "could not send SIGHUP to process $pid!"
exit $OCF_ERR_GENERIC
fi
else
- ocf_log err "xinetd process not found!"
+ ocf_exit_reason "xinetd process not found!"
exit $OCF_ERR_GENERIC
fi
}
# check "disable = X", printing X
check_service()
{
ocf_log "info" "checking \"disable\" in $1"
local result=$(sed -nre 's/^[ ]*disable[ ]*=[ ]*([^ ]+)[# ]*/\1/p' $1)
echo "$result"
}
# change "disable = X" to desired value
change_service()
{
ocf_log "info" "setting \"disable = $1\" in $2"
if ! sed -i -re 's/^([ ]*disable[ ]*=[ ]*)([^ ]+)([# ]*)/\1'"$1"'\3/' $2
then
ocf_log "err" "could not edit $2"
return 1
fi
return 0
}
xup_status () {
local disabled="$(check_service $SVCDEF)"
if [ "${disabled:=no}" = no ]; then
echo running
return $OCF_SUCCESS
elif [ "$disabled" = yes ]; then
echo stopped
return $OCF_NOT_RUNNING
else
echo unknown
return $OCF_ERR_CONFIGURED
fi
}
xup_start () {
if [ "running" = "`xup_status`" ]; then
ocf_log info "service $service already started"
exit $OCF_SUCCESS
fi
ocf_log "info" "enabling in $SVCDEF"
if change_service "no" $SVCDEF; then
hup_inetd
fi
}
xup_stop () {
if [ "stopped" = "`xup_status`" ]; then
ocf_log info "service $service already stopped"
exit $OCF_SUCCESS
fi
ocf_log "info" "disabling in $SVCDEF"
if change_service "yes" $SVCDEF; then
hup_inetd
fi
}
xup_usage () {
echo "Usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data}"
return 0
}
xup_validate_all () {
if [ ! -f "$SVCDEF" ]; then
- ocf_log err "service $service missing $SVCDEF"
+ ocf_exit_reason "service $service missing $SVCDEF"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
if [ $# -ne 1 ]; then
xup_usage
exit $OCF_ERR_ARGS
fi
# These operations do not require OCF instance parameters to be set
case "$1" in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
xup_usage
exit $OCF_SUCCESS
;;
esac
if [ -z "$OCF_RESKEY_service" ]; then
- ocf_log err "please define \"service\" parameter"
+ ocf_exit_reason "please define \"service\" parameter"
if [ "$1" = "start" ]; then
exit $OCF_ERR_CONFIGURED
else
exit $OCF_NOT_RUNNING
fi
fi
# Is xinetd running at all
if [ -z "`get_xinetd_pid`" ]; then
case "$1" in
stop) exit $OCF_SUCCESS;;
start)
- ocf_log err "xinetd not running, we manage just xinetd services, not the daemon itself"
+ ocf_exit_reason "xinetd not running, we manage just xinetd services, not the daemon itself"
exit $OCF_ERR_INSTALLED
;;
status|monitor)
if ocf_is_probe; then
exit $OCF_NOT_RUNNING
else
- ocf_log err "xinetd stopped"
+ ocf_exit_reason "xinetd stopped"
exit $OCF_ERR_GENERIC
fi
;;
esac
fi
# Make sure the OCF_RESKEY_service is a valid xinetd service name
if [ ! -f $SVCDEF ]; then
- ocf_log err "service definition $SVCDEF not found!"
+ ocf_exit_reason "service definition $SVCDEF not found!"
if [ "$1" = "start" ]; then
exit $OCF_ERR_INSTALLED
else
exit $OCF_NOT_RUNNING
fi
fi
# See how we were called.
case "$1" in
start)
xup_start
;;
stop)
xup_stop
;;
restart)
$0 stop
$0 start
;;
status)
xup_status
;;
monitor)
xup_status > /dev/null
;;
validate-all)
xup_validate_all
;;
*)
xup_usage
exit $OCF_ERR_UNIMPLEMENTED
esac
exit $?
diff --git a/heartbeat/apache b/heartbeat/apache
index 1e573375d..ab7c43f9d 100755
--- a/heartbeat/apache
+++ b/heartbeat/apache
@@ -1,655 +1,655 @@
#!/bin/sh
#
# High-Availability Apache/IBMhttp control script
#
# apache (aka IBMhttpd)
#
# Description: starts/stops apache web servers.
#
# Author: Alan Robertson
# Sun Jiang Dong
#
# Support: linux-ha@lists.linux-ha.org
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2002-2005 International Business Machines
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf
# node1 10.0.0.170 IBMhttpd
#
# Our parsing of the Apache config files is very rudimentary.
# It'll work with lots of different configurations - but not every
# possible configuration.
#
# Patches are being accepted ;-)
#
# OCF parameters:
# OCF_RESKEY_configfile
# OCF_RESKEY_httpd
# OCF_RESKEY_port
# OCF_RESKEY_statusurl
# OCF_RESKEY_options
# OCF_RESKEY_testregex
# OCF_RESKEY_client
# OCF_RESKEY_testurl
# OCF_RESKEY_testregex10
# OCF_RESKEY_testconffile
# OCF_RESKEY_testname
# OCF_RESKEY_envfiles
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/apache-conf.sh
. ${OCF_FUNCTIONS_DIR}/http-mon.sh
HA_VARRUNDIR=${HA_VARRUN}
#######################################################################
#
# Configuration options - usually you don't need to change these
#
#######################################################################
#
IBMHTTPD=/opt/IBMHTTPServer/bin/httpd
HTTPDLIST="/sbin/httpd2 /usr/sbin/httpd2 /usr/sbin/apache2 /sbin/httpd /usr/sbin/httpd /usr/sbin/apache $IBMHTTPD"
MPM=/usr/share/apache2/find_mpm
if [ -x $MPM ]; then
HTTPDLIST="$HTTPDLIST `$MPM 2>/dev/null`"
fi
LOCALHOST="http://localhost"
HTTPDOPTS="-DSTATUS"
DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf
DEFAULT_SUSECONFIG="/etc/apache2/httpd.conf"
DEFAULT_RHELCONFIG="/etc/httpd/conf/httpd.conf"
#
# You can also set
# HTTPD
# PORT
# STATUSURL
# CONFIGFILE
# in this section if what we're doing doesn't work for you...
#
# End of Configuration options
#######################################################################
CMD=`basename $0`
# The config-file-pathname is the pathname to the configuration
# file for this web server. Various appropriate defaults are
# assumed if no config file is specified. If this command is
# invoked as *IBM*, then the default config file name is
# $DEFAULT_IBMCONFIG, otherwise the default config file
# will be either $DEFAULT_RHELCONFIG or $DEFAULT_SUSECONFIG depending
# on which is detected.
usage() {
cat <<-END
usage: $0 action
action:
start start the web server
stop stop the web server
status return the status of web server, run or down
monitor return TRUE if the web server appears to be working.
For this to be supported you must configure mod_status
and give it a server-status URL. You have to have
installed either curl or wget for this to work.
meta-data show meta data message
validate-all validate the instance parameters
END
}
get_pid() {
if [ -f $PidFile ]; then
cat $PidFile
else
false
fi
}
#
# return TRUE if a process with given PID is running
#
ProcessRunning() {
local pid=$1
# Use /proc if it looks like it's here...
if [ -d /proc -a -d /proc/1 ]; then
[ -d /proc/$pid ]
else
# This assumes we're running as root...
kill -s 0 "$pid" >/dev/null 2>&1
fi
}
silent_status() {
local pid
pid=`get_pid`
if [ -n "$pid" ]; then
ProcessRunning $pid
else
: No pid file
false
fi
}
# May be useful to add other distros in future
validate_default_config() {
if [ -e /etc/SuSE-release ]; then
validate_default_suse_config
else
return 0
fi
}
# When using the default /etc/apache2/httpd.conf on SUSE, the file
# /etc/apache2/sysconfig.d/include.conf is required to be present,
# but this is only generated if you run the apache init script
# (with contents derived from /etc/sysconfig/apache2). So, here,
# if we're using the default system config file and it requires
# that include, we run "/etc/init.d/apache2 configtest" to ensure
# the relevant config is generated and valid. We're also taking
# this opportunity to enable mod_status if it's not present.
validate_default_suse_config() {
if [ "$CONFIGFILE" = "$DEFAULT_SUSECONFIG" ] && \
grep -Eq '^Include[[:space:]]+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE"
then
[ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status
# init script style, for crusty old SUSE
if [ -e "/etc/init.d/apache2" ]; then
ocf_run -q /etc/init.d/apache2 configtest || return 1
# systemd style, for shiny new SUSE
elif [ -e "/usr/sbin/start_apache2" ]; then
ocf_run -q /usr/sbin/start_apache2 -t || return 1
fi
fi
return 0
}
apache_start() {
if
silent_status
then
ocf_log info "$CMD already running (pid `get_pid`)"
return $OCF_SUCCESS
fi
validate_default_config || return $OCF_ERR_CONFIGURED
# https://bugs.launchpad.net/ubuntu/+source/apache2/+bug/603211
[ -d /var/run/apache2 ] || mkdir /var/run/apache2
if [ -z $PIDFILE_DIRECTIVE ]; then
ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE
else
ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE -c "PidFile $PidFile"
fi
tries=0
while : # wait until the user set timeout
do
apache_monitor
ec=$?
if [ $ec -eq $OCF_NOT_RUNNING ]
then
tries=`expr $tries + 1`
ocf_log info "waiting for apache $CONFIGFILE to come up"
sleep 1
else
break
fi
done
if [ $ec -ne 0 ] && silent_status; then
apache_stop
fi
return $ec
}
signal_children()
{
for sig in SIGTERM SIGHUP SIGKILL ; do
if pgrep -f $HTTPD.*$CONFIGFILE >/dev/null ; then
pkill -$sig -f $HTTPD.*$CONFIGFILE >/dev/null
ocf_log info "signal $sig sent to apache children"
sleep 1
else
break
fi
done
}
graceful_stop()
{
local tries=10
local pid=$1
# Try graceful stop for half timeout period if timeout period is present
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
tries=$((($OCF_RESKEY_CRM_meta_timeout/1000) / 2))
fi
ocf_log info "Attempting graceful stop of apache PID $pid"
kill -WINCH $pid >/dev/null
while
ProcessRunning $pid &&
[ $tries -gt 0 ]
do
sleep 1
tries=`expr $tries - 1`
done
if [ $tries -eq 0 ]; then
# graceful stop didn't work, process still up.
return 1
fi
return 0
}
kill_stop()
{
local tries=0
local pid=$1
ocf_log info "Killing apache PID $pid"
while
ProcessRunning $pid &&
[ $tries -lt 10 ]
do
if [ $tries -ne 0 ]; then
# don't sleep on the first try
sleep 1
fi
kill $pid >/dev/null
tries=`expr $tries + 1`
done
}
apache_stop() {
local ret=$OCF_SUCCESS
local pid
if ! silent_status; then
ocf_log info "$CMD is not running."
signal_children
return $ret
fi
pid=`get_pid`
graceful_stop $pid
if [ $? -ne 0 ]; then
kill_stop $pid
if ProcessRunning $pid; then
- ocf_log info "$CMD still running ($pid). Killing pid failed."
+ ocf_exit_reason "$CMD still running ($pid). Killing pid failed."
ret=$OCF_ERR_GENERIC
fi
fi
if [ $ret -eq 0 ]; then
ocf_log info "$CMD stopped."
fi
signal_children
return $ret
}
apache_monitor_10() {
if [ "$TESTCONFFILE" ]; then
readtestconf < $TESTCONFFILE
else
test_url="$TESTURL"
test_regex="$TESTREGEX10"
fi
whattorun=`gethttpclient`
fixtesturl
is_testconf_sane ||
return $OCF_ERR_CONFIGURED
if $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null
then
return $OCF_SUCCESS
else
if ! ocf_is_probe; then
- ocf_log err "Failed to access httpd status page."
+ ocf_exit_reason "Failed to access httpd status page."
fi
return $OCF_ERR_GENERIC
fi
}
# If the user has not provided any basic monitoring
# information, allow the agent to verify the server is
# healthy and capable of processing requests by requesting
# the http header of website's index
attempt_index_monitor_request() {
local indexpage=""
if [ -n "$OCF_RESKEY_testregex" ]; then
return 1;
fi
if [ -n "$OCF_RESKEY_testregex10" ]; then
return 1;
fi
if [ -n "$OCF_RESKEY_testurl" ]; then
return 1;
fi
if [ -n "$OCF_RESKEY_statusurl" ]; then
return 1;
fi
if [ -n "$OCF_RESKEY_testconffile" ]; then
return 1;
fi
indexpage=$(buildlocalurl)
request_url_header $indexpage
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
ocf_log info "Successfully retrieved http header at $indexpage"
return 0
}
apache_monitor_basic() {
if ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null
then
return $OCF_SUCCESS
fi
attempt_index_monitor_request
if [ $? -eq 0 ]; then
return $OCF_SUCCESS
fi
if ! ocf_is_probe; then
- ocf_log err "Failed to access httpd status page."
+ ocf_exit_reason "Failed to access httpd status page."
fi
return $OCF_ERR_GENERIC
}
apache_monitor() {
silent_status
if [ $? -ne 0 ]; then
ocf_log info "$CMD not running"
return $OCF_NOT_RUNNING
fi
ourhttpclient=`findhttpclient` # we'll need one
if [ -z "$ourhttpclient" ]; then
- ocf_log err "could not find a http client; make sure that either wget or curl is available"
+ ocf_exit_reason "could not find a http client; make sure that either wget or curl is available"
return $OCF_ERR_INSTALLED
fi
case `ocf_check_level 10` in
0) apache_monitor_basic;;
10) apache_monitor_10;;
esac
}
detect_default_config()
{
if [ -f $DEFAULT_SUSECONFIG ]; then
echo $DEFAULT_SUSECONFIG
else
echo $DEFAULT_RHELCONFIG
fi
}
apache_meta_data(){
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="apache">
<version>1.0</version>
<longdesc lang="en">
This is the resource agent for the Apache Web server.
This resource agent operates both version 1.x and version 2.x Apache
servers.
The start operation ends with a loop in which monitor is
repeatedly called to make sure that the server started and that
it is operational. Hence, if the monitor operation does not
succeed within the start operation timeout, the apache resource
will end with an error status.
The monitor operation by default loads the server status page
which depends on the mod_status module and the corresponding
configuration file (usually /etc/apache2/mod_status.conf).
Make sure that the server status page works and that the access
is allowed *only* from localhost (address 127.0.0.1).
See the statusurl and testregex attributes for more details.
See also http://httpd.apache.org/
</longdesc>
<shortdesc lang="en">Manages an Apache Web server instance</shortdesc>
<parameters>
<parameter name="configfile" required="0" unique="1">
<longdesc lang="en">
The full pathname of the Apache configuration file.
This file is parsed to provide defaults for various other
resource agent parameters.
</longdesc>
<shortdesc lang="en">configuration file path</shortdesc>
<content type="string" default="$(detect_default_config)" />
</parameter>
<parameter name="httpd">
<longdesc lang="en">
The full pathname of the httpd binary (optional).
</longdesc>
<shortdesc lang="en">httpd binary path</shortdesc>
<content type="string" default="/usr/sbin/httpd" />
</parameter>
<parameter name="port" >
<longdesc lang="en">
A port number that we can probe for status information
using the statusurl.
This will default to the port number found in the
configuration file, or 80, if none can be found
in the configuration file.
</longdesc>
<shortdesc lang="en">httpd port</shortdesc>
<content type="integer" />
</parameter>
<parameter name="statusurl">
<longdesc lang="en">
The URL to monitor (the apache server status page by default).
If left unspecified, it will be inferred from
the apache configuration file.
If you set this, make sure that it succeeds *only* from the
localhost (127.0.0.1). Otherwise, it may happen that the cluster
complains about the resource being active on multiple nodes.
</longdesc>
<shortdesc lang="en">url name</shortdesc>
<content type="string" />
</parameter>
<parameter name="testregex">
<longdesc lang="en">
Regular expression to match in the output of statusurl.
Case insensitive.
</longdesc>
<shortdesc lang="en">monitor regular expression</shortdesc>
<content type="string" default="exists, but impossible to show in a human readable format (try grep testregex)"/>
</parameter>
<parameter name="client">
<longdesc lang="en">
Client to use to query to Apache. If not specified, the RA will
try to find one on the system. Currently, wget and curl are
supported. For example, you can set this parameter to "curl" if
you prefer that to wget.
</longdesc>
<shortdesc lang="en">http client</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="testurl">
<longdesc lang="en">
URL to test. If it does not start with "http", then it's
considered to be relative to the Listen address.
</longdesc>
<shortdesc lang="en">test url</shortdesc>
<content type="string" />
</parameter>
<parameter name="testregex10">
<longdesc lang="en">
Regular expression to match in the output of testurl.
Case insensitive.
</longdesc>
<shortdesc lang="en">extended monitor regular expression</shortdesc>
<content type="string" />
</parameter>
<parameter name="testconffile">
<longdesc lang="en">
A file which contains test configuration. Could be useful if
you have to check more than one web application or in case sensitive
info should be passed as arguments (passwords). Furthermore,
using a config file is the only way to specify certain
parameters.
Please see README.webapps for examples and file description.
</longdesc>
<shortdesc lang="en">test configuration file</shortdesc>
<content type="string" />
</parameter>
<parameter name="testname">
<longdesc lang="en">
Name of the test within the test configuration file.
</longdesc>
<shortdesc lang="en">test name</shortdesc>
<content type="string" />
</parameter>
<parameter name="options">
<longdesc lang="en">
Extra options to apply when starting apache. See man httpd(8).
</longdesc>
<shortdesc lang="en">command line options</shortdesc>
<content type="string" />
</parameter>
<parameter name="envfiles">
<longdesc lang="en">
Files (one or more) which contain extra environment variables.
If you want to prevent script from reading the default file, set
this parameter to empty string.
</longdesc>
<shortdesc lang="en">environment settings files</shortdesc>
<content type="string" default="/etc/apache2/envvars"/>
</parameter>
<parameter name="use_ipv6">
<longdesc lang="en">
We will try to detect if the URL (for monitor) is IPv6, but if
that doesn't work set this to true to enforce IPv6.
</longdesc>
<shortdesc lang="en">use ipv6 with http clients</shortdesc>
<content type="boolean" default="false"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="40s" />
<action name="stop" timeout="60s" />
<action name="status" timeout="30s" />
<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
apache_validate_all() {
if [ -z "$HTTPD" ]; then
- ocf_log err "apache httpd program not found"
+ ocf_exit_reason "apache httpd program not found"
return $OCF_ERR_INSTALLED
fi
if [ ! -x "$HTTPD" ]; then
- ocf_log err "HTTPD $HTTPD not found or is not an executable!"
+ ocf_exit_reason "HTTPD $HTTPD not found or is not an executable!"
return $OCF_ERR_INSTALLED
fi
if [ ! -f $CONFIGFILE ]; then
- ocf_log err "Configuration file $CONFIGFILE not found!"
+ ocf_exit_reason "Configuration file $CONFIGFILE not found!"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
find_httpd_prog() {
case $0 in
*IBM*)
HTTPD=$IBMHTTPD
DefaultConfig=$DEFAULT_IBMCONFIG;;
*)
HTTPD=
for h in $HTTPDLIST
do
if [ -f $h -a -x $h ]; then
HTTPD=$h
break
fi
done
# Let the user know that the $HTTPD used is not the one (s)he specified via $OCF_RESKEY_httpd
if [ "X$OCF_RESKEY_httpd" != X -a "X$HTTPD" != X ]; then
ocf_log info "Using $HTTPD as HTTPD"
fi
DefaultConfig=$(detect_default_config)
;;
esac
}
apache_getconfig() {
# these variables are global
HTTPD="$OCF_RESKEY_httpd"
PORT="$OCF_RESKEY_port"
STATUSURL="$OCF_RESKEY_statusurl"
CONFIGFILE="$OCF_RESKEY_configfile"
OPTIONS="$OCF_RESKEY_options"
CLIENT=${OCF_RESKEY_client}
TESTREGEX=${OCF_RESKEY_testregex:-'</ *html *>'}
TESTURL="$OCF_RESKEY_testurl"
TESTREGEX10=${OCF_RESKEY_testregex10}
TESTCONFFILE="$OCF_RESKEY_testconffile"
TESTNAME="$OCF_RESKEY_testname"
: ${OCF_RESKEY_envfiles="/etc/apache2/envvars"}
source_envfiles $OCF_RESKEY_envfiles
if [ "X$HTTPD" = X -o ! -f "$HTTPD" -o ! -x "$HTTPD" ]; then
find_httpd_prog
fi
CONFIGFILE=${CONFIGFILE:-$DefaultConfig}
if [ -n "$HTTPD" ]; then
httpd_basename=`basename $HTTPD`
case $httpd_basename in
*-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;;
esac
fi
GetParams $CONFIGFILE
}
OCF_REQUIRED_PARAMS=""
OCF_REQUIRED_BINARIES=""
ocf_rarun $*
diff --git a/heartbeat/clvm b/heartbeat/clvm
index ac79655a8..9d312cc27 100755
--- a/heartbeat/clvm
+++ b/heartbeat/clvm
@@ -1,410 +1,410 @@
#!/bin/bash
#
# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/ocf-directories
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="clvm" version="0.9">
<version>1.0</version>
<longdesc lang="en">
This agent manages the clvmd daemon.
</longdesc>
<shortdesc lang="en">clvmd</shortdesc>
<parameters>
<parameter name="with_cmirrord" unique="0" required="0">
<longdesc lang="en">
Start with cmirrord (cluster mirror log daemon).
</longdesc>
<shortdesc lang="en">activate cmirrord</shortdesc>
<content type="boolean" default="false" />
</parameter>
<parameter name="daemon_options" unique="0">
<longdesc lang="en">
Options to clvmd. Refer to clvmd.8 for detailed descriptions.
</longdesc>
<shortdesc lang="en">Daemon Options</shortdesc>
<content type="string" default="-d0"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="90" />
<action name="monitor" timeout="90" interval="30" depth="0" />
<action name="reload" timeout="90" />
<action name="meta-data" timeout="10" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
END
}
#######################################################################
: ${OCF_RESKEY_daemon_options:="-d0"}
sbindir=$HA_SBIN_DIR
if [ -z $sbindir ]; then
sbindir=/usr/sbin
fi
DAEMON="clvmd"
CMIRROR="cmirrord"
DAEMON_PATH="${sbindir}/clvmd"
CMIRROR_PATH="${sbindir}/cmirrord"
LVMCONF="${sbindir}/lvmconf"
LOCK_FILE="/var/lock/subsys/$DAEMON"
# attempt to detect where the vg tools are located
# for some reason this isn't consistent with sbindir
# in some distros.
vgtoolsdir=$(dirname $(which vgchange 2> /dev/null) 2> /dev/null)
if [ -z "$vgtoolsdir" ]; then
vgtoolsdir="$sbindir"
fi
LVM_VGCHANGE=${vgtoolsdir}/vgchange
LVM_VGDISPLAY=${vgtoolsdir}/vgdisplay
LVM_VGSCAN=${vgtoolsdir}/vgscan
# Leaving this in for legacy. We do not want to advertize
# the abilty to set options in the systconfig exists, we want
# to expand the OCF style options as necessary instead.
[ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster
[ -f /etc/sysconfig/$DAEMON ] && . /etc/sysconfig/$DAEMON
CLVMD_TIMEOUT="90"
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
CLVMD_TIMEOUT=$(($OCF_RESKEY_CRM_meta_timeout/1000))
fi
clvmd_usage()
{
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
clvmd_validate()
{
# check_binary will exit with OCF_ERR_INSTALLED
# when binary is missing
check_binary "pgrep"
check_binary $DAEMON_PATH
if ocf_is_true $OCF_RESKEY_with_cmirrord; then
check_binary $CMIRROR_PATH
fi
if [ "$__OCF_ACTION" != "monitor" ]; then
check_binary "killall"
check_binary $LVM_VGCHANGE
check_binary $LVM_VGDISPLAY
check_binary $LVM_VGSCAN
fi
# Future validation checks here.
return $OCF_SUCCESS
}
check_process()
{
local binary=$1
local pidfile="${HA_RSCTMP}/${binary}-${OCF_RESOURCE_INSTANCE}.pid"
local pid
ocf_log debug "Checking status for ${binary}."
if [ -e "$pidfile" ]; then
cat /proc/$(cat $pidfile)/cmdline 2>/dev/null | grep -a "${binary}" > /dev/null 2>&1
if [ $? -eq 0 ];then
# shortcut without requiring pgrep to search through all procs
return $OCF_SUCCESS
fi
fi
pid=$(pgrep ${binary})
case $? in
0)
ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}."
echo "$pid" > $pidfile
return $OCF_SUCCESS;;
1)
rm -f "$pidfile" > /dev/null 2>&1
ocf_log info "$binary is not running"
return $OCF_NOT_RUNNING;;
*)
rm -f "$pidfile" > /dev/null 2>&1
- ocf_log err "Error encountered detecting pid status of $binary"
+ ocf_exit_reason "Error encountered detecting pid status of $binary"
return $OCF_ERR_GENERIC;;
esac
}
clvmd_status()
{
local rc
local mirror_rc
clvmd_validate
if [ $? -ne $OCF_SUCCESS ]; then
- ocf_log error "Unable to monitor, Environment validation failed."
+ ocf_exit_reason "Unable to monitor, Environment validation failed."
return $?
fi
check_process $DAEMON
rc=$?
mirror_rc=$rc
if ocf_is_true $OCF_RESKEY_with_cmirrord; then
check_process $CMIRROR
mirror_rc=$?
fi
# If these ever don't match, return error to force recovery
if [ $mirror_rc -ne $rc ]; then
return $OCF_ERR_GENERIC
fi
return $rc
}
# NOTE: replace this with vgs, once display filter per attr is implemented.
clustered_vgs() {
${LVM_VGDISPLAY} 2>/dev/null | awk 'BEGIN {RS="VG Name"} {if (/Clustered/) print $1;}'
}
wait_for_process()
{
local binary=$1
local timeout=$2
local count=0
ocf_log info "Waiting for $binary to exit"
while [ $count -le $timeout ]; do
check_process $binary
if [ $? -eq $OCF_NOT_RUNNING ]; then
ocf_log info "$binary terminated"
return $OCF_SUCCESS
fi
sleep 1
count=$((count+1))
done
return $OCF_ERR_GENERIC
}
time_left()
{
local end=$1
local default=$2
local now=$SECONDS
local result=0
result=$(( $end - $now ))
if [ $result -lt $default ]; then
return $default
fi
return $result
}
clvmd_stop()
{
local LVM_VGS
local rc=$OCF_SUCCESS
local end=$(( $SECONDS + $CLVMD_TIMEOUT ))
clvmd_status
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
check_process $DAEMON
if [ $? -ne $OCF_NOT_RUNNING ]; then
LVM_VGS="$(clustered_vgs)"
if [ -n "$LVM_VGS" ]; then
ocf_log info "Deactivating clustered VG(s):"
ocf_run ${LVM_VGCHANGE} -anl $LVM_VGS
if [ $? -ne 0 ]; then
- ocf_log error "Failed to deactivate volume groups, cluster vglist = $LVM_VGS"
+ ocf_exit_reason "Failed to deactivate volume groups, cluster vglist = $LVM_VGS"
return $OCF_ERR_GENERIC
fi
fi
ocf_log info "Signaling $DAEMON to exit"
killall -TERM $DAEMON
if [ $? != 0 ]; then
- ocf_log error "Failed to signal -TERM to $DAEMON"
+ ocf_exit_reason "Failed to signal -TERM to $DAEMON"
return $OCF_ERR_GENERIC
fi
wait_for_process $DAEMON $CLVMD_TIMEOUT
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
- ocf_log error "$DAEMON failed to exit"
+ ocf_exit_reason "$DAEMON failed to exit"
return $rc
fi
rm -f $LOCK_FILE
fi
check_process $CMIRROR
if [ $? -ne $OCF_NOT_RUNNING ] && ocf_is_true $OCF_RESKEY_with_cmirrord; then
local timeout
ocf_log info "Signaling $CMIRROR to exit"
killall -INT $CMIRROR
time_left $end 10; timeout=$?
wait_for_process $CMIRROR $timeout
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
killall -KILL $CMIRROR
time_left $end 10; timeout=$?
wait_for_process $CMIRROR $(time_left $end 10)
rc=$?
fi
fi
return $rc
}
start_process()
{
local binary_path=$1
local opts=$2
check_process "$(basename $binary_path)"
if [ $? -ne $OCF_SUCCESS ]; then
ocf_log info "Starting $binary_path: "
ocf_run $binary_path $opts
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log error "Failed to launch $binary_path, exit code $rc"
+ ocf_exit_reason "Failed to launch $binary_path, exit code $rc"
exit $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
}
clvmd_activate_all()
{
# Activate all volume groups by leaving the
# "volume group name" parameter empty
ocf_run ${LVM_VGCHANGE} -aay
if [ $? -ne 0 ]; then
ocf_log info "Failed to activate VG(s):"
clvmd_stop
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
clvmd_start()
{
local rc=0
local CLVMDOPTS="-T${CLVMD_TIMEOUT} $OCF_RESKEY_daemon_options"
clvmd_validate
if [ $? -ne $OCF_SUCCESS ]; then
- ocf_log error "Unable to start, Environment validation failed."
+ ocf_exit_reason "Unable to start, Environment validation failed."
return $?
fi
clvmd_status
if [ $? -eq $OCF_SUCCESS ]; then
ocf_log debug "$DAEMON already started"
clvmd_activate_all
return $?;
fi
# autoset locking type to clusted when lvmconf tool is available
if [ -x "$LVMCONF" ]; then
$LVMCONF --enable-cluster > /dev/null 2>&1
fi
# if either of these fail, script will exit OCF_ERR_GENERIC
if ocf_is_true $OCF_RESKEY_with_cmirrord; then
start_process $CMIRROR_PATH
fi
start_process $DAEMON_PATH $CLVMDOPTS
# Refresh local cache.
#
# It's possible that new PVs were added to this, or other VGs
# while this node was down. So we run vgscan here to avoid
# any potential "Missing UUID" messages with subsequent
# LVM commands.
# The following step would be better and more informative to the user:
# 'action "Refreshing VG(s) local cache:" ${LVM_VGSCAN}'
# but it could show warnings such as:
# 'clvmd not running on node x-y-z Unable to obtain global lock.'
# and the action would be shown as FAILED when in reality it didn't.
# Ideally vgscan should have a startup mode that would not print
# unnecessary warnings.
${LVM_VGSCAN} > /dev/null 2>&1
touch $LOCK_FILE
clvmd_activate_all
clvmd_status
return $?
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;
start) clvmd_start;;
stop) clvmd_stop;;
monitor) clvmd_status;;
validate-all) clvmd_validate;;
usage|help) clvmd_usage;;
*) clvmd_usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/conntrackd b/heartbeat/conntrackd
index bccb9393e..e81cda3e7 100755
--- a/heartbeat/conntrackd
+++ b/heartbeat/conntrackd
@@ -1,335 +1,335 @@
#!/bin/bash
#
#
# An OCF RA for conntrackd
# http://conntrack-tools.netfilter.org/
#
# Copyright (c) 2011 Dominik Klein
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
OCF_RESKEY_binary_default=conntrackd
OCF_RESKEY_config_default=/etc/conntrackd/conntrackd.conf
# For users of versions prior to 1.2:
# Map renamed parameter "conntrackd" to "binary" if in use
: ${OCF_RESKEY_binary=${OCF_RESKEY_conntrackd-${OCF_RESKEY_binary_default}}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="conntrackd">
<version>1.2</version>
<longdesc lang="en">
Master/Slave OCF Resource Agent for conntrackd
</longdesc>
<shortdesc lang="en">This resource agent manages conntrackd</shortdesc>
<parameters>
<parameter name="binary">
<longdesc lang="en">Name of the conntrackd executable.
If conntrackd is installed and available in the default PATH, it is sufficient to configure the name of the binary
For example "my-conntrackd-binary-version-0.9.14"
If conntrackd is installed somewhere else, you may also give a full path
For example "/packages/conntrackd-0.9.14/sbin/conntrackd"
</longdesc>
<shortdesc lang="en">Name of the conntrackd executable</shortdesc>
<content type="string" default="$OCF_RESKEY_binary_default"/>
</parameter>
<parameter name="config">
<longdesc lang="en">Full path to the conntrackd.conf file.
For example "/packages/conntrackd-0.9.14/etc/conntrackd/conntrackd.conf"</longdesc>
<shortdesc lang="en">Path to conntrackd.conf</shortdesc>
<content type="string" default="$OCF_RESKEY_config_default"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="30" />
<action name="promote" timeout="30" />
<action name="demote" timeout="30" />
<action name="notify" timeout="30" />
<action name="stop" timeout="30" />
<action name="monitor" timeout="20" interval="20" role="Slave" />
<action name="monitor" timeout="20" interval="10" role="Master" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="30" />
</actions>
</resource-agent>
END
}
meta_expect()
{
local what=$1 whatvar=OCF_RESKEY_CRM_meta_${1//-/_} op=$2 expect=$3
local val=${!whatvar}
if [[ -n $val ]]; then
# [, not [[, or it won't work ;)
[ $val $op $expect ] && return
fi
- ocf_log err "meta parameter misconfigured, expected $what $op $expect, but found ${val:-unset}."
+ ocf_exit_reason "meta parameter misconfigured, expected $what $op $expect, but found ${val:-unset}."
exit $OCF_ERR_CONFIGURED
}
conntrackd_is_master() {
# You can't query conntrackd whether it is master or slave. It can be both at the same time.
# This RA creates a statefile during promote and enforces master-max=1 and clone-node-max=1
ha_pseudo_resource $statefile monitor
}
conntrackd_set_master_score() {
${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1
}
conntrackd_monitor() {
rc=$OCF_NOT_RUNNING
# It does not write a PID file, so check the socket exists after
# extracting its path from the configuration file
local conntrack_socket=$(awk '/^[ \t]*UNIX[ \t]*{/,/^[ \t]*}/ { if ($1 == "Path") { print $2 } }' $OCF_RESKEY_config)
[ -S "$conntrack_socket" ] && rc=$OCF_SUCCESS
if [ "$rc" -eq "$OCF_SUCCESS" ]; then
# conntrackd is running
# now see if it acceppts queries
if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -s > /dev/null 2>&1; then
rc=$OCF_ERR_GENERIC
- ocf_log err "conntrackd is running but not responding to queries"
+ ocf_exit_reason "conntrackd is running but not responding to queries"
fi
if conntrackd_is_master; then
rc=$OCF_RUNNING_MASTER
# Restore master setting on probes
if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then
conntrackd_set_master_score $master_score
fi
else
# Restore master setting on probes
if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then
conntrackd_set_master_score $slave_score
fi
fi
fi
return $rc
}
conntrackd_start() {
rc=$OCF_ERR_GENERIC
# Keep trying to start the resource;
# wait for the CRM to time us out if this fails
while :; do
conntrackd_monitor
status=$?
case "$status" in
$OCF_SUCCESS)
conntrackd_set_master_score $slave_score
# -n = request resync from the others
if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -n; then
- ocf_log err "$OCF_RESKEY_binary -C $OCF_RESKEY_config -n failed during start."
+ ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -n failed during start."
rc=$OCF_ERR_GENERIC
else
rc=$OCF_SUCCESS
fi
break
;;
$OCF_NOT_RUNNING)
ocf_log info "Starting conntrackd"
$OCF_RESKEY_binary -C $OCF_RESKEY_config -d
;;
$OCF_RUNNING_MASTER)
ocf_log warn "conntrackd already in master mode, demoting."
ha_pseudo_resource $statefile stop
;;
$OCF_ERR_GENERIC)
- ocf_log err "conntrackd start failed"
+ ocf_exit_reason "conntrackd start failed"
rc=$OCF_ERR_GENERIC
break
;;
esac
done
return $rc
}
conntrackd_stop() {
rc=$OCF_ERR_GENERIC
# Keep trying to bring down the resource;
# wait for the CRM to time us out if this fails
while :; do
conntrackd_monitor
status=$?
case "$status" in
$OCF_SUCCESS|$OCF_ERR_GENERIC)
ocf_log info "Stopping conntrackd"
$OCF_RESKEY_binary -C $OCF_RESKEY_config -k
;;
$OCF_NOT_RUNNING)
rc=$OCF_SUCCESS
break
;;
$OCF_RUNNING_MASTER)
ocf_log warn "conntrackd still master"
;;
esac
done
return $rc
}
conntrackd_validate_all() {
check_binary "$OCF_RESKEY_binary"
if ! [ -e "$OCF_RESKEY_config" ]; then
- ocf_log err "Config FILE $OCF_RESKEY_config does not exist"
+ ocf_exit_reason "Config FILE $OCF_RESKEY_config does not exist"
return $OCF_ERR_INSTALLED
fi
meta_expect master-node-max = 1
meta_expect master-max = 1
meta_expect clone-node-max = 1
return $OCF_SUCCESS
}
conntrackd_promote() {
rc=$OCF_SUCCESS
if ! conntrackd_is_master; then
# -c = Commit the external cache to the kernel
# -f = Flush internal and external cache
# -R = resync with the kernel table
# -B = send a bulk update on the line
for parm in c f R B; do
if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm; then
- ocf_log err "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during promote."
+ ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during promote."
rc=$OCF_ERR_GENERIC
break
fi
done
ha_pseudo_resource $statefile start
conntrackd_set_master_score $master_score
fi
return $rc
}
conntrackd_demote() {
rc=$OCF_SUCCESS
if conntrackd_is_master; then
# -t = shorten kernel timers to remove zombies
# -n = request a resync from the others
for parm in t n; do
if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm; then
- ocf_log err "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during demote."
+ ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during demote."
rc=$OCF_ERR_GENERIC
break
fi
done
ha_pseudo_resource $statefile stop
conntrackd_set_master_score $slave_score
fi
return $rc
}
conntrackd_notify() {
hostname=$(hostname)
# OCF_RESKEY_CRM_meta_notify_master_uname is a whitespace separated list of master hostnames
for master in $OCF_RESKEY_CRM_meta_notify_master_uname; do
# if we are the master and an instance was just started on another node:
# send a bulk update to allow failback
if [ "$hostname" = "$master" -a "$OCF_RESKEY_CRM_meta_notify_type" = "post" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "start" -a "$OCF_RESKEY_CRM_meta_notify_start_uname" != "$hostname" ]; then
ocf_log info "Sending bulk update in post start to peers to allow failback"
$OCF_RESKEY_binary -C $OCF_RESKEY_config -B
fi
done
for tobepromoted in $OCF_RESKEY_CRM_meta_notify_promote_uname; do
# if there is a promote action to be executed on another node:
# send a bulk update to allow failback
if [ "$hostname" != "$tobepromoted" -a "$OCF_RESKEY_CRM_meta_notify_type" = "pre" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "promote" ]; then
ocf_log info "Sending bulk update in pre promote to peers to allow failback"
$OCF_RESKEY_binary -C $OCF_RESKEY_config -B
fi
done
}
conntrackd_usage() {
cat <<EOF
usage: $0 {start|stop|promote|demote|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
EOF
}
statefile=conntrackd.${OCF_RESOURCE_INSTANCE//:[0-9]*}.master
master_score=1000
slave_score=100
if [ $# -ne 1 ]; then
conntrackd_usage
exit $OCF_ERR_ARGS
fi
case $__OCF_ACTION in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
conntrackd_usage
exit $OCF_SUCCESS
esac
# Everything except usage and meta-data must pass the validate test
conntrackd_validate_all || exit
case $__OCF_ACTION in
start)
conntrackd_start
;;
stop)
conntrackd_stop
;;
promote)
conntrackd_promote
;;
demote)
conntrackd_demote
;;
status|monitor)
conntrackd_monitor
;;
notify)
conntrackd_notify
;;
validate-all)
;;
*)
conntrackd_usage
exit $OCF_ERR_UNIMPLEMENTED
esac
# exit code is the exit code (return code) of the last command (shell function)
diff --git a/heartbeat/dhcpd b/heartbeat/dhcpd
index 835a78835..67b529e8b 100755
--- a/heartbeat/dhcpd
+++ b/heartbeat/dhcpd
@@ -1,545 +1,545 @@
#!/bin/sh
#
# Resource Agent for managing dhcpd resources.
#
# License: GNU General Public License (GPL)
# (c) 2011-2012 Chris Bowlby,
#
# A fair amount of this script has been pulled from the official 0dhcpd
# init script. Those portions have been integrated into this script to
# ensure consistent behavior between the resource agent and the
# original script. The copyrights and original authors are credited
# as follows:
#
# Copyright (c) 1996, 1997, 1998 S.u.S.E. GmbH
# Copyright (c) 1998, 1999, 2000, 2001 SuSE GmbH
# Copyright (c) 2002, 2003 SuSE Linux AG
# Copyright (c) 2004-2008 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# Author(s) : Rolf Haberrecker <rolf@suse.de>, 1997-1999
# Peter Poeml <poeml@suse.de>, 2000-2006
# Marius Tomaschewski <mt@suse.de>, 2006-2010
#
# and Linux-HA contributors
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_binary_default="dhcpd"
OCF_RESKEY_pid_default="/var/run/dhcpd.pid"
OCF_RESKEY_user_default=dhcpd
OCF_RESKEY_group_default=nogroup
OCF_RESKEY_config_default=""
OCF_RESKEY_chrooted_default="true"
OCF_RESKEY_chrooted_path_default="/var/lib/dhcp"
OCF_RESKEY_leases_default="/db/dhcpd.leases"
OCF_RESKEY_interface_default=""
OCF_RESKEY_includes_default=""
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
: ${OCF_RESKEY_chrooted=${OCF_RESKEY_chrooted_default}}
: ${OCF_RESKEY_chrooted_path=${OCF_RESKEY_chrooted_path_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_leases=${OCF_RESKEY_leases_default}}
: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}}
: ${OCF_RESKEY_includes=${OCF_RESKEY_includes_default}}
# To enable support for different versions of dhcp, we need
# to know what version we are being run against.
DHCP_VERSION_MAJOR=`$OCF_RESKEY_binary --version 2>&1 | awk -F- '{print $3}' | awk -F. '{print $1}' | sed s/^[a-zA-Z]//g`
# These files are always copied by default to ensure the chroot environment works.
DEFAULT_FILE_LIST="/etc/gai.conf /etc/nsswitch.conf /etc/resolv.conf /etc/host.conf /etc/hosts /etc/localtime /dev/urandom"
usage() {
cat <<EOF
usage: $0 start|stop|monitor|meta-data|validate-all
$0 manages the dhcp (dhcpd) server as an HA resource.
The 'start' operation starts the dhcpd server.
The 'stop' operation stops the dhcpd server.
The 'monitor' operation reports whether the dhcpd service is running.
The 'validate-all' operation reports whether the parameters are valid.
EOF
return $OCF_SUCCESS
}
dhcpd_meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="dhcpd" version="0.1">
<version>0.1</version>
<longdesc lang="en">
Manage an ISC DHCP server service in a chroot environment.
</longdesc>
<shortdesc lang="en">Chrooted ISC DHCP server resource agent.</shortdesc>
<parameters>
<parameter name="config" unique="1" required="1">
<longdesc lang="en">
The absolute path to the DHCP server configuration file.
</longdesc>
<shortdesc lang="en">Configuration file</shortdesc>
<content type="string" default="$OCF_RESKEY_config_default"/>
</parameter>
<parameter name="chrooted" unique="1" required="0">
<longdesc lang="en">
Configure the dhcpd service to run in a chrooted or non-chrooted
mode.
</longdesc>
<shortdesc lang="en">Enable chroot mode</shortdesc>
<content type="boolean" default="$OCF_RESKEY_chrooted_default"/>
</parameter>
<parameter name="chrooted_path" unique="1" required="0">
<longdesc lang="en">
The absolute path of the chrooted DHCP environment.
</longdesc>
<shortdesc lang="en">The chrooted path</shortdesc>
<content type="string" default="$OCF_RESKEY_chrooted_path_default"/>
</parameter>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
The binary for the DHCP server process. An absolute path
definition is not required, but can be used to override
environment path.
</longdesc>
<shortdesc lang="en">dhcpd binary</shortdesc>
<content type="string" default="$OCF_RESKEY_binary_default"/>
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
The system user the DHCP server process will run as when
it is chrooted.
</longdesc>
<shortdesc lang="en">dhcpd owner</shortdesc>
<content type="string" default="$OCF_RESKEY_user_default"/>
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
The system group the DHCP server process will run as when
it is chrooted.
</longdesc>
<shortdesc lang="en">dhcpd group owner</shortdesc>
<content type="string" default="$OCF_RESKEY_group_default"/>
</parameter>
<parameter name="interface" unique="0" required="0">
<longdesc lang="en">
The network interface(s) the DHCP server process will
bind to. A blank value will bind the process to all
interfaces.
</longdesc>
<shortdesc lang="en">Network Interface</shortdesc>
<content type="string" default="$OCF_RESKEY_interface_default"/>
</parameter>
<parameter name="includes" unique="0" required="0">
<longdesc lang="en">
This parameter provides a means to copy include files
into the chrooted environment. If a dhcpd.conf file
contains a line similar to this:
include "/etc/named.keys";
Then an admin also has to tell the dhcpd RA that this
file should be pulled into the chrooted environment. This
is a space delimited list.
</longdesc>
<shortdesc lang="en">Include files</shortdesc>
<content type="string" default="$OCF_RESKEY_includes_default"/>
</parameter>
<parameter name="leases" unique="0" required="0">
<longdesc lang="en">
The leases database file, relative to chrooted_path.
</longdesc>
<shortdesc lang="en">Leases file</shortdesc>
<content type="string" default="$OCF_RESKEY_leases_default"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The path and filename of the PID file. It is relative
to chrooted_path.
</longdesc>
<shortdesc lang="en">PID file</shortdesc>
<content type="string" default="$OCF_RESKEY_pid_default"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10" depth="0" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
EOF
}
# Validate most critical parameters
dhcpd_validate_all() {
check_binary $OCF_RESKEY_binary
if ! ocf_is_probe; then
# Test for the appropriate configuration files depending on if
# chroot mode is enabled.
if ocf_is_true $OCF_RESKEY_chrooted ; then
if ! test -e "$OCF_RESKEY_chrooted_path"; then
- ocf_log err "Path $OCF_RESKEY_chrooted_path does not exist."
+ ocf_exit_reason "Path $OCF_RESKEY_chrooted_path does not exist."
return $OCF_ERR_INSTALLED
fi
if test -n "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config"; then
- ocf_log err "Configuration file $OCF_RESKEY_chrooted_path/$OCF_RESKEY_config doesn't exist"
+ ocf_exit_reason "Configuration file $OCF_RESKEY_chrooted_path/$OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
else
if test -n "$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_config"; then
- ocf_log err "Configuration file $OCF_RESKEY_config doesn't exist"
+ ocf_exit_reason "Configuration file $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
fi
fi
if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then
- ocf_log err "User $OCF_RESKEY_user doesn't exist"
+ ocf_exit_reason "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
# dhcpd_monitor. Send a request to dhcpd and check response.
dhcpd_monitor() {
# Assume chrooted mode is being used, but if not update the PIDF
# variable to point to the non-chrooted PID file.
PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid"
if ! ocf_is_true $OCF_RESKEY_chrooted ; then
PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid`
fi
ocf_pidfile_status $PIDF >/dev/null 2>&1 || return $OCF_NOT_RUNNING
return $OCF_SUCCESS
}
# Initialize Chroot
dhcpd_initialize_chroot() {
# If we are running the initialization for the first time, we need to make
# the new chrooted folder, in case we are not using the same default.
if ! [ -d $OCF_RESKEY_chrooted_path ] ; then
ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use."
fi
# Make sure all sub-paths are created if something went wrong during
# a partial run.
for i in db dev etc lib64 var/run; do
mkdir -p $OCF_RESKEY_chrooted_path/$i
done
# If we are running version 4 of the dhcp server, we need to mount a proc partition.
if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then
mkdir -p $OCF_RESKEY_chrooted_path/proc
if ! [ -e $OCF_RESKEY_chrooted_path/proc/net/dev ] ; then
mount -t proc -o ro proc $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1
fi
fi
# If the folder to store the PID file does not exist, make it.
if ! [ -d "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" ] ; then
mkdir -p "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`"
fi
# Ensure all permissions are in place if the folder was re-created.
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_leases`
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group "$OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_pid`"
## If there is no conf file, we can't initialize the chrooted
## environment, return with "program not configured"
if ! [ -f $OCF_RESKEY_config ] ; then
- ocf_log err "dhcpd has not been configured."
+ ocf_exit_reason "dhcpd has not been configured."
return $OCF_ERR_CONFIGURED
fi
# If the leases file does not exist, create it, as this is a fresh install.
if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then
touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases
fi
# Remove the random device.
test -e "$OCF_RESKEY_chrooted_path/dev/urandom" &&
rm -f $OCF_RESKEY_chrooted_path/dev/urandom
# Test for the existance of the defined include files, and append
# them to the list of files to be copied.
for i in $OCF_RESKEY_includes ; do
if [ -e $i ] ; then
DEFAULT_FILE_LIST="$DEFAULT_FILE_LIST $i"
else
- ocf_log err "include file $i does not exist"
+ ocf_exit_reason "include file $i does not exist"
return $OCF_ERR_INSTALLED
fi
done
# Ensure all "modified" non-chrooted configuration files are copied into the chrooted environment.
for i in $OCF_RESKEY_config $DEFAULT_FILE_LIST; do
# First, lets make sure the directory exists within the chrooted environment.
if test -d "$i" ; then
mkdir -p $OCF_RESKEY_chrooted_path/$i
elif test -e "$i" ; then
mkdir -p "`dirname $OCF_RESKEY_chrooted_path/$i`"
fi
# Next, we copy the configuration file into place.
cp -aL "$i" "$OCF_RESKEY_chrooted_path/${i%/*}/" > /dev/null 2>&1 ||
- { ocf_log err "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
+ { ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
done
libdir=$(basename $(echo /var/lib/dhcp/lib*))
if test -x /usr/bin/ldd ; then
get_ldd_deps()
{
ldd_wl="\/$libdir\/lib"
ldd_bl="\/$libdir\/libc\."
/usr/bin/ldd "$1" | while read a b c d ; do
[ -n "$c" ] || continue
[[ $c =~ $ldd_wl ]] || continue
[[ $c =~ $ldd_bl ]] && continue
echo $c
done
}
else
get_ldd_deps() { :; }
fi
cplibs=`for i in /$libdir/libresolv.so.* /$libdir/libnss_*.so.* /$libdir/libpthread.so.0 /$libdir/libdl.so.2
do
if [ -s "$i" ] ; then
echo "$i"
get_ldd_deps "$i"
fi
done | sort -u`
for i in $cplibs ; do
if [ -s "$i" ]; then
cp -pL "$i" "/var/lib/dhcp/$libdir/" ||
- { ocf_log err "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
+ { ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
fi
done
return $OCF_SUCCESS
}
# Initialize a non-chroot environment
dhcpd_initialize() {
## If there is no conf file, we can't start a dhcp service.
if ! [ -f $OCF_RESKEY_config ] ; then
- ocf_log err "dhcpd has not been configured."
+ ocf_exit_reason "dhcpd has not been configured."
return $OCF_ERR_CONFIGURED
fi
# As with the standard DHCP init script, we can still use the
# chrooted default path for storing the leases file. This behavior
# is consistent with the existing /etc/init.d/dhcpd script.
if ! [ -d $OCF_RESKEY_chrooted_path ] ; then
ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use."
fi
# If the leases file does not exist, create it, as this is a fresh install.
if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then
touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases
fi
# if the PID storage path does not exist, make it, and setup the permissions.
# NOTE: This part of the script has a potential security flaw, in that if someone
# puts in /var/run as the path, it will change ownership to the dhcpd user
# and group. However, all that would do is allow that user to view the contents
# of the files, which they can do now anyway. If this becomes an issue, I can work
# in some changes.
# We need to append "dhcpd" to the path for the PID file storage folder, because
# if /var/run is used, that folders permissions can not be changed, otherwise it affects
# more then just one application.
if ! [ -d `dirname $OCF_RESKEY_pid`/dhcpd ] ; then
mkdir -p `dirname $OCF_RESKEY_pid`/dhcpd
if [ -n "$OCF_RESKEY_user" -a "x$OCF_RESKEY_user" != "xroot" ] ; then
chown $OCF_RESKEY_user `dirname $OCF_RESKEY_pid`/dhcpd
fi
if [ -n "$OCF_RESKEY_group" -a "x$OCF_RESKEY_group" != "xwheel" ] ; then
chgrp $OCF_RESKEY_group `dirname $OCF_RESKEY_pid`/dhcpd
fi
fi
return $OCF_SUCCESS
}
# Start
dhcpd_start() {
# Lets make sure we are not already running.
if dhcpd_monitor; then
ocf_log info "dhcpd already running"
return $OCF_SUCCESS
fi
# Only initialize the chrooted path(s) if chroot mode is enabled.
if ocf_is_true $OCF_RESKEY_chrooted ; then
dhcpd_initialize_chroot ||
- { ocf_log err "Could not fully initialize the chroot environment." ; return $OCF_ERR_INSTALLED; }
+ { ocf_exit_reason "Could not fully initialize the chroot environment." ; return $OCF_ERR_INSTALLED; }
else
dhcpd_initialize ||
- { ocf_log err "Could not fully initialize the runtime environment." ; return $OCF_ERR_INSTALLED; }
+ { ocf_exit_reason "Could not fully initialize the runtime environment." ; return $OCF_ERR_INSTALLED; }
fi
dhcpd_validate_all || exit
# Define an empty string variable, to ensure it exists when needed.
DHCPD_ARGS=""
# To ensure consistent behavior with the standard DHCPD init script,
# use the chrooted default path for storing a leases file, when not in
# a chrooted enviroment.
if ocf_is_true $OCF_RESKEY_chrooted ; then
DHCPD_ARGS="$DHCPD_ARGS -chroot $OCF_RESKEY_chrooted_path -lf $OCF_RESKEY_leases"
else
DHCPD_ARGS="$DHCPD_ARGS -lf $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases"
fi
if [ -n "$OCF_RESKEY_user" ]; then
DHCPD_ARGS="$DHCPD_ARGS -user $OCF_RESKEY_user"
fi
if [ -n "$OCF_RESKEY_group" ]; then
DHCPD_ARGS="$DHCPD_ARGS -group $OCF_RESKEY_group"
fi
# If there is a pid file containing a pid, the machine might have crashed. pid files in
# /var/run are always cleaned up at boot time, but this is not the case for the pid file in
# the chroot jail. Therefore, an old pid file may exist. This is only a problem if it
# incidentally contains the pid of a running process. If this process is not a 'dhcpd',
# we remove the pid. (dhcpd itself only checks whether the pid is alive or not.)
PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid"
if ocf_is_true $OCF_RESKEY_chrooted ; then
ocf_log info "Starting dhcpd [chroot] service."
DHCPD_ARGS="$DHCPD_ARGS -pf $OCF_RESKEY_pid"
else
ocf_log info "Starting dhcpd [non-chroot] service."
PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid`
DHCPD_ARGS="$DHCPD_ARGS -pf $PIDF"
fi
test -e "$PIDF" && rm -f $PIDF
ocf_run $OCF_RESKEY_binary -cf $OCF_RESKEY_config $DHCPD_ARGS $OCF_RESKEY_interface ||
return $OCF_ERR_INSTALLED
while ! dhcpd_monitor; do
sleep .1
ocf_log info "waiting for dhcpd to start"
return $OCF_SUCCESS
done
if ocf_is_true $OCF_RESKEY_chrooted ; then
ocf_log info "dhcpd [chrooted] has started."
else
ocf_log info "dhcpd [non-chrooted] has started."
fi
return $OCF_SUCCESS
}
# Stop
dhcpd_stop () {
local timeout
local timewait
local rc
dhcpd_monitor
rc=$?
case "$rc" in
"$OCF_SUCCESS")
# Currently running, and is expected behaviour.
;;
"$OCF_NOT_RUNNING")
# Currently not running, therefore nothing to do.
ocf_log info "dhcpd already stopped"
return $OCF_SUCCESS
;;
esac
PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid"
if ! ocf_is_true $OCF_RESKEY_chrooted ; then
PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid`
fi
kill `cat $PIDF`
# Allow 2/3 of the action timeout for the orderly shutdown
# (The origin unit is ms, hence the conversion)
timewait=$((OCF_RESKEY_CRM_meta_timeout/1500))
sleep 0.1; timeout=0 # Sleep here for .1 sec to let dhcpd finish.
while dhcpd_monitor ; do
if [ $timeout -ge $timewait ]; then
break
else
sleep 1
timeout=`expr $timeout + 1`
fi
done
#If still up
if dhcpd_monitor 2>&1; then
- ocf_log err "dhcpd is still up! Trying kill -s KILL"
+ ocf_log notice "dhcpd is still up! Trying kill -s KILL"
kill -s SIGKILL `cat $PIDF`
fi
# If we are running a dhcp server v4 or higher, unmount the proc partition.
if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then
# We only want to unmount proc in a chrooted environment, else we could
# cause other issues.
if ocf_is_true $OCF_RESKEY_chrooted ; then
umount $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1
fi
fi
rm -f $PIDF
ocf_log info "dhcpd stopped"
return $OCF_SUCCESS
}
# Make sure meta-data and usage always succeed
case $__OCF_ACTION in
meta-data) dhcpd_meta_data
exit $OCF_SUCCESS
;;
validate-all) dhcpd_validate_all
exit $OCF_SUCCESS
;;
usage|help) dhcpd_usage
exit $OCF_SUCCESS
;;
esac
# Translate each action into the appropriate function call
case $__OCF_ACTION in
start) dhcpd_start;;
stop) dhcpd_stop;;
monitor) dhcpd_monitor;;
*) dhcpd_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/galera b/heartbeat/galera
index a1d925baf..e7ceb147f 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -1,691 +1,691 @@
#!/bin/sh
#
# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
##
# README.
#
# This agent only supports being configured as a multistate Master
# resource.
#
# Slave vs Master role:
#
# During the 'Slave' role, galera instances are in read-only mode and
# will not attempt to connect to the cluster. This role exists only as
# a means to determine which galera instance is the most up-to-date. The
# most up-to-date node will be used to bootstrap a galera cluster that
# has no current members.
#
# The galera instances will only begin to be promoted to the Master role
# once all the nodes in the 'wsrep_cluster_address' connection address
# have entered read-only mode. At that point the node containing the
# database that is most current will be promoted to Master. Once the first
# Master instance bootstraps the galera cluster, the other nodes will be
# promoted to Master as well.
#
# Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3
#
# pcs resource create db galera enable_creation=true \
# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master
#
# By setting the 'enable_creation' option, the database will be automatically
# generated at startup. The meta attribute 'master-max=3' means that all 3
# nodes listed in the wsrep_cluster_address list will be allowed to connect
# to the galera cluster and perform replication.
#
# NOTE: If you have more nodes in the pacemaker cluster then you wish
# to have in the galera cluster, make sure to use location contraints to prevent
# pacemaker from attempting to place a galera instance on a node that is
# not in the 'wsrep_cluster_address" list.
#
##
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
# It is common for some galera instances to store
# check user that can be used to query status
# in this file
if [ -f "/etc/sysconfig/clustercheck" ]; then
. /etc/sysconfig/clustercheck
fi
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote)
$0 manages a galera Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'promote' operation makes this mysql server run as master
The 'demote' operation makes this mysql server run as slave
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="galera">
<version>1.0</version>
<longdesc lang="en">
Resource script for managing galara database.
</longdesc>
<shortdesc lang="en">Manages a galara instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL server binary
</longdesc>
<shortdesc lang="en">MySQL server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MySQL config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">MySQL datadir</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MySQL daemon
</longdesc>
<shortdesc lang="en">MySQL user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MySQL daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MySQL group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="log" unique="0" required="0">
<longdesc lang="en">
The logfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="enable_creation" unique="0" required="0">
<longdesc lang="en">
If the MySQL database does not exist, it will be created
</longdesc>
<shortdesc lang="en">Create the database if it does not exist</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="wsrep_cluster_address" unique="0" required="1">
<longdesc lang="en">
The galera cluster address. This takes the form of:
gcomm://node,node,node
Only nodes present in this node list will be allowed to start a galera instance.
It is expected that the galera node names listed in this address match valid
pacemaker node names.
</longdesc>
<shortdesc lang="en">Galera cluster address</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="check_user" unique="0" required="0">
<longdesc lang="en">
Cluster check user.
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="root" />
</parameter>
<parameter name="check_passwd" unique="0" required="0">
<longdesc lang="en">
Cluster check user password
</longdesc>
<shortdesc lang="en">check password</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="20" />
<action name="monitor" role="Master" depth="0" timeout="30" interval="10" />
<action name="monitor" role="Slave" depth="0" timeout="30" interval="30" />
<action name="promote" timeout="300" />
<action name="demote" timeout="120" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
get_option_variable()
{
local key=$1
$MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" | tail -1
}
get_status_variable()
{
local key=$1
$MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1
}
set_bootstrap_node()
{
local node=$1
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true"
}
clear_bootstrap_node()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D
}
is_bootstrap()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -Q 2>/dev/null
}
clear_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D
}
set_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1
}
get_last_commit()
{
local node=$1
if [ -z "$node" ]; then
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null
else
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null
fi
}
wait_for_sync()
{
local state=$(get_status_variable "wsrep_local_state")
ocf_log info "Waiting for database to sync with the cluster. "
while [ "$state" != "4" ]; do
sleep 1
state=$(get_status_variable "wsrep_local_state")
done
ocf_log info "Database synced."
}
is_primary()
{
cluster_status=$(get_status_variable "wsrep_cluster_status")
if [ "$cluster_status" = "Primary" ]; then
return 0
fi
if [ -z "$cluster_status" ]; then
- ocf_log err "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
+ ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
else
ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}"
fi
return 1
}
is_readonly()
{
local res=$(get_option_variable "read_only")
if ! ocf_is_true "$res"; then
return 1
fi
cluster_status=$(get_status_variable "wsrep_cluster_status")
if ! [ "$cluster_status" = "Disconnected" ]; then
return 1
fi
return 0
}
master_exists()
{
# determine if a master instance is already up and is healthy
crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
return $?
}
clear_master_score()
{
local node=$1
if [ -z "$node" ]; then
$CRM_MASTER -D
else
$CRM_MASTER -D -N $node
fi
}
set_master_score()
{
local node=$1
if [ -z "$node" ]; then
$CRM_MASTER -v 100
else
$CRM_MASTER -N $node -v 100
fi
}
promote_everyone()
{
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
set_master_score $node
done
}
greater_than_equal_long()
{
# there are values we need to compare in this script
# that are too large for shell -gt to process
echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true"
}
detect_first_master()
{
local best_commit=0
local best_node="$NODENAME"
local last_commit=0
local missing_nodes=0
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
last_commit=$(get_last_commit $node)
if [ -z "$last_commit" ]; then
ocf_log info "Waiting on node <${node}> to report database status before Master instances can start."
missing_nodes=1
continue
fi
# this means -1, or that no commit has occured yet.
if [ "$last_commit" = "18446744073709551615" ]; then
last_commit="0"
fi
greater_than_equal_long "$last_commit" "$best_commit"
if [ $? -eq 0 ]; then
best_node=$node
best_commit=$last_commit
fi
done
if [ $missing_nodes -eq 1 ]; then
return
fi
ocf_log info "Promoting $best_node to be our bootstrap node"
set_master_score $best_node
set_bootstrap_node $best_node
}
# For galera, promote is really start
galera_promote()
{
local rc
local extra_opts
local bootstrap
master_exists
if [ $? -eq 0 ]; then
# join without bootstrapping
extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}"
else
bootstrap=$(is_bootstrap)
if ocf_is_true $bootstrap; then
ocf_log info "Node <${NODENAME}> is bootstrapping the cluster"
extra_opts="--wsrep-cluster-address=gcomm://"
else
- ocf_log err "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
+ ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
return $OCF_ERR_GENERIC
fi
fi
# make sure the read only instance is stopped
mysql_common_stop
rc=$?
if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
- ocf_log err "Failed to stop read-only galera instance during promotion to Master"
+ ocf_exit_reason "Failed to stop read-only galera instance during promotion to Master"
return $rc
fi
sleep 4
mysql_common_prepare_dirs
mysql_common_start "$extra_opts"
rc=$?
if [ $rc != $OCF_SUCCESS ]; then
return $rc
fi
galera_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
- ocf_log err "Failed initial monitor action"
+ ocf_exit_reason "Failed initial monitor action"
return $rc
fi
is_readonly
if [ $? -eq 0 ]; then
- ocf_log err "Failure. Master instance started in read-only mode, check configuration."
+ ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration."
return $OCF_ERR_GENERIC
fi
is_primary
if [ $? -ne 0 ]; then
- ocf_log err "Failure. Master instance started, but is not in Primary mode."
+ ocf_exit_reason "Failure. Master instance started, but is not in Primary mode."
return $OCF_ERR_GENERIC
fi
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
ocf_log info "Bootstrap complete, promoting the rest of the galera instances."
else
# if this is not the bootstrap node, make sure this instance
# syncs with the rest of the cluster before promotion returns.
wait_for_sync
fi
# last commit is no longer relevant once promoted
clear_last_commit
ocf_log info "Galera started"
return $OCF_SUCCESS
}
galera_demote()
{
mysql_common_stop
rc=$?
if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
- ocf_log err "Failed to stop Master galera instance during demotion to Master"
+ ocf_exit_reason "Failed to stop Master galera instance during demotion to Master"
return $rc
fi
# if this node was previously a bootstrap node, that is no longer the case.
clear_bootstrap_node
# start again in slave mode so the new last commit is recorded
galera_start
}
galera_start()
{
local extra_opts='--read-only=true'
local last_commit
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
if [ $? -ne 0 ]; then
- ocf_log err "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance"
+ ocf_exit_reason "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance"
return $OCF_ERR_CONFIGURED
fi
mysql_common_prepare_dirs
mysql_common_start "$extra_opts"
is_readonly
if [ $? -ne 0 ]; then
- ocf_log err "Failure. Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set."
+ ocf_exit_reason "Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set."
return $OCF_ERR_GENERIC
fi
ocf_log info "attempting to detect last commit version"
while [ -z "$last_commit" ]; do
last_commit=$(get_status_variable "wsrep_last_committed")
if [ -z "$last_commit" ]; then
sleep 1
fi
done
ocf_log info "Last commit version found: $last_commit"
set_last_commit $last_commit
master_exists
if [ $? -eq 0 ]; then
ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster."
set_master_score $NODENAME
else
clear_master_score
detect_first_master
fi
return $OCF_SUCCESS
}
galera_monitor()
{
local rc
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
mysql_common_status $status_loglevel
rc=$?
# If status returned an error, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
if [ $? -ne 0 ]; then
- ocf_log err "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
+ ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
return $OCF_ERR_GENERIC
fi
is_readonly
if [ $? -ne 0 ]; then
is_primary
if [ $? -ne 0 ]; then
- ocf_log err "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state."
+ ocf_exit_reason "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state."
return $OCF_ERR_GENERIC
fi
if ocf_is_probe; then
# restore master score during probe
# if we detect this is a master instance
set_master_score
fi
rc=$OCF_RUNNING_MASTER
else
master_exists
if [ $? -ne 0 ]; then
detect_first_master
else
# a master instance exists and is healthy, promote this
# local read only instance
# so it can join the master galera cluster.
set_master_score
fi
fi
# TODO look at what is done in the wait script
return $rc
}
galera_stop()
{
local rc
# make sure the process is stopped
mysql_common_stop
rc=$1
clear_last_commit
clear_master_score
clear_bootstrap_node
return $rc
}
galera_validate()
{
if ! ocf_is_ms; then
- ocf_log err "Galera must be configured as a multistate Master/Slave resource."
+ ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
return $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
- ocf_log err "Galera must be configured with a wsrep_cluster_address value."
+ ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value."
return $OCF_ERR_CONFIGURED
fi
mysql_common_validate
}
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
galera_validate
rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
fi
if [ -z "${OCF_RESKEY_check_passwd}" ]; then
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
OCF_RESKEY_check_passwd=${MYSQL_PASSWORD}
fi
if [ -z "${OCF_RESKEY_check_user}" ]; then
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
OCF_RESKEY_check_user=${MYSQL_USERNAME}
fi
: ${OCF_RESKEY_check_user="root"}
MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
if [ -n "${OCF_RESKEY_check_passwd}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${MYSQL_PASSWORD}"
fi
# What kind of method was invoked?
case "$1" in
start) galera_start;;
stop) galera_stop;;
status) mysql_common_status err;;
monitor) galera_monitor;;
promote) galera_promote;;
demote) galera_demote;;
validate-all) exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vi:sw=4:ts=4:et:
diff --git a/heartbeat/mysql b/heartbeat/mysql
index e3bdee6a0..dc862f5ac 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -1,1031 +1,1031 @@
#!/bin/sh
#
#
# MySQL
#
# Description: Manages a MySQL database as Linux-HA resource
#
# Authors: Alan Robertson: DB2 Script
# Jakub Janczak: rewrite as MySQL
# Andrew Beekhof: cleanup and import
# Sebastian Reitenbach: add OpenBSD defaults, more cleanup
# Narayan Newton: add Gentoo/Debian defaults
# Marian Marinov, Florian Haas: add replication capability
# Yves Trudeau, Baron Schwartz: add VIP support and improve replication
#
# Support: linux-ha@lists.linux-ha.org
# License: GNU General Public License (GPL)
#
# (c) 2002-2005 International Business Machines, Inc.
# 2005-2010 Linux-HA contributors
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 mysql
#
# See usage() function below for more details...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_client_binary
# OCF_RESKEY_config
# OCF_RESKEY_datadir
# OCF_RESKEY_user
# OCF_RESKEY_group
# OCF_RESKEY_test_table
# OCF_RESKEY_test_user
# OCF_RESKEY_test_passwd
# OCF_RESKEY_enable_creation
# OCF_RESKEY_additional_parameters
# OCF_RESKEY_log
# OCF_RESKEY_pid
# OCF_RESKEY_socket
# OCF_RESKEY_replication_user
# OCF_RESKEY_replication_passwd
# OCF_RESKEY_replication_port
# OCF_RESKEY_max_slave_lag
# OCF_RESKEY_evict_outdated_slaves
# OCF_RESKEY_reader_attribute
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote|notify)
$0 manages a MySQL Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'promote' operation makes this mysql server run as master
The 'demote' operation makes this mysql server run as slave
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mysql">
<version>1.0</version>
<longdesc lang="en">
Resource script for MySQL.
May manage a standalone MySQL database, a clone set with externally
managed replication, or a complete master/slave replication setup.
While managing replication, the default behavior is to use uname -n
values in the change master to command. Other IPs can be specified
manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP
giving the IP to use for replication. For example, if the mysql primitive
you are using is p_mysql, the attribute to set will be
p_mysql_mysql_master_IP.
</longdesc>
<shortdesc lang="en">Manages a MySQL database instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL server binary
</longdesc>
<shortdesc lang="en">MySQL server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MySQL config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">MySQL datadir</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MySQL daemon
</longdesc>
<shortdesc lang="en">MySQL user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MySQL daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MySQL group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="log" unique="0" required="0">
<longdesc lang="en">
The logfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="test_table" unique="0" required="0">
<longdesc lang="en">
Table to be tested in monitor statement (in database.table notation)
</longdesc>
<shortdesc lang="en">MySQL test table</shortdesc>
<content type="string" default="${OCF_RESKEY_test_table_default}" />
</parameter>
<parameter name="test_user" unique="0" required="0">
<longdesc lang="en">
MySQL test user, must have select privilege on test_table
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="${OCF_RESKEY_test_user_default}" />
</parameter>
<parameter name="test_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL test user password
</longdesc>
<shortdesc lang="en">MySQL test user password</shortdesc>
<content type="string" default="${OCF_RESKEY_test_passwd_default}" />
</parameter>
<parameter name="enable_creation" unique="0" required="0">
<longdesc lang="en">
If the MySQL database does not exist, it will be created
</longdesc>
<shortdesc lang="en">Create the database if it does not exist</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="replication_user" unique="0" required="0">
<longdesc lang="en">
MySQL replication user. This user is used for starting and stopping
MySQL replication, for setting and resetting the master host, and for
setting and unsetting read-only mode. Because of that, this user must
have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, and PROCESS
privileges on all nodes within the cluster. Mandatory if you define
a master-slave resource.
</longdesc>
<shortdesc lang="en">MySQL replication user</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_user_default}" />
</parameter>
<parameter name="replication_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL replication password. Used for replication client and slave.
Mandatory if you define a master-slave resource.
</longdesc>
<shortdesc lang="en">MySQL replication user password</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_passwd_default}" />
</parameter>
<parameter name="replication_port" unique="0" required="0">
<longdesc lang="en">
The port on which the Master MySQL instance is listening.
</longdesc>
<shortdesc lang="en">MySQL replication port</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_port_default}" />
</parameter>
<parameter name="max_slave_lag" unique="0" required="0">
<longdesc lang="en">
The maximum number of seconds a replication slave is allowed to lag
behind its master. Do not set this to zero. What the cluster manager
does in case a slave exceeds this maximum lag is determined by the
evict_outdated_slaves parameter.
</longdesc>
<shortdesc lang="en">Maximum time (seconds) a MySQL slave is allowed
to lag behind a master</shortdesc>
<content type="integer" default="${OCF_RESKEY_max_slave_lag_default}"/>
</parameter>
<parameter name="evict_outdated_slaves" unique="0" required="0">
<longdesc lang="en">
If set to true, any slave which is more than max_slave_lag seconds
behind the master has its MySQL instance shut down. If this parameter
is set to false in a primitive or clone resource, it is simply
ignored. If set to false in a master/slave resource, then exceeding
the maximum slave lag will merely push down the master preference so
the lagging slave is never promoted to the new master.
</longdesc>
<shortdesc lang="en">Determines whether to shut down badly lagging
slaves</shortdesc>
<content type="boolean" default="${OCF_RESKEY_evict_outdated_slaves_default}" />
</parameter>
<parameter name="reader_attribute" unique="1" required="0">
<longdesc lang="en">
An attribute that the RA can manage to specify whether a node
can be read from. This node attribute will be 1 if it's fine to
read from the node, and 0 otherwise (for example, when a slave
has lagged too far behind the master).
A typical example for the use of this attribute would be to tie
a set of IP addresses to MySQL slaves that can be read from.
This parameter is only meaningful in master/slave set configurations.
</longdesc>
<shortdesc lang="en">Sets the node attribute that determines
whether a node is usable for clients to read from.</shortdesc>
<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="20" />
<action name="monitor" role="Master" depth="0" timeout="30" interval="10" />
<action name="monitor" role="Slave" depth="0" timeout="30" interval="30" />
<action name="promote" timeout="120" />
<action name="demote" timeout="120" />
<action name="notify" timeout="90" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
# Convenience functions
set_read_only() {
# Sets or unsets read-only mode. Accepts one boolean as its
# optional argument. If invoked without any arguments, defaults to
# enabling read only mode. Should only be set in master/slave
# setups.
# Returns $OCF_SUCCESS if the operation succeeds, or
# $OCF_ERR_GENERIC if it fails.
local ro_val
if ocf_is_true $1; then
ro_val="on"
else
ro_val="off"
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "SET GLOBAL read_only=${ro_val}"
}
get_read_only() {
# Check if read-only is set
local read_only_state
read_only_state=`$MYSQL $MYSQL_OPTIONS_REPL \
-e "SHOW VARIABLES" | grep read_only | awk '{print $2}'`
if [ "$read_only_state" = "ON" ]; then
return 0
else
return 1
fi
}
is_slave() {
# Determine whether the machine is currently running as a MySQL
# slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW
# SLAVE STATUS creates an empty result set, 0 otherwise.
local rc
local tmpfile
# Check whether this machine should be slave
if ! ocf_is_ms || ! get_read_only; then
return 1
fi
get_slave_info
rc=$?
if [ $rc -eq 0 ]; then
# show slave status is not empty
# Is there a master_log_file defined? (master_log_file is deleted
# by reset slave
if [ "$master_log_file" ]; then
return 0
else
return 1
fi
else
# "SHOW SLAVE STATUS" returns an empty set if instance is not a
# replication slave
return 1
fi
}
parse_slave_info() {
# Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2
sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2
}
get_slave_info() {
# Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
local mysql_options
if [ "$master_log_file" -a "$master_host" ]; then
# variables are already defined, get_slave_info has been run before
return $OCF_SUCCESS
else
tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW SLAVE STATUS\G' > $tmpfile
if [ -s $tmpfile ]; then
master_host=`parse_slave_info Master_Host $tmpfile`
master_user=`parse_slave_info Master_User $tmpfile`
master_port=`parse_slave_info Master_Port $tmpfile`
master_log_file=`parse_slave_info Master_Log_File $tmpfile`
master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
last_errno=`parse_slave_info Last_Errno $tmpfile`
secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
ocf_log debug "MySQL instance running as a replication slave"
else
# Instance produced an empty "SHOW SLAVE STATUS" output --
# instance is not a slave
- ocf_log err "check_slave invoked on an instance that is not a replication slave."
+ ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
fi
}
check_slave() {
# Checks slave status
local rc new_master
get_slave_info
rc=$?
if [ $rc -eq 0 ]; then
# Did we receive an error other than max_connections?
if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
# Whoa. Replication ran into an error. This slave has
# diverged from its master. Make sure this resource
# doesn't restart in place.
- ocf_log err "MySQL instance configured for replication, but replication has failed."
+ ocf_exit_reason "MySQL instance configured for replication, but replication has failed."
ocf_log err "See $tmpfile for details"
# Just pull the reader VIP away, killing MySQL here would be pretty evil
# on a loaded server
set_reader_attr 0
exit $OCF_SUCCESS
fi
# If we got max_connections, let's remove the vip
if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
set_reader_attr 0
exit $OCF_SUCCESS
fi
if [ "$slave_io" != 'Yes' ]; then
# Not necessarily a bad thing. The master may have
# temporarily shut down, and the slave may just be
# reconnecting. A warning can't hurt, though.
ocf_log warn "MySQL Slave IO threads currently not running."
# Sanity check, are we at least on the right master
new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
if [ "$master_host" != "$new_master" ]; then
# Not pointing to the right master, not good, removing the VIPs
set_reader_attr 0
exit $OCF_SUCCESS
fi
fi
if [ "$slave_sql" != 'Yes' ]; then
# We don't have a replication SQL thread running. Not a
# good thing. Try to recoved by restarting the SQL thread
# and remove reader vip. Prevent MySQL restart.
- ocf_log err "MySQL Slave SQL threads currently not running."
+ ocf_exit_reason "MySQL Slave SQL threads currently not running."
ocf_log err "See $tmpfile for details"
# Remove reader vip
set_reader_attr 0
# try to restart slave
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "START SLAVE"
# Return success to prevent a restart
exit $OCF_SUCCESS
fi
if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then
# We're supposed to bail out if we lag too far
# behind. Let's check our lag.
if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
- ocf_log err "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)."
+ ocf_exit_reason "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)."
ocf_log err "See $tmpfile for details"
# Remove reader vip
set_reader_attr 0
exit $OCF_ERR_INSTALLED
fi
elif ocf_is_ms; then
# Even if we're not set to evict lagging slaves, we can
# still use the seconds behind master value to set our
# master preference.
local master_pref
master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
if [ $master_pref -lt 0 ]; then
# Sanitize a below-zero preference to just zero
master_pref=0
fi
$CRM_MASTER -v $master_pref
fi
# is the slave ok to have a VIP on it
if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
set_reader_attr 0
else
set_reader_attr 1
fi
ocf_log debug "MySQL instance running as a replication slave"
rm -f $tmpfile
else
# Instance produced an empty "SHOW SLAVE STATUS" output --
# instance is not a slave
# TODO: Needs to handle when get_slave_info will return too many connections error
rm -f $tmpfile
- ocf_log err "check_slave invoked on an instance that is not a replication slave."
+ ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
exit $OCF_ERR_GENERIC
fi
}
set_master() {
local new_master master_log_file master_log_pos
local master_params
new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
# Keep replication position
get_slave_info
if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
# master_params=", MASTER_LOG_FILE='$master_log_file', \
# MASTER_LOG_POS=$master_log_pos"
ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
rm -f $tmpfile
return
else
master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
master_params=", MASTER_LOG_FILE='$master_log_file', \
MASTER_LOG_POS=$master_log_pos"
ocf_log info "Restored master pos for $new_master : $master_log_file:$master_log_pos"
fi
fi
# Informs the MySQL server of the master to replicate
# from. Accepts one mandatory argument which must contain the host
# name of the new master host. The master must either be unchanged
# from the laste master the slave replicated from, or freshly
# reset with RESET MASTER.
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "CHANGE MASTER TO MASTER_HOST='$new_master', \
MASTER_USER='$OCF_RESKEY_replication_user', \
MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
rm -f $tmpfile
}
unset_master(){
# Instructs the MySQL server to stop replicating from a master
# host.
# If we're currently not configured to be replicating from any
# host, then there's nothing to do. But we do log a warning as
# no-one but the CRM should be touching the MySQL master/slave
# configuration.
if ! is_slave; then
ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave"
return $OCF_SUCCESS
fi
local tmpfile
tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX`
# At this point, the master is read only so there should not be much binlogs to transfer
# Let's wait for the last bits
while true; do
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW PROCESSLIST\G' > $tmpfile
if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then
ocf_log info "MySQL slave has finished reading master binary log"
break
fi
if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
ocf_log info "Master is down, no more binary logs to come"
break
fi
if grep -i 'Connecting to master' $tmpfile >/dev/null; then
ocf_log info "Master is down, no more binary logs to come"
break
fi
if ! grep 'system user' $tmpfile >/dev/null; then
ocf_log info "Slave is not running - not waiting to finish"
break
fi
sleep 1
done
# Now, stop the slave I/O thread and wait for relay log
# processing to complete
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE IO_THREAD"
if [ $? -gt 0 ]; then
- ocf_log err "Error stopping slave IO thread"
+ ocf_exit_reason "Error stopping slave IO thread"
exit $OCF_ERR_GENERIC
fi
while true; do
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW PROCESSLIST\G' > $tmpfile
if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
ocf_log info "MySQL slave has finished processing relay log"
break
fi
if ! grep -q 'system user' $tmpfile; then
ocf_log info "Slave not runnig - not waiting to finish"
break
fi
ocf_log info "Waiting for MySQL slave to finish processing relay log"
sleep 1
done
rm -f $tmpfile
# Now, stop all slave activity and unset the master host
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE"
if [ $? -gt 0 ]; then
- ocf_log err "Error stopping rest slave threads"
+ ocf_exit_reason "Error stopping rest slave threads"
exit $OCF_ERR_GENERIC
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "RESET SLAVE /*!50516 ALL */;"
if [ $? -gt 0 ]; then
- ocf_log err "Failed to reset slave"
+ ocf_exit_reason "Failed to reset slave"
exit $OCF_ERR_GENERIC
fi
}
# Start replication as slave
start_slave() {
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "START SLAVE"
}
# Set the attribute controlling the readers VIP
set_reader_attr() {
local curr_attr_value
curr_attr_value=$(get_reader_attr)
if [ "$curr_attr_value" -ne "$1" ]; then
$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1
fi
}
# get the attribute controlling the readers VIP
get_reader_attr() {
local attr_value
local rc
attr_value=`$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q`
rc=$?
if [ "$rc" -eq "0" ]; then
echo $attr_value
else
echo -1
fi
}
# Stores data for MASTER STATUS from MySQL
update_data_master_status() {
master_status_file="${HA_RSCTMP}/master_status.${OCF_RESOURCE_INSTANCE}"
$MYSQL $MYSQL_OPTIONS_REPL -e "SHOW MASTER STATUS\G" > $master_status_file
}
# Returns the specified value from the stored copy of SHOW MASTER STATUS.
# should be call after update_data_master_status for tmpfile
# Arguments:
# $1 The value to get.
get_master_status() {
awk -v var="$1" '$1 == var ":" {print substr($0, index($0, ":") + 2)}' "$master_status_file"
}
# Determines what IP address is attached to the current host. The output of the
# crm_attribute command looks like this:
# scope=nodes name=IP value=10.2.2.161
# If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
# The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the
# change master to command.
get_local_ip() {
local IP
IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G`
if [ ! $? -eq 0 ]; then
uname -n
else
echo $IP
fi
}
#######################################################################
# Functions invoked by resource manager actions
mysql_monitor() {
local rc
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
mysql_common_status $status_loglevel
rc=$?
# TODO: check max connections error
# If status returned an error, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then
# Check if this instance is configured as a slave, and if so
# check slave status
if is_slave; then
check_slave
fi
# Check for test table
ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
-e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table"
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log err "Failed to select from $test_table";
+ ocf_exit_reason "Failed to select from $test_table";
return $OCF_ERR_GENERIC;
fi
fi
if ocf_is_ms && ! get_read_only; then
ocf_log debug "MySQL monitor succeeded (master)";
return $OCF_RUNNING_MASTER
else
ocf_log debug "MySQL monitor succeeded";
return $OCF_SUCCESS
fi
}
mysql_start() {
local rc
if ocf_is_ms; then
# Initialize the ReaderVIP attribute, monitor will enable it
set_reader_attr 0
fi
mysql_common_status info
if [ $? = $OCF_SUCCESS ]; then
ocf_log info "MySQL already running"
return $OCF_SUCCESS
fi
mysql_common_prepare_dirs
# Uncomment to perform permission clensing
# - not convinced this should be enabled by default
#
#chmod 0755 $OCF_RESKEY_datadir
#chown -R $OCF_RESKEY_user $OCF_RESKEY_datadir
#chgrp -R $OCF_RESKEY_group $OCF_RESKEY_datadir
mysql_extra_params=
if ocf_is_ms; then
mysql_extra_params="--skip-slave-start"
fi
mysql_common_start $mysql_extra_params
rc=$?
if [ $rc != $OCF_SUCCESS ]; then
return $rc
fi
if ocf_is_ms; then
# We're configured as a stateful resource. We must start as
# slave by default. At this point we don't know if the CRM has
# already promoted a master. So, we simply start in read only
# mode.
set_read_only on
# Now, let's see whether there is a master. We might be a new
# node that is just joining the cluster, and the CRM may have
# promoted a master before.
master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "`
if [ "$master_host" -a "$master_host" != ${NODENAME} ]; then
ocf_log info "Changing MySQL configuration to replicate from $master_host."
set_master
start_slave
if [ $? -ne 0 ]; then
- ocf_log err "Failed to start slave"
+ ocf_exit_reason "Failed to start slave"
return $OCF_ERR_GENERIC
fi
else
ocf_log info "No MySQL master present - clearing replication state"
unset_master
fi
# We also need to set a master preference, otherwise Pacemaker
# won't ever promote us in the absence of any explicit
# preference set by the administrator. We choose a low
# greater-than-zero preference.
$CRM_MASTER -v 1
fi
# Initial monitor action
if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then
OCF_CHECK_LEVEL=10
fi
mysql_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
- ocf_log err "Failed initial monitor action"
+ ocf_exit_reason "Failed initial monitor action"
return $rc
fi
ocf_log info "MySQL started"
return $OCF_SUCCESS
}
mysql_stop() {
if ocf_is_ms; then
# clear preference for becoming master
$CRM_MASTER -D
# Remove VIP capability
set_reader_attr 0
fi
mysql_common_stop
}
mysql_promote() {
local master_info
if ( ! mysql_common_status err ); then
return $OCF_NOT_RUNNING
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE"
# Set Master Info in CIB, cluster level attribute
update_data_master_status
master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
${CRM_ATTR_REPL_INFO} -v "$master_info"
rm -f $tmpfile
set_read_only off || return $OCF_ERR_GENERIC
# Existing master gets a higher-than-default master preference, so
# the cluster manager does not shuffle the master role around
# unnecessarily
$CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
# A master can accept reads
set_reader_attr 1
return $OCF_SUCCESS
}
mysql_demote() {
if ! mysql_common_status err; then
return $OCF_NOT_RUNNING
fi
# Return master preference to default, so the cluster manager gets
# a chance to select a new master
$CRM_MASTER -v 1
}
mysql_notify() {
# If not configured as a Stateful resource, we make no sense of
# notifications.
if ! ocf_is_ms; then
ocf_log info "This agent makes no use of notifications unless running in master/slave mode."
return $OCF_SUCCESS
fi
local type_op
type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
ocf_log debug "Received $type_op notification."
case "$type_op" in
'pre-promote')
# Nothing to do now here, new replication info not yet published
;;
'post-promote')
# The master has completed its promotion. Now is a good
# time to check whether our replication slave is working
# correctly.
master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
if [ "$master_host" = ${NODENAME} ]; then
ocf_log info "This will be the new master, ignoring post-promote notification."
else
ocf_log info "Resetting replication"
unset_master
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
ocf_log info "Changing MySQL configuration to replicate from $master_host"
set_master
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
start_slave
if [ $? -ne 0 ]; then
- ocf_log err "Failed to start slave"
+ ocf_exit_reason "Failed to start slave"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
;;
'pre-demote')
demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
if [ $demote_host = ${NODENAME} ]; then
ocf_log info "post-demote notification for $demote_host"
set_read_only on
if [ $? -ne 0 ]; then
- ocf_log err "Failed to set read-only";
+ ocf_exit_reason "Failed to set read-only";
return $OCF_ERR_GENERIC;
fi
# Must kill all existing user threads because they are still Read/write
# in order for the slaves to complete the read of binlogs
local tmpfile
tmpfile=`mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX`
$MYSQL $MYSQL_OPTIONS_REPL \
-e "SHOW PROCESSLIST" > $tmpfile
for thread in `awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile`
do
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "KILL ${thread}"
done
else
ocf_log info "Ignoring post-demote notification execpt for my own demotion."
fi
return $OCF_SUCCESS
;;
'post-demote')
demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
if [ $demote_host = ${NODENAME} ]; then
ocf_log info "Ignoring post-demote notification for my own demotion."
return $OCF_SUCCESS
fi
ocf_log info "post-demote notification for $demote_host."
# The former master has just been gracefully demoted.
unset_master
;;
*)
return $OCF_SUCCESS
;;
esac
}
#######################################################################
##########################################################################
# If DEBUG_LOG is set, make this resource agent easy to debug: set up the
# debug log and direct all output to it. Otherwise, redirect to /dev/null.
# The log directory must be a directory owned by root, with permissions 0700,
# and the log must be writable and not a symlink.
##########################################################################
DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
if [ -d "${DEBUG_LOG_DIR}" ]; then
exec 9>>"$DEBUG_LOG"
exec 2>&9
date >&9
echo "$*" >&9
env | grep OCF_ | sort >&9
set -x
else
exec 9>/dev/null
fi
fi
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
mysql_common_validate
rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
fi
# What kind of method was invoked?
case "$1" in
start) mysql_start;;
stop) mysql_stop;;
status) mysql_common_status err;;
monitor) mysql_monitor;;
promote) mysql_promote;;
demote) mysql_demote;;
notify) mysql_notify;;
validate-all) exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vi:sw=4:ts=4:et:
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index 5b6a99158..a02f8cde1 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -1,279 +1,279 @@
#!/bin/sh
#######################################################################
# Attempt to detect a default binary
OCF_RESKEY_binary_default=$(which mysqld_safe 2> /dev/null)
if [ "$OCF_RESKEY_binary_default" = "" ]; then
OCF_RESKEY_binary_default=$(which safe_mysqld 2> /dev/null)
fi
# Fill in some defaults if no values are specified
HOSTOS=`uname`
if [ "X${HOSTOS}" = "XOpenBSD" ];then
if [ "$OCF_RESKEY_binary_default" = "" ]; then
OCF_RESKEY_binary_default="/usr/local/bin/mysqld_safe"
fi
OCF_RESKEY_config_default="/etc/my.cnf"
OCF_RESKEY_datadir_default="/var/mysql"
OCF_RESKEY_user_default="_mysql"
OCF_RESKEY_group_default="_mysql"
OCF_RESKEY_log_default="/var/log/mysqld.log"
OCF_RESKEY_pid_default="/var/mysql/mysqld.pid"
OCF_RESKEY_socket_default="/var/run/mysql/mysql.sock"
else
if [ "$OCF_RESKEY_binary_default" = "" ]; then
OCF_RESKEY_binary_default="/usr/bin/safe_mysqld"
fi
OCF_RESKEY_config_default="/etc/my.cnf"
OCF_RESKEY_datadir_default="/var/lib/mysql"
OCF_RESKEY_user_default="mysql"
OCF_RESKEY_group_default="mysql"
OCF_RESKEY_log_default="/var/log/mysqld.log"
OCF_RESKEY_pid_default="/var/run/mysql/mysqld.pid"
OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock"
fi
OCF_RESKEY_client_binary_default="mysql"
OCF_RESKEY_test_user_default="root"
OCF_RESKEY_test_table_default="mysql.user"
OCF_RESKEY_test_passwd_default=""
OCF_RESKEY_enable_creation_default=0
OCF_RESKEY_additional_parameters_default=""
OCF_RESKEY_replication_port_default="3306"
OCF_RESKEY_max_slave_lag_default="3600"
OCF_RESKEY_evict_outdated_slaves_default="false"
OCF_RESKEY_reader_attribute_default="readable"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}}
: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}}
: ${OCF_RESKEY_test_table=${OCF_RESKEY_test_table_default}}
: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}}
: ${OCF_RESKEY_enable_creation=${OCF_RESKEY_enable_creation_default}}
: ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}}
: ${OCF_RESKEY_replication_user=${OCF_RESKEY_replication_user_default}}
: ${OCF_RESKEY_replication_passwd=${OCF_RESKEY_replication_passwd_default}}
: ${OCF_RESKEY_replication_port=${OCF_RESKEY_replication_port_default}}
: ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
: ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
#######################################################################
# Convenience variables
MYSQL=$OCF_RESKEY_client_binary
MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
MYSQL_TOO_MANY_CONN_ERR=1040
CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
NODENAME=$(ocf_local_nodename)
CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $NODENAME "
INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
#######################################################################
mysql_common_validate()
{
check_binary $OCF_RESKEY_binary
check_binary $OCF_RESKEY_client_binary
if [ ! -f $OCF_RESKEY_config ]; then
- ocf_log err "Config $OCF_RESKEY_config doesn't exist";
+ ocf_exit_reason "Config $OCF_RESKEY_config doesn't exist";
return $OCF_ERR_INSTALLED;
fi
if [ ! -d $OCF_RESKEY_datadir ]; then
- ocf_log err "Datadir $OCF_RESKEY_datadir doesn't exist";
+ ocf_exit_reason "Datadir $OCF_RESKEY_datadir doesn't exist";
return $OCF_ERR_INSTALLED;
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
- ocf_log err "User $OCF_RESKEY_user doesn't exit";
+ ocf_exit_reason "User $OCF_RESKEY_user doesn't exit";
return $OCF_ERR_INSTALLED;
fi
getent group $OCF_RESKEY_group >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
- ocf_log err "Group $OCF_RESKEY_group doesn't exist";
+ ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist";
return $OCF_ERR_INSTALLED;
fi
return $OCF_SUCCESS
}
mysql_common_status() {
local loglevel=$1
local pid=$2
if [ -z "$pid" ]; then
if [ ! -e $OCF_RESKEY_pid ]; then
ocf_log $loglevel "MySQL is not running"
return $OCF_NOT_RUNNING;
fi
pid=`cat $OCF_RESKEY_pid`;
fi
if [ -d /proc -a -d /proc/1 ]; then
[ "u$pid" != "u" -a -d /proc/$pid ]
else
kill -s 0 $pid >/dev/null 2>&1
fi
if [ $? -eq 0 ]; then
return $OCF_SUCCESS;
else
ocf_log $loglevel "MySQL not running: removing old PID file"
rm -f $OCF_RESKEY_pid
return $OCF_NOT_RUNNING;
fi
}
mysql_common_prepare_dirs()
{
local rc
touch $OCF_RESKEY_log
chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log
chmod 0640 $OCF_RESKEY_log
[ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log
if ocf_is_true "$OCF_RESKEY_enable_creation" && [ ! -d $OCF_RESKEY_datadir/mysql ] ; then
ocf_log info "Initializing MySQL database: "
$MYSQL_BINDIR/mysql_install_db --datadir=$OCF_RESKEY_datadir
rc=$?
if [ $rc -ne 0 ] ; then
- ocf_log err "Initialization failed: $rc";
+ ocf_exit_reason "Initialization failed: $rc";
exit $OCF_ERR_GENERIC
fi
chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_datadir
fi
pid_dir=`dirname $OCF_RESKEY_pid`
if [ ! -d $pid_dir ] ; then
ocf_log info "Creating PID dir: $pid_dir"
mkdir -p $pid_dir
chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir
fi
socket_dir=`dirname $OCF_RESKEY_socket`
if [ ! -d $socket_dir ] ; then
ocf_log info "Creating socket dir: $socket_dir"
mkdir -p $socket_dir
chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir
fi
# Regardless of whether we just created the directory or it
# already existed, check whether it is writable by the configured
# user
for dir in $pid_dir $socket_dir; do
if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
- ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user"
+ ocf_exit_reason "Directory $dir is not writable by $OCF_RESKEY_user"
exit $OCF_ERR_PERM;
fi
done
}
mysql_common_start()
{
local mysql_extra_params="$1"
local pid
${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
--datadir=$OCF_RESKEY_datadir \
--log-error=$OCF_RESKEY_log \
--user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
$mysql_extra_params >/dev/null 2>&1 &
pid=$!
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required.
start_wait=1
while [ $start_wait = 1 ]; do
if ! ps $pid > /dev/null 2>&1; then
wait $pid
- ocf_log err "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation"
+ ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation"
return $OCF_ERR_GENERIC
fi
mysql_common_status info
rc=$?
if [ $rc = $OCF_SUCCESS ]; then
start_wait=0
elif [ $rc != $OCF_NOT_RUNNING ]; then
ocf_log info "MySQL start failed: $rc"
return $rc
fi
sleep 2
done
return $OCF_SUCCESS
}
mysql_common_stop()
{
local pid
local rc
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log info "MySQL is not running"
return $OCF_SUCCESS
fi
pid=`cat $OCF_RESKEY_pid 2> /dev/null `
/bin/kill $pid > /dev/null
rc=$?
if [ $rc != 0 ]; then
- ocf_log err "MySQL couldn't be stopped"
+ ocf_exit_reason "MySQL couldn't be stopped"
return $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
fi
count=0
while [ $count -lt $shutdown_timeout ]
do
mysql_common_status info $pid
rc=$?
if [ $rc = $OCF_NOT_RUNNING ]; then
break
fi
count=`expr $count + 1`
sleep 1
ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
done
mysql_common_status info $pid
if [ $? != $OCF_NOT_RUNNING ]; then
ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
/bin/kill -KILL $pid > /dev/null
fi
ocf_log info "MySQL stopped";
rm -f /var/lock/subsys/mysqld
rm -f $OCF_RESKEY_socket
return $OCF_SUCCESS
}
diff --git a/heartbeat/named b/heartbeat/named
index ede22df1a..2c34a15c2 100755
--- a/heartbeat/named
+++ b/heartbeat/named
@@ -1,489 +1,489 @@
#!/bin/sh
#
# Description: Manages a named (Bind) server as an OCF High-Availability
# resource
#
# Authors: Serge Dubrouski (sergeyfd@gmail.com)
#
# Copyright: 2011 Serge Dubrouski <sergeyfd@gmail.com>
#
# License: GNU General Public License (GPL)
#
###############################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#Defaults
OCF_RESKEY_named_default="/usr/sbin/named"
OCF_RESKEY_rndc_default="/usr/sbin/rndc"
OCF_RESKEY_host_default="/usr/bin/host"
OCF_RESKEY_named_user_default=named
OCF_RESKEY_named_config_default=""
OCF_RESKEY_named_pidfile_default="/var/run/named/named.pid"
OCF_RESKEY_named_rootdir_default=""
OCF_RESKEY_named_options_default=""
OCF_RESKEY_named_keytab_file_default=""
OCF_RESKEY_monitor_request_default="localhost"
OCF_RESKEY_monitor_response_default="127.0.0.1"
OCF_RESKEY_monitor_ip_default="127.0.0.1"
: ${OCF_RESKEY_named=${OCF_RESKEY_named_default}}
: ${OCF_RESKEY_rndc=${OCF_RESKEY_rndc_default}}
: ${OCF_RESKEY_host=${OCF_RESKEY_host_default}}
: ${OCF_RESKEY_named_user=${OCF_RESKEY_named_user_default}}
: ${OCF_RESKEY_named_config=${OCF_RESKEY_named_config_default}}
: ${OCF_RESKEY_named_pidfile=${OCF_RESKEY_named_pidfile_default}}
: ${OCF_RESKEY_named_rootdir=${OCF_RESKEY_named_rootdir_default}}
: ${OCF_RESKEY_named_options=${OCF_RESKEY_named_options_default}}
: ${OCF_RESKEY_named_keytab_file=${OCF_RESKEY_named_keytab_file_default}}
: ${OCF_RESKEY_monitor_request=${OCF_RESKEY_monitor_request_default}}
: ${OCF_RESKEY_monitor_response=${OCF_RESKEY_monitor_response_default}}
: ${OCF_RESKEY_monitor_ip=${OCF_RESKEY_monitor_ip_default}}
usage() {
cat <<EOF
usage: $0 start|stop|reload|status|monitor|meta-data|validate-all|methods
$0 manages named (Bind) server as an HA resource.
The 'start' operation starts named server.
The 'stop' operation stops named server.
The 'reload' operation reload named configuration.
The 'status' operation reports whether named is up.
The 'monitor' operation reports whether named is running.
The 'validate-all' operation reports whether parameters are valid.
The 'methods' operation reports on the methods $0 supports.
EOF
return $OCF_ERR_ARGS
}
named_meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="named">
<version>1.0</version>
<longdesc lang="en">
Resource script for named (Bind) server. It manages named as an HA resource.
</longdesc>
<shortdesc lang="en">Manages a named server</shortdesc>
<parameters>
<parameter name="named" unique="0" required="0">
<longdesc lang="en">
Path to the named command.
</longdesc>
<shortdesc lang="en">named</shortdesc>
<content type="string" default="${OCF_RESKEY_named_default}" />
</parameter>
<parameter name="rndc" unique="0" required="0">
<longdesc lang="en">
Path to the rndc command.
</longdesc>
<shortdesc lang="en">rndc</shortdesc>
<content type="string" default="${OCF_RESKEY_rndc_default}" />
</parameter>
<parameter name="host" unique="0" required="0">
<longdesc lang="en">
Path to the host command.
</longdesc>
<shortdesc lang="en">host</shortdesc>
<content type="string" default="${OCF_RESKEY_host_default}" />
</parameter>
<parameter name="named_user" unique="0" required="0">
<longdesc lang="en">
User that should own named process.
</longdesc>
<shortdesc lang="en">named_user</shortdesc>
<content type="string" default="${OCF_RESKEY_named_user_default}" />
</parameter>
<parameter name="named_config" unique="1" required="0">
<longdesc lang="en">
Configuration file for named.
</longdesc>
<shortdesc lang="en">named_config</shortdesc>
<content type="string" default="${OCF_RESKEY_named_config_default}" />
</parameter>
<parameter name="named_pidfile" unique="1" required="0">
<longdesc lang="en">
PIDFILE file for named.
</longdesc>
<shortdesc lang="en">named_pidfile</shortdesc>
<content type="string" default="${OCF_RESKEY_named_pidfile_default}" />
</parameter>
<parameter name="named_rootdir" unique="1" required="0">
<longdesc lang="en">
Directory that named should use for chroot if any.
</longdesc>
<shortdesc lang="en">named_rootdir</shortdesc>
<content type="string" default="${OCF_RESKEY_named_rootdir_default}" />
</parameter>
<parameter name="named_options" unique="0" required="0">
<longdesc lang="en">
Options for named process if any.
</longdesc>
<shortdesc lang="en">named_options</shortdesc>
<content type="string" default="${OCF_RESKEY_named_options_default}" />
</parameter>
<parameter name="named_keytab_file" unique="0" required="0">
<longdesc lang="en">
named service keytab file (for GSS-TSIG).
</longdesc>
<shortdesc lang="en">named_keytab_file</shortdesc>
<content type="string" default="${OCF_RESKEY_named_keytab_file_default}" />
</parameter>
<parameter name="monitor_request" unique="0" required="0">
<longdesc lang="en">
Request that shall be sent to named for monitoring. Usually an A record in DNS.
</longdesc>
<shortdesc lang="en">monitor_request</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_request_default}" />
</parameter>
<parameter name="monitor_response" unique="0" required="0">
<longdesc lang="en">
Expected response from named server.
</longdesc>
<shortdesc lang="en">monitor_response</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_response_default}" />
</parameter>
<parameter name="monitor_ip" unique="0" required="0">
<longdesc lang="en">
IP Address where named listens.
</longdesc>
<shortdesc lang="en">monitor_ip</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_ip_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="reload" timeout="60" />
<action name="status" timeout="10" />
<action name="monitor" depth="0" timeout="30" interval="30"/>
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
<action name="methods" timeout="5" />
</actions>
</resource-agent>
EOF
}
#
# methods: What methods/operations do we support?
#
named_methods() {
cat <<EOF
start
stop
status
monitor
methods
meta-data
validate-all
EOF
}
# Validate most critical parameters
named_validate_all() {
check_binary $OCF_RESKEY_named
check_binary $OCF_RESKEY_rndc
check_binary $OCF_RESKEY_host
if [ -n "$OCF_RESKEY_named_config" -a \
! -r "${OCF_RESKEY_named_rootdir}/${OCF_RESKEY_named_config}" ]; then
if ocf_is_probe; then
ocf_log info "Configuration file ${OCF_RESKEY_named_rootdir}/${OCF_RESKEY_named_config} not readable during probe."
else
- ocf_log err "Configuration file ${OCF_RESKEY_named_rootdir}/${OCF_RESKEY_named_config} doesn't exist"
+ ocf_exit_reason "Configuration file ${OCF_RESKEY_named_rootdir}/${OCF_RESKEY_named_config} doesn't exist"
return $OCF_ERR_INSTALLED
fi
fi
getent passwd $OCF_RESKEY_named_user >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
- ocf_log err "User $OCF_RESKEY_named_user doesn't exist";
+ ocf_exit_reason "User $OCF_RESKEY_named_user doesn't exist";
return $OCF_ERR_INSTALLED;
fi
if [ -z "$OCF_RESKEY_monitor_request" -o \
-z "$OCF_RESKEY_monitor_response" -o \
-z "$OCF_RESKEY_monitor_ip" ]; then
- ocf_log err "None of monitor_request, monitor_response, and monitor_ip can be empty"
+ ocf_exit_reason "None of monitor_request, monitor_response, and monitor_ip can be empty"
return $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
##
# Attempt to generate a /etc/rndc.key if one is not present
##
rndc_key_generator()
{
local rndc_options="-a -r /dev/urandom -u $OCF_RESKEY_named_user"
if [ -s /etc/rndc.key ]; then
# file already exists
return
fi
if ! have_binary "rndc-confgen"; then
# can't autogen key... Report this, but not as a warning or error.
# It is possible that the user configured the key in named.conf
ocf_log info "rndc-confgen tool not present, unable to autogen /etc/rndc.key."
return
fi
if [ -n "$OCF_RESKEY_rootdir" ]; then
rndc_options="$rndc_options -t $OCF_RESKEY_rootdir"
fi
rndc-confgen $rndc_options > /dev/null 2>&1;
if [ $? -eq 0 ]; then
if have_binary "restorecon"; then
restorecon /etc/rndc.key
fi
else
ocf_log info "failed to auto-generate /etc/rndc.key file."
fi
}
#
# named_getpid. Get pid of named process with a given parameters.
#
named_getpid () {
local pattern="$OCF_RESKEY_named"
if [ -n "$OCF_RESKEY_named_rootdir" -a "x${OCF_RESKEY_named_rootdir}" != "x/" ]; then
pattern="$pattern.*-t $OCF_RESKEY_named_rootdir"
fi
if [ -n "$OCF_RESKEY_named_config" ]; then
pattern="$pattern.*-c $OCF_RESKEY_named_config"
fi
pid=`pgrep -f "$pattern"`
echo $pid
}
#
# named_status. Simple check of the status of named process by pidfile.
#
named_status () {
ocf_pidfile_status ${OCF_RESKEY_named_pidfile} >/dev/null 2>&1
}
#
# named_monitor. Send a request to named and check response.
#
named_monitor() {
local output
if ! named_status
then
ocf_log info "named is down"
return $OCF_NOT_RUNNING
fi
output=`$OCF_RESKEY_host $OCF_RESKEY_monitor_request $OCF_RESKEY_monitor_ip`
if [ $? -ne 0 ] || ! echo $output | grep -q '.* has .*address '"$OCF_RESKEY_monitor_response"
then
- ocf_log err "named didn't answer properly for $OCF_RESKEY_monitor_request."
+ ocf_exit_reason "named didn't answer properly for $OCF_RESKEY_monitor_request."
ocf_log err "Expected: $OCF_RESKEY_monitor_response."
ocf_log err "Got: $output"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
#
# Reload
#
named_reload() {
$OCF_RESKEY_rndc reload >/dev/null || return $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
#
# Start
#
named_start() {
local root_dir_opt
local pid
root_dir_opt=""
named_status && return $OCF_SUCCESS
# Remove pidfile if exists
rm -f ${OCF_RESKEY_named_pidfile}
if [ -n "${OCF_RESKEY_named_rootdir}" -a "x${OCF_RESKEY_named_rootdir}" != "x/" ]
then
root_dir_opt="-t ${OCF_RESKEY_named_rootdir}"
[ -s /etc/localtime ] && cp -fp /etc/localtime ${OCF_RESKEY_named_rootdir}/etc/localtime
fi
if [ -n "$OCF_RESKEY_named_config" ]; then
OCF_RESKEY_named_options="-c $OCF_RESKEY_named_config $OCF_RESKEY_named_options"
fi
rndc_key_generator
if ! ${OCF_RESKEY_named} -u ${OCF_RESKEY_named_user} $root_dir_opt ${OCF_RESKEY_named_options}
then
- ocf_log err "named failed to start."
+ ocf_exit_reason "named failed to start."
return $OCF_ERR_GENERIC
fi
pid=`named_getpid`
if [ -n "$pid" ]; then
if [ ! -e ${OCF_RESKEY_named_pidfile} ]; then
echo $pid > ${OCF_RESKEY_named_pidfile}
fi
else
- ocf_log err "named failed to start. Probably error in configuration."
+ ocf_exit_reason "named failed to start. Probably error in configuration."
return $OCF_ERR_GENERIC
fi
while :
do
named_monitor && break
sleep 1
ocf_log debug "named hasn't started yet."
done
ocf_log info "named has started."
return $OCF_SUCCESS
}
#
# Stop
#
named_stop () {
local timeout
local timewait
named_status || return $OCF_SUCCESS
$OCF_RESKEY_rndc stop >/dev/null
if [ $? -ne 0 ]; then
ocf_log info "rndc stop failed. Killing named."
kill `cat ${OCF_RESKEY_named_pidfile}`
fi
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
# Allow 2/3 of the action timeout for the orderly shutdown
# (The origin unit is ms, hence the conversion)
timewait=$((OCF_RESKEY_CRM_meta_timeout/1500))
else
timewait=20
fi
sleep 1; timeout=0 # Sleep here for 1 sec to let rndc finish.
while named_status ; do
if [ $timeout -ge $timewait ]; then
break
else
sleep 1
timeout=`expr $timeout + 1`
ocf_log debug "named appears to hung, waiting ..."
fi
done
#If still up
if named_status 2>&1; then
- ocf_log err "named is still up! Killing"
+ ocf_exit_reason "named is still up! Killing"
kill -9 `cat ${OCF_RESKEY_named_pidfile}`
fi
rm -f ${OCF_RESKEY_named_pidfile}
return $OCF_SUCCESS
}
# Main part
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_GENERIC
fi
case "$1" in
methods) named_methods
exit $?;;
meta-data) named_meta_data
exit $OCF_SUCCESS;;
esac
named_validate_all
rc=$?
[ "$1" = "validate-all" ] && exit $rc
if [ $rc -ne 0 ]
then
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $OCF_NOT_RUNNING;;
*) exit $rc;;
esac
fi
if [ `id -u` -ne 0 ]; then
- ocf_log err "$0 must be run as root"
+ ocf_exit_reason "$0 must be run as root"
exit $OCF_ERR_GENERIC
fi
case "$1" in
status) if named_status
then
ocf_log info "named is up"
exit $OCF_SUCCESS
else
ocf_log info "named is down"
exit $OCF_NOT_RUNNING
fi;;
monitor) named_monitor
exit $?;;
start) named_start
exit $?;;
stop) named_stop
exit $?;;
reload) named_reload
exit $?;;
*)
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index 9421d8383..110eebfb0 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -1,1873 +1,1873 @@
#!/bin/sh
#
# Description: Manages a PostgreSQL Server as an OCF High-Availability
# resource
#
# Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA
# Florian Haas (florian@linbit.com) -- makeover
# Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication
# David Corlette (dcorlette@netiq.com) -- add support for non-standard library locations and non-standard port
#
# Copyright: 2006-2012 Serge Dubrouski <sergeyfd@gmail.com>
# and other Linux-HA contributors
# License: GNU General Public License (GPL)
#
###############################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#
# Get PostgreSQL Configuration parameter
#
get_pgsql_param() {
local param_name
param_name=$1
perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) {
\$dir=\$1;
\$dir =~ s/\s*\#.*//;
\$dir =~ s/^'(\S*)'/\$1/;
print \$dir;}"
perl -ne "$perl_code" < $OCF_RESKEY_config
}
# Defaults
OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl
OCF_RESKEY_psql_default=/usr/bin/psql
OCF_RESKEY_pgdata_default=/var/lib/pgsql/data
OCF_RESKEY_pgdba_default=postgres
OCF_RESKEY_pghost_default=""
OCF_RESKEY_pgport_default=5432
OCF_RESKEY_pglibs_default=/usr/lib
OCF_RESKEY_start_opt_default=""
OCF_RESKEY_pgdb_default=template1
OCF_RESKEY_logfile_default=/dev/null
OCF_RESKEY_stop_escalate_default=30
OCF_RESKEY_monitor_user_default=""
OCF_RESKEY_monitor_password_default=""
OCF_RESKEY_monitor_sql_default="select now();"
OCF_RESKEY_check_wal_receiver_default="false"
# Defaults for replication
OCF_RESKEY_rep_mode_default=none
OCF_RESKEY_node_list_default=""
OCF_RESKEY_restore_command_default=""
OCF_RESKEY_archive_cleanup_command_default=""
OCF_RESKEY_recovery_end_command_default=""
OCF_RESKEY_master_ip_default=""
OCF_RESKEY_repuser_default="postgres"
OCF_RESKEY_primary_conninfo_opt_default=""
OCF_RESKEY_restart_on_promote_default="false"
OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp"
OCF_RESKEY_xlog_check_count_default="3"
OCF_RESKEY_crm_attr_timeout_default="5"
OCF_RESKEY_stop_escalate_in_slave_default=30
: ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}}
: ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}}
: ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}}
: ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}}
: ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}}
: ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}}
: ${OCF_RESKEY_pglibs=${OCF_RESKEY_pglibs_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf}
: ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}}
: ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}}
: ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}}
: ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}}
: ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}}
: ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}}
: ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}}
: ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}}
# for replication
: ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}}
: ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}}
: ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}}
: ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}}
: ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}}
: ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}}
: ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}}
: ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}}
: ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}}
: ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}}
: ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}}
: ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}}
: ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}}
usage() {
cat <<EOF
usage: $0 start|stop|status|monitor|promote|demote|notify|meta-data|validate-all|methods
$0 manages a PostgreSQL Server as an HA resource.
The 'start' operation starts the PostgreSQL server.
The 'stop' operation stops the PostgreSQL server.
The 'status' operation reports whether the PostgreSQL is up.
The 'monitor' operation reports whether the PostgreSQL is running.
The 'promote' operation promotes the PostgreSQL server.
The 'demote' operation demotes the PostgreSQL server.
The 'validate-all' operation reports whether the parameters are valid.
The 'methods' operation reports on the methods $0 supports.
EOF
return $OCF_ERR_ARGS
}
meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="pgsql">
<version>1.0</version>
<longdesc lang="en">
Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource.
</longdesc>
<shortdesc lang="en">Manages a PostgreSQL database instance</shortdesc>
<parameters>
<parameter name="pgctl" unique="0" required="0">
<longdesc lang="en">
Path to pg_ctl command.
</longdesc>
<shortdesc lang="en">pgctl</shortdesc>
<content type="string" default="${OCF_RESKEY_pgctl_default}" />
</parameter>
<parameter name="start_opt" unique="0" required="0">
<longdesc lang="en">
Start options (-o start_opt in pg_ctl). "-i -p 5432" for example.
</longdesc>
<shortdesc lang="en">start_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_start_opt_default}" />
</parameter>
<parameter name="ctl_opt" unique="0" required="0">
<longdesc lang="en">
Additional pg_ctl options (-w, -W etc..).
</longdesc>
<shortdesc lang="en">ctl_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_ctl_opt_default}" />
</parameter>
<parameter name="psql" unique="0" required="0">
<longdesc lang="en">
Path to psql command.
</longdesc>
<shortdesc lang="en">psql</shortdesc>
<content type="string" default="${OCF_RESKEY_psql_default}" />
</parameter>
<parameter name="pgdata" unique="0" required="0">
<longdesc lang="en">
Path to PostgreSQL data directory.
</longdesc>
<shortdesc lang="en">pgdata</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdata_default}" />
</parameter>
<parameter name="pgdba" unique="0" required="0">
<longdesc lang="en">
User that owns PostgreSQL.
</longdesc>
<shortdesc lang="en">pgdba</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdba_default}" />
</parameter>
<parameter name="pghost" unique="0" required="0">
<longdesc lang="en">
Hostname/IP address where PostgreSQL is listening
</longdesc>
<shortdesc lang="en">pghost</shortdesc>
<content type="string" default="${OCF_RESKEY_pghost_default}" />
</parameter>
<parameter name="pgport" unique="0" required="0">
<longdesc lang="en">
Port where PostgreSQL is listening
</longdesc>
<shortdesc lang="en">pgport</shortdesc>
<content type="integer" default="${OCF_RESKEY_pgport_default}" />
</parameter>
<parameter name="pglibs" unique="0" required="0">
<longdesc lang="en">
Custom location of the Postgres libraries. If not set, the standard location
will be used.
</longdesc>
<shortdesc lang="en">pglibs</shortdesc>
<content type="string" default="${OCF_RESKEY_pglibs_default}" />
</parameter>
<parameter name="monitor_user" unique="0" required="0">
<longdesc lang="en">
PostgreSQL user that pgsql RA will user for monitor operations. If it's not set
pgdba user will be used.
</longdesc>
<shortdesc lang="en">monitor_user</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_user_default}" />
</parameter>
<parameter name="monitor_password" unique="0" required="0">
<longdesc lang="en">
Password for monitor user.
</longdesc>
<shortdesc lang="en">monitor_password</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_password_default}" />
</parameter>
<parameter name="monitor_sql" unique="0" required="0">
<longdesc lang="en">
SQL script that will be used for monitor operations.
</longdesc>
<shortdesc lang="en">monitor_sql</shortdesc>
<content type="string" default="${OCF_RESKEY_monitor_sql_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Path to the PostgreSQL configuration file for the instance.
</longdesc>
<shortdesc lang="en">Configuration file</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdata}/postgresql.conf" />
</parameter>
<parameter name="pgdb" unique="0" required="0">
<longdesc lang="en">
Database that will be used for monitoring.
</longdesc>
<shortdesc lang="en">pgdb</shortdesc>
<content type="string" default="${OCF_RESKEY_pgdb_default}" />
</parameter>
<parameter name="logfile" unique="0" required="0">
<longdesc lang="en">
Path to PostgreSQL server log output file.
</longdesc>
<shortdesc lang="en">logfile</shortdesc>
<content type="string" default="${OCF_RESKEY_logfile_default}" />
</parameter>
<parameter name="socketdir" unique="0" required="0">
<longdesc lang="en">
Unix socket directory for PostgreSQL
</longdesc>
<shortdesc lang="en">socketdir</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="stop_escalate" unique="0" required="0">
<longdesc lang="en">
Number of shutdown retries (using -m fast) before resorting to -m immediate
</longdesc>
<shortdesc lang="en">stop escalation</shortdesc>
<content type="integer" default="${OCF_RESKEY_stop_escalate_default}" />
</parameter>
<parameter name="rep_mode" unique="0" required="0">
<longdesc lang="en">
Replication mode may be set to "async" or "sync" or "slave".
They require PostgreSQL 9.1 or later.
Once set, "async" and "sync" require node_list, master_ip, and
restore_command parameters,as well as configuring PostgreSQL
for replication (in postgresql.conf and pg_hba.conf).
"slave" means that RA only makes recovery.conf before starting
to connect to primary which is running somewhere.
It dosen't need master/slave setting.
It requires master_ip restore_command parameters.
</longdesc>
<shortdesc lang="en">rep_mode</shortdesc>
<content type="string" default="${OCF_RESKEY_rep_mode_default}" />
</parameter>
<parameter name="node_list" unique="0" required="0">
<longdesc lang="en">
All node names. Please separate each node name with a space.
This is required for replication.
</longdesc>
<shortdesc lang="en">node list</shortdesc>
<content type="string" default="${OCF_RESKEY_node_list_default}" />
</parameter>
<parameter name="restore_command" unique="0" required="0">
<longdesc lang="en">
restore_command for recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">restore_command</shortdesc>
<content type="string" default="${OCF_RESKEY_restore_command_default}" />
</parameter>
<parameter name="archive_cleanup_command" unique="0" required="0">
<longdesc lang="en">
archive_cleanup_command for recovery.conf.
This is used for replication and is optional.
</longdesc>
<shortdesc lang="en">archive_cleanup_command</shortdesc>
<content type="string" default="${OCF_RESKEY_archive_cleanup_command_default}" />
</parameter>
<parameter name="recovery_end_command" unique="0" required="0">
<longdesc lang="en">
recovery_end_command for recovery.conf.
This is used for replication and is optional.
</longdesc>
<shortdesc lang="en">recovery_end_command</shortdesc>
<content type="string" default="${OCF_RESKEY_recovery_end_command_default}" />
</parameter>
<parameter name="master_ip" unique="0" required="0">
<longdesc lang="en">
Master's floating IP address to be connected from hot standby.
This parameter is used for "primary_conninfo" in recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">master ip</shortdesc>
<content type="string" default="${OCF_RESKEY_master_ip_default}" />
</parameter>
<parameter name="repuser" unique="0" required="0">
<longdesc lang="en">
User used to connect to the master server.
This parameter is used for "primary_conninfo" in recovery.conf.
This is required for replication.
</longdesc>
<shortdesc lang="en">repuser</shortdesc>
<content type="string" default="${OCF_RESKEY_repuser_default}" />
</parameter>
<parameter name="primary_conninfo_opt" unique="0" required="0">
<longdesc lang="en">
primary_conninfo options of recovery.conf except host, port, user and application_name.
This is optional for replication.
</longdesc>
<shortdesc lang="en">primary_conninfo_opt</shortdesc>
<content type="string" default="${OCF_RESKEY_primary_conninfo_opt_default}" />
</parameter>
<parameter name="restart_on_promote" unique="0" required="0">
<longdesc lang="en">
If this is true, RA deletes recovery.conf and restarts PostgreSQL
on promote to keep Timeline ID. It probably makes fail-over slower.
It's recommended to set on-fail of promote up as fence.
This is optional for replication.
</longdesc>
<shortdesc lang="en">restart_on_promote</shortdesc>
<content type="boolean" default="${OCF_RESKEY_restart_on_promote_default}" />
</parameter>
<parameter name="tmpdir" unique="0" required="0">
<longdesc lang="en">
Path to temporary directory.
This is optional for replication.
</longdesc>
<shortdesc lang="en">tmpdir</shortdesc>
<content type="string" default="${OCF_RESKEY_tmpdir_default}" />
</parameter>
<parameter name="xlog_check_count" unique="0" required="0">
<longdesc lang="en">
Number of checks of xlog on monitor before promote.
This is optional for replication.
</longdesc>
<shortdesc lang="en">xlog check count</shortdesc>
<content type="integer" default="${OCF_RESKEY_check_count_default}" />
</parameter>
<parameter name="crm_attr_timeout" unique="0" required="0">
<longdesc lang="en">
The timeout of crm_attribute forever update command.
Default value is 5 seconds.
This is optional for replication.
</longdesc>
<shortdesc lang="en">The timeout of crm_attribute forever update command.</shortdesc>
<content type="integer" default="${OCF_RESKEY_crm_attr_timeout_default}" />
</parameter>
<parameter name="stop_escalate_in_slave" unique="0" required="0">
<longdesc lang="en">
Number of shutdown retries (using -m fast) before resorting to -m immediate
in slave state.
This is optional for replication.
</longdesc>
<shortdesc lang="en">stop escalation_in_slave</shortdesc>
<content type="integer" default="${OCF_RESKEY_stop_escalate_in_slave_default}" />
</parameter>
<parameter name="check_wal_receiver" unique="0" required="0">
<longdesc lang="en">
If this is true, RA checks wal_receiver process on monitor
and notifies its status using "(resource name)-receiver-status" attribute.
It's useful for checking whether PostgreSQL (hot standby) connects to primary.
The attribute shows status as "normal" or "ERROR".
</longdesc>
<shortdesc lang="en">check_wal_receiver</shortdesc>
<content type="boolean" default="${OCF_RESKEY_check_wal_receiver_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="30"/>
<action name="monitor" depth="0" timeout="30" interval="29" role="Master" />
<action name="promote" timeout="120" />
<action name="demote" timeout="120" />
<action name="notify" timeout="90" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
<action name="methods" timeout="5" />
</actions>
</resource-agent>
EOF
}
#
# Run the given command in the Resource owner environment...
#
runasowner() {
local quietrun=""
local loglevel="-err"
local var
for var in 1 2
do
case "$1" in
"-q")
quietrun="-q"
shift 1;;
"warn"|"err")
loglevel="-$1"
shift 1;;
*)
;;
esac
done
ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*"
}
#
# Shell escape
#
escape_string() {
echo "$*" | sed -e "s/'/'\\\\''/g"
}
#
# methods: What methods/operations do we support?
#
pgsql_methods() {
cat <<EOF
start
stop
status
monitor
promote
demote
notify
methods
meta-data
validate-all
EOF
}
#pgsql_real_start: Starts PostgreSQL
pgsql_real_start() {
local pgctl_options
local postgres_options
local rc
if pgsql_status; then
ocf_log info "PostgreSQL is already running. PID=`cat $PIDFILE`"
if is_replication; then
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
fi
# Remove postmaster.pid if it exists
rm -f $PIDFILE
# Remove backup_label if it exists
if [ -f $BACKUPLABEL ] && ! is_replication; then
ocf_log info "Removing $BACKUPLABEL. The previous backup might have failed."
rm -f $BACKUPLABEL
fi
# Check if we need to create a log file
if ! check_log_file $OCF_RESKEY_logfile
then
- ocf_log err "PostgreSQL can't write to the log file: $OCF_RESKEY_logfile"
+ ocf_exit_reason "PostgreSQL can't write to the log file: $OCF_RESKEY_logfile"
return $OCF_ERR_PERM
fi
# Check socket directory
if [ -n "$OCF_RESKEY_socketdir" ]
then
check_socket_dir
fi
if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
rm -f $RECOVERY_CONF
make_recovery_conf || return $OCF_ERR_GENERIC
fi
# Set options passed to pg_ctl
pgctl_options="$OCF_RESKEY_ctl_opt -D $OCF_RESKEY_pgdata -l $OCF_RESKEY_logfile"
# Set options passed to the PostgreSQL server process
postgres_options="-c config_file=${OCF_RESKEY_config}"
if [ -n "$OCF_RESKEY_pghost" ]; then
postgres_options="$postgres_options -h $OCF_RESKEY_pghost"
fi
if [ -n "$OCF_RESKEY_start_opt" ]; then
postgres_options="$postgres_options $OCF_RESKEY_start_opt"
fi
# Tack pass-through options onto pg_ctl options
pgctl_options="$pgctl_options -o '$postgres_options'"
# Invoke pg_ctl
runasowner "unset PGUSER; unset PGPASSWORD; $OCF_RESKEY_pgctl $pgctl_options start"
if [ $? -eq 0 ]; then
# Probably started.....
ocf_log info "PostgreSQL start command sent."
else
- ocf_log err "Can't start PostgreSQL."
+ ocf_exit_reason "Can't start PostgreSQL."
return $OCF_ERR_GENERIC
fi
while :
do
pgsql_real_monitor warn
rc=$?
if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then
break;
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't started yet. Waiting..."
done
ocf_log info "PostgreSQL is started."
return $rc
}
pgsql_replication_start() {
local rc
# initializing for replication
change_pgsql_status "$NODENAME" "STOP"
delete_master_baseline
exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE
rm -f ${XLOG_NOTE_FILE}.* $REP_MODE_CONF $RECOVERY_CONF
if ! make_recovery_conf || ! delete_xlog_location || ! set_async_mode_all; then
return $OCF_ERR_GENERIC
fi
if [ -f $PGSQL_LOCK ]; then
- ocf_log err "My data may be inconsistent. You have to remove $PGSQL_LOCK file to force start."
+ ocf_exit_reason "My data may be inconsistent. You have to remove $PGSQL_LOCK file to force start."
return $OCF_ERR_GENERIC
fi
# start
pgsql_real_start
if [ $? -ne $OCF_SUCCESS ]; then
return $OCF_ERR_GENERIC
fi
change_pgsql_status "$NODENAME" "HS:alone"
return $OCF_SUCCESS
}
#pgsql_start: pgsql_real_start() wrapper for replication
pgsql_start() {
if ! is_replication; then
pgsql_real_start
return $?
else
pgsql_replication_start
return $?
fi
}
#pgsql_promote: Promote PostgreSQL
pgsql_promote() {
local target
local rc
if ! is_replication; then
- ocf_log err "Not in a replication mode."
+ ocf_exit_reason "Not in a replication mode."
return $OCF_ERR_CONFIGURED
fi
rm -f ${XLOG_NOTE_FILE}.*
for target in $NODE_LIST; do
[ "$target" = "$NODENAME" ] && continue
change_data_status "$target" "DISCONNECT"
change_master_score "$target" "$CAN_NOT_PROMOTE"
done
ocf_log info "Creating $PGSQL_LOCK."
touch $PGSQL_LOCK
show_master_baseline
if ocf_is_true ${OCF_RESKEY_restart_on_promote}; then
ocf_log info "Restarting PostgreSQL instead of promote."
#stop : this function returns $OCF_SUCCESS only.
pgsql_real_stop slave
rm -f $RECOVERY_CONF
pgsql_real_start
rc=$?
if [ $rc -ne $OCF_RUNNING_MASTER ]; then
- ocf_log err "Can't start PostgreSQL as primary on promote."
+ ocf_exit_reason "Can't start PostgreSQL as primary on promote."
if [ $rc -ne $OCF_SUCCESS ]; then
change_pgsql_status "$NODENAME" "STOP"
fi
return $OCF_ERR_GENERIC
fi
else
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata promote"
if [ $? -eq 0 ]; then
ocf_log info "PostgreSQL promote command sent."
else
- ocf_log err "Can't promote PostgreSQL."
+ ocf_exit_reason "Can't promote PostgreSQL."
return $OCF_ERR_GENERIC
fi
while :
do
pgsql_real_monitor warn
rc=$?
if [ $rc -eq $OCF_RUNNING_MASTER ]; then
break;
elif [ $rc -eq $OCF_ERR_GENERIC ]; then
- ocf_log err "Can't promote PostgreSQL."
+ ocf_exit_reason "Can't promote PostgreSQL."
return $rc
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't promoted yet. Waiting..."
done
ocf_log info "PostgreSQL is promoted."
fi
change_data_status "$NODENAME" "LATEST"
exec_with_retry 0 $CRM_MASTER -v $PROMOTE_ME
change_pgsql_status "$NODENAME" "PRI"
return $OCF_SUCCESS
}
#pgsql_demote: Demote PostgreSQL
pgsql_demote() {
local rc
if ! is_replication; then
- ocf_log err "Not in a replication mode."
+ ocf_exit_reason "Not in a replication mode."
return $OCF_ERR_CONFIGURED
fi
exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE
delete_master_baseline
if ! pgsql_status; then
ocf_log info "PostgreSQL is already stopped on demote."
else
ocf_log info "Stopping PostgreSQL on demote."
pgsql_real_stop master
rc=$?
if [ "$rc" -ne "$OCF_SUCCESS" ]; then
change_pgsql_status "$NODENAME" "UNKNOWN"
return $rc
fi
fi
change_pgsql_status "$NODENAME" "STOP"
return $OCF_SUCCESS
}
#pgsql_real_stop: Stop PostgreSQL
pgsql_real_stop() {
local rc
local count
local stop_escalate
if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -D -q
fi
if ! pgsql_status
then
#Already stopped
return $OCF_SUCCESS
fi
stop_escalate=$OCF_RESKEY_stop_escalate
if [ "$1" = "slave" ]; then
stop_escalate="$OCF_RESKEY_stop_escalate_in_slave"
fi
# Stop PostgreSQL, do not wait for clients to disconnect
if [ $stop_escalate -gt 0 ]; then
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m fast"
fi
# stop waiting
count=0
while [ $count -lt $stop_escalate ]
do
if ! pgsql_status
then
#PostgreSQL stopped
break;
fi
count=`expr $count + 1`
sleep 1
done
if pgsql_status
then
#PostgreSQL is still up. Use another shutdown mode.
ocf_log info "PostgreSQL failed to stop after ${OCF_RESKEY_stop_escalate}s using -m fast. Trying -m immediate..."
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m immediate"
fi
while :
do
pgsql_real_monitor
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
# An unnecessary debug log is prevented.
break;
fi
sleep 1
ocf_log debug "PostgreSQL still hasn't stopped yet. Waiting..."
done
# Remove postmaster.pid if it exists
rm -f $PIDFILE
if [ "$1" = "master" -a "$OCF_RESKEY_CRM_meta_notify_slave_uname" = " " ]; then
ocf_log info "Removing $PGSQL_LOCK."
rm -f $PGSQL_LOCK
fi
return $OCF_SUCCESS
}
pgsql_replication_stop() {
local rc
exec_with_retry 5 $CRM_MASTER -v $CAN_NOT_PROMOTE
delete_xlog_location
if ! pgsql_status
then
ocf_log info "PostgreSQL is already stopped."
change_pgsql_status "$NODENAME" "STOP"
return $OCF_SUCCESS
fi
pgsql_real_stop slave
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
change_pgsql_status "$NODENAME" "UNKNOWN"
return $rc
fi
change_pgsql_status "$NODENAME" "STOP"
set_async_mode_all
delete_master_baseline
return $OCF_SUCCESS
}
#pgsql_stop: pgsql_real_stop() wrapper for replication
pgsql_stop() {
if ! is_replication; then
pgsql_real_stop
return $?
else
pgsql_replication_stop
return $?
fi
}
#
# pgsql_status: is PostgreSQL up?
#
pgsql_status() {
if [ -f $PIDFILE ]
then
PID=`head -n 1 $PIDFILE`
runasowner "kill -s 0 $PID >/dev/null 2>&1"
return $?
fi
# No PID file
false
}
pgsql_wal_receiver_status() {
local PID
local receiver_parent_pids
PID=`head -n 1 $PIDFILE`
receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al receiver process" | cut -d " " -f 3`
if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" -q
return 0
fi
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" -q
ocf_log warn "wal receiver process is not running"
return 1
}
#
# pgsql_real_monitor
#
pgsql_real_monitor() {
local loglevel
local rc
local output
# Set the log level of the error message
loglevel=${1:-err}
if ! pgsql_status
then
ocf_log info "PostgreSQL is down"
return $OCF_NOT_RUNNING
fi
if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
pgsql_wal_receiver_status
fi
if is_replication; then
#Check replication state
output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
$OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
-Atc \"${CHECK_MS_SQL}\""`
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status."
return $OCF_ERR_GENERIC
fi
case "$output" in
f) ocf_log debug "PostgreSQL is running as a primary."
if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then
return $OCF_RUNNING_MASTER
fi
;;
t) ocf_log debug "PostgreSQL is running as a hot standby."
return $OCF_SUCCESS;;
- *) ocf_log err "$CHECK_MS_SQL output is $output"
+ *) ocf_exit_reason "$CHECK_MS_SQL output is $output"
return $OCF_ERR_GENERIC;;
esac
fi
OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"`
runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \
-c '$OCF_RESKEY_monitor_sql'"
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running."
return $OCF_ERR_GENERIC
fi
if is_replication; then
return $OCF_RUNNING_MASTER
fi
return $OCF_SUCCESS
}
pgsql_replication_monitor() {
local rc
rc=$1
if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then
return $rc
fi
# If I am Master
if [ $rc -eq $OCF_RUNNING_MASTER ]; then
change_data_status "$NODENAME" "LATEST"
change_pgsql_status "$NODENAME" "PRI"
control_slave_status || return $OCF_ERR_GENERIC
if [ "$RE_CONTROL_SLAVE" = "true" ]; then
sleep 2
ocf_log info "re-controlling slave status."
RE_CONTROL_SLAVE="none"
control_slave_status || return $OCF_ERR_GENERIC
fi
return $rc
fi
# I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor,
# so I will get master node name using crm_mon -n
print_crm_mon | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):]Master"
if [ $? -ne 0 ] ; then
# If I am Slave and Master is not exist
ocf_log info "Master does not exist."
change_pgsql_status "$NODENAME" "HS:alone"
have_master_right
if [ $? -eq 0 ]; then
rm -f ${XLOG_NOTE_FILE}.*
fi
else
output=`exec_with_retry 0 $CRM_ATTR_FOREVER -N "$NODENAME" \
-n "$PGSQL_DATA_STATUS_ATTR" -G -q`
if [ "$output" = "DISCONNECT" ]; then
change_pgsql_status "$NODENAME" "HS:alone"
fi
fi
return $rc
}
#pgsql_monitor: pgsql_real_monitor() wrapper for replication
pgsql_monitor() {
local rc
pgsql_real_monitor
rc=$?
if ! is_replication; then
return $rc
else
pgsql_replication_monitor $rc
return $?
fi
}
# pgsql_post_demote
pgsql_post_demote() {
DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE"
if [ "$DEMOTE_NODE" != "$NODENAME" ]; then
if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | tr '[A-Z]' '[a-z]' | grep $NODENAME; then
show_master_baseline
change_pgsql_status "$NODENAME" "HS:alone"
fi
fi
return $OCF_SUCCESS
}
pgsql_pre_promote() {
local master_baseline
local my_master_baseline
local cmp_location
local number_of_nodes
# If my data is newer than new master's one, I fail my resource.
PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
number_of_nodes=`echo $NODE_LIST | wc -w`
if [ $number_of_nodes -ge 3 -a \
"$OCF_RESKEY_rep_mode" = "sync" -a \
"$PROMOTE_NODE" != "$NODENAME" ]; then
master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \
"$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
if [ $? -eq 0 ]; then
my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \
"$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
# get older location
cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\
sort | head -1`
if [ "$cmp_location" != "$my_master_baseline" ]; then
- ocf_log err "My data is newer than new master's one. New master's location : $master_baseline"
+ ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline"
exec_with_retry 0 $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY
return $OCF_ERR_GENERIC
fi
fi
fi
return $OCF_SUCCESS
}
pgsql_notify() {
local type="${OCF_RESKEY_CRM_meta_notify_type}"
local op="${OCF_RESKEY_CRM_meta_notify_operation}"
local rc
if ! is_replication; then
return $OCF_SUCCESS
fi
ocf_log debug "notify: ${type} for ${op}"
case $type in
pre)
case $op in
promote)
pgsql_pre_promote
return $?
;;
esac
;;
post)
case $op in
promote)
delete_xlog_location
PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
if [ "$PROMOTE_NODE" != "$NODENAME" ]; then
delete_master_baseline
fi
return $OCF_SUCCESS
;;
demote)
pgsql_post_demote
return $?
;;
start|stop)
MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \
sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
if [ "$NODENAME" = "$MASTER_NODE" ]; then
control_slave_status
fi
return $OCF_SUCCESS
;;
esac
;;
esac
return $OCF_SUCCESS
}
control_slave_status() {
local rc
local data_status
local target
local all_data_status
local tmp_data_status
local number_of_nodes
all_data_status=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
$OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
-Atc \"${CHECK_REPLICATION_STATE_SQL}\""`
rc=$?
if [ $rc -eq 0 ]; then
if [ -n "$all_data_status" ]; then
all_data_status=`echo $all_data_status | sed "s/\n/ /g"`
fi
else
report_psql_error $rc err "Can't get PostgreSQL replication status."
return 1
fi
number_of_nodes=`echo $NODE_LIST | wc -w`
for target in $NODE_LIST; do
if [ "$target" = "$NODENAME" ]; then
continue
fi
data_status="DISCONNECT"
if [ -n "$all_data_status" ]; then
for tmp_data_status in $all_data_status; do
if ! echo $tmp_data_status | grep -q "^${target}|"; then
continue
fi
data_status=`echo $tmp_data_status | cut -d "|" -f 2,3`
ocf_log debug "node_name and data_status is $tmp_data_status"
break
done
fi
case "$data_status" in
"STREAMING|SYNC")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_PROMOTE"
change_pgsql_status "$target" "HS:sync"
;;
"STREAMING|ASYNC")
change_data_status "$target" "$data_status"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
change_master_score "$target" "$CAN_NOT_PROMOTE"
set_sync_mode "$target"
else
if [ $number_of_nodes -le 2 ]; then
change_master_score "$target" "$CAN_PROMOTE"
else
# I can't determine which slave's data is newest in async mode.
change_master_score "$target" "$CAN_NOT_PROMOTE"
fi
fi
change_pgsql_status "$target" "HS:async"
;;
"STREAMING|POTENTIAL")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
change_pgsql_status "$target" "HS:potential"
;;
"DISCONNECT")
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
set_async_mode "$target"
fi
;;
*)
change_data_status "$target" "$data_status"
change_master_score "$target" "$CAN_NOT_PROMOTE"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
set_async_mode "$target"
fi
change_pgsql_status "$target" "HS:connected"
;;
esac
done
return 0
}
have_master_right() {
local old
local new
local output
local data_status
local node
local mylocation
local count
local newestXlog
local oldfile
local newfile
ocf_log debug "Checking if I have a master right."
data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \
"$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
"$data_status" != "LATEST" ]; then
ocf_log warn "My data is out-of-date. status=$data_status"
return 1
fi
else
if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
"$data_status" != "STREAMING|ASYNC" -a \
"$data_status" != "LATEST" ]; then
ocf_log warn "My data is out-of-date. status=$data_status"
return 1
fi
fi
ocf_log info "My data status=$data_status."
show_xlog_location
if [ $? -ne 0 ]; then
- ocf_log err "Failed to show my xlog location."
+ ocf_exit_reason "Failed to show my xlog location."
exit $OCF_ERR_GENERIC
fi
old=0
for count in `seq $OCF_RESKEY_xlog_check_count`; do
if [ -f ${XLOG_NOTE_FILE}.$count ]; then
old=$count
continue
fi
break
done
new=`expr $old + 1`
# get xlog locations of all nodes
for node in ${NODE_LIST}; do
output=`$CRM_ATTR_REBOOT -N "$node" -n \
"$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null`
if [ $? -ne 0 ]; then
ocf_log warn "Can't get $node xlog location."
continue
else
ocf_log info "$node xlog location : $output"
echo "$node $output" >> ${XLOG_NOTE_FILE}.${new}
if [ "$node" = "$NODENAME" ]; then
mylocation=$output
fi
fi
done
oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null`
newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null`
if [ "$oldfile" != "$newfile" ]; then
# reset counter
rm -f ${XLOG_NOTE_FILE}.*
printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
return 1
fi
if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then
newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \
head -1 | cut -d " " -f 2`
if [ "$newestXlog" = "$mylocation" ]; then
ocf_log info "I have a master right."
exec_with_retry 5 $CRM_MASTER -v $PROMOTE_ME
return 0
fi
change_data_status "$NODENAME" "DISCONNECT"
ocf_log info "I don't have correct master data."
# reset counter
rm -f ${XLOG_NOTE_FILE}.*
printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
fi
return 1
}
is_replication() {
if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then
return 0
fi
return 1
}
get_my_location() {
local rc
local output
local replay_loc
local receive_loc
local output1
local output2
local log1
local log2
local newer_location
output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
$OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
-Atc \"${CHECK_XLOG_LOC_SQL}\""`
rc=$?
if [ $rc -ne 0 ]; then
report_psql_error $rc err "Can't get my xlog location."
return 1
fi
replay_loc=`echo $output | cut -d "|" -f 1`
receive_loc=`echo $output | cut -d "|" -f 2`
output1=`echo "$replay_loc" | cut -d "/" -f 1`
output2=`echo "$replay_loc" | cut -d "/" -f 2`
log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
replay_loc="${log1}${log2}"
output1=`echo "$receive_loc" | cut -d "/" -f 1`
output2=`echo "$receive_loc" | cut -d "/" -f 2`
log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
receive_loc="${log1}${log2}"
newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1`
echo "$newer_location"
return 0
}
show_xlog_location() {
local location
location=`get_my_location` || return 1
exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location"
}
delete_xlog_location() {
exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D
}
show_master_baseline() {
local rc
local location
location=`get_my_location`
ocf_log info "My master baseline : $location."
exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location"
}
delete_master_baseline() {
exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D
}
set_async_mode_all() {
[ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0
ocf_log info "Set all nodes into async mode."
runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
if [ $? -ne 0 ]; then
- ocf_log err "Can't set all nodes into async mode."
+ ocf_exit_reason "Can't set all nodes into async mode."
return 1
fi
return 0
}
set_async_mode() {
cat $REP_MODE_CONF | grep -q -e "[,' ]$1[,' ]"
if [ $? -eq 0 ]; then
ocf_log info "Setup $1 into async mode."
runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
else
ocf_log debug "$1 is already in async mode."
return 0
fi
exec_with_retry 0 reload_conf
}
set_sync_mode() {
local sync_node_in_conf
sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2`
if [ -n "$sync_node_in_conf" ]; then
ocf_log debug "$sync_node_in_conf is already sync mode."
else
ocf_log info "Setup $1 into sync mode."
runasowner -q err "echo \"synchronous_standby_names = '$1'\" > \"$REP_MODE_CONF\""
[ "$RE_CONTROL_SLAVE" = "false" ] && RE_CONTROL_SLAVE="true"
exec_with_retry 0 reload_conf
fi
}
reload_conf() {
# Invoke pg_ctl
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload"
if [ $? -eq 0 ]; then
ocf_log info "Reload configuration file."
else
- ocf_log err "Can't reload configuration file."
+ ocf_exit_reason "Can't reload configuration file."
return 1
fi
return 0
}
user_recovery_conf() {
# put archive_cleanup_command and recovery_end_command only when defined by user
if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then
echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'"
fi
if [ -n "$OCF_RESKEY_recovery_end_command" ]; then
echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'"
fi
}
make_recovery_conf() {
runasowner "touch $RECOVERY_CONF"
if [ $? -ne 0 ]; then
- ocf_log err "Can't create recovery.conf."
+ ocf_exit_reason "Can't create recovery.conf."
return 1
fi
cat > $RECOVERY_CONF <<END
standby_mode = 'on'
primary_conninfo = 'host=${OCF_RESKEY_master_ip} port=${OCF_RESKEY_pgport} user=${OCF_RESKEY_repuser} application_name=${NODENAME} ${OCF_RESKEY_primary_conninfo_opt}'
restore_command = '${OCF_RESKEY_restore_command}'
recovery_target_timeline = 'latest'
END
user_recovery_conf >> $RECOVERY_CONF
ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}"
return 0
}
# change pgsql-status.
# arg1:node, arg2: value
change_pgsql_status() {
local output
if ! is_node_online $1; then
return 0
fi
output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null`
if [ "$output" != "$2" ]; then
# If slave's disk is broken, RA cannot read PID file
# and misjudges the PostgreSQL as down while it is running.
# It causes overwriting of pgsql-status by Master because replication is still connected.
if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then
if [ "$1" != "$NODENAME" ]; then
ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited."
return 0
fi
fi
ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2."
exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2"
fi
return 0
}
# change pgsql-data-status.
# arg1:node, arg2: value
change_data_status() {
local output
if ! node_exist $1; then
return 0
fi
while :
do
output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
if [ "$output" != "$2" ]; then
ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2."
exec_with_retry 0 exec_with_timeout 0 "$CRM_ATTR_FOREVER" -N $1 -n $PGSQL_DATA_STATUS_ATTR -v "$2"
else
break
fi
done
return 0
}
# set master-score
# arg1:node, arg2: score, arg3: resoure
set_master_score() {
local current_score
current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null`
if [ -n "$current_score" -a "$current_score" != "$2" ]; then
ocf_log info "Changing $3 master score on $1 : $current_score->$2."
exec_with_retry 0 $CRM_ATTR_REBOOT -N "$target" -n "master-$3" -v "$2"
fi
return 0
}
# change master-score
# arg1:node, arg2: score
change_master_score() {
local instance
if ! is_node_online $1; then
return 0
fi
if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then
# If Pacemaker version is 1.0.x
instance=0
while :
do
if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then
break
fi
if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then
instance=`expr $instance + 1`
continue
fi
set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1
instance=`expr $instance + 1`
done
else
# If globally-unique=false and Pacemaker version is 1.1.8 or higher
# Master/Slave resource has no instance number
set_master_score $1 $2 ${RESOURCE_NAME} || return 1
fi
return 0
}
report_psql_error()
{
local rc
local loglevel
local message
rc=$1
loglevel=${2:-err}
message="$3"
ocf_log $loglevel "$message rc=$rc"
if [ $rc -eq 1 ]; then
- ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command."
+ ocf_exit_reason "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command."
elif [ $rc -eq 2 ]; then
ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command."
elif [ $rc -eq 3 ]; then
- ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command."
+ ocf_exit_reason "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command."
fi
}
#
# timeout management function
# arg1 timeout >= 0 (if arg1 is 0, OCF_RESKEY_crm_attr_timeout is used.)
# arg2 : command
# arg3 : command's args
exec_with_timeout() {
local func_pid
local count=$OCF_RESKEY_crm_attr_timeout
local rc
if [ "$1" -ne 0 ]; then
count=$1
fi
shift
$* &
func_pid=$!
sleep .1
while kill -s 0 $func_pid >/dev/null 2>&1; do
sleep 1
count=`expr $count - 1`
if [ $count -le 0 ]; then
- ocf_log err "\"$*\" (pid=$func_pid) timed out."
+ ocf_exit_reason "\"$*\" (pid=$func_pid) timed out."
kill -s 9 $func_pid >/dev/null 2>&1
return 1
fi
ocf_log info "Waiting($count). \"$*\" (pid=$func_pid)."
done
wait $func_pid
}
# retry command when command doesn't return 0
# arg1 : count >= 0 (if arg1 is 0, it retries command in infinitum(1day))
# arg2..argN : command and args
exec_with_retry() {
local count="86400"
local output
local rc
if [ "$1" -ne 0 ]; then
count=$1
fi
shift
while [ $count -gt 0 ]; do
output=`$*`
rc=$?
if [ $rc -ne 0 ]; then
ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"."
count=`expr $count - 1`
sleep 1
else
printf "${output}"
return 0
fi
done
- ocf_log err "giving up executing \"$*\""
+ ocf_exit_reason "giving up executing \"$*\""
return $rc
}
is_node_online() {
print_crm_mon | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline"
}
node_exist() {
print_crm_mon | tr '[A-Z]' '[a-z]' | grep -q "^node $1"
}
check_binary2() {
if ! have_binary "$1"; then
- ocf_log err "Setup problem: couldn't find command: $1"
+ ocf_exit_reason "Setup problem: couldn't find command: $1"
return 1
fi
return 0
}
check_config() {
local rc=0
if [ ! -f "$1" ]; then
if ocf_is_probe; then
ocf_log info "Configuration file is $1 not readable during probe."
rc=1
else
- ocf_log err "Configuration file $1 doesn't exist"
+ ocf_exit_reason "Configuration file $1 doesn't exist"
rc=2
fi
fi
return $rc
}
# Validate most critical parameters
pgsql_validate_all() {
local version
local check_config_rc
local rep_mode_string
if ! check_binary2 "$OCF_RESKEY_pgctl" ||
! check_binary2 "$OCF_RESKEY_psql"; then
return $OCF_ERR_INSTALLED
fi
check_config "$OCF_RESKEY_config"
check_config_rc=$?
[ $check_config_rc -eq 2 ] && return $OCF_ERR_INSTALLED
[ $check_config_rc -eq 0 ] && : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`}
getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
if [ ! $? -eq 0 ]; then
- ocf_log err "User $OCF_RESKEY_pgdba doesn't exist";
+ ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist";
return $OCF_ERR_INSTALLED;
fi
if ocf_is_probe; then
ocf_log info "Don't check $OCF_RESKEY_pgdata during probe"
else
if ! runasowner "test -w $OCF_RESKEY_pgdata"; then
- ocf_log err "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba"
+ ocf_exit_reason "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba"
return $OCF_ERR_PERM;
fi
fi
if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ]
then
- ocf_log err "monitor password can't be empty"
+ ocf_exit_reason "monitor password can't be empty"
return $OCF_ERR_CONFIGURED
fi
if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ]
then
- ocf_log err "monitor_user has to be set if monitor_password is set"
+ ocf_exit_reason "monitor_user has to be set if monitor_password is set"
return $OCF_ERR_CONFIGURED
fi
if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
version=`cat $OCF_RESKEY_pgdata/PG_VERSION`
if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then
- ocf_log err "Replication mode needs PostgreSQL 9.1 or higher."
+ ocf_exit_reason "Replication mode needs PostgreSQL 9.1 or higher."
return $OCF_ERR_INSTALLED
fi
if [ ! -n "$OCF_RESKEY_master_ip" ]; then
- ocf_log err "master_ip can't be empty."
+ ocf_exit_reason "master_ip can't be empty."
return $OCF_ERR_CONFIGURED
fi
fi
if is_replication; then
if ! ocf_is_ms; then
- ocf_log err "Replication(rep_mode=async or sync) requires Master/Slave configuration."
+ ocf_exit_reason "Replication(rep_mode=async or sync) requires Master/Slave configuration."
return $OCF_ERR_CONFIGURED
fi
if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then
- ocf_log err "Invalid rep_mode : $OCF_RESKEY_rep_mode"
+ ocf_exit_reason "Invalid rep_mode : $OCF_RESKEY_rep_mode"
return $OCF_ERR_CONFIGURED
fi
if [ ! -n "$NODE_LIST" ]; then
- ocf_log err "node_list can't be empty."
+ ocf_exit_reason "node_list can't be empty."
return $OCF_ERR_CONFIGURED
fi
if [ $check_config_rc -eq 0 ]; then
rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA"
if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
if ! grep -q "$rep_mode_string" $OCF_RESKEY_config; then
ocf_log info "adding include directive into $OCF_RESKEY_config"
echo "$rep_mode_string" >> $OCF_RESKEY_config
fi
else
if grep -q "$rep_mode_string" $OCF_RESKEY_config; then
ocf_log info "deleting include directive from $OCF_RESKEY_config"
rep_mode_string=`echo $rep_mode_string | sed -e 's|/|\\\\/|g'`
sed -i "/$rep_mode_string/d" $OCF_RESKEY_config
fi
fi
fi
if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then
- ocf_log err "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba"
+ ocf_exit_reason "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba"
return $OCF_ERR_PERM
fi
fi
if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
if ocf_is_ms; then
- ocf_log err "Replication(rep_mode=slave) does not support Master/Slave configuration."
+ ocf_exit_reason "Replication(rep_mode=slave) does not support Master/Slave configuration."
return $OCF_ERR_CONFIGURED
fi
fi
return $OCF_SUCCESS
}
#
# Check if we need to create a log file
#
check_log_file() {
if [ ! -f "$1" ]
then
touch $1 > /dev/null 2>&1
chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1
fi
#Check if $OCF_RESKEY_pgdba can write to the log file
if ! runasowner "test -w $1"
then
return 1
fi
return 0
}
#
# Check socket directory
#
check_socket_dir() {
if [ ! -d "$OCF_RESKEY_socketdir" ]; then
if ! mkdir "$OCF_RESKEY_socketdir"; then
- ocf_log err "Can't create directory $OCF_RESKEY_socketdir"
+ ocf_exit_reason "Can't create directory $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
if ! chown $OCF_RESKEY_pgdba:`getent passwd \
$OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir"
then
- ocf_log err "Can't change ownership for $OCF_RESKEY_socketdir"
+ ocf_exit_reason "Can't change ownership for $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
if ! chmod 2775 "$OCF_RESKEY_socketdir"; then
- ocf_log err "Can't change permissions for $OCF_RESKEY_socketdir"
+ ocf_exit_reason "Can't change permissions for $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
else
if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then
- ocf_log err "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir"
+ ocf_exit_reason "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir"
exit $OCF_ERR_PERM
fi
rm $OCF_RESKEY_socketdir/test.$$
fi
}
print_crm_mon() {
if [ -z "$CRM_MON_OUTPUT" ]; then
CRM_MON_OUTPUT=`exec_with_retry 0 crm_mon -n1`
fi
printf "${CRM_MON_OUTPUT}\n"
}
#
# 'main' starts here...
#
if [ $# -ne 1 ]
then
usage
exit $OCF_ERR_GENERIC
fi
PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid
BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label
RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1`
PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status"
RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf
NODENAME=$(ocf_local_nodename | tr '[A-Z]' '[a-z]')
if is_replication; then
REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf
PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock
XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note
CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot"
CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever"
CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount"
CAN_NOT_PROMOTE="-INFINITY"
CAN_PROMOTE="100"
PROMOTE_ME="1000"
CHECK_MS_SQL="select pg_is_in_recovery()"
CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()"
CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication"
PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status"
PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status"
PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc"
PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline"
NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'`
RE_CONTROL_SLAVE="false"
fi
case "$1" in
methods) pgsql_methods
exit $?;;
meta-data) meta_data
exit $OCF_SUCCESS;;
esac
pgsql_validate_all
rc=$?
[ "$1" = "validate-all" ] && exit $rc
if [ $rc -ne 0 ]
then
case "$1" in
stop) if is_replication; then
change_pgsql_status "$NODENAME" "UNKNOWN"
fi
exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $OCF_NOT_RUNNING;;
*) exit $rc;;
esac
fi
US=`id -u -n`
if [ $US != root -a $US != $OCF_RESKEY_pgdba ]
then
- ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba"
+ ocf_exit_reason "$0 must be run as root or $OCF_RESKEY_pgdba"
exit $OCF_ERR_GENERIC
fi
# make psql command options
if [ -n "$OCF_RESKEY_monitor_user" ]; then
PGUSER=$OCF_RESKEY_monitor_user; export PGUSER
PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD
psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb"
else
psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb"
fi
if [ -n "$OCF_RESKEY_pghost" ]; then
psql_options="$psql_options -h $OCF_RESKEY_pghost"
else
if [ -n "$OCF_RESKEY_socketdir" ]; then
psql_options="$psql_options -h $OCF_RESKEY_socketdir"
fi
fi
if [ -n "$OCF_RESKEY_pgport" ]; then
export PGPORT=$OCF_RESKEY_pgport
fi
if [ -n "$OCF_RESKEY_pglibs" ]; then
if [ -n "$LD_LIBRARY_PATH" ]; then
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$OCF_RESKEY_pglibs
else
export LD_LIBRARY_PATH=$OCF_RESKEY_pglibs
fi
fi
# What kind of method was invoked?
case "$1" in
status) if pgsql_status
then
ocf_log info "PostgreSQL is up"
exit $OCF_SUCCESS
else
ocf_log info "PostgreSQL is down"
exit $OCF_NOT_RUNNING
fi;;
monitor) pgsql_monitor
exit $?;;
start) pgsql_start
exit $?;;
promote) pgsql_promote
exit $?;;
demote) pgsql_demote
exit $?;;
notify) pgsql_notify
exit $?;;
stop) pgsql_stop
exit $?;;
*)
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/postfix b/heartbeat/postfix
index 8619af60d..72fc3710d 100755
--- a/heartbeat/postfix
+++ b/heartbeat/postfix
@@ -1,415 +1,415 @@
#!/bin/sh
#
# Resource script for Postfix
#
# Description: Manages Postfix as an OCF resource in
# an high-availability setup.
#
# Author: Raoul Bhatia <r.bhatia@ipax.at> : Original Author
# License: GNU General Public License (GPL)
# Note: If you want to run multiple Postfix instances, please see
# http://amd.co.at/adminwiki/Postfix#Adding_a_Second_Postfix_Instance_on_one_Server
# http://www.postfix.org/postconf.5.html
#
#
# usage: $0 {start|stop|reload|monitor|validate-all|meta-data}
#
# The "start" arg starts a Postfix instance
#
# The "stop" arg stops it.
#
# OCF parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_config_dir
# OCF_RESKEY_parameters
#
##########################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_RESKEY_binary="/usr/sbin/postfix"}
: ${OCF_RESKEY_config_dir=""}
: ${OCF_RESKEY_parameters=""}
USAGE="Usage: $0 {start|stop|reload|monitor|validate-all|meta-data}";
##########################################################################
usage() {
echo $USAGE >&2
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="postfix">
<version>0.1</version>
<longdesc lang="en">
This script manages Postfix as an OCF resource in a high-availability setup.
</longdesc>
<shortdesc lang="en">Manages a highly available Postfix mail server instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Full path to the Postfix binary.
For example, "/usr/sbin/postfix".
</longdesc>
<shortdesc lang="en">Full path to Postfix binary</shortdesc>
<content type="string" default="/usr/sbin/postfix" />
</parameter>
<parameter name="config_dir" unique="1" required="0">
<longdesc lang="en">
Full path to a Postfix configuration directory.
For example, "/etc/postfix".
</longdesc>
<shortdesc lang="en">Full path to configuration directory</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="parameters" unique="0" required="0">
<longdesc lang="en">
The Postfix daemon may be called with additional parameters.
Specify any of them here.
</longdesc>
<shortdesc lang="en"></shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="reload" timeout="20s" />
<action name="monitor" depth="0" timeout="20s" interval="60s" />
<action name="validate-all" timeout="20s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
postfix_running() {
local loglevel
loglevel=${1:-err}
# run Postfix status if available
if ocf_is_true $status_support; then
$binary $OPTION_CONFIG_DIR status 2>&1
ret=$?
if [ $ret -ne 0 ]; then
ocf_log $loglevel "Postfix status: " $ret
fi
return $ret
fi
# manually check Postfix's pid
PIDFILE=${queue_dir}/pid/master.pid
if [ -f $PIDFILE ]; then
PID=`head -n 1 $PIDFILE`
kill -s 0 $PID >/dev/null 2>&1 && [ `ps -p $PID | grep master | wc -l` -eq 1 ]
return $?
fi
# Postfix is not running
false
}
postfix_start()
{
# if Postfix is running return success
if postfix_running info; then
ocf_log info "Postfix already running."
return $OCF_SUCCESS
fi
# start Postfix
$binary $OPTIONS start >/dev/null 2>&1
ret=$?
if [ $ret -ne 0 ]; then
- ocf_log err "Postfix returned error: " $ret
+ ocf_exit_reason "Postfix returned error: " $ret
return $OCF_ERR_GENERIC
fi
# grant some time for startup/forking the sub processes
# and loop initial monitoring until success or timeout
while true; do
sleep 1
# break if postfix is up and running; log failure otherwise
postfix_running info && break
ocf_log info "Postfix failed initial monitor action: " $ret
done
ocf_log info "Postfix started."
return $OCF_SUCCESS
}
postfix_stop()
{
# if Postfix is not running return success
if ! postfix_running info; then
ocf_log info "Postfix already stopped."
return $OCF_SUCCESS
fi
# stop Postfix
$binary $OPTIONS stop >/dev/null 2>&1
ret=$?
if [ $ret -ne 0 ]; then
- ocf_log err "Postfix returned an error while stopping: " $ret
+ ocf_exit_reason "Postfix returned an error while stopping: " $ret
return $OCF_ERR_GENERIC
fi
# grant some time for shutdown and recheck 5 times
for i in 1 2 3 4 5; do
if postfix_running info; then
sleep 1
else
break
fi
done
# escalate to abort if we did not stop by now
# @TODO shall we loop here too?
if postfix_running info; then
- ocf_log err "Postfix failed to stop. Escalating to 'abort'."
+ ocf_exit_reason "Postfix failed to stop. Escalating to 'abort'."
$binary $OPTIONS abort >/dev/null 2>&1; ret=$?
sleep 5
# postfix abort did not succeed
if postfix_running; then
- ocf_log err "Postfix failed to abort."
+ ocf_exit_reason "Postfix failed to abort."
return $OCF_ERR_GENERIC
fi
fi
ocf_log info "Postfix stopped."
return $OCF_SUCCESS
}
postfix_reload()
{
if postfix_running; then
ocf_log info "Reloading Postfix."
$binary $OPTIONS reload
fi
}
postfix_monitor()
{
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
if postfix_running $status_loglevel; then
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
postfix_validate_all()
{
# check that the Postfix binaries exist and can be executed
check_binary "$binary"
check_binary "postconf"
# if true, run in-depth directory checks
dir_check=true
# check config_dir and alternate_config_directories parameter
if [ "x$config_dir" != "x" ]; then
if [ ! -d "$config_dir" ]; then
if ocf_is_probe; then
ocf_log info "Postfix configuration directory '$config_dir' not readable during probe."
# skip in-depth directory checks if config file isn't readable during probe
dir_check=false
else
- ocf_log err "Postfix configuration directory '$config_dir' does not exist or is not readable."
+ ocf_exit_reason "Postfix configuration directory '$config_dir' does not exist or is not readable."
return $OCF_ERR_INSTALLED
fi
fi
alternate_config_directories=`postconf -h alternate_config_directories 2>/dev/null | grep "$config_dir/\?"`
if [ "x$alternate_config_directories" = "x" ]; then
- ocf_log err "Postfix main configuration must contain correct 'alternate_config_directories' parameter."
+ ocf_exit_reason "Postfix main configuration must contain correct 'alternate_config_directories' parameter."
return $OCF_ERR_INSTALLED
fi
fi
# check spool/queue and data directories (if applicable)
# this is required because "postfix check" does not catch all errors
if ocf_is_true $dir_check; then
if [ ! -d "$queue_dir" ]; then
if ocf_is_probe; then
ocf_log info "Postfix queue directory '$queue_dir' not readable during probe."
else
- ocf_log err "Postfix queue directory '$queue_dir' does not exist or is not readable."
+ ocf_exit_reason "Postfix queue directory '$queue_dir' does not exist or is not readable."
return $OCF_ERR_INSTALLED
fi
fi
if ocf_is_true $status_support; then
data_dir=`postconf $OPTION_CONFIG_DIR -h data_directory 2>/dev/null`
data_dir_count=`echo "$data_dir" | tr ',' ' ' | wc -w`
if [ $data_dir_count -gt 1 ]; then
- ocf_log err "Postfix data directory '$orig_data_dir' cannot be set to multiple directories."
+ ocf_exit_reason "Postfix data directory '$orig_data_dir' cannot be set to multiple directories."
return $OCF_ERR_INSTALLED
fi
if [ ! -d "$data_dir" ]; then
if ocf_is_probe; then
ocf_log info "Postfix data directory '$data_dir' not readable during probe."
else
- ocf_log err "Postfix data directory '$data_dir' does not exist or is not readable."
+ ocf_exit_reason "Postfix data directory '$data_dir' does not exist or is not readable."
return $OCF_ERR_INSTALLED
fi
fi
fi
# check directory permissions
if ocf_is_true $status_support; then
user=`postconf $OPTION_CONFIG_DIR -h mail_owner 2>/dev/null`
for dir in "$data_dir"; do
if ! su -s /bin/sh - $user -c "test -w $dir"; then
if ocf_is_probe; then
ocf_log info "Directory '$dir' is not writable by user '$user' during probe."
else
- ocf_log err "Directory '$dir' is not writable by user '$user'."
+ ocf_exit_reason "Directory '$dir' is not writable by user '$user'."
return $OCF_ERR_PERM;
fi
fi
done
fi
fi
# run Postfix internal check, if not probing
if ! ocf_is_probe; then
$binary $OPTIONS check >/dev/null 2>&1
ret=$?
if [ $ret -ne 0 ]; then
- ocf_log err "Postfix 'check' failed: " $ret
+ ocf_exit_reason "Postfix 'check' failed: " $ret
return $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
}
#
# Main
#
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
binary=$OCF_RESKEY_binary
config_dir=$OCF_RESKEY_config_dir
parameters=$OCF_RESKEY_parameters
# handle parameters
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage|help) usage
exit $OCF_SUCCESS
;;
esac
# build Postfix options string *outside* to access from each method
OPTIONS=''
OPTION_CONFIG_DIR=''
# check for Postfix's postconf binary
check_binary "postconf"
# check if the Postfix config_dir exist
if [ "x$config_dir" != "x" ]; then
# remove all trailing slashes to ease "postconf alternate_config_directories" match
config_dir=`echo $config_dir | sed 's/\/*$//'`
# reset config_dir if it equals Postfix's default config_directory
postconf -h config_directory 2>/dev/null | grep -q "^$config_dir/\?$"
if [ $? -eq 0 ]; then
config_dir=""
fi
# set OPTIONS if config_dir is still set
# save OPTION_CONFIG_DIR seperatly
if [ "x$config_dir" != "x" ]; then
OPTION_CONFIG_DIR="-c $config_dir"
OPTIONS=$OPTION_CONFIG_DIR
fi
fi
# add all additional parameters to options string
if [ "x$parameters" != "x" ]; then
OPTIONS="$OPTIONS $parameters"
fi
# important directories, used in different methods
queue_dir=`postconf $OPTION_CONFIG_DIR -h queue_directory 2>/dev/null`
# check Postfix version and status support
status_support=false
postfix_version=`postconf -h mail_version 2>/dev/null`
ocf_version_cmp "$postfix_version" "2.5.0"
ret=$?
# we need Postfix 2.5.0 or greater for status/data_directory support
if [ $ret -eq 1 -o $ret -eq 2 ]; then
status_support=true
fi
postfix_validate_all
ret=$?
LSB_STATUS_STOPPED=3
if [ $ret -ne $OCF_SUCCESS ]; then
case $1 in
stop) exit $OCF_SUCCESS ;;
*) exit $ret;;
esac
fi
case $1 in
monitor) postfix_monitor
exit $?
;;
start) postfix_start
exit $?
;;
stop) postfix_stop
exit $?
;;
reload) postfix_reload
exit $?
;;
validate-all) exit $OCF_SUCCESS
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/rsyncd b/heartbeat/rsyncd
index e830bcb2e..86c771e32 100755
--- a/heartbeat/rsyncd
+++ b/heartbeat/rsyncd
@@ -1,270 +1,270 @@
#!/bin/sh
#
# Resource script for rsync daemon
#
# Description: Manages rsync daemon as an OCF resource in
# an High Availability setup.
#
# Author: Dhairesh Oza <odhairesh@novell.com>
# License: GNU General Public License (GPL)
#
#
# usage: $0 {start|stop|status|monitor|validate-all|meta-data}
#
# The "start" arg starts rsyncd.
#
# The "stop" arg stops it.
#
# OCF parameters:
# OCF_RESKEY_binpath
# OCF_RESKEY_conffile
# OCF_RESKEY_bwlimit
#
# Note:This RA requires that the rsyncd config files has a "pid file"
# entry so that it is able to act on the correct process
##########################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
##########################################################################
usage()
{
echo $USAGE >&2
}
meta_data()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="rsyncd">
<version>1.0</version>
<longdesc lang="en">
This script manages rsync daemon
</longdesc>
<shortdesc lang="en">Manages an rsync daemon</shortdesc>
<parameters>
<parameter name="binpath">
<longdesc lang="en">
The rsync binary path.
For example, "/usr/bin/rsync"
</longdesc>
<shortdesc lang="en">Full path to the rsync binary</shortdesc>
<content type="string" default="rsync"/>
</parameter>
<parameter name="conffile">
<longdesc lang="en">
The rsync daemon configuration file name with full path.
For example, "/etc/rsyncd.conf"
</longdesc>
<shortdesc lang="en">Configuration file name with full path</shortdesc>
<content type="string" default="/etc/rsyncd.conf" />
</parameter>
<parameter name="bwlimit">
<longdesc lang="en">
This option allows you to specify a maximum transfer
rate in kilobytes per second. This option is
most effective when using rsync with large files
(several megabytes and up). Due to the nature of
rsync transfers, blocks of data are sent, then if
rsync determines the transfer was too fast, it will
wait before sending the next data block. The result
is an average transfer rate equaling the specified
limit. A value of zero specifies no limit.
</longdesc>
<shortdesc lang="en">limit I/O bandwidth, KBytes per second</shortdesc>
<content type="string" default=""/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s"/>
<action name="stop" timeout="20s"/>
<action name="monitor" depth="0" timeout="20s" interval="60s" />
<action name="validate-all" timeout="20s"/>
<action name="meta-data" timeout="5s"/>
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
get_pid_and_conf_file()
{
if [ -n "$OCF_RESKEY_conffile" ]; then
CONF_FILE=$OCF_RESKEY_conffile
else
CONF_FILE="/etc/rsyncd.conf"
fi
grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null
if [ $? -eq 0 ]; then
PIDFILE=`grep -v "^#" "$CONF_FILE" | grep "pid file" | awk -F "=" '{ print $2 }'`
fi
}
rsyncd_status()
{
if [ -n "$PIDFILE" -a -f $PIDFILE ]; then
# rsync is probably running
PID=`cat $PIDFILE`
if [ -n "$PID" ]; then
if ps -p $PID | grep rsync >/dev/null ; then
ocf_log info "rsync daemon running"
return $OCF_SUCCESS
else
ocf_log info "rsync daemon is not running but pid file exists"
return $OCF_ERR_GENERIC
fi
else
- ocf_log err "PID file empty!"
+ ocf_exit_reason "PID file empty!"
return $OCF_ERR_GENERIC
fi
fi
# rsyncd is not running
ocf_log info "rsync daemon is not running"
return $OCF_NOT_RUNNING
}
rsyncd_start()
{
# if rsyncd is running return success
rsyncd_status
retVal=$?
if [ $retVal -eq $OCF_SUCCESS ]; then
exit $OCF_SUCCESS
elif [ $retVal -ne $OCF_NOT_RUNNING ]; then
- ocf_log err "Error. Unknown status."
+ ocf_exit_reason "Error. Unknown status."
exit $OCF_ERR_GENERIC
fi
if [ -n "$OCF_RESKEY_binpath" ]; then
COMMAND="$OCF_RESKEY_binpath --daemon"
else
COMMAND="rsync --daemon"
fi
if [ -n "$OCF_RESKEY_conffile" ]; then
COMMAND="$COMMAND --config $OCF_RESKEY_conffile"
fi
if [ -n "$OCF_RESKEY_bwlimit" ]; then
COMMAND="$COMMAND --bwlimit $OCF_RESKEY_bwlimit"
fi
if grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null ; then
$COMMAND;
if [ $? -ne 0 ]; then
- ocf_log err "Error. rsync daemon returned error $?."
+ ocf_exit_reason "Error. rsync daemon returned error $?."
exit $OCF_ERR_GENERIC
fi
else
- ocf_log err "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA."
+ ocf_exit_reason "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA."
return $OCF_ERR_GENERIC
fi
ocf_log info "Started rsync daemon."
exit $OCF_SUCCESS
}
rsyncd_stop()
{
if rsyncd_status ; then
PID=`cat $PIDFILE`
if [ -n "$PID" ] ; then
kill $PID
if [ $? -ne 0 ]; then
kill -s KILL $PID
if [ $? -ne 0 ]; then
- ocf_log err "Error. Could not stop rsync daemon."
+ ocf_exit_reason "Error. Could not stop rsync daemon."
return $OCF_ERR_GENERIC
fi
fi
rm $PIDFILE 2>/dev/null
fi
fi
ocf_log info "Stopped rsync daemon."
exit $OCF_SUCCESS
}
rsyncd_monitor()
{
rsyncd_status
}
rsyncd_validate_all()
{
if [ -n "$OCF_RESKEY_binpath" -a ! -x "$OCF_RESKEY_binpath" ]; then
- ocf_log err "Binary path $OCF_RESKEY_binpath does not exist."
+ ocf_exit_reason "Binary path $OCF_RESKEY_binpath does not exist."
exit $OCF_ERR_ARGS
fi
if [ -n "$OCF_RESKEY_conffile" -a ! -f "$OCF_RESKEY_conffile" ]; then
- ocf_log err "Config file $OCF_RESKEY_conffile does not exist."
+ ocf_exit_reason "Config file $OCF_RESKEY_conffile does not exist."
exit $OCF_ERR_ARGS
fi
if grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null ; then
:
else
- ocf_log err "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA."
+ ocf_exit_reason "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA."
return $OCF_ERR_GENERIC
fi
#Not checking "$OCF_RESKEY_bwlimit"
return $OCF_SUCCESS
}
#
# Main
#
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
start) get_pid_and_conf_file
rsyncd_start
;;
stop) get_pid_and_conf_file
rsyncd_stop
;;
status) get_pid_and_conf_file
rsyncd_status
;;
monitor)get_pid_and_conf_file
rsyncd_monitor
;;
validate-all) get_pid_and_conf_file
rsyncd_validate_all
;;
meta-data) meta_data
;;
usage) usage
exit $OCF_SUCCESS
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/slapd b/heartbeat/slapd
index ffb40e845..c26b16f32 100755
--- a/heartbeat/slapd
+++ b/heartbeat/slapd
@@ -1,591 +1,591 @@
#!/bin/bash
#
# Stand-alone LDAP Daemon (slapd)
#
# Description: Manages Stand-alone LDAP Daemon (slapd) as an OCF resource in
# an high-availability setup.
#
# Authors: Jeroen Koekkoek
# nozawat@gmail.com
# John Keith Hohm
#
# License: GNU General Public License (GPL)
# Copyright: (C) 2011 Pagelink B.V.
#
# The OCF code was inspired by the Postfix resource script written by
# Raoul Bhatia <r.bhatia@ipax.at>.
#
# The code for managing the slapd instance is based on the the slapd init
# script found in Debian GNU/Linux 6.0.
#
# OCF parameters:
# OCF_RESKEY_slapd
# OCF_RESKEY_ldapsearch
# OCF_RESKEY_config
# OCF_RESKEY_pidfile
# OCF_RESKEY_user
# OCF_RESKEY_group
# OCF_RESKEY_services
# OCF_RESKEY_watch_suffix
# OCF_RESKEY_ignore_suffix
# OCF_RESKEY_bind_dn
# OCF_RESKEY_password
# OCF_RESKEY_parameters
# OCF_RESKEY_stop_escalate
#
################################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_RESKEY_slapd="/usr/sbin/slapd"}
: ${OCF_RESKEY_ldapsearch="ldapsearch"}
: ${OCF_RESKEY_config=""}
: ${OCF_RESKEY_pidfile=""}
: ${OCF_RESKEY_user=""}
: ${OCF_RESKEY_group=""}
: ${OCF_RESKEY_services="ldap:///"}
: ${OCF_RESKEY_watch_suffix=""}
: ${OCF_RESKEY_ignore_suffix=""}
: ${OCF_RESKEY_bind_dn=""}
: ${OCF_RESKEY_password=""}
: ${OCF_RESKEY_parameters=""}
: ${OCF_RESKEY_stop_escalate=15}
USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"
ORIG_IFS=$IFS
NEWLINE='
'
################################################################################
usage() {
echo $USAGE >&2
}
meta_data()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="slapd">
<version>0.1</version>
<longdesc lang="en">
Resource script for Stand-alone LDAP Daemon (slapd). It manages a slapd instance as an OCF resource.
</longdesc>
<shortdesc lang="en">Manages a Stand-alone LDAP Daemon (slapd) instance</shortdesc>
<parameters>
<parameter name="slapd" unique="0" required="0">
<longdesc lang="en">
Full path to the slapd binary.
For example, "/usr/sbin/slapd".
</longdesc>
<shortdesc lang="en">Full path to slapd binary</shortdesc>
<content type="string" default="/usr/sbin/slapd" />
</parameter>
<parameter name="ldapsearch" unique="0" required="0">
<longdesc lang="en">
Full path to the ldapsearch binary.
For example, "/usr/bin/ldapsearch".
</longdesc>
<shortdesc lang="en">Full path to ldapsearch binary</shortdesc>
<content type="string" default="ldapsearch" />
</parameter>
<parameter name="config" required="0" unique="1">
<longdesc lang="en">
Full path to a slapd configuration directory or a slapd configuration file.
For example, "/etc/ldap/slapd.d" or "/etc/ldap/slapd.conf".
</longdesc>
<shortdesc>Full path to configuration directory or file</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="pidfile" required="0" unique="0">
<longdesc lang="en">
File to read the PID from; read from olcPidFile/pidfile in config if not set.
</longdesc>
<shortdesc lang="en">File to read PID from</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User name or id slapd will run with. The group id is also changed to this
user's gid, unless the group parameter is used to override.
</longdesc>
<shortdesc lang="en">User name or id slapd will run with</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group name or id slapd will run with.
</longdesc>
<shortdesc lang="en">Group name or id slapd will run with</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="services" required="0" unique="1">
<longdesc lang="en">
LDAP (and other scheme) URLs slapd will serve.
For example, "ldap://127.0.0.1:389 ldaps:/// ldapi:///"
</longdesc>
<shortdesc>LDAP (and other scheme) URLs to serve</shortdesc>
<content type="string" default="ldap:///"/>
</parameter>
<parameter name="watch_suffix" required="0" unique="0">
<longdesc lang="en">
Suffix (database backend) that will be monitored for availability. Multiple
suffixes can be specified by providing a space seperated list. By providing one
or more suffixes here, the ignore_suffix parameter is discarded. All suffixes
will be monitored if left blank.
</longdesc>
<shortdesc>Suffix that will be monitored for availability.</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="ignore_suffix" required="0" unique="0">
<longdesc lang="en">
Suffix (database backend) that will not be monitored for availability. Multiple
suffixes can be specified by providing a space seperated list. No suffix will
be excluded if left blank.
</longdesc>
<shortdesc>Suffix that will not be monitored for availability.</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="bind_dn" required="0" unique="0">
<longdesc lang="en">
Distinguished Name used to bind to the LDAP directory for testing. Leave blank
to bind to the LDAP directory anonymously.
</longdesc>
<shortdesc>Distinguished Name used to bind to the LDAP directory for testing.</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="password" required="0" unique="0">
<longdesc lang="en">
Password used to bind to the LDAP directory for testing.
</longdesc>
<shortdesc>Password used to bind to the LDAP directory for testing.</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="parameters" unique="0" required="0">
<longdesc lang="en">
slapd may be called with additional parameters.
Specify any of them here.
</longdesc>
<shortdesc lang="en">Any additional parameters to slapd.</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="stop_escalate" unique="0" required="0">
<longdesc lang="en">
Number of seconds to wait for shutdown (using SIGTERM) before resorting to
SIGKILL
</longdesc>
<shortdesc lang="en">Seconds before stop escalation to KILL</shortdesc>
<content type="integer" default="15" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" depth="0" timeout="20s" interval="60s" />
<action name="validate-all" timeout="20s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}
terminate()
{
local pid=$1
local signal=$2
local recheck=${3-0}
local rc
local waited=0
kill -$signal $pid >/dev/null 2>&1; rc=$?
while [ \( $rc -eq 0 \) -a \( $recheck -eq 0 -o $waited -lt $recheck \) ]; do
kill -0 $pid >/dev/null 2>&1; rc=$?
let "waited += 1"
if [ $rc -eq 0 ]; then
sleep 1
fi
done
if [ $rc -ne 0 ]; then
return 0
fi
return 1
}
watch_suffix()
{
local rc
if [ -n "$OCF_RESKEY_watch_suffix" ]; then
if echo "'$OCF_RESKEY_watch_suffix'" | grep "'$1'" >/dev/null 2>&1; then
rc=0
else
rc=1
fi
else
if echo "'$OCF_RESKEY_ignore_suffix'" | grep "'$1'" >/dev/null 2>&1; then
rc=1
else
rc=0
fi
fi
return $rc
}
slapd_pid()
{
local pid
if [ -f "$pid_file" ]; then
pid=`head -n 1 "$pid_file" 2>/dev/null`
if [ "X$pid" != "X" ]; then
echo "$pid"
return $OCF_SUCCESS
fi
- ocf_log err "slapd pid file '$pid_file' empty."
+ ocf_exit_reason "slapd pid file '$pid_file' empty."
return $OCF_ERR_GENERIC
fi
ocf_log info "slapd pid file '$pid_file' does not exist."
return $OCF_NOT_RUNNING
}
slapd_status()
{
local pid=$1
local state=$?
if [ $state -eq $OCF_SUCCESS ]; then
if ! kill -0 $pid >/dev/null 2>&1; then
return $OCF_NOT_RUNNING
else
return $OCF_SUCCESS
fi
fi
return $state
}
slapd_start()
{
local options
local reason
local rc
local state
slapd_status `slapd_pid`; state=$?
if [ $state -eq $OCF_SUCCESS ]; then
ocf_log info "slapd already running."
return $state
elif [ $state -eq $OCF_ERR_GENERIC ]; then
return $state
fi
options="-u $user -g $group"
if [ -d "$config" ]; then
options="$options -F $config"
elif [ -f "$config" ]; then
options="$options -f $config"
else
- ocf_log err "slapd configuration '$config' does not exist."
+ ocf_exit_reason "slapd configuration '$config' does not exist."
return $OCF_ERR_INSTALLED
fi
if [ -n "$parameters" ]; then
options="$options $parameters"
fi
if [ -n "$services" ]; then
$slapd -h "$services" $options 2>&1; rc=$?
else
$slapd $options 2>&1; rc=$?
fi
if [ $rc -ne 0 ]; then
- ocf_log err "slapd returned error."
+ ocf_exit_reason "slapd returned error."
return $OCF_ERR_GENERIC
fi
while true; do
slapd_monitor start
if [ $? = "$OCF_SUCCESS" ]; then
break
fi
sleep 1
done
ocf_log info "slapd started."
return $OCF_SUCCESS
}
slapd_stop()
{
local pid
local rc
local state
pid=`slapd_pid`; slapd_status $pid; state=$?
if [ $state -eq $OCF_NOT_RUNNING ]; then
ocf_log info "slapd already stopped."
return $OCF_SUCCESS
elif [ $state -eq $OCF_ERR_GENERIC ]; then
return $state
fi
terminate $pid TERM $OCF_RESKEY_stop_escalate; rc=$?
if [ $rc -ne 0 ]; then
- ocf_log err "slapd failed to stop. Escalating to KILL."
+ ocf_exit_reason "slapd failed to stop. Escalating to KILL."
terminate $pid KILL; rc=$?
fi
if [ -f "$pid_file" ]; then
rm -f "$pid_file" >/dev/null 2>&1
fi
ocf_log info "slapd stopped."
return $OCF_SUCCESS
}
slapd_monitor()
{
local options
local rc
local state
local suffix
local suffixes
local err_option="-info"
slapd_status `slapd_pid`; state=$?
if [ $state -eq $OCF_NOT_RUNNING ]; then
if [ -z "$1" ];then
if ! ocf_is_probe; then
- ocf_log err "slapd process not found."
+ ocf_exit_reason "slapd process not found."
fi
fi
return $state
elif [ $state -ne $OCF_SUCCESS ]; then
- ocf_log err "slapd returned error."
+ ocf_exit_reason "slapd returned error."
return $state
fi
if [ -d "$config" ]; then
for suffix in `find "$config"/'cn=config' -type f -name olcDatabase* -exec \
sed -ne 's/^[[:space:]]*olcSuffix:[[:space:]]\+\(.\+\)/\1/p' {} \;`
do
suffix=${suffix#\"*}
suffix=${suffix%\"*}
if watch_suffix $suffix; then
suffixes="$suffixes $suffix"
fi
done
elif [ -f "$config" ]; then
for suffix in `sed -ne 's/^[[:space:]]*suffix[[:space:]]\+\(.\+\)/\1/p' "$config"`
do
suffix=${suffix#\"*}
suffix=${suffix%\"*}
if watch_suffix $suffix; then
suffixes="$suffixes $suffix"
fi
done
else
if ocf_is_probe; then
ocf_log info "slapd configuration '$config' does not exist during probe."
else
- ocf_log err "slapd configuration '$config' does not exist."
+ ocf_exit_reason "slapd configuration '$config' does not exist."
return $OCF_ERR_INSTALLED
fi
fi
options="-LLL -s base -x"
if [ -n "$bind_dn" ]; then
options="$options -D $bind_dn -w $password"
fi
[ -z "$1" ] && err_option=""
for suffix in $suffixes; do
ocf_run -q $err_option "$ldapsearch" -H "$services" -b "$suffix" $options >/dev/null 2>&1; rc=$?
case "$rc" in
"0")
ocf_log debug "slapd database with suffix '$suffix' reachable"
;;
"49")
- ocf_log err "slapd database with suffix '$suffix' unreachable. Invalid credentials."
+ ocf_exit_reason "slapd database with suffix '$suffix' unreachable. Invalid credentials."
return $OCF_ERR_CONFIGURED
;;
*)
if [ -z "$1" ] || [ -n "$1" -a $rc -ne 1 ]; then
- ocf_log err "slapd database with suffix '$suffix' unreachable. exit code ($rc)"
+ ocf_exit_reason "slapd database with suffix '$suffix' unreachable. exit code ($rc)"
fi
state=$OCF_ERR_GENERIC
;;
esac
done
return $state
}
slapd_validate_all()
{
check_binary "$slapd"
check_binary "$ldapsearch"
if [ -z "$pid_file" ]; then
if [ -d "$config" ]; then
pid_file=`sed -ne \
's/^olcPidFile:[[:space:]]\+\(.\+\)[[:space:]]*/\1/p' \
"$config"/'cn=config.ldif' 2>/dev/null`
elif [ -f "$config" ]; then
pid_file=`sed -ne \
's/^pidfile[[:space:]]\+\(.\+\)/\1/p' \
"$config" 2>/dev/null`
else
if ocf_is_probe; then
ocf_log info "slapd configuration '$config' does not exist during probe."
else
- ocf_log err "slapd configuration '$config' does not exist."
+ ocf_exit_reason "slapd configuration '$config' does not exist."
return $OCF_ERR_INSTALLED
fi
fi
fi
if [ -z "$user" ]; then
user=`id -nu 2>/dev/null`
elif ! id "$user" >/dev/null 2>&1; then
- ocf_log err "slapd user '$user' does not exist"
+ ocf_exit_reason "slapd user '$user' does not exist"
return $OCF_ERR_INSTALLED
fi
if [ -z "$group" ]; then
group=`id -ng 2>/dev/null`
elif ! grep "^$group:" /etc/group >/dev/null 2>&1; then
- ocf_log err "slapd group '$group' does not exist"
+ ocf_exit_reason "slapd group '$group' does not exist"
return $OCF_ERR_INSTALLED
fi
pid_dir=`dirname "$pid_file"`
if [ ! -d "$pid_dir" ]; then
mkdir -p "$pid_dir"
chown -R "$user" "$pid_dir"
chgrp -R "$group" "$pid_dir"
fi
return $OCF_SUCCESS
}
#
# Main
#
slapd=$OCF_RESKEY_slapd
ldapsearch=$OCF_RESKEY_ldapsearch
config=$OCF_RESKEY_config
user=$OCF_RESKEY_user
group=$OCF_RESKEY_group
services=$OCF_RESKEY_services
bind_dn=$OCF_RESKEY_bind_dn
password=$OCF_RESKEY_password
parameters=$OCF_RESKEY_parameters
pid_file=$OCF_RESKEY_pidfile
if [ -z "$config" ]; then
config_dirname="/etc/ldap"
if [ -e "/etc/openldap" ]; then
config_dirname="/etc/openldap"
fi
config="$config_dirname/slapd.conf"
if [ -e "$config_dirname/slapd.d" ]; then
config="$config_dirname/slapd.d"
fi
fi
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage|help)
usage
exit $OCF_SUCCESS
;;
esac
slapd_validate_all
rc=$?
[ $rc -eq $OCF_SUCCESS ] || exit $rc
case $1 in
status)
slapd_status `slapd_pid`; state=$?
if [ $state -eq $OCF_SUCCESS ]; then
ocf_log debug "slapd is running."
elif [ $state -eq $OCF_NOT_RUNNING ]; then
ocf_log debug "slapd is stopped."
fi
exit $state
;;
start)
slapd_start
exit $?
;;
stop)
slapd_stop
exit $?
;;
monitor)
slapd_monitor; state=$?
exit $state
;;
validate-all)
exit $OCF_SUCCESS
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/symlink b/heartbeat/symlink
index 214092d0e..1e36a9c74 100755
--- a/heartbeat/symlink
+++ b/heartbeat/symlink
@@ -1,245 +1,245 @@
#!/bin/sh
#
#
# An OCF RA that manages a symlink
#
# Copyright (c) 2011 Dominik Klein
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="symlink">
<version>1.1</version>
<longdesc lang="en">
This resource agent that manages a symbolic link (symlink).
It is primarily intended to manage configuration files which should be
enabled or disabled based on where the resource is running, such as
cron job definitions and the like.
</longdesc>
<shortdesc lang="en">Manages a symbolic link</shortdesc>
<parameters>
<parameter name="link" required="1">
<longdesc lang="en">
Full path of the symbolic link to be managed. This must obviously be
in a filesystem that supports symbolic links.
</longdesc>
<shortdesc lang="en">Full path of the symlink</shortdesc>
<content type="string"/>
</parameter>
<parameter name="target" required="1">
<longdesc lang="en">
Full path to the link target (the file or directory which the symlink points to).
</longdesc>
<shortdesc lang="en">Full path to the link target</shortdesc>
<content type="string" />
</parameter>
<parameter name="backup_suffix">
<longdesc lang="en">
A suffix to append to any files that the resource agent moves out of
the way because they clash with "link".
If this is unset (the default), then the resource agent will simply
refuse to create a symlink if it clashes with an existing file.
</longdesc>
<shortdesc lang="en">Suffix to append to backup files</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="15" />
<action name="stop" timeout="15" />
<action name="monitor" depth="0" timeout="15" interval="60"/>
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="10" />
</actions>
</resource-agent>
END
}
symlink_monitor() {
# This applies the following logic:
#
# * If $OCF_RESKEY_link does not exist, then the resource is
# definitely stopped.
#
# * If $OCF_RESKEY_link exists and is a symlink that points to
# ${OCF_RESKEY_target}, then the resource is definitely started.
#
# * If $OCF_RESKEY_link exists, but is anything other than a
# symlink to ${OCF_RESKEY_target}, then the status depends on whether
# ${OCF_RESKEY_backup_suffix} is set:
#
# - if ${OCF_RESKEY_backup_suffix} is set, then the resource is
# simply not running. The existing file will be moved out of
# the way, to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix},
# when the resource starts.
#
# - if ${OCF_RESKEY_backup_suffix} is not set, then an existing
# file ${OCF_RESKEY_link} is an error condition, and the
# resource can't start here.
rc=$OCF_ERR_GENERIC
# Using ls here instead of "test -e", as "test -e" returns false
# if the file does exist, but it a symlink to a file that doesn't
if ! ls "$OCF_RESKEY_link" >/dev/null 2>&1; then
ocf_log debug "$OCF_RESKEY_link does not exist"
rc=$OCF_NOT_RUNNING
elif [ ! -L "$OCF_RESKEY_link" ]; then
if [ -z "$OCF_RESKEY_backup_suffix" ]; then
- ocf_log err "$OCF_RESKEY_link exists but is not a symbolic link!"
+ ocf_exit_reason "$OCF_RESKEY_link exists but is not a symbolic link!"
exit $OCF_ERR_INSTALLED
else
ocf_log debug "$OCF_RESKEY_link exists but is not a symbolic link, will be moved to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix} on start"
rc=$OCF_NOT_RUNNING
fi
elif readlink -f "$OCF_RESKEY_link" | egrep -q "^${OCF_RESKEY_target}$"; then
ocf_log debug "$OCF_RESKEY_link exists and is a symbolic link to ${OCF_RESKEY_target}."
rc=$OCF_SUCCESS
else
if [ -z "$OCF_RESKEY_backup_suffix" ]; then
- ocf_log err "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}!"
+ ocf_exit_reason "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}!"
exit $OCF_ERR_INSTALLED
else
ocf_log debug "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}, will be moved to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix} on start"
rc=$OCF_NOT_RUNNING
fi
fi
return $rc
}
symlink_start() {
if ! symlink_monitor; then
if [ -e "$OCF_RESKEY_link" ]; then
if [ -z "$OCF_RESKEY_backup_suffix" ]; then
# Shouldn't happen, because symlink_monitor should
# have errored out. But there is a chance that
# something else put that file there after
# symlink_monitor ran.
- ocf_log err "$OCF_RESKEY_link exists and no backup_suffix is set, won't overwrite."
+ ocf_exit_reason "$OCF_RESKEY_link exists and no backup_suffix is set, won't overwrite."
exit $OCF_ERR_GENERIC
else
ocf_log debug "Found $OCF_RESKEY_link, moving to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}"
ocf_run mv -v "$OCF_RESKEY_link" "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" \
|| exit $OCF_ERR_GENERIC
fi
fi
ocf_run ln -sv "$OCF_RESKEY_target" "$OCF_RESKEY_link"
symlink_monitor
return $?
else
return $OCF_SUCCESS
fi
}
symlink_stop() {
if symlink_monitor; then
ocf_run rm -vf "$OCF_RESKEY_link" || exit $OCF_ERR_GENERIC
if ! symlink_monitor; then
if [ -e "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" ]; then
ocf_log debug "Found backup ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}, moving to $OCF_RESKEY_link"
# if restoring the backup fails then still return with
# $OCF_SUCCESS, but log a warning
ocf_run -warn mv "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" "$OCF_RESKEY_link"
fi
return $OCF_SUCCESS
else
- ocf_log err "Removing $OCF_RESKEY_link failed."
+ ocf_exit_reason "Removing $OCF_RESKEY_link failed."
return $OCF_ERR_GENERIC
fi
else
return $OCF_SUCCESS
fi
}
symlink_validate_all() {
if [ "x${OCF_RESKEY_link}" = "x" ]; then
- ocf_log err "Mandatory parameter link is unset"
+ ocf_exit_reason "Mandatory parameter link is unset"
exit $OCF_ERR_CONFIGURED
fi
if [ "x${OCF_RESKEY_target}" = "x" ]; then
- ocf_log err "Mandatory parameter target is unset"
+ ocf_exit_reason "Mandatory parameter target is unset"
exit $OCF_ERR_CONFIGURED
fi
# Having a non-existant target is technically not an error, as
# symlinks are allowed to point to non-existant paths. But it
# still doesn't hurt to warn people if the target does not exist
# (but only during non-probes).
if [ ! -e "${OCF_RESKEY_target}" ]; then
ocf_log warn "${OCF_RESKEY_target} does not exist!"
fi
}
symlink_usage() {
cat <<EOF
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
EOF
}
if [ $# -ne 1 ]; then
symlink_usage
exit $OCF_ERR_ARGS
fi
case $__OCF_ACTION in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
symlink_usage
exit $OCF_SUCCESS
esac
# Everything except usage and meta-data must pass the validate test
symlink_validate_all || exit
case $__OCF_ACTION in
start)
symlink_start
;;
stop)
symlink_stop
;;
status|monitor)
symlink_monitor
;;
validate-all)
;;
*)
symlink_usage
exit $OCF_ERR_UNIMPLEMENTED
esac
# exit code is the exit code (return code) of the last command (shell function)
diff --git a/heartbeat/tomcat b/heartbeat/tomcat
index f9baa0af3..8b7fe31ab 100755
--- a/heartbeat/tomcat
+++ b/heartbeat/tomcat
@@ -1,723 +1,723 @@
#!/bin/sh
#
# Description: Manages a Tomcat Server as an OCF High-Availability
# resource under Heartbeat/LinuxHA control
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
# Copyright (c) 2007 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
#
#######################################################################
# OCF parameters:
# OCF_RESKEY_tomcat_name - The name of the resource. Default is tomcat
# OCF_RESKEY_script_log - A destination of the log of this script. Default /var/log/OCF_RESKEY_tomcat_name.log
# OCF_RESKEY_tomcat_stop_timeout - Time-out at the time of the stop. Default is 5. DEPRECATED
# OCF_RESKEY_tomcat_suspend_trialcount - The re-try number of times awaiting a stop. Default is 10. DEPRECATED
# OCF_RESKEY_tomcat_user - A user name to start a resource.
# OCF_RESKEY_statusurl - URL for state confirmation. Default is http://127.0.0.1:8080
# OCF_RESKEY_max_stop_time - The max time it should take for proper shutdown. Restrictions, only Tomcat6.
# OCF_RESKEY_java_home - Home directory of Java. Default is none
# OCF_RESKEY_java_opts - Options to pass to Java JVM for start and stop. Default is none
# OCF_RESKEY_catalina_home - Home directory of Tomcat. Default is none
# OCF_RESKEY_catalina_base - Base directory of Tomcat. Default is OCF_RESKEY_catalina_home
# OCF_RESKEY_catalina_out - Log file name of Tomcat. Default is OCF_RESKEY_catalina_base/logs/catalina.out
# OCF_RESKEY_catalina_pid - A PID file name of Tomcat. Default is OCF_RESKEY_catalina_base/logs/catalina.pid
# OCF_RESKEY_tomcat_start_opts - Start options of Tomcat. Default is none.
# OCF_RESKEY_catalina_opts - CATALINA_OPTS environment variable. Default is none.
# OCF_RESKEY_catalina_tmpdir - CATALINA_TMPDIR environment variable. Default is none.
# OCF_RESKEY_catalina_rotate_log - Control catalina.out logrotation flag. Default is NO.
# OCF_RESKEY_catalina_rotatetime - catalina.out logrotation time span(seconds). Default is 86400.
# OCF_RESKEY_java_endorsed_dirs - JAVA_ENDORSED_DIRS environment variable. Default is none.
# OCF_RESKEY_logging_config - LOGGING_CONFIG environment variable. Default is none.
# OCF_RESKEY_logging_manager - LOGGING_MANAGER environment variable. Default is none.
###############################################################################
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
############################################################################
# Usage
usage()
{
cat <<-!
usage: $0 action
action:
start start Tomcat
stop stop Tomcat
status return the status of Tomcat, up or down
monitor return TRUE if Tomcat appears to be working.
You have to have installed $WGETNAME for this to work.
meta-data show meta data message
validate-all validate the instance parameters
!
}
############################################################################
# Check tomcat service availability
isrunning_tomcat()
{
$WGET --tries=20 -O /dev/null $RESOURCE_STATUSURL >/dev/null 2>&1
}
############################################################################
#
isalive_tomcat()
{
# As the server stops, the PID file disappears. To avoid race conditions,
# we will have remembered the PID of a running instance on script entry.
local pid=$rememberedPID
# If there is a PID file, attempt to use that
if [ -f $CATALINA_PID ]; then
local tmp
ocf_log debug "Reading pid from $CATALINA_PID"
tmp=`head -n 1 $CATALINA_PID`
if [ $? -eq 0 ]; then
pid=$tmp
fi
fi
if [ -n "$pid" ] && [ "$pid" -gt 0 ]; then
# Retry message for restraint
ocf_log debug "Sending noop signal to $pid"
kill -s 0 $pid >/dev/null 2>&1
return $?
fi
# No PID file
false
}
############################################################################
# Check rotatelogs process and restart if it is stopped
monitor_rotatelogs()
{
pgrep -f "$ROTATELOGS.*$CATALINA_BASE/logs/catalina_%F.log" > /dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_log warn "A rotatelogs command for $CATALINA_BASE/logs/catalina_%F.log is not running. Restarting it."
start_rotatelogs
if [ $? -eq 0 ]; then
ocf_log info "Restart rotatelogs process succeeded."
else
ocf_log warn "Restart rotatelogs process failed."
fi
fi
}
############################################################################
# Check tomcat process and service availability
monitor_tomcat()
{
isalive_tomcat ||
return $OCF_NOT_RUNNING
isrunning_tomcat ||
return $OCF_ERR_GENERIC
if ocf_is_true ${CATALINA_ROTATE_LOG}; then
# Monitor rotatelogs process and restart it if it is stopped.
# And never consider rotatelogs process failure to be a monitor failure
# as long as Tomcat process works fine.
monitor_rotatelogs
fi
return $OCF_SUCCESS
}
############################################################################
# Startup rotatelogs process
start_rotatelogs()
{
# -s is required because tomcat5.5's login shell is /bin/false
su - -s /bin/sh $RESOURCE_TOMCAT_USER \
-c "$ROTATELOGS -l \"$CATALINA_BASE/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \
< "$CATALINA_OUT" > /dev/null 2>&1 &
}
############################################################################
# Execute catalina.out log rotation
rotate_catalina_out()
{
# Check catalina_%F.log is writable or not.
CURRENT_ROTATELOG_SUFFIX=`date +"%F"`
su - -s /bin/sh $RESOURCE_TOMCAT_USER \
-c "touch \"$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log\"" > /dev/null 2>&1
if [ $? -ne 0 ]; then
- ocf_log err "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable."
+ ocf_exit_reason "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable."
return $OCF_ERR_GENERIC
fi
# Clean up and set permissions on required files
rm -rf "$CATALINA_BASE"/temp/*
if [ -p "$CATALINA_OUT" ]; then
rm -f "$CATALINA_OUT"
elif [ -e "$CATALINA_OUT" ]; then
DATE=`date +"%F-%H%M%S"`
ocf_log warn "$CATALINA_OUT already exists. It is saved as $CATALINA_OUT-$DATE"
mv "$CATALINA_OUT" "$CATALINA_OUT-$DATE"
fi
mkfifo -m700 "$CATALINA_OUT"
chown --dereference "$RESOURCE_TOMCAT_USER" "$CATALINA_OUT" || true
start_rotatelogs
}
############################################################################
# Tomcat Command
tomcatCommand()
{
cat<<-END_TOMCAT_COMMAND
export JAVA_HOME=${JAVA_HOME}
export JAVA_OPTS="${JAVA_OPTS}"
export CATALINA_HOME=${CATALINA_HOME}
export CATALINA_BASE=${CATALINA_BASE}
export CATALINA_OUT=${CATALINA_OUT}
export CATALINA_PID=${CATALINA_PID}
export CATALINA_OPTS="${CATALINA_OPTS}"
export CATALINA_TMPDIR="${CATALINA_TMPDIR}"
export JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}"
export LOGGING_CONFIG="${LOGGING_CONFIG}"
export LOGGING_MANAGER="${LOGGING_MANAGER}"
export TOMCAT_CFG=${TOMCAT_CFG}
$TOMCAT_START_SCRIPT $@
END_TOMCAT_COMMAND
}
attemptTomcatCommand()
{
if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then
export TOMCAT_CFG=$(mktemp ${HA_RSCTMP}/tomcat-tmp-XXXXX.cfg)
fi
if [ "$RESOURCE_TOMCAT_USER" = root ]; then
"$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1
else
tomcatCommand $@ | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1
fi
if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then
rm -f "$TOMCAT_CFG"
fi
}
############################################################################
# Start Tomcat
start_tomcat()
{
cd "$CATALINA_HOME/bin"
validate_all_tomcat || exit $?
monitor_tomcat
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
# Remove $CATALINA_PID if it exists
rm -f $CATALINA_PID
#ocf_log debug "catalina.out rotation FLG = ${CATALINA_ROTATE_LOG}"
if ocf_is_true ${CATALINA_ROTATE_LOG}; then
rotate_catalina_out
if [ $? -eq 0 ]; then
ocf_log debug "Rotate catalina.out succeeded."
else
- ocf_log err "Rotate catalina.out failed. Avoid starting tomcat without catalina.out rotation."
+ ocf_exit_reason "Rotate catalina.out failed. Avoid starting tomcat without catalina.out rotation."
return $OCF_ERR_GENERIC
fi
fi
echo "`date "+%Y/%m/%d %T"`: start ===========================" >> "$TOMCAT_CONSOLE"
ocf_log debug "CATALINA_OPTS value = ${CATALINA_OPTS}"
attemptTomcatCommand start ${TOMCAT_START_OPTS} &
while true; do
monitor_tomcat
if [ $? -eq $OCF_SUCCESS ]; then
break
fi
ocf_log debug "start_tomcat[$TOMCAT_NAME]: retry monitor_tomcat"
sleep 3
done
return $OCF_SUCCESS
}
############################################################################
# Stop Tomcat
stop_tomcat()
{
local stop_time
local RA_TIMEOUT=20
local TOMCAT_STOP_OPTS=""
if [ -n $OCF_RESKEY_CRM_meta_timeout ]; then
RA_TIMEOUT=$((OCF_RESKEY_CRM_meta_timeout/1000))
fi
STOP_TIMEOUT=$((RA_TIMEOUT-5))
if [ -n "$MAX_STOP_TIME" ]; then
if [ $MAX_STOP_TIME -gt $RA_TIMEOUT ]; then
ocf_log warn "max_stop_timeout must be shorter than the timeout of stop operation."
fi
if [ $MAX_STOP_TIME -eq 0 ]; then
STOP_TIMEOUT=$RA_TIMEOUT
else
STOP_TIMEOUT=$MAX_STOP_TIME
fi
fi
cd "$CATALINA_HOME/bin"
memorize_pid # This lets monitoring continue to work reliably
echo "`date "+%Y/%m/%d %T"`: stop ###########################" >> "$TOMCAT_CONSOLE"
if [ "$TOMCAT_START_SCRIPT" = "$CATALINA_HOME/bin/catalina.sh" ]; then
TOMCAT_STOP_OPTS="$STOP_TIMEOUT --force"
fi
stop_time=$(date +%s)
attemptTomcatCommand stop $TOMCAT_STOP_OPTS
lapse_sec=0
while isalive_tomcat; do
sleep 1
lapse_sec=`expr $(date +%s) - $stop_time`
if [ $lapse_sec -ge $STOP_TIMEOUT ]; then
ocf_log debug "stop_tomcat[$TOMCAT_NAME]: stop failed, killing with SIGKILL ($lapse_sec)"
kill -s KILL $rememberedPID > /dev/null 2>&1
fi
done
if ocf_is_true ${CATALINA_ROTATE_LOG}; then
rm -f "$CATALINA_PID" "${CATALINA_OUT}"
else
rm -f "$CATALINA_PID"
fi
return $OCF_SUCCESS
}
metadata_tomcat()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="tomcat">
<version>1.0</version>
<longdesc lang="en">
Resource script for Tomcat. It manages a Tomcat instance as a cluster resource.
</longdesc>
<shortdesc lang="en">Manages a Tomcat servlet environment instance</shortdesc>
<parameters>
<parameter name="tomcat_name" unique="1" >
<longdesc lang="en"><![CDATA[
The name of the resource, added as a Java parameter in JAVA_OPTS:
-Dname=<tomcat_name> to Tomcat process on start. Used to ensure
process is still running and must be unique.
]]></longdesc>
<shortdesc>The name of the resource</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="script_log" unique="1">
<longdesc lang="en">
Log file, used during start and stop operations.
</longdesc>
<shortdesc>Log file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="tomcat_stop_timeout" unique="0">
<longdesc lang="en">
Time-out for stop operation. DEPRECATED
</longdesc>
<shortdesc>Time-out for the stop operation. DEPRECATED</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="tomcat_suspend_trialcount" unique="0">
<longdesc lang="en">
Maximum number of times to retry stop operation before suspending
and killing Tomcat. DEPRECATED. Does not retry.
</longdesc>
<shortdesc>Max retry count for stop operation. DEPRECATED</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="tomcat_user" unique="0">
<longdesc lang="en">
The user who starts Tomcat.
</longdesc>
<shortdesc>The user who starts Tomcat</shortdesc>
<content type="string" default="root" />
</parameter>
<parameter name="statusurl" unique="0">
<longdesc lang="en">
URL for state confirmation.
</longdesc>
<shortdesc>URL for state confirmation</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="max_stop_time" unique="0">
<longdesc lang="en">
Number of seconds to wait during a stop before drastic measures
(force kill) are used on the tomcat process.
This number MUST be less than your cluster stop timeout for the resource.
The default value is five seconds before the timeout value of stop operation.
When it is over this value, it stops a process in kill commands.
This parameter is only effective on Tomcat 6 or later.
</longdesc>
<shortdesc>The max time it should take for proper shutdown.</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="java_home" unique="0" required="1">
<longdesc lang="en">
Home directory of Java.
</longdesc>
<shortdesc>Home directory of Java</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="java_opts" unique="0">
<longdesc lang="en">
Java JVM options used on start and stop.
</longdesc>
<shortdesc>Java options parsed to JVM, used on start and stop.</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_home" unique="0" required="1">
<longdesc lang="en">
Home directory of Tomcat.
</longdesc>
<shortdesc>Home directory of Tomcat</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_base" unique="1">
<longdesc lang="en">
Instance directory of Tomcat
</longdesc>
<shortdesc>Instance directory of Tomcat, defaults to catalina_home</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_out" unique="1">
<longdesc lang="en">
Log file name of Tomcat
</longdesc>
<shortdesc>Log file name of Tomcat, defaults to catalina_base/logs/catalina.out</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_pid" unique="1">
<longdesc lang="en">
A PID file name for Tomcat.
</longdesc>
<shortdesc>A PID file name for Tomcat</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="tomcat_start_script" unique="0" required="0">
<longdesc lang="en">
Absolute path to the custom tomcat start script to use.
</longdesc>
<shortdesc>Tomcat start script location</shortdesc>
<content type="string" default="$TOMCAT_START_SCRIPT" />
</parameter>
<parameter name="tomcat_start_opts" unique="0">
<longdesc lang="en">
Tomcat start options.
</longdesc>
<shortdesc>Tomcat start options</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_opts" unique="0">
<longdesc lang="en">
Catalina options, for the start operation only.
</longdesc>
<shortdesc>Catalina options</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_tmpdir" unique="1">
<longdesc lang="en">
Temporary directory of Tomcat
</longdesc>
<shortdesc>Temporary directory of Tomcat, defaults to none</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="catalina_rotate_log" unique="0">
<longdesc lang="en">
Rotate catalina.out flag.
</longdesc>
<shortdesc>Rotate catalina.out flag</shortdesc>
<content type="boolean" default="NO" />
</parameter>
<parameter name="catalina_rotatetime" unique="0">
<longdesc lang="en">
catalina.out rotation interval (seconds).
</longdesc>
<shortdesc>catalina.out rotation interval (seconds)</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="java_endorsed_dirs" unique="1">
<longdesc lang="en">
Java_endorsed_dirs of tomcat
</longdesc>
<shortdesc>Java_endorsed_dirs of Tomcat, defaults to none</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="logging_config" unique="1">
<longdesc lang="en">
Logging_config of tomcat
</longdesc>
<shortdesc>Logging_config of Tomcat, defaults to none</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="logging_manager" unique="1">
<longdesc lang="en">
Logging_manager of tomcat
</longdesc>
<shortdesc>Logging_manager of Tomcat, defaults to none.</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5"/>
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
validate_all_tomcat()
{
local port
local rc=$OCF_SUCCESS
ocf_log info "validate_all_tomcat[$TOMCAT_NAME]"
check_binary $WGET
if [ -z "${TOMCAT_START_SCRIPT}" ]; then
- ocf_log err "No default tomcat start script detected. Please specify start script location using the 'tomcat_start_script' option"
+ ocf_exit_reason "No default tomcat start script detected. Please specify start script location using the 'tomcat_start_script' option"
rc=$OCF_ERR_CONFIGURED
fi
if [ -n "$MAX_STOP_TIME" ] && [ "$MAX_STOP_TIME" -lt 0 ]; then
- ocf_log err "max_stop_time must be set to a value greater than 0."
+ ocf_exit_reason "max_stop_time must be set to a value greater than 0."
rc=$OCF_ERR_CONFIGURED
fi
if echo "$RESOURCE_STATUSURL" | grep -q ":[0-9][0-9]*" ; then
port=${RESOURCE_STATUSURL##*:}
port=${port%%/*}
ocf_log debug "Tomcat port is $port"
ocf_log debug "grep port=\"$port\" $CATALINA_BASE/conf/server.xml"
grep "port=\"$port\"" $CATALINA_BASE/conf/server.xml > /dev/null 2>&1
if [ $? -ne 0 ]; then
- ocf_log err "Your configured status URL specifies a port ($port), but the server does not have a connector listening to that port in $CATALINA_BASE/conf/server.xml"
+ ocf_exit_reason "Your configured status URL specifies a port ($port), but the server does not have a connector listening to that port in $CATALINA_BASE/conf/server.xml"
rc=$OCF_ERR_INSTALLED
fi
fi
if ocf_is_true ${CATALINA_ROTATE_LOG}; then
if [ ! -x "$ROTATELOGS" ]; then
- ocf_log err "rotatelogs command does not exist."
+ ocf_exit_reason "rotatelogs command does not exist."
rc=$OCF_ERR_INSTALLED
fi
fi
return $rc
}
# As we stop tomcat, it removes it's own pid file...we still want to know what it was
memorize_pid()
{
if [ -f $CATALINA_PID ]; then
rememberedPID=$(cat $CATALINA_PID)
fi
}
#
### tomcat RA environment variables
#
COMMAND=$1
TOMCAT_NAME="${OCF_RESKEY_tomcat_name-tomcat}"
TOMCAT_CONSOLE="${OCF_RESKEY_script_log-/var/log/$TOMCAT_NAME.log}"
RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user-root}"
RESOURCE_STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}"
JAVA_HOME="${OCF_RESKEY_java_home}"
JAVA_OPTS="${OCF_RESKEY_java_opts}"
CATALINA_HOME="${OCF_RESKEY_catalina_home}"
CATALINA_BASE="${OCF_RESKEY_catalina_base-${OCF_RESKEY_catalina_home}}"
CATALINA_OUT="${OCF_RESKEY_catalina_out-$CATALINA_BASE/logs/catalina.out}"
CATALINA_PID=$OCF_RESKEY_catalina_pid
if [ -z "$CATALINA_PID" ]; then
mkdir -p "${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/"
if [ "${RESOURCE_TOMCAT_USER}" != "root" ]; then
chown ${RESOURCE_TOMCAT_USER} "${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/"
fi
CATALINA_PID="${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/catalina.pid"
fi
MAX_STOP_TIME="${OCF_RESKEY_max_stop_time}"
TOMCAT_START_OPTS="${OCF_RESKEY_tomcat_start_opts}"
TOMCAT_START_SCRIPT="${OCF_RESKEY_tomcat_start_script}"
CATALINA_OPTS="-Dname=$TOMCAT_NAME ${OCF_RESKEY_catalina_opts}"
CATALINA_TMPDIR="${OCF_RESKEY_catalina_tmpdir}"
CATALINA_ROTATE_LOG="${OCF_RESKEY_catalina_rotate_log-NO}"
CATALINA_ROTATETIME="${OCF_RESKEY_catalina_rotatetime-86400}"
JAVA_ENDORSED_DIRS="${OCF_RESKEY_java_endorsed_dirs}"
LOGGING_CONFIG="${OCF_RESKEY_logging_config}"
LOGGING_MANAGER="${OCF_RESKEY_logging_manager}"
if [ -z "${TOMCAT_START_SCRIPT}" ]; then
if [ -e "$CATALINA_HOME/bin/catalina.sh" ]; then
TOMCAT_START_SCRIPT="$CATALINA_HOME/bin/catalina.sh"
elif [ -e "/usr/sbin/tomcat" ]; then
REDIRECT_DEFAULT_CONFIG=1
TOMCAT_START_SCRIPT="/usr/sbin/tomcat"
elif [ -e "/usr/sbin/tomcat6" ]; then
REDIRECT_DEFAULT_CONFIG=1
TOMCAT_START_SCRIPT="/usr/sbin/tomcat6"
fi
fi
LSB_STATUS_STOPPED=3
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
case "$COMMAND" in
meta-data) metadata_tomcat; exit $OCF_SUCCESS;;
help|usage) usage; exit $OCF_SUCCESS;;
esac
if [ ! -d "$JAVA_HOME" -o ! -d "$CATALINA_HOME" -o ! -d "$CATALINA_BASE" ]; then
case $COMMAND in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
esac
- ocf_log err "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist."
+ ocf_exit_reason "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist."
exit $OCF_ERR_INSTALLED
fi
export JAVA_HOME JAVA_OPTS CATALINA_HOME CATALINA_BASE CATALINA_OUT CATALINA_PID CATALINA_OPTS CATALINA_TMPDIR JAVA_ENDORSED_DIRS LOGGING_CONFIG LOGGING_MANAGER
JAVA=${JAVA_HOME}/bin/java
if [ ! -x "$JAVA" ]; then
case $COMMAND in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
esac
- ocf_log err "java command does not exist."
+ ocf_exit_reason "java command does not exist."
exit $OCF_ERR_INSTALLED
fi
ROTATELOGS=""
if ocf_is_true ${CATALINA_ROTATE_LOG}; then
# Look for rotatelogs/rotatelogs2
if [ -x /usr/sbin/rotatelogs ]; then
ROTATELOGS=/usr/sbin/rotatelogs
elif [ -x /usr/sbin/rotatelogs2 ]; then
ROTATELOGS=/usr/sbin/rotatelogs2
fi
fi
#
# ------------------
# the main script
# ------------------
#
case "$COMMAND" in
start)
ocf_log debug "[$TOMCAT_NAME] Enter tomcat start"
start_tomcat
func_status=$?
ocf_log debug "[$TOMCAT_NAME] Leave tomcat start $func_status"
exit $func_status
;;
stop)
ocf_log debug "[$TOMCAT_NAME] Enter tomcat stop"
stop_tomcat
func_status=$?
ocf_log debug "[$TOMCAT_NAME] Leave tomcat stop $func_status"
exit $func_status
;;
status)
if monitor_tomcat; then
echo tomcat instance $TOMCAT_NAME is running
exit $OCF_SUCCESS
else
echo tomcat instance $TOMCAT_NAME is stopped
exit $OCF_NOT_RUNNING
fi
exit $?
;;
monitor)
#ocf_log debug "[$TOMCAT_NAME] Enter tomcat monitor"
monitor_tomcat
func_status=$?
#ocf_log debug "[$TOMCAT_NAME] Leave tomcat monitor $func_status"
exit $func_status
;;
meta-data)
metadata_tomcat
exit $?
;;
validate-all)
validate_all_tomcat
exit $?
;;
usage|help)
usage
exit $OCF_SUCCESS
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jan 25, 11:21 AM (1 d, 11 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1322340
Default Alt Text
(395 KB)

Event Timeline