diff --git a/heartbeat/CTDB b/heartbeat/CTDB index 9a6f96cde..3419b544b 100755 --- a/heartbeat/CTDB +++ b/heartbeat/CTDB @@ -1,757 +1,757 @@ #!/bin/sh # # OCF Resource Agent for managing CTDB # # Copyright (c) 2009-2010 Novell Inc., Tim Serong # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OVERVIEW # # When run by itself, CTDB can handle IP failover and includes scripts # to manage various services (Samba, Winbind, HTTP, etc.). When run as # a resource in a Pacemaker cluster, this additional functionality # should not be used; instead one should define separate resources for # CTDB, Samba, Winbind, IP addresses, etc. # # As of 2010-11-17, there is no separate OCF Samba or Winbind RA, so # it is still possible to configure CTDB so that it manages these # resources itself. In future, once Samba and Winbind RAs are # available, this ability will be deprecated and ultimately removed. # # This RA intentionally provides no ability to configure CTDB such that # it manages IP failover, HTTP, NFS, etc. # # # TODO: # - ctdb_stop doesn't really support multiple independent CTDB instances, # unless they're running from distinct ctdbd binaries (it uses pkill # $OCF_RESKEY_ctdbd_binary if "ctdb stop" doesn't work, which it might # not under heavy load - this will kill all ctdbd instances on the # system). OTOH, running multiple CTDB instances per node is, well, # AFAIK, completely crazy. Can't run more than one in a vanilla CTDB # cluster, with the CTDB init script. So it might be nice to address # this for complete semantic correctness of the RA, but shouldn't # actually cause any trouble in real life. # - As much as possible, get rid of auto config generation # - Especially smb.conf # - Verify timeouts are sane # - Monitor differentiate between error and not running? # - Do we need to verify globally unique setting? # - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on # current nodes) # - Look at enabling set_ctdb_variables() if necessary. # - Probably possible for sysconfig file to not be restored if # CTDB dies unexpectedly. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Default parameter values: # Some distro's ctdb package stores the persistent db in /var/lib/ctdb, # others store in /var/ctdb. This attempts to detect the correct default # directory. var_prefix="/var/lib/ctdb" if [ ! -d "$var_prefix" ] && [ -d "/var/ctdb" ]; then var_prefix="/var/ctdb" fi run_prefix="/run" if [ ! -d "$var_prefix" ] && [ -d "/var/run" ]; then var_prefix="/var/run" fi : ${OCF_RESKEY_ctdb_manages_samba:=no} : ${OCF_RESKEY_ctdb_manages_winbind:=no} : ${OCF_RESKEY_ctdb_service_smb:=""} : ${OCF_RESKEY_ctdb_service_nmb:=""} : ${OCF_RESKEY_ctdb_service_winbind:=""} : ${OCF_RESKEY_ctdb_samba_skip_share_check:=yes} : ${OCF_RESKEY_ctdb_monitor_free_memory:=100} : ${OCF_RESKEY_ctdb_start_as_disabled:=no} : ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb} : ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb} : ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd} : ${OCF_RESKEY_ctdb_socket:=${var_prefix}/ctdb.socket} : ${OCF_RESKEY_ctdb_dbdir:=${var_prefix}} : ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb} : ${OCF_RESKEY_ctdb_rundir:=${run_prefix}/ctdb} : ${OCF_RESKEY_ctdb_debuglevel:=2} : ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf} : ${OCF_RESKEY_smb_passdb_backend:=tdbsam} : ${OCF_RESKEY_smb_idmap_backend:=tdb2} ####################################################################### meta_data() { cat < 1.0 This resource agent manages CTDB, allowing one to use Clustered Samba in a Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2 or GFS2) on which the CTDB lock will be stored. Create /etc/ctdb/nodes containing a list of private IP addresses of each node in the cluster, then configure this RA as a clone. This agent expects the samba and windbind resources to be managed outside of CTDB's control as a separate set of resources controlled by the cluster manager. The optional support for enabling CTDB management of these daemons will be depreciated. For more information see http://linux-ha.org/wiki/CTDB_(resource_agent) CTDB Resource Agent The location of a shared lock file, common across all nodes. This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock CTDB shared lock file Should CTDB manage starting/stopping the Samba service for you? This will be deprecated in future, in favor of configuring a separate Samba resource. Should CTDB manage Samba? Should CTDB manage starting/stopping the Winbind service for you? This will be deprecated in future, in favor of configuring a separate Winbind resource. Should CTDB manage Winbind? Name of smb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of smb init script Name of nmb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of nmb init script Name of winbind init script. Only necessary if CTDB is managing Winbind directly. Will usually be auto-detected. Name of winbind init script If there are very many shares it may not be feasible to check that all of them are available during each monitoring interval. In that case this check can be disabled. Skip share check during monitor? If the amount of free memory drops below this value the node will become unhealthy and ctdb and all managed services will be shutdown. Once this occurs, the administrator needs to find the reason for the OOM situation, rectify it and restart ctdb with "service ctdb start". Minimum amount of free memory (MB) When set to yes, the CTDB node will start in DISABLED mode and not host any public ip addresses. Start CTDB disabled? The directory containing various CTDB configuration files. The "nodes" and "notify.sh" scripts are expected to be in this directory, as is the "events.d" subdirectory. CTDB config file directory Full path to the CTDB binary. CTDB binary path Full path to the CTDB cluster daemon binary. CTDB Daemon binary path Full path to the domain socket that ctdbd will create, used for local clients to attach and communicate with the ctdb daemon. CTDB socket location The directory to put the local CTDB database files in. Persistent database files will be put in ctdb_dbdir/persistent. CTDB database directory Full path to log file. To log to syslog instead, use the value "syslog". CTDB log file location Full path to ctdb runtime directory, used for storage of socket lock state. CTDB runtime directory location What debug level to run at (0-10). Higher means more verbose. CTDB debug level Path to default samba config file. Only necessary if CTDB is managing Samba. Path to smb.conf The directory for smbd to use for storing such files as smbpasswd and secrets.tdb. Old versions of CTBD (prior to 1.0.50) required this to be on shared storage. This parameter should not be set for current versions of CTDB, and only remains in the RA for backwards compatibility. Samba private dir (deprecated) Which backend to use for storing user and possibly group information. Only necessary if CTDB is managing Samba. Samba passdb backend Which backend to use for SID/uid/gid mapping. Only necessary if CTDB is managing Samba. Samba idmap backend Which fileid:algorithm to use with vfs_fileid. The correct value depends on which clustered filesystem is in use, e.g.: for OCFS2, this should be set to "fsid". Only necessary if CTDB is managing Samba. Samba VFS fileid algorithm END } ####################################################################### # Figure out path to /etc/sysconfig/ctdb (same logic as # loadconfig() from /etc/ctdb/functions if [ -f /etc/sysconfig/ctdb ]; then CTDB_SYSCONFIG=/etc/sysconfig/ctdb elif [ -f /etc/default/ctdb ]; then CTDB_SYSCONFIG=/etc/default/ctdb elif [ -f $OCF_RESKEY_ctdb_config_dir/ctdb ]; then CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb fi # Backup paths CTDB_SYSCONFIG_BACKUP=${CTDB_SYSCONFIG}.ctdb-ra-orig invoke_ctdb() { # CTDB's defaults are: local timeout=3 local timelimit=120 # ...but we override with the timeout for the current op: if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((OCF_RESKEY_CRM_meta_timeout/1000)) timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000)) fi $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket \ -t $timeout -T $timelimit \ "$@" } # Enable any event scripts that are explicitly required. # Any others will ultimately be invoked or not based on how they ship # with CTDB, but will generally have no effect, beacuase the relevant # CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb. enable_event_scripts() { local event_dir=$OCF_RESKEY_ctdb_config_dir/events.d if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then chmod u+x $event_dir/10.interface else chmod a-x $event_dir/10.interface fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then chmod u+x $event_dir/11.routing else chmod a-x $event_dir/11.routing fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || \ ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then chmod u+x $event_dir/50.samba else chmod a-x $event_dir/50.samba fi } # This function has no effect (currently no way to set CTDB_SET_*) # but remains here in case we need it in future. set_ctdb_variables() { rv=$OCF_SUCCESS set | grep ^CTDB_SET_ | cut -d_ -f3- | while read v; do varname=`echo $v | cut -d= -f1` value=`echo $v | cut -d= -f2` invoke_ctdb setvar $varname $value || rv=$OCF_ERR_GENERIC done || rv=$OCF_ERR_GENERIC return $rv } # Add necessary settings to /etc/samba/smb.conf. In a perfect world, # we'd be able to generate a new, temporary, smb.conf file somewhere, # something like: # include = /etc/samba/smb.conf # [global] # clustering = yes # # ...etc... # Unfortunately, we can't do this, because there's no way to tell the # smb init script where the temporary config is, so we just edit # the default config file. init_smb_conf() { # Don't screw around with the config if CTDB isn't managing Samba! ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 # replace these things in smb.conf local repl='# CTDB-RA:|passdb backend|clustering|idmap backend|private dir|ctdbd socket' local private_dir [ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n" local vfs_fileid local do_vfs=0 if [ -n "$OCF_RESKEY_smb_fileid_algorithm" ]; then repl="${repl}|fileid:algorithm|fileid:mapping" vfs_fileid="\tfileid:algorithm = $OCF_RESKEY_smb_fileid_algorithm\n" if sed -n '/^[[:space:]]*\[global\]/,/^[[:space:]]*\[/p' $OCF_RESKEY_smb_conf | \ grep -Eq '^[[:space:]]*vfs objects'; then # vfs objects already specified, will append fileid to existing line do_vfs=1 else vfs_fileid="$vfs_fileid\tvfs objects = fileid\n" fi fi awk ' /^[[:space:]]*\[/ { global = 0 } /^[[:space:]]*\[global\]/ { global = 1 } { if(global) { if ('$do_vfs' && $0 ~ /^[[:space:]]vfs objects/ && $0 !~ /fileid/) { print $0" fileid" } else if ($0 !~ /^[[:space:]]*('"$repl"')/) { print } } else { print } }' $OCF_RESKEY_smb_conf | sed "/^[[:space:]]*\[global\]/ a\\ \t# CTDB-RA: Begin auto-generated section (do not change below)\n\ \tpassdb backend = $OCF_RESKEY_smb_passdb_backend\n\ \tclustering = yes\n\ \tidmap backend = $OCF_RESKEY_smb_idmap_backend\n\ \tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir$vfs_fileid\ \t# CTDB-RA: End auto-generated section (do not change above)" > $OCF_RESKEY_smb_conf.$$ mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf } # Get rid of that section we added cleanup_smb_conf() { ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' $OCF_RESKEY_smb_conf > $OCF_RESKEY_smb_conf.$$ mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf } append_ctdb_sysconfig() { [ -n "$2" ] && echo "$1=$2" >> $CTDB_SYSCONFIG } # Generate a new, minimal CTDB config file that's just enough # to get CTDB running as configured by the RA parameters. generate_ctdb_sysconfig() { # Backup existing sysconfig if we're not already using an auto-generated one grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG || cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP if [ $? -ne 0 ]; then ocf_log warn "Unable to backup $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP" fi ocf_log info "Generating new $CTDB_SYSCONFIG" # Note to maintainers and other random hackers: # Parameters may need to be set here, for CTDB event # scripts to pick up, or may need to be passed to ctdbd # when starting, or both. Be careful. The CTDB source # tree and manpages are your friends. As a concrete # example, setting CTDB_START_AS_DISABLED here is # completely useless, as this is actually a command line # argument for ctdbd; it's not used anywhere else. cat >$CTDB_SYSCONFIG </dev/null for pdbase in $(ls $persistent_db_dir/*.tdb.[0-9] 2>/dev/null$) ; do /usr/bin/tdbdump $pdbase >/dev/null 2>/dev/null || { - ocf_log err "Persistent database $pdbase is corrupted! CTDB will not start." + ocf_exit_reason "Persistent database $pdbase is corrupted! CTDB will not start." return $OCF_ERR_GENERIC } done # Add necessary configuration to smb.conf init_smb_conf if [ $? -ne 0 ]; then - ocf_log err "Failed to update $OCF_RESKEY_smb_conf." + ocf_exit_reason "Failed to update $OCF_RESKEY_smb_conf." return $OCF_ERR_GENERIC fi # Generate new CTDB sysconfig generate_ctdb_sysconfig enable_event_scripts # Use logfile by default, or syslog if asked for local log_option="--logfile=$OCF_RESKEY_ctdb_logfile" if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then log_option="--syslog" elif [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then # ensure the logfile's directory exists, otherwise ctdb will fail to start mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile) fi # ensure ctdb's rundir exists, otherwise it will fail to start mkdir -p $OCF_RESKEY_ctdb_rundir 2>/dev/null # public addresses file (should not be present, but need to set for correctness if it is) local pub_addr_option="" [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \ pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses" # start as disabled local start_as_disabled="--start-as-disabled" ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled="" # Start her up $OCF_RESKEY_ctdbd_binary \ --reclock=$OCF_RESKEY_ctdb_recovery_lock \ --nlist=$OCF_RESKEY_ctdb_config_dir/nodes \ --socket=$OCF_RESKEY_ctdb_socket \ --dbdir=$OCF_RESKEY_ctdb_dbdir \ --dbdir-persistent=$OCF_RESKEY_ctdb_dbdir/persistent \ --event-script-dir=$OCF_RESKEY_ctdb_config_dir/events.d \ --notification-script=$OCF_RESKEY_ctdb_config_dir/notify.sh \ --transport=tcp \ $start_as_disabled $log_option $pub_addr_option \ -d $OCF_RESKEY_ctdb_debuglevel if [ $? -ne 0 ]; then # cleanup smb.conf cleanup_smb_conf - ocf_log err "Failed to execute $OCF_RESKEY_ctdbd_binary." + ocf_exit_reason "Failed to execute $OCF_RESKEY_ctdbd_binary." return $OCF_ERR_GENERIC else # Wait a bit for CTDB to stabilize # (until start times out if necessary) while true; do # Initial sleep is intentional (ctdb init script # has sleep after ctdbd start, but before invoking # ctdb to talk to it) sleep 1 status=$(invoke_ctdb status 2>/dev/null) if [ $? -ne 0 ]; then # CTDB will be running, kill it before returning ctdb_stop - ocf_log err "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status" + ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status" return $OCF_ERR_GENERIC fi if ! echo $status | grep -qs 'UNHEALTHY (THIS'; then # Status does not say this node is unhealthy, # so we're good to go. Do a bit of final # setup and (hopefully) return success. set_ctdb_variables return $? fi done fi # ctdbd will (or can) actually still be running at this point, so kill it ctdb_stop - ocf_log err "Timeout waiting for CTDB to stabilize" + ocf_exit_reason "Timeout waiting for CTDB to stabilize" return $OCF_ERR_GENERIC } ctdb_stop() { # Do nothing if already stopped pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS # Tell it to die nicely invoke_ctdb shutdown >/dev/null 2>&1 rv=$? # No more Mr. Nice Guy count=0 while pkill -0 -f $OCF_RESKEY_ctdbd_binary ; do sleep 1 count=$(($count + 1)) [ $count -gt 10 ] && { ocf_log info "killing ctdbd " pkill -9 -f $OCF_RESKEY_ctdbd_binary pkill -9 -f ${OCF_RESKEY_ctdb_config_dir}/events.d/ } done # Cleanup smb.conf cleanup_smb_conf # It was a clean shutdown, return success [ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS # Unclean shutdown, return success if there's no ctdbds left (we # killed them forcibly, but at least they're good and dead). pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS # Problem: ctdb shutdown didn't work and neither did some vigorous # kill -9ing. Only thing to do is report failure. return $OCF_ERR_GENERIC } ctdb_monitor() { local status # "ctdb status" exits non-zero if CTDB isn't running. # It can also exit non-zero if there's a timeout (ctdbd blocked, # stalled, massive load, or otherwise wedged). If it's actually # not running, STDERR will say "Errno:Connection refused(111)", # whereas if it's wedged, it'll say various other unpleasant things. status=$(invoke_ctdb status 2>&1) if [ $? -ne 0 ]; then if echo $status | grep -qs 'Connection refused'; then return $OCF_NOT_RUNNING elif echo $status | grep -qs 'No such file or directory'; then return $OCF_NOT_RUNNING else - ocf_log err "CTDB status call failed: $status" + ocf_exit_reason "CTDB status call failed: $status" return $OCF_ERR_GENERIC fi fi if echo $status | grep -Eqs '(OK|DISABLED) \(THIS'; then return $OCF_SUCCESS fi - ocf_log err "CTDB status is bad: $status" + ocf_exit_reason "CTDB status is bad: $status" return $OCF_ERR_GENERIC } ctdb_validate() { # Required binaries (full path to tdbdump is intentional, as that's # what's used in ctdb_start, which was lifted from the init script) for binary in pkill /usr/bin/tdbdump; do check_binary $binary done if [ -z "$CTDB_SYSCONFIG" ]; then - ocf_log err "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)" + ocf_exit_reason "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)" return $OCF_ERR_INSTALLED fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then - ocf_log err "Samba config file '$OCF_RESKEY_smb_conf' does not exist." + ocf_exit_reason "Samba config file '$OCF_RESKEY_smb_conf' does not exist." return $OCF_ERR_INSTALLED fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!" fi if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then - ocf_log err "$OCF_RESKEY_ctdb_config_dir/nodes does not exist." + ocf_exit_reason "$OCF_RESKEY_ctdb_config_dir/nodes does not exist." return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then - ocf_log err "ctdb_recovery_lock not specified." + ocf_exit_reason "ctdb_recovery_lock not specified." return $OCF_ERR_CONFIGURED fi lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") touch "$lock_dir/$$" 2>/dev/null if [ $? != 0 ]; then - ocf_log err "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." + ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." return $OCF_ERR_ARGS fi rm "$lock_dir/$$" return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ctdb_start;; stop) ctdb_stop;; monitor) ctdb_monitor;; validate-all) ctdb_validate;; usage|help) ctdb_usage exit $OCF_SUCCESS ;; *) ctdb_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/Delay b/heartbeat/Delay index f50539110..9cfa939d6 100755 --- a/heartbeat/Delay +++ b/heartbeat/Delay @@ -1,223 +1,223 @@ #!/bin/sh # # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # This script is a test resource for introducing delay. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_startdelay # OCF_RESKEY_stopdelay # OCF_RESKEY_mondelay # # # OCF_RESKEY_startdelay defaults to 30 (seconds) # OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay # OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay # # # This is really a test resource script. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-! usage: $0 {start|stop|status|monitor|meta-data|validate-all} ! } meta_data() { cat < 1.0 This script is a test resource for introducing delay. Waits for a defined timespan How long in seconds to delay on start operation. Start delay How long in seconds to delay on stop operation. Defaults to "startdelay" if unspecified. Stop delay How long in seconds to delay on monitor operation. Defaults to "startdelay" if unspecified. Monitor delay END } Delay_stat() { ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor } Delay_Status() { if Delay_stat then ocf_log info "Delay is running OK" return $OCF_SUCCESS else ocf_log info "Delay is stopped" return $OCF_NOT_RUNNING fi } Delay_Monitor() { Delay_Validate_All -q sleep $OCF_RESKEY_mondelay Delay_Status } Delay_Start() { if Delay_stat then ocf_log info "Delay already running." return $OCF_SUCCESS else Delay_Validate_All -q ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start rc=$? sleep $OCF_RESKEY_startdelay if [ $rc -ne 0 ] then return $OCF_ERR_PERM fi return $OCF_SUCCESS fi } Delay_Stop() { if Delay_stat then Delay_Validate_All -q ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop rc=$? sleep $OCF_RESKEY_stopdelay if [ $rc -ne 0 ] then return $OCF_ERR_PERM fi return $OCF_SUCCESS else ocf_log info "Delay already stopped." return $OCF_SUCCESS fi } # Check if all the arguments are valid numbers, a string is considered valid if: # 1. It does not contain any character but digits and period "."; # 2. The period "." does not occur more than once Are_Valid_Numbers() { for i in "$@"; do echo $i |grep -v [^0-9.] |grep -q -v [.].*[.] if test $? -ne 0; then return $OCF_ERR_ARGS fi done return $OCF_SUCCESS } Delay_Validate_All() { # Be quiet when specified -q option _and_ validation succeded getopts "q" option if test $option = "q"; then quiet=yes else quiet=no fi shift $(($OPTIND -1)) if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \ $OCF_RESKEY_mondelay; then if test $quiet = "no"; then echo "Validate OK" fi # _Return_ on validation success return $OCF_SUCCESS else - echo "Some of the instance parameters are invalid" + ocf_exit_reason "Some of the instance parameters are invalid" # _Exit_ on validation failure exit $OCF_ERR_ARGS fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_startdelay=30} : ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay} : ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay} case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; start) Delay_Start ;; stop) Delay_Stop ;; monitor) Delay_Monitor ;; status) Delay_Status ;; validate-all) Delay_Validate_All ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_ARGS ;; esac exit $? diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem index 00ba454c8..cefe52bb9 100755 --- a/heartbeat/Filesystem +++ b/heartbeat/Filesystem @@ -1,883 +1,883 @@ #!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Filesystem # Description: Manages a Filesystem on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # # usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data} # # OCF parameters are as below: # OCF_RESKEY_device # OCF_RESKEY_directory # OCF_RESKEY_fstype # OCF_RESKEY_options # OCF_RESKEY_statusfile_prefix # OCF_RESKEY_run_fsck # OCF_RESKEY_fast_stop # OCF_RESKEY_force_clones # #OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0 # Or a -U or -L option for mount, or an NFS mount specification #OCF_RESKEY_directory : the mount point for the filesystem #OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2 #OCF_RESKEY_options : options to be given to the mount command via -o #OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring #OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no #OCF_RESKEY_fast_stop : fast stop: yes(default)/no #OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts # for each brick in a glusterfs setup # # # This assumes you want to manage a filesystem on a shared (SCSI) bus, # on a replicated device (such as DRBD), or a network filesystem (such # as NFS or Samba). # # Do not put this filesystem in /etc/fstab. This script manages all of # that for you. # # NOTE: If 2 or more nodes mount the same file system read-write, and # that file system is not designed for that specific purpose # (such as GFS or OCFS2), and is not a network file system like # NFS or Samba, then the filesystem is going to become # corrupted. # # As a result, you should use this together with the stonith # option and redundant, independent communications paths. # # If you don't do this, don't blame us when you scramble your # disk. ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults DFLT_STATUSDIR=".Filesystem_status/" # Variables used by multiple methods HOSTOS=`uname` # The status file is going to an extra directory, by default # prefix=${OCF_RESKEY_statusfile_prefix} : ${prefix:=$DFLT_STATUSDIR} suffix="${OCF_RESOURCE_INSTANCE}" [ "$OCF_RESKEY_CRM_meta_clone" ] && suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone" suffix="${suffix}_`uname -n`" STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|meta-data} EOT } meta_data() { cat < 1.1 Resource script for Filesystem. It manages a Filesystem on a shared storage medium. The standard monitor operation of depth 0 (also known as probe) checks if the filesystem is mounted. If you want deeper tests, set OCF_CHECK_LEVEL to one of the following values: 10: read first 16 blocks of the device (raw read) This doesn't exercise the filesystem at all, but the device on which the filesystem lives. This is noop for non-block devices such as NFS, SMBFS, or bind mounts. 20: test if a status file can be written and read The status file must be writable by root. This is not always the case with an NFS mount, as NFS exports usually have the "root_squash" option set. In such a setup, you must either use read-only monitoring (depth=10), export with "no_root_squash" on your NFS server, or grant world write permissions on the directory where the status file is to be placed. Manages filesystem mounts The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. block device The mount point for the filesystem. mount point The type of filesystem to be mounted. filesystem type Any extra options to be given as -o options to mount. For bind mounts, add "bind" here and set fstype to "none". We will do the right thing for options such as "bind,ro". options The prefix to be used for a status file for resource monitoring with depth 20. If you don't specify this parameter, all status files will be created in a separate directory. status file prefix Specify how to decide whether to run fsck or not. "auto" : decide to run fsck depending on the fstype(default) "force" : always run fsck regardless of the fstype "no" : do not run fsck ever. run_fsck Normally, we expect no users of the filesystem and the stop operation to finish quickly. If you cannot control the filesystem users easily and want to prevent the stop action from failing, then set this parameter to "no" and add an appropriate timeout for the stop operation. fast stop The use of a clone setup for local filesystems is forbidden by default. For special setups like glusterfs, cloning a mount of a local device with a filesystem like ext4 or xfs independently on several nodes is a valid use case. Only set this to "true" if you know what you are doing! allow running as a clone, regardless of filesystem type This option allows specifying how to handle processes that are currently accessing the mount directory. "true" : Default value, kill processes accessing mount point "safe" : Kill processes accessing mount point using methods that avoid functions that could potentially block during process detection "false" : Do not kill any processes. The 'safe' option uses shell logic to walk the /procs/ directory for pids using the mount point while the default option uses the fuser cli tool. fuser is known to perform operations that can potentially block if unresponsive nfs mounts are in use on the system. Kill processes before unmount END } # # Make sure the kernel does the right thing with the FS buffers # This function should be called after unmounting and before mounting # It may not be necessary in 2.4 and later kernels, but it shouldn't hurt # anything either... # # It's really a bug that you have to do this at all... # flushbufs() { if have_binary $BLOCKDEV ; then if [ "$blockdevice" = "yes" ] ; then $BLOCKDEV --flushbufs $1 return $? fi fi return 0 } # Take advantage of /etc/mtab if present, use portable mount command # otherwise. Normalize format to "dev mountpoint fstype". is_bind_mount() { echo "$options" | grep -w bind >/dev/null 2>&1 } list_mounts() { local inpf="" if [ -e "/proc/mounts" ] && ! is_bind_mount; then inpf=/proc/mounts elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then inpf=/etc/mtab fi if [ "$inpf" ]; then cut -d' ' -f1,2,3 < $inpf else $MOUNT | cut -d' ' -f1,3,5 fi } determine_blockdevice() { if [ $blockdevice = "yes" ]; then return fi # Get the current real device name, if possible. # (specified devname could be -L or -U...) case "$FSTYPE" in nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|none) : ;; *) DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1` if [ -b "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Lists all filesystems potentially mounted under a given path, # excluding the path itself. list_submounts() { list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r } # kernels < 2.6.26 can't handle bind remounts bind_kernel_check() { echo "$options" | grep -w ro >/dev/null 2>&1 || return uname -r | awk -F. ' $1==2 && $2==6 { sub("[^0-9].*","",$3); if ($3<26) exit(1); }' [ $? -ne 0 ] && ocf_log warn "kernel `uname -r` cannot handle read only bind mounts" } bind_mount() { if is_bind_mount && [ "$options" != "-o bind" ] then bind_kernel_check bind_opts=`echo $options | sed 's/bind/remount/'` $MOUNT $bind_opts $MOUNTPOINT else true # make sure to return OK fi } is_option() { echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1 } is_fsck_needed() { case $OCF_RESKEY_run_fsck in force) true;; no) false;; ""|auto) case $FSTYPE in ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs) false;; *) true;; esac;; *) ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'" OCF_RESKEY_run_fsck="auto" is_fsck_needed;; esac } fstype_supported() { local support="$FSTYPE" local rc if [ "X${HOSTOS}" != "XOpenBSD" ];then # skip checking /proc/filesystems for obsd return $OCF_SUCCESS fi if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then : No FSTYPE specified, rely on the system has the right file-system support already return $OCF_SUCCESS fi # support fuse-filesystems (e.g. GlusterFS) case $FSTYPE in glusterfs) support="fuse";; esac grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -eq 0 ]; then # found the fs type return $OCF_SUCCESS fi # if here, we should attempt to load the module and then # check the if the filesystem support exists again. $MODPROBE $support >/dev/null if [ $? -ne 0 ]; then - ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernal module" + ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernal module" return $OCF_ERR_INSTALLED fi # It is possible for the module to load and not be complete initialized # before we check /proc/filesystems again. Give this a few trys before # giving up entirely. for try in $(seq 5); do grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -eq 0 ] ; then # yes. found the filesystem after doing the modprobe return $OCF_SUCCESS fi ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again" sleep 1 done - ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems" + ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems" return $OCF_ERR_INSTALLED } # # START: Start up the filesystem # Filesystem_start() { # See if the device is already mounted. if Filesystem_status >/dev/null 2>&1 ; then ocf_log info "Filesystem $MOUNTPOINT is already mounted." return $OCF_SUCCESS fi fstype_supported || exit $OCF_ERR_INSTALLED # Check the filesystem & auto repair. # NOTE: Some filesystem types don't need this step... Please modify # accordingly if [ $blockdevice = "yes" ]; then if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then - ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" + ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" exit $OCF_ERR_INSTALLED fi if is_fsck_needed; then ocf_log info "Starting filesystem check on $DEVICE" if [ -z "$FSTYPE" ]; then $FSCK -p $DEVICE else $FSCK -t $FSTYPE -p $DEVICE fi # NOTE: if any errors at all are detected, it returns non-zero # if the error is >= 4 then there is a big problem if [ $? -ge 4 ]; then - ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE" + ocf_exit_reason "Couldn't sucessfully fsck filesystem for $DEVICE" return $OCF_ERR_GENERIC fi fi fi [ -d "$MOUNTPOINT" ] || ocf_run mkdir -p $MOUNTPOINT if [ ! -d "$MOUNTPOINT" ] ; then - ocf_log err "Couldn't find directory [$MOUNTPOINT] to use as a mount point" + ocf_exit_reason "Couldn't find directory [$MOUNTPOINT] to use as a mount point" exit $OCF_ERR_INSTALLED fi flushbufs $DEVICE # Mount the filesystem. case "$FSTYPE" in none) $MOUNT $options $DEVICE $MOUNTPOINT && bind_mount ;; "") $MOUNT $options $DEVICE $MOUNTPOINT ;; *) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;; esac if [ $? -ne 0 ]; then - ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT" + ocf_exit_reason "Couldn't mount filesystem $DEVICE on $MOUNTPOINT" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # end of Filesystem_start get_pids() { local dir=$1 local procs local mmap_procs if ocf_is_true "$FORCE_UNMOUNT"; then if [ "X${HOSTOS}" = "XOpenBSD" ];then fstat | grep $dir | awk '{print $3}' else $FUSER -m $dir 2>/dev/null fi elif [ "$FORCE_UNMOUNT" = "safe" ]; then procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}') mmap_procs=$(grep " ${dir}" /proc/[0-9]*/maps | awk -F/ '{print $3}') printf "${procs}\n${mmap_procs}" | sort | uniq fi } signal_processes() { local dir=$1 local sig=$2 local pids pid # fuser returns a non-zero return code if none of the # specified files is accessed or in case of a fatal # error. pids=$(get_pids "$dir") if [ -z "$pids" ]; then ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'" return fi for pid in $pids; do ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`" kill -s $sig $pid done } try_umount() { local SUB=$1 $UMOUNT $umount_force $SUB list_mounts | grep -q " $SUB " >/dev/null 2>&1 || { ocf_log info "unmounted $SUB successfully" return $OCF_SUCCESS } return $OCF_ERR_GENERIC } fs_stop() { local SUB=$1 timeout=$2 sig cnt for sig in TERM KILL; do cnt=$((timeout/2)) # try half time with TERM while [ $cnt -gt 0 ]; do try_umount $SUB && return $OCF_SUCCESS - ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig" + ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig" signal_processes $SUB $sig cnt=$((cnt-1)) sleep 1 done done return $OCF_ERR_GENERIC } # # STOP: Unmount the filesystem # Filesystem_stop() { # See if the device is currently mounted Filesystem_status >/dev/null 2>&1 if [ $? -eq $OCF_NOT_RUNNING ]; then # Already unmounted, wonderful. rc=$OCF_SUCCESS else # Wipe the status file, but continue with a warning if # removal fails -- the file system might be read only if [ $OCF_CHECK_LEVEL -eq 20 ]; then rm -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log warn "Failed to remove status file ${STATUSFILE}." fi fi # Determine the real blockdevice this is mounted on (if # possible) prior to unmounting. determine_blockdevice # For networked filesystems, there's merit in trying -f: case "$FSTYPE" in nfs4|nfs|cifs|smbfs) umount_force="-f" ;; esac # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. local timeout for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do ocf_log info "Trying to unmount $SUB" if ocf_is_true "$FAST_STOP"; then timeout=6 else timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"} timeout=$((timeout/1000)) fi fs_stop $SUB $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then - ocf_log err "Couldn't unmount $SUB, giving up!" + ocf_exit_reason "Couldn't unmount $SUB, giving up!" fi done fi flushbufs $DEVICE return $rc } # end of Filesystem_stop # # STATUS: is the filesystem mounted or not? # Filesystem_status() { if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then rc=$OCF_SUCCESS msg="$MOUNTPOINT is mounted (running)" else rc=$OCF_NOT_RUNNING msg="$MOUNTPOINT is unmounted (stopped)" fi # Special case "monitor" to check whether the UUID cached and # on-disk still match? case "$OP" in status) ocf_log info "$msg";; esac return $rc } # end of Filesystem_status # Note: the read/write tests below will stall in case the # underlying block device (or in the case of a NAS mount, the # NAS server) has gone away. In that case, if I/O does not # return to normal in time, the operation hits its timeout # and it is up to the CRM to initiate appropriate recovery # actions (such as fencing the node). # # MONITOR 10: read the device # Filesystem_monitor_10() { if [ "$blockdevice" = "no" ] ; then ocf_log warn "$DEVICE is not a block device, monitor 10 is noop" return $OCF_SUCCESS fi dd_opts="iflag=direct bs=4k count=1" err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null` if [ $? -ne 0 ]; then - ocf_log err "Failed to read device $DEVICE" + ocf_exit_reason "Failed to read device $DEVICE" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # MONITOR 20: write and read a status file # Filesystem_monitor_20() { if [ "$blockdevice" = "no" ] ; then # O_DIRECT not supported on cifs/smbfs dd_opts="oflag=sync bs=4k conv=fsync,sync" else # Writing to the device in O_DIRECT mode is imperative # to bypass caches. dd_opts="oflag=direct,sync bs=4k conv=fsync,sync" fi status_dir=`dirname $STATUSFILE` [ -d "$status_dir" ] || mkdir -p "$status_dir" err_output=`echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1` if [ $? -ne 0 ]; then - ocf_log err "Failed to write status file ${STATUSFILE}" + ocf_exit_reason "Failed to write status file ${STATUSFILE}" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi test -f ${STATUSFILE} if [ $? -ne 0 ]; then - ocf_log err "Cannot stat the status file ${STATUSFILE}" + ocf_exit_reason "Cannot stat the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi cat ${STATUSFILE} > /dev/null if [ $? -ne 0 ]; then - ocf_log err "Cannot read the status file ${STATUSFILE}" + ocf_exit_reason "Cannot read the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } Filesystem_monitor() { Filesystem_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then case "$OCF_CHECK_LEVEL" in 10) Filesystem_monitor_10; rc=$?;; 20) Filesystem_monitor_20; rc=$?;; *) - ocf_log err "unsupported monitor level $OCF_CHECK_LEVEL" + ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL" rc=$OCF_ERR_CONFIGURED ;; esac fi return $rc } # end of Filesystem_monitor # # VALIDATE_ALL: Are the instance parameters valid? # FIXME!! The only part that's useful is the return code. # This code always returns $OCF_SUCCESS (!) # Filesystem_validate_all() { if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then ocf_log warn "Mountpoint $MOUNTPOINT does not exist" fi # Check if the $FSTYPE is workable # NOTE: Without inserting the $FSTYPE module, this step may be imprecise # TODO: This is Linux specific crap. if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then cut -f2 /proc/filesystems |grep -q ^$FSTYPE$ if [ $? -ne 0 ]; then modpath=/lib/modules/`uname -r` moddep=$modpath/modules.dep # Do we have $FSTYPE in modules.dep? cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$" if [ $? -ne 0 ]; then ocf_log info "It seems we do not have $FSTYPE support" fi fi fi # If we are supposed to do monitoring with status files, then # we need a utility to write in O_DIRECT mode. if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary dd # Note: really old coreutils version do not support # the "oflag" option for dd. We don't check for that # here. In case dd does not support oflag, monitor is # bound to fail, with dd spewing an error message to # the logs. On such systems, we must do without status # file monitoring. fi #TODO: How to check the $options ? return $OCF_SUCCESS } # # set the blockdevice variable to "no" or "yes" # set_blockdevice_var() { blockdevice=no # these are definitely not block devices case $FSTYPE in nfs4|nfs|smbfs|cifs|none|glusterfs|ceph) return;; esac if `is_option "loop"`; then return fi case $DEVICE in -*) # Oh... An option to mount instead... Typically -U or -L ;; /dev/null) # Special case for BSC blockdevice=yes ;; *) if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" fi if [ ! -d "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Check the arguments passed to this script if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # Check the OCF_RESKEY_ environment variables... FORCE_UNMOUNT="yes" if [ -n "${OCF_RESKEY_force_unmount}" ]; then FORCE_UNMOUNT=$OCF_RESKEY_force_unmount fi DEVICE=$OCF_RESKEY_device FSTYPE=$OCF_RESKEY_fstype if [ ! -z "$OCF_RESKEY_options" ]; then options="-o $OCF_RESKEY_options" fi FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} OP=$1 # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac if [ x = x"$DEVICE" ]; then - ocf_log err "Please set OCF_RESKEY_device to the device to be managed" + ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed" exit $OCF_ERR_CONFIGURED fi set_blockdevice_var # Normalize instance parameters: # It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". # But the output of `mount` and /proc/mounts do not. if [ -z "$OCF_RESKEY_directory" ]; then if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then - ocf_log err "Please specify the directory" + ocf_exit_reason "Please specify the directory" exit $OCF_ERR_CONFIGURED fi else MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//') : ${MOUNTPOINT:=/} # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll # kill the whole system. Is that a good idea? fi # Check to make sure the utilites are found if [ "X${HOSTOS}" != "XOpenBSD" ];then check_binary $MODPROBE check_binary $FUSER fi check_binary $FSCK check_binary $MOUNT check_binary $UMOUNT if [ "$OP" != "monitor" ]; then ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" fi case $OP in status) Filesystem_status exit $? ;; monitor) Filesystem_monitor exit $? ;; validate-all) Filesystem_validate_all exit $? ;; stop) Filesystem_stop exit $? ;; esac CLUSTERSAFE=0 is_option "ro" && CLUSTERSAFE=2 case $FSTYPE in nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2) CLUSTERSAFE=1 # this is kind of safe too ;; # add here CLUSTERSAFE=0 for all filesystems which are not # cluster aware and which, even if when mounted read-only, # could still modify parts of it such as journal/metadata ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) if ocf_is_true "$OCF_RESKEY_force_clones"; then CLUSTERSAFE=2 else CLUSTERSAFE=0 # these are not allowed fi ;; esac if ocf_is_clone; then case $CLUSTERSAFE in 0) - ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" + ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" ocf_log err "DO NOT RUN IT AS A CLONE!" ocf_log err "Politely refusing to proceed to avoid data corruption." exit $OCF_ERR_CONFIGURED ;; 2) ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!" if ocf_is_true "$OCF_RESKEY_force_clones"; then ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so." else ocf_log warn "But we'll let it run because it is mounted read-only." ocf_log warn "Please make sure that it's meta data is read-only too!" fi ;; esac fi case $OP in start) Filesystem_start ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr index e8c0f77d1..8163c0c26 100755 --- a/heartbeat/IPsrcaddr +++ b/heartbeat/IPsrcaddr @@ -1,486 +1,486 @@ #!/bin/sh # # Description: IPsrcaddr - Preferred source address modification # # Author: John Sutton # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: SCL Internet # # Based on the IPaddr script. # # This script manages the preferred source address associated with # packets which originate on the localhost and are routed through the # default route. By default, i.e. without the use of this script or # similar, these packets will carry the IP of the primary i.e. the # non-aliased interface. This can be a nuisance if you need to ensure # that such packets carry the same IP irrespective of which host in # a redundant cluster they actually originate from. # # It can add a preferred source address, or remove one. # # usage: IPsrcaddr {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds a preferred source address. # # Surprisingly, the "stop" arg removes it. :-) # # NOTES: # # 1) There must be one and not more than 1 default route! Mainly because # I can't see why you should have more than one. And if there is more # than one, we would have to box clever to find out which one is to be # modified, or we would have to pass its identity as an argument. # # 2) The script depends on Alexey Kuznetsov's ip utility from the # iproute aka iproute2 package. # # 3) No checking is done to see if the passed in IP address can # reasonably be associated with the interface on which the default # route exists. So unless you want to deliberately spoof your source IP, # check it! Normally, I would expect that your haresources looks # something like: # # nodename ip1 ip2 ... ipN IPsrcaddr::ipX # # where ipX is one of the ip1 to ipN. # # OCF parameters are as below: # OCF_RESKEY_ipaddress ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0" CMDCHANGE="$IP2UTIL route change to " SYSTYPE="`uname -s`" usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 Resource script for IPsrcaddr. It manages the preferred source address modification. Manages the preferred source address for outgoing IP packets The IP address. IP address The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). Netmask END } errorexit() { - ocf_log err "$*" + ocf_exit_reason "$*" exit $OCF_ERR_GENERIC } # # We can distinguish 3 cases: no preferred source address, a # preferred source address exists which matches that specified, and one # exists but doesn't match that specified. srca_read() returns 1,0,2 # respectively. # # The output of route show is something along the lines of: # # default via X.X.X.X dev eth1 src Y.Y.Y.Y # # where the src clause "src Y.Y.Y.Y" may or may not be present WS="[`echo -en ' \t'`]" OCTET="[0-9]\{1,3\}" IPADDR="\($OCTET\.\)\{3\}$OCTET" SRCCLAUSE="src$WS$WS*\($IPADDR\)" MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)" FINDIF=$HA_BIN/findif # findif needs that to be set export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress srca_read() { # Capture the default route - doublequotes prevent word splitting... DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed" # ... so we can make sure there is only 1 default route [ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \ errorexit "more than 1 default route exists" # But there might still be no default route [ -z "$DEFROUTE" ] && errorexit "no default route exists" # Sed out the source ip address if it exists SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"` # and what remains after stripping out the source ip address clause ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"` [ -z "$SRCIP" ] && return 1 [ $SRCIP = $1 ] && return 0 return 2 } # # Add (or change if it already exists) the preferred source address # The exit code should conform to LSB exit codes. # srca_start() { srca_read $1 rc=$? if [ $rc = 0 ]; then rc=$OCF_SUCCESS ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)" else ip route replace $NETWORK dev $INTERFACE src $1 || \ errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed" $CMDCHANGE $ROUTE_WO_SRC src $1 || \ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed" rc=$? fi return $rc } # # Remove (if it exists) the preferred source address. # If one exists but it's not the same as the one specified, that's # an error. Maybe that's the wrong behaviour because if this fails # then when IPaddr releases the associated interface (if there is one) # your default route will also get dropped ;-( # The exit code should conform to LSB exit codes. # srca_stop() { srca_read $1 rc=$? if [ $rc = 1 ]; then # We do not have a preferred source address for now ocf_log info "No preferred source address defined, nothing to stop" exit $OCF_SUCCESS fi [ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address" ip route replace $NETWORK dev $INTERFACE || \ errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed" $CMDCHANGE $ROUTE_WO_SRC || \ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed" return $? } srca_status() { srca_read $1 case $? in 0) echo "OK" return $OCF_SUCCESS;; 1) echo "No preferred source address defined" return $OCF_NOT_RUNNING;; 2) echo "Preferred source address has incorrect value" return $OCF_ERR_GENERIC;; esac } # A not reliable IP address checking function, which only picks up those _obvious_ violations... # # It accepts IPv4 address in dotted quad notation, for example "192.168.1.1" # # 100% confidence whenever it reports "negative", # but may get false "positive" answer. # CheckIP() { ip="$1" case $ip in *[!0-9.]*) #got invalid char false;; .*|*.) #begin or end by ".", which is invalid false;; *..*) #consecutive ".", which is invalid false;; *.*.*.*.*) #four decimal dots, which is too many false;; *.*.*.*) #exactly three decimal dots, candidate, evaluate each field local IFS=. set -- $ip if ( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] ) then if [ $1 -eq 127 ]; then - ocf_log err "IP address [$ip] is a loopback address, thus can not be preferred source address" + ocf_exit_reason "IP address [$ip] is a loopback address, thus can not be preferred source address" exit $OCF_ERR_CONFIGURED fi else true fi ;; *) #less than three decimal dots false;; esac return $? # This return is unnecessary, this comment too :) } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface_solaris() { $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:*$%%'` case $addr in addr:$BASEIP) echo $ifname; return $OCF_SUCCESS;; $BASEIP) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface_generic() { local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \ | cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'` if [ -z "$iface" ]; then return $OCF_ERR_GENERIC else echo $iface return $OCF_SUCCESS fi } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface() { case "$SYSTYPE" in SunOS) IF=`find_interface_solaris $BASEIP` ;; *) IF=`find_interface_generic $BASEIP` ;; esac echo $IF return $OCF_SUCCESS; } ip_status() { BASEIP="$1" case "$SYSTYPE" in Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac case "$SYSTYPE" in *BSD) $IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$BASEIP " >/dev/null 2>&1 if [ $? = 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi;; Linux|SunOS) IF=`find_interface "$BASEIP"` if [ -z "$IF" ]; then return $OCF_NOT_RUNNING fi case $IF in lo*) - ocf_log err "IP address [$BASEIP] is served by loopback, thus can not be preferred source address" + ocf_exit_reason "IP address [$BASEIP] is served by loopback, thus can not be preferred source address" exit $OCF_ERR_CONFIGURED ;; *)return $OCF_SUCCESS;; esac ;; *) if [ -z "$IF" ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi;; esac } srca_validate_all() { check_binary $AWK check_binary $IFCONFIG # The IP address should be in good shape if CheckIP "$ipaddress"; then : else - ocf_log err "Invalid IP address [$ipaddress]" + ocf_exit_reason "Invalid IP address [$ipaddress]" exit $OCF_ERR_CONFIGURED fi if ocf_is_probe; then return $OCF_SUCCESS fi # We should serve this IP address of course if ip_status "$ipaddress"; then : else - ocf_log err "We are not serving [$ipaddress], hence can not make it a preferred source address" + ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address" exit $OCF_ERR_INSTALLED fi } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations do not require the OCF instance parameters to be set case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac if [ -z "$OCF_RESKEY_ipaddress" ] then # usage - ocf_log err "Please set OCF_RESKEY_ipaddress to the preferred source IP address!" + ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!" exit $OCF_ERR_CONFIGURED fi ipaddress="$OCF_RESKEY_ipaddress" if [ "x$SYSTYPE" = "xLinux" ]; then srca_validate_all fi findif_out=`$FINDIF -C` rc=$? [ $rc -ne 0 ] && { - ocf_log err "[$FINDIF -C] failed" + ocf_exit_reason "[$FINDIF -C] failed" exit $rc } INTERFACE=`echo $findif_out | awk '{print $1}'` NETWORK=`ip route list dev $INTERFACE scope link match $ipaddress|grep -o '^[^ ]*'` case $1 in start) srca_start $ipaddress ;; stop) srca_stop $ipaddress ;; status) srca_status $ipaddress ;; monitor) srca_status $ipaddress ;; validate-all) srca_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # # Version 0.3 2002/11/04 17:00:00 John Sutton # Name changed from IPsrcroute to IPsrcaddr and now reports errors # using ha_log rather than on stderr. # # Version 0.2 2002/11/02 17:00:00 John Sutton # Changed status output to "OK" to satisfy ResourceManager's # we_own_resource() function. # # Version 0.1 2002/11/01 17:00:00 John Sutton # First effort but does the job? # diff --git a/heartbeat/LVM b/heartbeat/LVM index 7d86fb417..b466fa34e 100755 --- a/heartbeat/LVM +++ b/heartbeat/LVM @@ -1,691 +1,691 @@ #!/bin/sh # # # LVM # # Description: Manages an LVM volume as an HA resource # # # Author: Alan Robertson # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # This code significantly inspired by the LVM resource # in FailSafe by Lars Marowsky-Bree # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_volgrpname # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { methods=`LVM_methods` methods=`echo $methods | tr ' ' '|'` cat < 1.0 Resource script for LVM. It manages an Linux Volume Manager volume (LVM) as an HA resource. Controls the availability of an LVM Volume Group The name of volume group. Volume group name If set, the volume group will be activated exclusively. This option works one of two ways. If the volume group has the cluster attribute set, then the volume group will be activated exclusively using clvmd across the cluster. If the cluster attribute is not set, the volume group will be activated exclusively using a tag and the volume_list filter. When the tag option is in use, the volume_list in lvm.con must be initialized. This can be as simple as setting 'volume_list = []' depending on your setup. Exclusive activation If "exclusive" is set on a non clustered volume group, this overrides the tag to be used. Exclusive activation tag If set, the volume group will be activated even only partial of the physical volumes available. It helps to set to true, when you are using mirroring logical volumes. Activate VG even with partial PV only EOF } # # methods: What methods/operations do we support? # LVM_methods() { cat < /dev/null 2>&1 if [ $? -ne 0 ]; then return fi ## # Now check to see if the initrd has been updated. # If not, the machine could boot and activate the VG outside # the control of pacemaker ## if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" = "" ]; then ocf_log warn "LVM: Improper setup detected" ocf_log warn "* initrd image needs to be newer than lvm.conf" # While dangerous if not done the first time, there are many # cases where we don't simply want to fail here. Instead, # keep warning until the user remakes the initrd - or has # it done for them by upgrading the kernel. # # initrd can be updated using this command. # dracut -H -f /boot/initramfs-$(uname -r).img $(uname -r) # fi } ## # does this vg have our tag ## check_tags() { local owner=`vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '` if [ -z "$owner" ]; then # No-one owns this VG yet return 1 fi if [ "$OUR_TAG" = "$owner" ]; then # yep, this is ours return 0 fi # some other tag is set on this vg return 2 } strip_tags() { local i for i in `vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g`; do ocf_log info "Stripping tag, $i" # LVM version 2.02.98 allows changing tags if PARTIAL vgchange --deltag $i $OCF_RESKEY_volgrpname done if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '` ]; then - ocf_log err "Failed to remove ownership tags from $OCF_RESKEY_volgrpname" + ocf_exit_reason "Failed to remove ownership tags from $OCF_RESKEY_volgrpname" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } set_tags() { check_tags case $? in 0) # we already own it. return $OCF_SUCCESS ;; 2) # other tags are set, strip them before setting if ! strip_tags; then return $OCF_ERR_GENERIC fi ;; *) : ;; esac vgchange --addtag $OUR_TAG $OCF_RESKEY_volgrpname if [ $? -ne 0 ]; then - ocf_log err "Failed to add ownership tag to $OCF_RESKEY_volgrpname" + ocf_exit_reason "Failed to add ownership tag to $OCF_RESKEY_volgrpname" return $OCF_ERR_GENERIC fi ocf_log info "New tag \"$OUR_TAG\" added to $OCF_RESKEY_volgrpname" return $OCF_SUCCESS } # # Return LVM status (silently) # LVM_status() { local rc=1 loglevel="debug" # Set the log level of the error message if [ "X${2}" = "X" ]; then loglevel="err" if ocf_is_probe; then loglevel="warn" else if [ ${OP_METHOD} = "stop" ]; then loglevel="info" fi fi fi if [ -d /dev/$1 ]; then test "`cd /dev/$1 && ls`" != "" rc=$? if [ $rc -ne 0 ]; then - ocf_log err "VG $1 with no logical volumes is not supported by this RA!" + ocf_exit_reason "VG $1 with no logical volumes is not supported by this RA!" fi fi if [ $rc -ne 0 ]; then ocf_log $loglevel "LVM Volume $1 is not available (stopped)" rc=$OCF_NOT_RUNNING else case $(get_vg_mode) in 1) # exclusive with tagging. # If vg is running, make sure the correct tag is present. Otherwise we # can not guarantee exclusive activation. if ! check_tags; then - ocf_log err "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\"" + ocf_exit_reason "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\"" rc=$OCF_ERR_GENERIC fi # make sure the environment for tags activation is still valid if ! verify_tags_environment; then rc=$OCF_ERR_GENERIC fi # let the user know if their initrd is older than lvm.conf. check_initrd_warning ;; *) : ;; esac fi if [ "X${2}" = "X" ]; then # status call return return $rc fi # Report on LVM volume status to stdout... if [ $rc -eq 0 ]; then echo "Volume $1 is available (running)" else echo "Volume $1 is not available (stopped)" fi return $rc } get_activate_options() { local options="-a" case $(get_vg_mode) in 0) options="${options}ly";; 1) options="${options}y --config activation{volume_list=[\"@${OUR_TAG}\"]}";; 2) options="${options}ey";; esac if ocf_is_true "$OCF_RESKEY_partial_activation" ; then options="${options} --partial" fi # for clones (clustered volume groups), we'll also have to force # monitoring, even if disabled in lvm.conf. if ocf_is_clone; then options="$options --monitor y" fi echo $options } ## # Attempt to deactivate vg cluster wide and then start the vg exclusively ## retry_exclusive_start() { local vgchange_options=$(get_activate_options) # Deactivate each LV in the group one by one cluster wide set -- $(lvs -o name,attr --noheadings $OCF_RESKEY_volgrpname 2> /dev/null) while [ $# -ge 2 ]; do case $2 in ????ao*) # open LVs cannot be deactivated. return $OCF_ERR_GENERIC;; *) if ! lvchange -an $OCF_RESKEY_volgrpname/$1; then - ocf_log err "Unable to perform required deactivation of $OCF_RESKEY_volgrpname/$1 before starting" + ocf_exit_reason "Unable to perform required deactivation of $OCF_RESKEY_volgrpname/$1 before starting" return $OCF_ERR_GENERIC fi ;; esac shift 2 done ocf_run vgchange $vgchange_options $OCF_RESKEY_volgrpname } # # Enable LVM volume # LVM_start() { local vgchange_options=$(get_activate_options) local vg=$1 local clvmd=0 # TODO: This MUST run vgimport as well ocf_log info "Activating volume group $vg" if [ "$LVM_MAJOR" -eq "1" ]; then ocf_run vgscan $vg else ocf_run vgscan fi case $(get_vg_mode) in 2) clvmd=1 ;; 1) if ! set_tags; then return $OCF_ERR_GENERIC fi ;; *) : ;; esac if ! ocf_run vgchange $vgchange_options $vg; then if [ $clvmd -eq 0 ]; then return $OCF_ERR_GENERIC fi # Failure to exclusively activate cluster vg.: # This could be caused by a remotely active LV, Attempt # to disable volume group cluster wide and try again. # Allow for some settling sleep 5 if ! retry_exclusive_start; then return $OCF_ERR_GENERIC fi fi if LVM_status $vg; then : OK Volume $vg activated just fine! return $OCF_SUCCESS else - ocf_log err "LVM: $vg did not activate correctly" + ocf_exit_reason "LVM: $vg did not activate correctly" return $OCF_NOT_RUNNING fi } # # Disable the LVM volume # LVM_stop() { local res=$OCF_ERR_GENERIC local vgchange_options="-aln" local vg=$1 if ! vgs $vg > /dev/null 2>&1; then ocf_log info "Volume group $vg not found" return $OCF_SUCCESS fi ocf_log info "Deactivating volume group $vg" case $(get_vg_mode) in 1) vgchange_options="-an" ;; esac for i in $(seq 10) do ocf_run vgchange $vgchange_options $vg res=$? if LVM_status $vg; then - ocf_log err "LVM: $vg did not stop correctly" + ocf_exit_reason "LVM: $vg did not stop correctly" res=1 fi if [ $res -eq 0 ]; then break fi res=$OCF_ERR_GENERIC ocf_log warn "$vg still Active" ocf_log info "Retry deactivating volume group $vg" sleep 1 which udevadm > /dev/null 2>&1 && udevadm settle --timeout=5 done case $(get_vg_mode) in 1) if [ $res -eq 0 ]; then strip_tags res=$? fi ;; esac return $res } # # Check whether the OCF instance parameters are valid # LVM_validate_all() { check_binary $AWK ## # lvmetad is a daemon that caches lvm metadata to improve the # performance of LVM commands. This daemon should never be used when # volume groups exist that are being managed by the cluster. The lvmetad # daemon introduces a response lag, where certain LVM commands look like # they have completed (like vg activation) when in fact the command # is still in progress by the lvmetad. This can cause reliability issues # when managing volume groups in the cluster. For Example, if you have a # volume group that is a dependency for another application, it is possible # the cluster will think the volume group is activated and attempt to start # the application before volume group is really accesible... lvmetad is bad. ## lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1 if [ $? -eq 0 ]; then # for now warn users that lvmetad is enabled and that they should disable it. In the # future we may want to consider refusing to start, or killing the lvmetad daemon. ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process" fi ## # Off-the-shelf tests... ## VGOUT=`vgck ${VOLUME} 2>&1` if [ $? -ne 0 ]; then - ocf_log err "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" + ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi ## # Does the Volume Group exist? ## if [ "$LVM_MAJOR" = "1" ]; then VGOUT=`vgdisplay ${VOLUME} 2>&1` else VGOUT=`vgdisplay -v ${VOLUME} 2>&1` fi if [ $? -ne 0 ]; then - ocf_log err "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" + ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi ## # If exclusive activation is not enabled, then # further checking of proper setup is not necessary ## if ! ocf_is_true "$OCF_RESKEY_exclusive"; then return $OCF_SUCCESS; fi ## # Having cloned lvm resources with exclusive vg activation makes no sense at all. ## if ocf_is_clone; then - ocf_log err "cloned lvm resources can not be activated exclusively" + ocf_exit_reason "cloned lvm resources can not be activated exclusively" exit $OCF_ERR_CONFIGURED fi ## # Make sure the cluster attribute is set and clvmd is up when exclusive # activation is enabled. Otherwise we can't exclusively activate the volume group. ## case $(get_vg_mode) in 1) # exclusive activation using tags if ! verify_tags_environment; then exit $OCF_ERR_GENERIC fi ;; 2) # exclusive activation with clvmd ## # verify is clvmd running ## if ! ps -C clvmd > /dev/null 2>&1; then - ocf_log err "$OCF_RESKEY_volgrpname has the cluster attribute set, but 'clvmd' is not running" + ocf_exit_reason "$OCF_RESKEY_volgrpname has the cluster attribute set, but 'clvmd' is not running" exit $OCF_ERR_GENERIC fi ;; *) : ;; esac return $OCF_SUCCESS } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS;; methods) LVM_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if [ -z "$OCF_RESKEY_volgrpname" ] then - ocf_log err "You must identify the volume group name!" + ocf_exit_reason "You must identify the volume group name!" exit $OCF_ERR_CONFIGURED fi # Get the LVM version number, for this to work we assume(thanks to panjiam): # # LVM1 outputs like this # # # vgchange --version # vgchange: Logical Volume Manager 1.0.3 # Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10) # # LVM2 and higher versions output in this format # # # vgchange --version # LVM version: 2.00.15 (2004-04-19) # Library version: 1.00.09-ioctl (2004-03-31) # Driver version: 4.1.0 LVM_VERSION=`vgchange --version 2>&1 | \ $AWK '/Logical Volume Manager/ {print $5"\n"; exit; } /LVM version:/ {printf $3"\n"; exit;}'` rc=$? if ( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] ) then - ocf_log err "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?" + ocf_exit_reason "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?" exit $OCF_ERR_INSTALLED fi LVM_MAJOR="${LVM_VERSION%%.*}" VOLUME=$OCF_RESKEY_volgrpname OP_METHOD=$1 if [ -n "$OCF_RESKEY_tag" ]; then OUR_TAG=$OCF_RESKEY_tag fi # What kind of method was invoked? case "$1" in start) LVM_validate_all LVM_start $VOLUME exit $?;; stop) LVM_stop $VOLUME exit $?;; status) LVM_status $VOLUME $1 exit $?;; monitor) LVM_status $VOLUME exit $?;; validate-all) LVM_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/MailTo b/heartbeat/MailTo index acf673078..3936c39de 100755 --- a/heartbeat/MailTo +++ b/heartbeat/MailTo @@ -1,191 +1,191 @@ #!/bin/sh # # Resource script for MailTo # # Author: Alan Robertson # # Description: sends email to a sysadmin whenever a takeover occurs. # # Note: This command requires an argument, unlike normal init scripts. # # This can be given in the haresources file as: # # You can also give a mail subject line or even multiple addresses # MailTo::alanr@unix.sh::BigImportantWebServer # MailTo::alanr@unix.sh,spoppi@gmx.de::BigImportantWebServer # # This will then be put into the message subject and body. # # OCF parameters are as below: # OCF_RESKEY_email # OCF_RESKEY_subject # # License: GNU General Public License (GPL) # # Copyright: (C) 2005 International Business Machines ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### ARGS="$0 $*" us=`uname -n` usage() { echo "Usage: $0 {start|stop|status|monitor|meta-data|validate-all}" } meta_data() { cat < 1.0 This is a resource agent for MailTo. It sends email to a sysadmin whenever a takeover occurs. Notifies recipients by email in the event of resource takeover The email address of sysadmin. Email address The subject of the email. Subject END } MailProgram() { $MAILCMD -s "$1" "$email" < 1.0 Enables and disables network routes. Supports host and net routes, routes via a gateway address, and routes using specific source addresses. This resource agent is useful if a node's routing table needs to be manipulated based on node role assignment. Consider the following example use case: - One cluster node serves as an IPsec tunnel endpoint. - All other nodes use the IPsec tunnel to reach hosts in a specific remote network. Then, here is how you would implement this scheme making use of the Route resource agent: - Configure an ipsec LSB resource. - Configure a cloned Route OCF resource. - Create an order constraint to ensure that ipsec is started before Route. - Create a colocation constraint between the ipsec and Route resources, to make sure no instance of your cloned Route resource is started on the tunnel endpoint itself. Manages network routes The destination network (or host) to be configured for the route. Specify the netmask suffix in CIDR notation (e.g. "/24"). If no suffix is given, a host route will be created. Specify "0.0.0.0/0" or "default" if you want this resource to set the system default route. Destination network The outgoing network device to use for this route. Outgoing network device The gateway IP address to use for this route. Gateway IP address The source IP address to be configured for the route. Source IP address The routing table to be configured for the route. Routing table END } ####################################################################### create_route_spec() { # Creates a route specification for use by "ip route (add|del|show)" route_spec="to ${OCF_RESKEY_destination}" if [ -n "${OCF_RESKEY_device}" ]; then route_spec="${route_spec} dev ${OCF_RESKEY_device}" fi if [ -n "${OCF_RESKEY_gateway}" ]; then route_spec="${route_spec} via ${OCF_RESKEY_gateway}" fi if [ -n "${OCF_RESKEY_source}" ]; then route_spec="${route_spec} src ${OCF_RESKEY_source}" fi if [ -n "${OCF_RESKEY_table}" ]; then route_spec="${route_spec} table ${OCF_RESKEY_table}" fi echo "$route_spec" } route_usage() { cat </dev/null 2>&1; then - ocf_log error "Network device ${OCF_RESKEY_device} appears not to be available on this system." + ocf_exit_reason "Network device ${OCF_RESKEY_device} appears not to be available on this system." # OCF_ERR_ARGS prevents the resource from running anywhere at all, # maybe another node has the interface? # OCF_ERR_INSTALLED just prevents starting on this particular node. return $OCF_ERR_INSTALLED fi fi # The following tests must return $OCF_ERR_INSTALLED, but only if # the resource is actually running (i.e., not during probes) if ! ocf_is_probe; then # If a source address has been configured, is it available on # this system? if [ -n "${OCF_RESKEY_source}" ]; then if ! ip address show | grep -w ${OCF_RESKEY_source} >/dev/null 2>&1; then - ocf_log error "Source address ${OCF_RESKEY_source} appears not to be available on this system." + ocf_exit_reason "Source address ${OCF_RESKEY_source} appears not to be available on this system." # same reason as with _device: return $OCF_ERR_INSTALLED fi fi # If a gateway address has been configured, is it reachable? if [ -n "${OCF_RESKEY_gateway}" ]; then if ! ip route get ${OCF_RESKEY_gateway} >/dev/null 2>&1; then - ocf_log error "Gateway address ${OCF_RESKEY_gateway} is unreachable." + ocf_exit_reason "Gateway address ${OCF_RESKEY_gateway} is unreachable." # same reason as with _device: return $OCF_ERR_INSTALLED fi fi fi return $OCF_SUCCESS } # These two actions must always succeed case $__OCF_ACTION in meta-data) meta_data # OCF variables are not set when querying meta-data exit 0 ;; usage|help) route_usage exit $OCF_SUCCESS ;; esac # Don't do anything if the necessary utilities aren't present for binary in ip grep; do check_binary $binary done route_validate || exit $? case $OCF_RESKEY_destination in *:*) addr_family="-6" ;; *) addr_family="-4" ;; esac case $__OCF_ACTION in start) route_start;; stop) route_stop;; status|monitor) route_status;; reload) ocf_log info "Reloading..." route_start ;; validate-all) ;; *) route_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" exit $rc diff --git a/heartbeat/SendArp b/heartbeat/SendArp index 675070cbb..b67404f24 100755 --- a/heartbeat/SendArp +++ b/heartbeat/SendArp @@ -1,267 +1,267 @@ #!/bin/sh # # # Copyright (c) 2006, Huang Zhen # Converting original heartbeat RA to OCF RA. # # Copyright (C) 2004 Horms # # Based on IPaddr2: Copyright (C) 2003 Tuomo Soini # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # This script send out gratuitous Arp for an IP address # # It can be used _instead_ of the IPaddr2 or IPaddr resource # to send gratuitous arp for an IP address on a given interface, # without adding the address to that interface. I.e. if for # some reason you want to send gratuitous arp for addresses # managed by IPaddr2 or IPaddr on an additional interface. # # OCF parameters are as below: # OCF_RESKEY_ip # OCF_RESKEY_nic # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs SENDARP=$HA_BIN/send_arp SENDARPPIDDIR=${HA_RSCTMP} BASEIP="$OCF_RESKEY_ip" INTERFACE="$OCF_RESKEY_nic" RESIDUAL="" SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP" BACKGROUND=${OCF_RESKEY_background:-"yes"} # Set default values : ${ARP_INTERVAL_MS=200} # milliseconds between ARPs : ${ARP_REPEAT=5} # repeat count : ${ARP_BACKGROUND=$BACKGROUND} # no to run in foreground : ${ARP_NETMASK=ffffffffffff} # netmask for ARP ####################################################################### sendarp_meta_data() { cat < 1.0 This RA can be used _instead_ of the IPaddr2 or IPaddr RA to send gratuitous ARP for an IP address on a given interface, without adding the address to that interface. For example, if for some resaon you wanted to send gratuitous ARP for addresses managed by IPaddr2 or IPaddr on an additional interface. Broadcasts unsolicited ARP announcements The IP address for sending ARP packet. IP address The NIC for sending ARP packet. NIC Send ARPs in background. Set to false if you want to test if sending ARPs succeeded. Send ARPs in background END } ####################################################################### sendarp_usage() { cat < 1.0 The resource agent of Squid. This manages a Squid instance as an HA resource. Manages a Squid proxy server instance This is a required parameter. This parameter specifies squid's executable file. Executable file This is a required parameter. This parameter specifies a configuration file for a squid instance managed by this RA. Configuration file This is a required parameter. This parameter specifies a process id file for a squid instance managed by this RA. Pidfile This is a required parameter. This parameter specifies a port number for a squid instance managed by this RA. If plural ports are used, you must specifiy the only one of them. Port number On stop, a squid shutdown is invoked first. If the resource doesn't stop within this timeout, we resort to stopping processes by sending signals and finally KILLing them. how long to wait for squid shutdown to stop the instance before resorting to kill This is an optional parameter. This RA runs in debug mode when this parameter includes 'x' or 'v'. If 'x' is included, both of STDOUT and STDERR redirect to the logfile specified by "debug_log", and then the builtin shell option 'x' is turned on. It is similar about 'v'. Debug mode This is an optional and omittable parameter. This parameter specifies a destination file for debug logs and works only if this RA run in debug mode. Refer to "debug_mode" about debug mode. If no value is given but it's requied, it's made by the following rules: "/var/log/" as a directory part, the basename of the configuration file given by "syslog_ng_conf" as a basename part, ".log" as a suffix. A destination of the debug log END return $OCF_SUCCESS } get_pids() { SQUID_PIDS=( ) # Seek by pattern SQUID_PIDS[0]=$(pgrep -f "$PROCESS_PATTERN") # Seek by pidfile SQUID_PIDS[1]=$(awk '1{print $1}' $SQUID_PIDFILE 2>/dev/null) if [[ -n "${SQUID_PIDS[1]}" ]]; then typeset exe exe=$(ls -l "/proc/${SQUID_PIDS[1]}/exe") if [[ $? = 0 ]]; then exe=${exe##*-> } if ! [[ "$exe" = $SQUID_EXE ]]; then SQUID_PIDS[1]="" fi else SQUID_PIDS[1]="" fi fi # Seek by port SQUID_PIDS[2]=$( netstat -apn | awk '/tcp.*:'$SQUID_PORT' .*LISTEN/ && $7~/^[1-9]/ { sub("\\/.*", "", $7); print $7; exit}') } are_all_pids_found() { if [[ -n "${SQUID_PIDS[0]}" ]] && [[ -n "${SQUID_PIDS[1]}" ]] && [[ -n "${SQUID_PIDS[2]}" ]] then return 0 else return 1 fi } are_pids_sane() { if [[ "${SQUID_PIDS[1]}" = "${SQUID_PIDS[2]}" ]]; then return $OCF_SUCCESS else - ocf_log err "$SQUID_NAME:Pid unmatch" + ocf_exit_reason "$SQUID_NAME:Pid unmatch" return $OCF_ERR_GENERIC fi } is_squid_dead() { if [[ -z "${SQUID_PIDS[0]}" ]] && [[ -z "${SQUID_PIDS[2]}" ]] then return 0 else return 1 fi } monitor_squid() { typeset trialcount=0 while true; do get_pids if are_all_pids_found; then are_pids_sane return $OCF_SUCCESS fi if is_squid_dead; then return $OCF_NOT_RUNNING fi ocf_log info "$SQUID_NAME:Inconsistent processes:" \ "${SQUID_PIDS[0]},${SQUID_PIDS[1]},${SQUID_PIDS[2]}" (( trialcount = trialcount + 1 )) if (( trialcount > SQUID_CONFIRM_TRIALCOUNT )); then - ocf_log err "$SQUID_NAME:Inconsistency of processes remains unsolved" + ocf_exit_reason "$SQUID_NAME:Inconsistency of processes remains unsolved" return $OCF_ERR_GENERIC fi sleep 1 done } start_squid() { typeset status monitor_squid status=$? if [[ $status != $OCF_NOT_RUNNING ]]; then return $status fi set -- "$SQUID_OPTS" ocf_run $SQUID_EXE -f "$SQUID_CONF" "$@" status=$? if [[ $status != $OCF_SUCCESS ]]; then return $OCF_ERR_GENERIC fi while true; do get_pids if are_all_pids_found && are_pids_sane; then return $OCF_SUCCESS fi ocf_log info "$SQUID_NAME:Waiting for squid to be invoked" sleep 1 done return $OCF_ERR_GENERIC } stop_squid() { typeset lapse_sec if ocf_run $SQUID_EXE -f $SQUID_CONF -k shutdown; then lapse_sec=0 while true; do get_pids if is_squid_dead; then rm -f $SQUID_PIDFILE return $OCF_SUCCESS fi (( lapse_sec = lapse_sec + 1 )) if (( lapse_sec > SQUID_STOP_TIMEOUT )); then break fi sleep 1 ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ "stop NORM $lapse_sec/$SQUID_STOP_TIMEOUT" done fi while true; do get_pids ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ "try to stop by SIGKILL:${SQUID_PIDS[0]} ${SQUID_PIDS[2]}" kill -KILL ${SQUID_PIDS[0]} ${SQUID_PIDS[2]} sleep 1 if is_squid_dead; then rm -f $SQUID_PIDFILE return $OCF_SUCCESS fi done return $OCF_ERR_GENERIC } status_squid() { return $OCF_SUCCESS } validate_all_squid() { ocf_log info "validate_all_squid[$SQUID_NAME]" return $OCF_SUCCESS } : === Debug ${0##*/} $1 === if [[ "$1" = "meta-data" ]]; then metadata_squid exit $? fi SQUID_CONF="${OCF_RESKEY_squid_conf}" if [[ -z "$SQUID_CONF" ]]; then - ocf_log err "SQUID_CONF is not defined" + ocf_exit_reason "SQUID_CONF is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_NAME="${SQUID_CONF##*/}" SQUID_NAME="${SQUID_NAME%.*}" DEBUG_LOG="${OCF_RESKEY_debug_log-/var/log/squid_${SQUID_NAME}_debug}.log" DEBUG_MODE="" case $OCF_RESKEY_debug_mode in *x*) DEBUG_MODE="${DEBUG_MODE}x";; esac case $OCF_RESKEY_debug_mode in *v*) DEBUG_MODE="${DEBUG_MODE}v";; esac if [ -n "$DEBUG_MODE" ]; then PS4='\d \t \h '"${1-unknown} " export PS4 exec 1>>$DEBUG_LOG 2>&1 set -$DEBUG_MODE fi SQUID_EXE="${OCF_RESKEY_squid_exe}" if [[ -z "$SQUID_EXE" ]]; then - ocf_log err "SQUID_EXE is not defined" + ocf_exit_reason "SQUID_EXE is not defined" exit $OCF_ERR_CONFIGURED fi if [[ ! -x "$SQUID_EXE" ]]; then - ocf_log err "$SQUID_EXE is not found" + ocf_exit_reason "$SQUID_EXE is not found" exit $OCF_ERR_CONFIGURED fi SQUID_PIDFILE="${OCF_RESKEY_squid_pidfile}" if [[ -z "$SQUID_PIDFILE" ]]; then - ocf_log err "SQUID_PIDFILE is not defined" + ocf_exit_reason "SQUID_PIDFILE is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_PORT="${OCF_RESKEY_squid_port}" if [[ -z "$SQUID_PORT" ]]; then - ocf_log err "SQUID_PORT is not defined" + ocf_exit_reason "SQUID_PORT is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_OPTS="${OCF_RESKEY_squid_opts}" SQUID_PIDS=( ) SQUID_CONFIRM_TRIALCOUNT="${OCF_RESKEY_squid_confirm_trialcount-3}" SQUID_STOP_TIMEOUT="${OCF_RESKEY_squid_stop_timeout-10}" SQUID_SUSPEND_TRIALCOUNT="${OCF_RESKEY_squid_suspend_trialcount-10}" PROCESS_PATTERN="$SQUID_EXE -f $SQUID_CONF" COMMAND=$1 case "$COMMAND" in start) ocf_log debug "[$SQUID_NAME] Enter squid start" start_squid func_status=$? ocf_log debug "[$SQUID_NAME] Leave squid start $func_status" exit $func_status ;; stop) ocf_log debug "[$SQUID_NAME] Enter squid stop" stop_squid func_status=$? ocf_log debug "[$SQUID_NAME] Leave squid stop $func_status" exit $func_status ;; status) status_squid exit $? ;; monitor) #ocf_log debug "[$SQUID_NAME] Enter squid monitor" monitor_squid func_status=$? #ocf_log debug "[$SQUID_NAME] Leave squid monitor $func_status" exit $func_status ;; validate-all) validate_all_squid exit $? ;; *) usage ;; esac # vim: set sw=4 ts=4 : diff --git a/heartbeat/Xinetd b/heartbeat/Xinetd index 1d1be0b58..b6a7b56e2 100755 --- a/heartbeat/Xinetd +++ b/heartbeat/Xinetd @@ -1,250 +1,250 @@ #!/bin/sh # # Startup/shutdown script for services managed by xinetd. # # Copyright (C) 2003 Charlie Brooks # Copyright (C) 2011 Ulrich Windl # # WARNING: Tested ONLY on SLES11 SP1 at this time. # # Author: Charlie Brooks # Description: given parameters of a service name and start|stop|status, # will enable, disable or report on a specified xinetd service # Config: all services must have a descriptor file in /etc/xinetd.d # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # OCF parameters are as below: # OCF_RESKEY_service ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs service=$OCF_RESKEY_service SVCDEF=/etc/xinetd.d/$service ####################################################################### meta_data() { cat < 1.0 Resource script for Xinetd. It starts/stops services managed by xinetd by enabling or disabling them in the configuration file. The xinetd daemon itself must be running: we are not going to start or stop it ourselves. All services should have a line saying either "disable=yes" or "disable=no". The script just changes those settings before reloading xinetd. Important: in case the services managed by the cluster are the only ones enabled, you should specify the -stayalive option for xinetd or it will exit on Heartbeat stop. Alternatively, you may enable some internal service such as echo. Manages a service of Xinetd The name of the service managed by xinetd. service name END } get_xinetd_pid() { ps -e -o pid,comm | $AWK '$2 == "xinetd" { print $1 }' } # force xinetd to reload the service descriptions hup_inetd () { # don't rely on the pid file, but lookup xinetd in the list of # processes local pid pid=`get_xinetd_pid` if [ "$pid" ]; then if kill -s HUP $pid; then ocf_log info "asked xinetd to reload by sending SIGHUP to process $pid!" else - ocf_log err "could not send SIGHUP to process $pid!" + ocf_exit_reason "could not send SIGHUP to process $pid!" exit $OCF_ERR_GENERIC fi else - ocf_log err "xinetd process not found!" + ocf_exit_reason "xinetd process not found!" exit $OCF_ERR_GENERIC fi } # check "disable = X", printing X check_service() { ocf_log "info" "checking \"disable\" in $1" local result=$(sed -nre 's/^[ ]*disable[ ]*=[ ]*([^ ]+)[# ]*/\1/p' $1) echo "$result" } # change "disable = X" to desired value change_service() { ocf_log "info" "setting \"disable = $1\" in $2" if ! sed -i -re 's/^([ ]*disable[ ]*=[ ]*)([^ ]+)([# ]*)/\1'"$1"'\3/' $2 then ocf_log "err" "could not edit $2" return 1 fi return 0 } xup_status () { local disabled="$(check_service $SVCDEF)" if [ "${disabled:=no}" = no ]; then echo running return $OCF_SUCCESS elif [ "$disabled" = yes ]; then echo stopped return $OCF_NOT_RUNNING else echo unknown return $OCF_ERR_CONFIGURED fi } xup_start () { if [ "running" = "`xup_status`" ]; then ocf_log info "service $service already started" exit $OCF_SUCCESS fi ocf_log "info" "enabling in $SVCDEF" if change_service "no" $SVCDEF; then hup_inetd fi } xup_stop () { if [ "stopped" = "`xup_status`" ]; then ocf_log info "service $service already stopped" exit $OCF_SUCCESS fi ocf_log "info" "disabling in $SVCDEF" if change_service "yes" $SVCDEF; then hup_inetd fi } xup_usage () { echo "Usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data}" return 0 } xup_validate_all () { if [ ! -f "$SVCDEF" ]; then - ocf_log err "service $service missing $SVCDEF" + ocf_exit_reason "service $service missing $SVCDEF" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then xup_usage exit $OCF_ERR_ARGS fi # These operations do not require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) xup_usage exit $OCF_SUCCESS ;; esac if [ -z "$OCF_RESKEY_service" ]; then - ocf_log err "please define \"service\" parameter" + ocf_exit_reason "please define \"service\" parameter" if [ "$1" = "start" ]; then exit $OCF_ERR_CONFIGURED else exit $OCF_NOT_RUNNING fi fi # Is xinetd running at all if [ -z "`get_xinetd_pid`" ]; then case "$1" in stop) exit $OCF_SUCCESS;; start) - ocf_log err "xinetd not running, we manage just xinetd services, not the daemon itself" + ocf_exit_reason "xinetd not running, we manage just xinetd services, not the daemon itself" exit $OCF_ERR_INSTALLED ;; status|monitor) if ocf_is_probe; then exit $OCF_NOT_RUNNING else - ocf_log err "xinetd stopped" + ocf_exit_reason "xinetd stopped" exit $OCF_ERR_GENERIC fi ;; esac fi # Make sure the OCF_RESKEY_service is a valid xinetd service name if [ ! -f $SVCDEF ]; then - ocf_log err "service definition $SVCDEF not found!" + ocf_exit_reason "service definition $SVCDEF not found!" if [ "$1" = "start" ]; then exit $OCF_ERR_INSTALLED else exit $OCF_NOT_RUNNING fi fi # See how we were called. case "$1" in start) xup_start ;; stop) xup_stop ;; restart) $0 stop $0 start ;; status) xup_status ;; monitor) xup_status > /dev/null ;; validate-all) xup_validate_all ;; *) xup_usage exit $OCF_ERR_UNIMPLEMENTED esac exit $? diff --git a/heartbeat/apache b/heartbeat/apache index 1e573375d..ab7c43f9d 100755 --- a/heartbeat/apache +++ b/heartbeat/apache @@ -1,655 +1,655 @@ #!/bin/sh # # High-Availability Apache/IBMhttp control script # # apache (aka IBMhttpd) # # Description: starts/stops apache web servers. # # Author: Alan Robertson # Sun Jiang Dong # # Support: linux-ha@lists.linux-ha.org # # License: GNU General Public License (GPL) # # Copyright: (C) 2002-2005 International Business Machines # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf # node1 10.0.0.170 IBMhttpd # # Our parsing of the Apache config files is very rudimentary. # It'll work with lots of different configurations - but not every # possible configuration. # # Patches are being accepted ;-) # # OCF parameters: # OCF_RESKEY_configfile # OCF_RESKEY_httpd # OCF_RESKEY_port # OCF_RESKEY_statusurl # OCF_RESKEY_options # OCF_RESKEY_testregex # OCF_RESKEY_client # OCF_RESKEY_testurl # OCF_RESKEY_testregex10 # OCF_RESKEY_testconffile # OCF_RESKEY_testname # OCF_RESKEY_envfiles : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/apache-conf.sh . ${OCF_FUNCTIONS_DIR}/http-mon.sh HA_VARRUNDIR=${HA_VARRUN} ####################################################################### # # Configuration options - usually you don't need to change these # ####################################################################### # IBMHTTPD=/opt/IBMHTTPServer/bin/httpd HTTPDLIST="/sbin/httpd2 /usr/sbin/httpd2 /usr/sbin/apache2 /sbin/httpd /usr/sbin/httpd /usr/sbin/apache $IBMHTTPD" MPM=/usr/share/apache2/find_mpm if [ -x $MPM ]; then HTTPDLIST="$HTTPDLIST `$MPM 2>/dev/null`" fi LOCALHOST="http://localhost" HTTPDOPTS="-DSTATUS" DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf DEFAULT_SUSECONFIG="/etc/apache2/httpd.conf" DEFAULT_RHELCONFIG="/etc/httpd/conf/httpd.conf" # # You can also set # HTTPD # PORT # STATUSURL # CONFIGFILE # in this section if what we're doing doesn't work for you... # # End of Configuration options ####################################################################### CMD=`basename $0` # The config-file-pathname is the pathname to the configuration # file for this web server. Various appropriate defaults are # assumed if no config file is specified. If this command is # invoked as *IBM*, then the default config file name is # $DEFAULT_IBMCONFIG, otherwise the default config file # will be either $DEFAULT_RHELCONFIG or $DEFAULT_SUSECONFIG depending # on which is detected. usage() { cat <<-END usage: $0 action action: start start the web server stop stop the web server status return the status of web server, run or down monitor return TRUE if the web server appears to be working. For this to be supported you must configure mod_status and give it a server-status URL. You have to have installed either curl or wget for this to work. meta-data show meta data message validate-all validate the instance parameters END } get_pid() { if [ -f $PidFile ]; then cat $PidFile else false fi } # # return TRUE if a process with given PID is running # ProcessRunning() { local pid=$1 # Use /proc if it looks like it's here... if [ -d /proc -a -d /proc/1 ]; then [ -d /proc/$pid ] else # This assumes we're running as root... kill -s 0 "$pid" >/dev/null 2>&1 fi } silent_status() { local pid pid=`get_pid` if [ -n "$pid" ]; then ProcessRunning $pid else : No pid file false fi } # May be useful to add other distros in future validate_default_config() { if [ -e /etc/SuSE-release ]; then validate_default_suse_config else return 0 fi } # When using the default /etc/apache2/httpd.conf on SUSE, the file # /etc/apache2/sysconfig.d/include.conf is required to be present, # but this is only generated if you run the apache init script # (with contents derived from /etc/sysconfig/apache2). So, here, # if we're using the default system config file and it requires # that include, we run "/etc/init.d/apache2 configtest" to ensure # the relevant config is generated and valid. We're also taking # this opportunity to enable mod_status if it's not present. validate_default_suse_config() { if [ "$CONFIGFILE" = "$DEFAULT_SUSECONFIG" ] && \ grep -Eq '^Include[[:space:]]+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE" then [ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status # init script style, for crusty old SUSE if [ -e "/etc/init.d/apache2" ]; then ocf_run -q /etc/init.d/apache2 configtest || return 1 # systemd style, for shiny new SUSE elif [ -e "/usr/sbin/start_apache2" ]; then ocf_run -q /usr/sbin/start_apache2 -t || return 1 fi fi return 0 } apache_start() { if silent_status then ocf_log info "$CMD already running (pid `get_pid`)" return $OCF_SUCCESS fi validate_default_config || return $OCF_ERR_CONFIGURED # https://bugs.launchpad.net/ubuntu/+source/apache2/+bug/603211 [ -d /var/run/apache2 ] || mkdir /var/run/apache2 if [ -z $PIDFILE_DIRECTIVE ]; then ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE else ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE -c "PidFile $PidFile" fi tries=0 while : # wait until the user set timeout do apache_monitor ec=$? if [ $ec -eq $OCF_NOT_RUNNING ] then tries=`expr $tries + 1` ocf_log info "waiting for apache $CONFIGFILE to come up" sleep 1 else break fi done if [ $ec -ne 0 ] && silent_status; then apache_stop fi return $ec } signal_children() { for sig in SIGTERM SIGHUP SIGKILL ; do if pgrep -f $HTTPD.*$CONFIGFILE >/dev/null ; then pkill -$sig -f $HTTPD.*$CONFIGFILE >/dev/null ocf_log info "signal $sig sent to apache children" sleep 1 else break fi done } graceful_stop() { local tries=10 local pid=$1 # Try graceful stop for half timeout period if timeout period is present if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then tries=$((($OCF_RESKEY_CRM_meta_timeout/1000) / 2)) fi ocf_log info "Attempting graceful stop of apache PID $pid" kill -WINCH $pid >/dev/null while ProcessRunning $pid && [ $tries -gt 0 ] do sleep 1 tries=`expr $tries - 1` done if [ $tries -eq 0 ]; then # graceful stop didn't work, process still up. return 1 fi return 0 } kill_stop() { local tries=0 local pid=$1 ocf_log info "Killing apache PID $pid" while ProcessRunning $pid && [ $tries -lt 10 ] do if [ $tries -ne 0 ]; then # don't sleep on the first try sleep 1 fi kill $pid >/dev/null tries=`expr $tries + 1` done } apache_stop() { local ret=$OCF_SUCCESS local pid if ! silent_status; then ocf_log info "$CMD is not running." signal_children return $ret fi pid=`get_pid` graceful_stop $pid if [ $? -ne 0 ]; then kill_stop $pid if ProcessRunning $pid; then - ocf_log info "$CMD still running ($pid). Killing pid failed." + ocf_exit_reason "$CMD still running ($pid). Killing pid failed." ret=$OCF_ERR_GENERIC fi fi if [ $ret -eq 0 ]; then ocf_log info "$CMD stopped." fi signal_children return $ret } apache_monitor_10() { if [ "$TESTCONFFILE" ]; then readtestconf < $TESTCONFFILE else test_url="$TESTURL" test_regex="$TESTREGEX10" fi whattorun=`gethttpclient` fixtesturl is_testconf_sane || return $OCF_ERR_CONFIGURED if $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null then return $OCF_SUCCESS else if ! ocf_is_probe; then - ocf_log err "Failed to access httpd status page." + ocf_exit_reason "Failed to access httpd status page." fi return $OCF_ERR_GENERIC fi } # If the user has not provided any basic monitoring # information, allow the agent to verify the server is # healthy and capable of processing requests by requesting # the http header of website's index attempt_index_monitor_request() { local indexpage="" if [ -n "$OCF_RESKEY_testregex" ]; then return 1; fi if [ -n "$OCF_RESKEY_testregex10" ]; then return 1; fi if [ -n "$OCF_RESKEY_testurl" ]; then return 1; fi if [ -n "$OCF_RESKEY_statusurl" ]; then return 1; fi if [ -n "$OCF_RESKEY_testconffile" ]; then return 1; fi indexpage=$(buildlocalurl) request_url_header $indexpage if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi ocf_log info "Successfully retrieved http header at $indexpage" return 0 } apache_monitor_basic() { if ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null then return $OCF_SUCCESS fi attempt_index_monitor_request if [ $? -eq 0 ]; then return $OCF_SUCCESS fi if ! ocf_is_probe; then - ocf_log err "Failed to access httpd status page." + ocf_exit_reason "Failed to access httpd status page." fi return $OCF_ERR_GENERIC } apache_monitor() { silent_status if [ $? -ne 0 ]; then ocf_log info "$CMD not running" return $OCF_NOT_RUNNING fi ourhttpclient=`findhttpclient` # we'll need one if [ -z "$ourhttpclient" ]; then - ocf_log err "could not find a http client; make sure that either wget or curl is available" + ocf_exit_reason "could not find a http client; make sure that either wget or curl is available" return $OCF_ERR_INSTALLED fi case `ocf_check_level 10` in 0) apache_monitor_basic;; 10) apache_monitor_10;; esac } detect_default_config() { if [ -f $DEFAULT_SUSECONFIG ]; then echo $DEFAULT_SUSECONFIG else echo $DEFAULT_RHELCONFIG fi } apache_meta_data(){ cat < 1.0 This is the resource agent for the Apache Web server. This resource agent operates both version 1.x and version 2.x Apache servers. The start operation ends with a loop in which monitor is repeatedly called to make sure that the server started and that it is operational. Hence, if the monitor operation does not succeed within the start operation timeout, the apache resource will end with an error status. The monitor operation by default loads the server status page which depends on the mod_status module and the corresponding configuration file (usually /etc/apache2/mod_status.conf). Make sure that the server status page works and that the access is allowed *only* from localhost (address 127.0.0.1). See the statusurl and testregex attributes for more details. See also http://httpd.apache.org/ Manages an Apache Web server instance The full pathname of the Apache configuration file. This file is parsed to provide defaults for various other resource agent parameters. configuration file path The full pathname of the httpd binary (optional). httpd binary path A port number that we can probe for status information using the statusurl. This will default to the port number found in the configuration file, or 80, if none can be found in the configuration file. httpd port The URL to monitor (the apache server status page by default). If left unspecified, it will be inferred from the apache configuration file. If you set this, make sure that it succeeds *only* from the localhost (127.0.0.1). Otherwise, it may happen that the cluster complains about the resource being active on multiple nodes. url name Regular expression to match in the output of statusurl. Case insensitive. monitor regular expression Client to use to query to Apache. If not specified, the RA will try to find one on the system. Currently, wget and curl are supported. For example, you can set this parameter to "curl" if you prefer that to wget. http client URL to test. If it does not start with "http", then it's considered to be relative to the Listen address. test url Regular expression to match in the output of testurl. Case insensitive. extended monitor regular expression A file which contains test configuration. Could be useful if you have to check more than one web application or in case sensitive info should be passed as arguments (passwords). Furthermore, using a config file is the only way to specify certain parameters. Please see README.webapps for examples and file description. test configuration file Name of the test within the test configuration file. test name Extra options to apply when starting apache. See man httpd(8). command line options Files (one or more) which contain extra environment variables. If you want to prevent script from reading the default file, set this parameter to empty string. environment settings files We will try to detect if the URL (for monitor) is IPv6, but if that doesn't work set this to true to enforce IPv6. use ipv6 with http clients END return $OCF_SUCCESS } apache_validate_all() { if [ -z "$HTTPD" ]; then - ocf_log err "apache httpd program not found" + ocf_exit_reason "apache httpd program not found" return $OCF_ERR_INSTALLED fi if [ ! -x "$HTTPD" ]; then - ocf_log err "HTTPD $HTTPD not found or is not an executable!" + ocf_exit_reason "HTTPD $HTTPD not found or is not an executable!" return $OCF_ERR_INSTALLED fi if [ ! -f $CONFIGFILE ]; then - ocf_log err "Configuration file $CONFIGFILE not found!" + ocf_exit_reason "Configuration file $CONFIGFILE not found!" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } find_httpd_prog() { case $0 in *IBM*) HTTPD=$IBMHTTPD DefaultConfig=$DEFAULT_IBMCONFIG;; *) HTTPD= for h in $HTTPDLIST do if [ -f $h -a -x $h ]; then HTTPD=$h break fi done # Let the user know that the $HTTPD used is not the one (s)he specified via $OCF_RESKEY_httpd if [ "X$OCF_RESKEY_httpd" != X -a "X$HTTPD" != X ]; then ocf_log info "Using $HTTPD as HTTPD" fi DefaultConfig=$(detect_default_config) ;; esac } apache_getconfig() { # these variables are global HTTPD="$OCF_RESKEY_httpd" PORT="$OCF_RESKEY_port" STATUSURL="$OCF_RESKEY_statusurl" CONFIGFILE="$OCF_RESKEY_configfile" OPTIONS="$OCF_RESKEY_options" CLIENT=${OCF_RESKEY_client} TESTREGEX=${OCF_RESKEY_testregex:-''} TESTURL="$OCF_RESKEY_testurl" TESTREGEX10=${OCF_RESKEY_testregex10} TESTCONFFILE="$OCF_RESKEY_testconffile" TESTNAME="$OCF_RESKEY_testname" : ${OCF_RESKEY_envfiles="/etc/apache2/envvars"} source_envfiles $OCF_RESKEY_envfiles if [ "X$HTTPD" = X -o ! -f "$HTTPD" -o ! -x "$HTTPD" ]; then find_httpd_prog fi CONFIGFILE=${CONFIGFILE:-$DefaultConfig} if [ -n "$HTTPD" ]; then httpd_basename=`basename $HTTPD` case $httpd_basename in *-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;; esac fi GetParams $CONFIGFILE } OCF_REQUIRED_PARAMS="" OCF_REQUIRED_BINARIES="" ocf_rarun $* diff --git a/heartbeat/clvm b/heartbeat/clvm index ac79655a8..9d312cc27 100755 --- a/heartbeat/clvm +++ b/heartbeat/clvm @@ -1,410 +1,410 @@ #!/bin/bash # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ocf-directories ####################################################################### meta_data() { cat < 1.0 This agent manages the clvmd daemon. clvmd Start with cmirrord (cluster mirror log daemon). activate cmirrord Options to clvmd. Refer to clvmd.8 for detailed descriptions. Daemon Options END } ####################################################################### : ${OCF_RESKEY_daemon_options:="-d0"} sbindir=$HA_SBIN_DIR if [ -z $sbindir ]; then sbindir=/usr/sbin fi DAEMON="clvmd" CMIRROR="cmirrord" DAEMON_PATH="${sbindir}/clvmd" CMIRROR_PATH="${sbindir}/cmirrord" LVMCONF="${sbindir}/lvmconf" LOCK_FILE="/var/lock/subsys/$DAEMON" # attempt to detect where the vg tools are located # for some reason this isn't consistent with sbindir # in some distros. vgtoolsdir=$(dirname $(which vgchange 2> /dev/null) 2> /dev/null) if [ -z "$vgtoolsdir" ]; then vgtoolsdir="$sbindir" fi LVM_VGCHANGE=${vgtoolsdir}/vgchange LVM_VGDISPLAY=${vgtoolsdir}/vgdisplay LVM_VGSCAN=${vgtoolsdir}/vgscan # Leaving this in for legacy. We do not want to advertize # the abilty to set options in the systconfig exists, we want # to expand the OCF style options as necessary instead. [ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster [ -f /etc/sysconfig/$DAEMON ] && . /etc/sysconfig/$DAEMON CLVMD_TIMEOUT="90" if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then CLVMD_TIMEOUT=$(($OCF_RESKEY_CRM_meta_timeout/1000)) fi clvmd_usage() { cat </dev/null | grep -a "${binary}" > /dev/null 2>&1 if [ $? -eq 0 ];then # shortcut without requiring pgrep to search through all procs return $OCF_SUCCESS fi fi pid=$(pgrep ${binary}) case $? in 0) ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}." echo "$pid" > $pidfile return $OCF_SUCCESS;; 1) rm -f "$pidfile" > /dev/null 2>&1 ocf_log info "$binary is not running" return $OCF_NOT_RUNNING;; *) rm -f "$pidfile" > /dev/null 2>&1 - ocf_log err "Error encountered detecting pid status of $binary" + ocf_exit_reason "Error encountered detecting pid status of $binary" return $OCF_ERR_GENERIC;; esac } clvmd_status() { local rc local mirror_rc clvmd_validate if [ $? -ne $OCF_SUCCESS ]; then - ocf_log error "Unable to monitor, Environment validation failed." + ocf_exit_reason "Unable to monitor, Environment validation failed." return $? fi check_process $DAEMON rc=$? mirror_rc=$rc if ocf_is_true $OCF_RESKEY_with_cmirrord; then check_process $CMIRROR mirror_rc=$? fi # If these ever don't match, return error to force recovery if [ $mirror_rc -ne $rc ]; then return $OCF_ERR_GENERIC fi return $rc } # NOTE: replace this with vgs, once display filter per attr is implemented. clustered_vgs() { ${LVM_VGDISPLAY} 2>/dev/null | awk 'BEGIN {RS="VG Name"} {if (/Clustered/) print $1;}' } wait_for_process() { local binary=$1 local timeout=$2 local count=0 ocf_log info "Waiting for $binary to exit" while [ $count -le $timeout ]; do check_process $binary if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "$binary terminated" return $OCF_SUCCESS fi sleep 1 count=$((count+1)) done return $OCF_ERR_GENERIC } time_left() { local end=$1 local default=$2 local now=$SECONDS local result=0 result=$(( $end - $now )) if [ $result -lt $default ]; then return $default fi return $result } clvmd_stop() { local LVM_VGS local rc=$OCF_SUCCESS local end=$(( $SECONDS + $CLVMD_TIMEOUT )) clvmd_status if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi check_process $DAEMON if [ $? -ne $OCF_NOT_RUNNING ]; then LVM_VGS="$(clustered_vgs)" if [ -n "$LVM_VGS" ]; then ocf_log info "Deactivating clustered VG(s):" ocf_run ${LVM_VGCHANGE} -anl $LVM_VGS if [ $? -ne 0 ]; then - ocf_log error "Failed to deactivate volume groups, cluster vglist = $LVM_VGS" + ocf_exit_reason "Failed to deactivate volume groups, cluster vglist = $LVM_VGS" return $OCF_ERR_GENERIC fi fi ocf_log info "Signaling $DAEMON to exit" killall -TERM $DAEMON if [ $? != 0 ]; then - ocf_log error "Failed to signal -TERM to $DAEMON" + ocf_exit_reason "Failed to signal -TERM to $DAEMON" return $OCF_ERR_GENERIC fi wait_for_process $DAEMON $CLVMD_TIMEOUT rc=$? if [ $rc -ne $OCF_SUCCESS ]; then - ocf_log error "$DAEMON failed to exit" + ocf_exit_reason "$DAEMON failed to exit" return $rc fi rm -f $LOCK_FILE fi check_process $CMIRROR if [ $? -ne $OCF_NOT_RUNNING ] && ocf_is_true $OCF_RESKEY_with_cmirrord; then local timeout ocf_log info "Signaling $CMIRROR to exit" killall -INT $CMIRROR time_left $end 10; timeout=$? wait_for_process $CMIRROR $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then killall -KILL $CMIRROR time_left $end 10; timeout=$? wait_for_process $CMIRROR $(time_left $end 10) rc=$? fi fi return $rc } start_process() { local binary_path=$1 local opts=$2 check_process "$(basename $binary_path)" if [ $? -ne $OCF_SUCCESS ]; then ocf_log info "Starting $binary_path: " ocf_run $binary_path $opts rc=$? if [ $rc -ne 0 ]; then - ocf_log error "Failed to launch $binary_path, exit code $rc" + ocf_exit_reason "Failed to launch $binary_path, exit code $rc" exit $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } clvmd_activate_all() { # Activate all volume groups by leaving the # "volume group name" parameter empty ocf_run ${LVM_VGCHANGE} -aay if [ $? -ne 0 ]; then ocf_log info "Failed to activate VG(s):" clvmd_stop return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } clvmd_start() { local rc=0 local CLVMDOPTS="-T${CLVMD_TIMEOUT} $OCF_RESKEY_daemon_options" clvmd_validate if [ $? -ne $OCF_SUCCESS ]; then - ocf_log error "Unable to start, Environment validation failed." + ocf_exit_reason "Unable to start, Environment validation failed." return $? fi clvmd_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "$DAEMON already started" clvmd_activate_all return $?; fi # autoset locking type to clusted when lvmconf tool is available if [ -x "$LVMCONF" ]; then $LVMCONF --enable-cluster > /dev/null 2>&1 fi # if either of these fail, script will exit OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_with_cmirrord; then start_process $CMIRROR_PATH fi start_process $DAEMON_PATH $CLVMDOPTS # Refresh local cache. # # It's possible that new PVs were added to this, or other VGs # while this node was down. So we run vgscan here to avoid # any potential "Missing UUID" messages with subsequent # LVM commands. # The following step would be better and more informative to the user: # 'action "Refreshing VG(s) local cache:" ${LVM_VGSCAN}' # but it could show warnings such as: # 'clvmd not running on node x-y-z Unable to obtain global lock.' # and the action would be shown as FAILED when in reality it didn't. # Ideally vgscan should have a startup mode that would not print # unnecessary warnings. ${LVM_VGSCAN} > /dev/null 2>&1 touch $LOCK_FILE clvmd_activate_all clvmd_status return $? } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; start) clvmd_start;; stop) clvmd_stop;; monitor) clvmd_status;; validate-all) clvmd_validate;; usage|help) clvmd_usage;; *) clvmd_usage exit $OCF_ERR_UNIMPLEMENTED;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/conntrackd b/heartbeat/conntrackd index bccb9393e..e81cda3e7 100755 --- a/heartbeat/conntrackd +++ b/heartbeat/conntrackd @@ -1,335 +1,335 @@ #!/bin/bash # # # An OCF RA for conntrackd # http://conntrack-tools.netfilter.org/ # # Copyright (c) 2011 Dominik Klein # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### OCF_RESKEY_binary_default=conntrackd OCF_RESKEY_config_default=/etc/conntrackd/conntrackd.conf # For users of versions prior to 1.2: # Map renamed parameter "conntrackd" to "binary" if in use : ${OCF_RESKEY_binary=${OCF_RESKEY_conntrackd-${OCF_RESKEY_binary_default}}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} meta_data() { cat < 1.2 Master/Slave OCF Resource Agent for conntrackd This resource agent manages conntrackd Name of the conntrackd executable. If conntrackd is installed and available in the default PATH, it is sufficient to configure the name of the binary For example "my-conntrackd-binary-version-0.9.14" If conntrackd is installed somewhere else, you may also give a full path For example "/packages/conntrackd-0.9.14/sbin/conntrackd" Name of the conntrackd executable Full path to the conntrackd.conf file. For example "/packages/conntrackd-0.9.14/etc/conntrackd/conntrackd.conf" Path to conntrackd.conf END } meta_expect() { local what=$1 whatvar=OCF_RESKEY_CRM_meta_${1//-/_} op=$2 expect=$3 local val=${!whatvar} if [[ -n $val ]]; then # [, not [[, or it won't work ;) [ $val $op $expect ] && return fi - ocf_log err "meta parameter misconfigured, expected $what $op $expect, but found ${val:-unset}." + ocf_exit_reason "meta parameter misconfigured, expected $what $op $expect, but found ${val:-unset}." exit $OCF_ERR_CONFIGURED } conntrackd_is_master() { # You can't query conntrackd whether it is master or slave. It can be both at the same time. # This RA creates a statefile during promote and enforces master-max=1 and clone-node-max=1 ha_pseudo_resource $statefile monitor } conntrackd_set_master_score() { ${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1 } conntrackd_monitor() { rc=$OCF_NOT_RUNNING # It does not write a PID file, so check the socket exists after # extracting its path from the configuration file local conntrack_socket=$(awk '/^[ \t]*UNIX[ \t]*{/,/^[ \t]*}/ { if ($1 == "Path") { print $2 } }' $OCF_RESKEY_config) [ -S "$conntrack_socket" ] && rc=$OCF_SUCCESS if [ "$rc" -eq "$OCF_SUCCESS" ]; then # conntrackd is running # now see if it acceppts queries if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -s > /dev/null 2>&1; then rc=$OCF_ERR_GENERIC - ocf_log err "conntrackd is running but not responding to queries" + ocf_exit_reason "conntrackd is running but not responding to queries" fi if conntrackd_is_master; then rc=$OCF_RUNNING_MASTER # Restore master setting on probes if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then conntrackd_set_master_score $master_score fi else # Restore master setting on probes if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then conntrackd_set_master_score $slave_score fi fi fi return $rc } conntrackd_start() { rc=$OCF_ERR_GENERIC # Keep trying to start the resource; # wait for the CRM to time us out if this fails while :; do conntrackd_monitor status=$? case "$status" in $OCF_SUCCESS) conntrackd_set_master_score $slave_score # -n = request resync from the others if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -n; then - ocf_log err "$OCF_RESKEY_binary -C $OCF_RESKEY_config -n failed during start." + ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -n failed during start." rc=$OCF_ERR_GENERIC else rc=$OCF_SUCCESS fi break ;; $OCF_NOT_RUNNING) ocf_log info "Starting conntrackd" $OCF_RESKEY_binary -C $OCF_RESKEY_config -d ;; $OCF_RUNNING_MASTER) ocf_log warn "conntrackd already in master mode, demoting." ha_pseudo_resource $statefile stop ;; $OCF_ERR_GENERIC) - ocf_log err "conntrackd start failed" + ocf_exit_reason "conntrackd start failed" rc=$OCF_ERR_GENERIC break ;; esac done return $rc } conntrackd_stop() { rc=$OCF_ERR_GENERIC # Keep trying to bring down the resource; # wait for the CRM to time us out if this fails while :; do conntrackd_monitor status=$? case "$status" in $OCF_SUCCESS|$OCF_ERR_GENERIC) ocf_log info "Stopping conntrackd" $OCF_RESKEY_binary -C $OCF_RESKEY_config -k ;; $OCF_NOT_RUNNING) rc=$OCF_SUCCESS break ;; $OCF_RUNNING_MASTER) ocf_log warn "conntrackd still master" ;; esac done return $rc } conntrackd_validate_all() { check_binary "$OCF_RESKEY_binary" if ! [ -e "$OCF_RESKEY_config" ]; then - ocf_log err "Config FILE $OCF_RESKEY_config does not exist" + ocf_exit_reason "Config FILE $OCF_RESKEY_config does not exist" return $OCF_ERR_INSTALLED fi meta_expect master-node-max = 1 meta_expect master-max = 1 meta_expect clone-node-max = 1 return $OCF_SUCCESS } conntrackd_promote() { rc=$OCF_SUCCESS if ! conntrackd_is_master; then # -c = Commit the external cache to the kernel # -f = Flush internal and external cache # -R = resync with the kernel table # -B = send a bulk update on the line for parm in c f R B; do if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm; then - ocf_log err "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during promote." + ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during promote." rc=$OCF_ERR_GENERIC break fi done ha_pseudo_resource $statefile start conntrackd_set_master_score $master_score fi return $rc } conntrackd_demote() { rc=$OCF_SUCCESS if conntrackd_is_master; then # -t = shorten kernel timers to remove zombies # -n = request a resync from the others for parm in t n; do if ! $OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm; then - ocf_log err "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during demote." + ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during demote." rc=$OCF_ERR_GENERIC break fi done ha_pseudo_resource $statefile stop conntrackd_set_master_score $slave_score fi return $rc } conntrackd_notify() { hostname=$(hostname) # OCF_RESKEY_CRM_meta_notify_master_uname is a whitespace separated list of master hostnames for master in $OCF_RESKEY_CRM_meta_notify_master_uname; do # if we are the master and an instance was just started on another node: # send a bulk update to allow failback if [ "$hostname" = "$master" -a "$OCF_RESKEY_CRM_meta_notify_type" = "post" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "start" -a "$OCF_RESKEY_CRM_meta_notify_start_uname" != "$hostname" ]; then ocf_log info "Sending bulk update in post start to peers to allow failback" $OCF_RESKEY_binary -C $OCF_RESKEY_config -B fi done for tobepromoted in $OCF_RESKEY_CRM_meta_notify_promote_uname; do # if there is a promote action to be executed on another node: # send a bulk update to allow failback if [ "$hostname" != "$tobepromoted" -a "$OCF_RESKEY_CRM_meta_notify_type" = "pre" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "promote" ]; then ocf_log info "Sending bulk update in pre promote to peers to allow failback" $OCF_RESKEY_binary -C $OCF_RESKEY_config -B fi done } conntrackd_usage() { cat <, 1997-1999 # Peter Poeml , 2000-2006 # Marius Tomaschewski , 2006-2010 # # and Linux-HA contributors # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_binary_default="dhcpd" OCF_RESKEY_pid_default="/var/run/dhcpd.pid" OCF_RESKEY_user_default=dhcpd OCF_RESKEY_group_default=nogroup OCF_RESKEY_config_default="" OCF_RESKEY_chrooted_default="true" OCF_RESKEY_chrooted_path_default="/var/lib/dhcp" OCF_RESKEY_leases_default="/db/dhcpd.leases" OCF_RESKEY_interface_default="" OCF_RESKEY_includes_default="" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} : ${OCF_RESKEY_chrooted=${OCF_RESKEY_chrooted_default}} : ${OCF_RESKEY_chrooted_path=${OCF_RESKEY_chrooted_path_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_leases=${OCF_RESKEY_leases_default}} : ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} : ${OCF_RESKEY_includes=${OCF_RESKEY_includes_default}} # To enable support for different versions of dhcp, we need # to know what version we are being run against. DHCP_VERSION_MAJOR=`$OCF_RESKEY_binary --version 2>&1 | awk -F- '{print $3}' | awk -F. '{print $1}' | sed s/^[a-zA-Z]//g` # These files are always copied by default to ensure the chroot environment works. DEFAULT_FILE_LIST="/etc/gai.conf /etc/nsswitch.conf /etc/resolv.conf /etc/host.conf /etc/hosts /etc/localtime /dev/urandom" usage() { cat < 0.1 Manage an ISC DHCP server service in a chroot environment. Chrooted ISC DHCP server resource agent. The absolute path to the DHCP server configuration file. Configuration file Configure the dhcpd service to run in a chrooted or non-chrooted mode. Enable chroot mode The absolute path of the chrooted DHCP environment. The chrooted path The binary for the DHCP server process. An absolute path definition is not required, but can be used to override environment path. dhcpd binary The system user the DHCP server process will run as when it is chrooted. dhcpd owner The system group the DHCP server process will run as when it is chrooted. dhcpd group owner The network interface(s) the DHCP server process will bind to. A blank value will bind the process to all interfaces. Network Interface This parameter provides a means to copy include files into the chrooted environment. If a dhcpd.conf file contains a line similar to this: include "/etc/named.keys"; Then an admin also has to tell the dhcpd RA that this file should be pulled into the chrooted environment. This is a space delimited list. Include files The leases database file, relative to chrooted_path. Leases file The path and filename of the PID file. It is relative to chrooted_path. PID file EOF } # Validate most critical parameters dhcpd_validate_all() { check_binary $OCF_RESKEY_binary if ! ocf_is_probe; then # Test for the appropriate configuration files depending on if # chroot mode is enabled. if ocf_is_true $OCF_RESKEY_chrooted ; then if ! test -e "$OCF_RESKEY_chrooted_path"; then - ocf_log err "Path $OCF_RESKEY_chrooted_path does not exist." + ocf_exit_reason "Path $OCF_RESKEY_chrooted_path does not exist." return $OCF_ERR_INSTALLED fi if test -n "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config"; then - ocf_log err "Configuration file $OCF_RESKEY_chrooted_path/$OCF_RESKEY_config doesn't exist" + ocf_exit_reason "Configuration file $OCF_RESKEY_chrooted_path/$OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi else if test -n "$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_config"; then - ocf_log err "Configuration file $OCF_RESKEY_config doesn't exist" + ocf_exit_reason "Configuration file $OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi fi fi if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then - ocf_log err "User $OCF_RESKEY_user doesn't exist" + ocf_exit_reason "User $OCF_RESKEY_user doesn't exist" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } # dhcpd_monitor. Send a request to dhcpd and check response. dhcpd_monitor() { # Assume chrooted mode is being used, but if not update the PIDF # variable to point to the non-chrooted PID file. PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ! ocf_is_true $OCF_RESKEY_chrooted ; then PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` fi ocf_pidfile_status $PIDF >/dev/null 2>&1 || return $OCF_NOT_RUNNING return $OCF_SUCCESS } # Initialize Chroot dhcpd_initialize_chroot() { # If we are running the initialization for the first time, we need to make # the new chrooted folder, in case we are not using the same default. if ! [ -d $OCF_RESKEY_chrooted_path ] ; then ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use." fi # Make sure all sub-paths are created if something went wrong during # a partial run. for i in db dev etc lib64 var/run; do mkdir -p $OCF_RESKEY_chrooted_path/$i done # If we are running version 4 of the dhcp server, we need to mount a proc partition. if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then mkdir -p $OCF_RESKEY_chrooted_path/proc if ! [ -e $OCF_RESKEY_chrooted_path/proc/net/dev ] ; then mount -t proc -o ro proc $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1 fi fi # If the folder to store the PID file does not exist, make it. if ! [ -d "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" ] ; then mkdir -p "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" fi # Ensure all permissions are in place if the folder was re-created. chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_leases` chown -R $OCF_RESKEY_user:$OCF_RESKEY_group "$OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_pid`" ## If there is no conf file, we can't initialize the chrooted ## environment, return with "program not configured" if ! [ -f $OCF_RESKEY_config ] ; then - ocf_log err "dhcpd has not been configured." + ocf_exit_reason "dhcpd has not been configured." return $OCF_ERR_CONFIGURED fi # If the leases file does not exist, create it, as this is a fresh install. if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases fi # Remove the random device. test -e "$OCF_RESKEY_chrooted_path/dev/urandom" && rm -f $OCF_RESKEY_chrooted_path/dev/urandom # Test for the existance of the defined include files, and append # them to the list of files to be copied. for i in $OCF_RESKEY_includes ; do if [ -e $i ] ; then DEFAULT_FILE_LIST="$DEFAULT_FILE_LIST $i" else - ocf_log err "include file $i does not exist" + ocf_exit_reason "include file $i does not exist" return $OCF_ERR_INSTALLED fi done # Ensure all "modified" non-chrooted configuration files are copied into the chrooted environment. for i in $OCF_RESKEY_config $DEFAULT_FILE_LIST; do # First, lets make sure the directory exists within the chrooted environment. if test -d "$i" ; then mkdir -p $OCF_RESKEY_chrooted_path/$i elif test -e "$i" ; then mkdir -p "`dirname $OCF_RESKEY_chrooted_path/$i`" fi # Next, we copy the configuration file into place. cp -aL "$i" "$OCF_RESKEY_chrooted_path/${i%/*}/" > /dev/null 2>&1 || - { ocf_log err "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } + { ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } done libdir=$(basename $(echo /var/lib/dhcp/lib*)) if test -x /usr/bin/ldd ; then get_ldd_deps() { ldd_wl="\/$libdir\/lib" ldd_bl="\/$libdir\/libc\." /usr/bin/ldd "$1" | while read a b c d ; do [ -n "$c" ] || continue [[ $c =~ $ldd_wl ]] || continue [[ $c =~ $ldd_bl ]] && continue echo $c done } else get_ldd_deps() { :; } fi cplibs=`for i in /$libdir/libresolv.so.* /$libdir/libnss_*.so.* /$libdir/libpthread.so.0 /$libdir/libdl.so.2 do if [ -s "$i" ] ; then echo "$i" get_ldd_deps "$i" fi done | sort -u` for i in $cplibs ; do if [ -s "$i" ]; then cp -pL "$i" "/var/lib/dhcp/$libdir/" || - { ocf_log err "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } + { ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } fi done return $OCF_SUCCESS } # Initialize a non-chroot environment dhcpd_initialize() { ## If there is no conf file, we can't start a dhcp service. if ! [ -f $OCF_RESKEY_config ] ; then - ocf_log err "dhcpd has not been configured." + ocf_exit_reason "dhcpd has not been configured." return $OCF_ERR_CONFIGURED fi # As with the standard DHCP init script, we can still use the # chrooted default path for storing the leases file. This behavior # is consistent with the existing /etc/init.d/dhcpd script. if ! [ -d $OCF_RESKEY_chrooted_path ] ; then ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use." fi # If the leases file does not exist, create it, as this is a fresh install. if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases fi # if the PID storage path does not exist, make it, and setup the permissions. # NOTE: This part of the script has a potential security flaw, in that if someone # puts in /var/run as the path, it will change ownership to the dhcpd user # and group. However, all that would do is allow that user to view the contents # of the files, which they can do now anyway. If this becomes an issue, I can work # in some changes. # We need to append "dhcpd" to the path for the PID file storage folder, because # if /var/run is used, that folders permissions can not be changed, otherwise it affects # more then just one application. if ! [ -d `dirname $OCF_RESKEY_pid`/dhcpd ] ; then mkdir -p `dirname $OCF_RESKEY_pid`/dhcpd if [ -n "$OCF_RESKEY_user" -a "x$OCF_RESKEY_user" != "xroot" ] ; then chown $OCF_RESKEY_user `dirname $OCF_RESKEY_pid`/dhcpd fi if [ -n "$OCF_RESKEY_group" -a "x$OCF_RESKEY_group" != "xwheel" ] ; then chgrp $OCF_RESKEY_group `dirname $OCF_RESKEY_pid`/dhcpd fi fi return $OCF_SUCCESS } # Start dhcpd_start() { # Lets make sure we are not already running. if dhcpd_monitor; then ocf_log info "dhcpd already running" return $OCF_SUCCESS fi # Only initialize the chrooted path(s) if chroot mode is enabled. if ocf_is_true $OCF_RESKEY_chrooted ; then dhcpd_initialize_chroot || - { ocf_log err "Could not fully initialize the chroot environment." ; return $OCF_ERR_INSTALLED; } + { ocf_exit_reason "Could not fully initialize the chroot environment." ; return $OCF_ERR_INSTALLED; } else dhcpd_initialize || - { ocf_log err "Could not fully initialize the runtime environment." ; return $OCF_ERR_INSTALLED; } + { ocf_exit_reason "Could not fully initialize the runtime environment." ; return $OCF_ERR_INSTALLED; } fi dhcpd_validate_all || exit # Define an empty string variable, to ensure it exists when needed. DHCPD_ARGS="" # To ensure consistent behavior with the standard DHCPD init script, # use the chrooted default path for storing a leases file, when not in # a chrooted enviroment. if ocf_is_true $OCF_RESKEY_chrooted ; then DHCPD_ARGS="$DHCPD_ARGS -chroot $OCF_RESKEY_chrooted_path -lf $OCF_RESKEY_leases" else DHCPD_ARGS="$DHCPD_ARGS -lf $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases" fi if [ -n "$OCF_RESKEY_user" ]; then DHCPD_ARGS="$DHCPD_ARGS -user $OCF_RESKEY_user" fi if [ -n "$OCF_RESKEY_group" ]; then DHCPD_ARGS="$DHCPD_ARGS -group $OCF_RESKEY_group" fi # If there is a pid file containing a pid, the machine might have crashed. pid files in # /var/run are always cleaned up at boot time, but this is not the case for the pid file in # the chroot jail. Therefore, an old pid file may exist. This is only a problem if it # incidentally contains the pid of a running process. If this process is not a 'dhcpd', # we remove the pid. (dhcpd itself only checks whether the pid is alive or not.) PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ocf_is_true $OCF_RESKEY_chrooted ; then ocf_log info "Starting dhcpd [chroot] service." DHCPD_ARGS="$DHCPD_ARGS -pf $OCF_RESKEY_pid" else ocf_log info "Starting dhcpd [non-chroot] service." PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` DHCPD_ARGS="$DHCPD_ARGS -pf $PIDF" fi test -e "$PIDF" && rm -f $PIDF ocf_run $OCF_RESKEY_binary -cf $OCF_RESKEY_config $DHCPD_ARGS $OCF_RESKEY_interface || return $OCF_ERR_INSTALLED while ! dhcpd_monitor; do sleep .1 ocf_log info "waiting for dhcpd to start" return $OCF_SUCCESS done if ocf_is_true $OCF_RESKEY_chrooted ; then ocf_log info "dhcpd [chrooted] has started." else ocf_log info "dhcpd [non-chrooted] has started." fi return $OCF_SUCCESS } # Stop dhcpd_stop () { local timeout local timewait local rc dhcpd_monitor rc=$? case "$rc" in "$OCF_SUCCESS") # Currently running, and is expected behaviour. ;; "$OCF_NOT_RUNNING") # Currently not running, therefore nothing to do. ocf_log info "dhcpd already stopped" return $OCF_SUCCESS ;; esac PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ! ocf_is_true $OCF_RESKEY_chrooted ; then PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` fi kill `cat $PIDF` # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timewait=$((OCF_RESKEY_CRM_meta_timeout/1500)) sleep 0.1; timeout=0 # Sleep here for .1 sec to let dhcpd finish. while dhcpd_monitor ; do if [ $timeout -ge $timewait ]; then break else sleep 1 timeout=`expr $timeout + 1` fi done #If still up if dhcpd_monitor 2>&1; then - ocf_log err "dhcpd is still up! Trying kill -s KILL" + ocf_log notice "dhcpd is still up! Trying kill -s KILL" kill -s SIGKILL `cat $PIDF` fi # If we are running a dhcp server v4 or higher, unmount the proc partition. if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then # We only want to unmount proc in a chrooted environment, else we could # cause other issues. if ocf_is_true $OCF_RESKEY_chrooted ; then umount $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1 fi fi rm -f $PIDF ocf_log info "dhcpd stopped" return $OCF_SUCCESS } # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) dhcpd_meta_data exit $OCF_SUCCESS ;; validate-all) dhcpd_validate_all exit $OCF_SUCCESS ;; usage|help) dhcpd_usage exit $OCF_SUCCESS ;; esac # Translate each action into the appropriate function call case $__OCF_ACTION in start) dhcpd_start;; stop) dhcpd_stop;; monitor) dhcpd_monitor;; *) dhcpd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/galera b/heartbeat/galera index a1d925baf..e7ceb147f 100755 --- a/heartbeat/galera +++ b/heartbeat/galera @@ -1,691 +1,691 @@ #!/bin/sh # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ## # README. # # This agent only supports being configured as a multistate Master # resource. # # Slave vs Master role: # # During the 'Slave' role, galera instances are in read-only mode and # will not attempt to connect to the cluster. This role exists only as # a means to determine which galera instance is the most up-to-date. The # most up-to-date node will be used to bootstrap a galera cluster that # has no current members. # # The galera instances will only begin to be promoted to the Master role # once all the nodes in the 'wsrep_cluster_address' connection address # have entered read-only mode. At that point the node containing the # database that is most current will be promoted to Master. Once the first # Master instance bootstraps the galera cluster, the other nodes will be # promoted to Master as well. # # Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3 # # pcs resource create db galera enable_creation=true \ # wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master # # By setting the 'enable_creation' option, the database will be automatically # generated at startup. The meta attribute 'master-max=3' means that all 3 # nodes listed in the wsrep_cluster_address list will be allowed to connect # to the galera cluster and perform replication. # # NOTE: If you have more nodes in the pacemaker cluster then you wish # to have in the galera cluster, make sure to use location contraints to prevent # pacemaker from attempting to place a galera instance on a node that is # not in the 'wsrep_cluster_address" list. # ## ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/mysql-common.sh # It is common for some galera instances to store # check user that can be used to query status # in this file if [ -f "/etc/sysconfig/clustercheck" ]; then . /etc/sysconfig/clustercheck fi ####################################################################### usage() { cat < 1.0 Resource script for managing galara database. Manages a galara instance Location of the MySQL server binary MySQL server binary Location of the MySQL client binary MySQL client binary Configuration file MySQL config Directory containing databases MySQL datadir User running MySQL daemon MySQL user Group running MySQL daemon (for logfile and directory permissions) MySQL group The logfile to be used for mysqld. MySQL log file The pidfile to be used for mysqld. MySQL pid file The socket to be used for mysqld. MySQL socket If the MySQL database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld The galera cluster address. This takes the form of: gcomm://node,node,node Only nodes present in this node list will be allowed to start a galera instance. It is expected that the galera node names listed in this address match valid pacemaker node names. Galera cluster address Cluster check user. MySQL test user Cluster check user password check password END } get_option_variable() { local key=$1 $MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" | tail -1 } get_status_variable() { local key=$1 $MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1 } set_bootstrap_node() { local node=$1 ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true" } clear_bootstrap_node() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D } is_bootstrap() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -Q 2>/dev/null } clear_last_commit() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D } set_last_commit() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1 } get_last_commit() { local node=$1 if [ -z "$node" ]; then ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null else ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null fi } wait_for_sync() { local state=$(get_status_variable "wsrep_local_state") ocf_log info "Waiting for database to sync with the cluster. " while [ "$state" != "4" ]; do sleep 1 state=$(get_status_variable "wsrep_local_state") done ocf_log info "Database synced." } is_primary() { cluster_status=$(get_status_variable "wsrep_cluster_status") if [ "$cluster_status" = "Primary" ]; then return 0 fi if [ -z "$cluster_status" ]; then - ocf_log err "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status" + ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status" else ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}" fi return 1 } is_readonly() { local res=$(get_option_variable "read_only") if ! ocf_is_true "$res"; then return 1 fi cluster_status=$(get_status_variable "wsrep_cluster_status") if ! [ "$cluster_status" = "Disconnected" ]; then return 1 fi return 0 } master_exists() { # determine if a master instance is already up and is healthy crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 return $? } clear_master_score() { local node=$1 if [ -z "$node" ]; then $CRM_MASTER -D else $CRM_MASTER -D -N $node fi } set_master_score() { local node=$1 if [ -z "$node" ]; then $CRM_MASTER -v 100 else $CRM_MASTER -N $node -v 100 fi } promote_everyone() { for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do set_master_score $node done } greater_than_equal_long() { # there are values we need to compare in this script # that are too large for shell -gt to process echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true" } detect_first_master() { local best_commit=0 local best_node="$NODENAME" local last_commit=0 local missing_nodes=0 for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do last_commit=$(get_last_commit $node) if [ -z "$last_commit" ]; then ocf_log info "Waiting on node <${node}> to report database status before Master instances can start." missing_nodes=1 continue fi # this means -1, or that no commit has occured yet. if [ "$last_commit" = "18446744073709551615" ]; then last_commit="0" fi greater_than_equal_long "$last_commit" "$best_commit" if [ $? -eq 0 ]; then best_node=$node best_commit=$last_commit fi done if [ $missing_nodes -eq 1 ]; then return fi ocf_log info "Promoting $best_node to be our bootstrap node" set_master_score $best_node set_bootstrap_node $best_node } # For galera, promote is really start galera_promote() { local rc local extra_opts local bootstrap master_exists if [ $? -eq 0 ]; then # join without bootstrapping extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}" else bootstrap=$(is_bootstrap) if ocf_is_true $bootstrap; then ocf_log info "Node <${NODENAME}> is bootstrapping the cluster" extra_opts="--wsrep-cluster-address=gcomm://" else - ocf_log err "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected." + ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected." return $OCF_ERR_GENERIC fi fi # make sure the read only instance is stopped mysql_common_stop rc=$? if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then - ocf_log err "Failed to stop read-only galera instance during promotion to Master" + ocf_exit_reason "Failed to stop read-only galera instance during promotion to Master" return $rc fi sleep 4 mysql_common_prepare_dirs mysql_common_start "$extra_opts" rc=$? if [ $rc != $OCF_SUCCESS ]; then return $rc fi galera_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then - ocf_log err "Failed initial monitor action" + ocf_exit_reason "Failed initial monitor action" return $rc fi is_readonly if [ $? -eq 0 ]; then - ocf_log err "Failure. Master instance started in read-only mode, check configuration." + ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration." return $OCF_ERR_GENERIC fi is_primary if [ $? -ne 0 ]; then - ocf_log err "Failure. Master instance started, but is not in Primary mode." + ocf_exit_reason "Failure. Master instance started, but is not in Primary mode." return $OCF_ERR_GENERIC fi if ocf_is_true $bootstrap; then promote_everyone clear_bootstrap_node ocf_log info "Bootstrap complete, promoting the rest of the galera instances." else # if this is not the bootstrap node, make sure this instance # syncs with the rest of the cluster before promotion returns. wait_for_sync fi # last commit is no longer relevant once promoted clear_last_commit ocf_log info "Galera started" return $OCF_SUCCESS } galera_demote() { mysql_common_stop rc=$? if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then - ocf_log err "Failed to stop Master galera instance during demotion to Master" + ocf_exit_reason "Failed to stop Master galera instance during demotion to Master" return $rc fi # if this node was previously a bootstrap node, that is no longer the case. clear_bootstrap_node # start again in slave mode so the new last commit is recorded galera_start } galera_start() { local extra_opts='--read-only=true' local last_commit echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME if [ $? -ne 0 ]; then - ocf_log err "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance" + ocf_exit_reason "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance" return $OCF_ERR_CONFIGURED fi mysql_common_prepare_dirs mysql_common_start "$extra_opts" is_readonly if [ $? -ne 0 ]; then - ocf_log err "Failure. Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set." + ocf_exit_reason "Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set." return $OCF_ERR_GENERIC fi ocf_log info "attempting to detect last commit version" while [ -z "$last_commit" ]; do last_commit=$(get_status_variable "wsrep_last_committed") if [ -z "$last_commit" ]; then sleep 1 fi done ocf_log info "Last commit version found: $last_commit" set_last_commit $last_commit master_exists if [ $? -eq 0 ]; then ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster." set_master_score $NODENAME else clear_master_score detect_first_master fi return $OCF_SUCCESS } galera_monitor() { local rc local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi mysql_common_status $status_loglevel rc=$? # If status returned an error, return that immediately if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME if [ $? -ne 0 ]; then - ocf_log err "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" + ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" return $OCF_ERR_GENERIC fi is_readonly if [ $? -ne 0 ]; then is_primary if [ $? -ne 0 ]; then - ocf_log err "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state." + ocf_exit_reason "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state." return $OCF_ERR_GENERIC fi if ocf_is_probe; then # restore master score during probe # if we detect this is a master instance set_master_score fi rc=$OCF_RUNNING_MASTER else master_exists if [ $? -ne 0 ]; then detect_first_master else # a master instance exists and is healthy, promote this # local read only instance # so it can join the master galera cluster. set_master_score fi fi # TODO look at what is done in the wait script return $rc } galera_stop() { local rc # make sure the process is stopped mysql_common_stop rc=$1 clear_last_commit clear_master_score clear_bootstrap_node return $rc } galera_validate() { if ! ocf_is_ms; then - ocf_log err "Galera must be configured as a multistate Master/Slave resource." + ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource." return $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then - ocf_log err "Galera must be configured with a wsrep_cluster_address value." + ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value." return $OCF_ERR_CONFIGURED fi mysql_common_validate } case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac galera_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi if [ -z "${OCF_RESKEY_check_passwd}" ]; then # This value is automatically sourced from /etc/sysconfig/checkcluster if available OCF_RESKEY_check_passwd=${MYSQL_PASSWORD} fi if [ -z "${OCF_RESKEY_check_user}" ]; then # This value is automatically sourced from /etc/sysconfig/checkcluster if available OCF_RESKEY_check_user=${MYSQL_USERNAME} fi : ${OCF_RESKEY_check_user="root"} MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}" if [ -n "${OCF_RESKEY_check_passwd}" ]; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${MYSQL_PASSWORD}" fi # What kind of method was invoked? case "$1" in start) galera_start;; stop) galera_stop;; status) mysql_common_status err;; monitor) galera_monitor;; promote) galera_promote;; demote) galera_demote;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/mysql b/heartbeat/mysql index e3bdee6a0..dc862f5ac 100755 --- a/heartbeat/mysql +++ b/heartbeat/mysql @@ -1,1031 +1,1031 @@ #!/bin/sh # # # MySQL # # Description: Manages a MySQL database as Linux-HA resource # # Authors: Alan Robertson: DB2 Script # Jakub Janczak: rewrite as MySQL # Andrew Beekhof: cleanup and import # Sebastian Reitenbach: add OpenBSD defaults, more cleanup # Narayan Newton: add Gentoo/Debian defaults # Marian Marinov, Florian Haas: add replication capability # Yves Trudeau, Baron Schwartz: add VIP support and improve replication # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # (c) 2002-2005 International Business Machines, Inc. # 2005-2010 Linux-HA contributors # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 mysql # # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_config # OCF_RESKEY_datadir # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_test_table # OCF_RESKEY_test_user # OCF_RESKEY_test_passwd # OCF_RESKEY_enable_creation # OCF_RESKEY_additional_parameters # OCF_RESKEY_log # OCF_RESKEY_pid # OCF_RESKEY_socket # OCF_RESKEY_replication_user # OCF_RESKEY_replication_passwd # OCF_RESKEY_replication_port # OCF_RESKEY_max_slave_lag # OCF_RESKEY_evict_outdated_slaves # OCF_RESKEY_reader_attribute ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/mysql-common.sh ####################################################################### usage() { cat < 1.0 Resource script for MySQL. May manage a standalone MySQL database, a clone set with externally managed replication, or a complete master/slave replication setup. While managing replication, the default behavior is to use uname -n values in the change master to command. Other IPs can be specified manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP giving the IP to use for replication. For example, if the mysql primitive you are using is p_mysql, the attribute to set will be p_mysql_mysql_master_IP. Manages a MySQL database instance Location of the MySQL server binary MySQL server binary Location of the MySQL client binary MySQL client binary Configuration file MySQL config Directory containing databases MySQL datadir User running MySQL daemon MySQL user Group running MySQL daemon (for logfile and directory permissions) MySQL group The logfile to be used for mysqld. MySQL log file The pidfile to be used for mysqld. MySQL pid file The socket to be used for mysqld. MySQL socket Table to be tested in monitor statement (in database.table notation) MySQL test table MySQL test user, must have select privilege on test_table MySQL test user MySQL test user password MySQL test user password If the MySQL database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld MySQL replication user. This user is used for starting and stopping MySQL replication, for setting and resetting the master host, and for setting and unsetting read-only mode. Because of that, this user must have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, and PROCESS privileges on all nodes within the cluster. Mandatory if you define a master-slave resource. MySQL replication user MySQL replication password. Used for replication client and slave. Mandatory if you define a master-slave resource. MySQL replication user password The port on which the Master MySQL instance is listening. MySQL replication port The maximum number of seconds a replication slave is allowed to lag behind its master. Do not set this to zero. What the cluster manager does in case a slave exceeds this maximum lag is determined by the evict_outdated_slaves parameter. Maximum time (seconds) a MySQL slave is allowed to lag behind a master If set to true, any slave which is more than max_slave_lag seconds behind the master has its MySQL instance shut down. If this parameter is set to false in a primitive or clone resource, it is simply ignored. If set to false in a master/slave resource, then exceeding the maximum slave lag will merely push down the master preference so the lagging slave is never promoted to the new master. Determines whether to shut down badly lagging slaves An attribute that the RA can manage to specify whether a node can be read from. This node attribute will be 1 if it's fine to read from the node, and 0 otherwise (for example, when a slave has lagged too far behind the master). A typical example for the use of this attribute would be to tie a set of IP addresses to MySQL slaves that can be read from. This parameter is only meaningful in master/slave set configurations. Sets the node attribute that determines whether a node is usable for clients to read from. END } # Convenience functions set_read_only() { # Sets or unsets read-only mode. Accepts one boolean as its # optional argument. If invoked without any arguments, defaults to # enabling read only mode. Should only be set in master/slave # setups. # Returns $OCF_SUCCESS if the operation succeeds, or # $OCF_ERR_GENERIC if it fails. local ro_val if ocf_is_true $1; then ro_val="on" else ro_val="off" fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "SET GLOBAL read_only=${ro_val}" } get_read_only() { # Check if read-only is set local read_only_state read_only_state=`$MYSQL $MYSQL_OPTIONS_REPL \ -e "SHOW VARIABLES" | grep read_only | awk '{print $2}'` if [ "$read_only_state" = "ON" ]; then return 0 else return 1 fi } is_slave() { # Determine whether the machine is currently running as a MySQL # slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW # SLAVE STATUS creates an empty result set, 0 otherwise. local rc local tmpfile # Check whether this machine should be slave if ! ocf_is_ms || ! get_read_only; then return 1 fi get_slave_info rc=$? if [ $rc -eq 0 ]; then # show slave status is not empty # Is there a master_log_file defined? (master_log_file is deleted # by reset slave if [ "$master_log_file" ]; then return 0 else return 1 fi else # "SHOW SLAVE STATUS" returns an empty set if instance is not a # replication slave return 1 fi } parse_slave_info() { # Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2 sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2 } get_slave_info() { # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use! local mysql_options if [ "$master_log_file" -a "$master_host" ]; then # variables are already defined, get_slave_info has been run before return $OCF_SUCCESS else tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX` $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW SLAVE STATUS\G' > $tmpfile if [ -s $tmpfile ]; then master_host=`parse_slave_info Master_Host $tmpfile` master_user=`parse_slave_info Master_User $tmpfile` master_port=`parse_slave_info Master_Port $tmpfile` master_log_file=`parse_slave_info Master_Log_File $tmpfile` master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile` slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile` slave_io=`parse_slave_info Slave_IO_Running $tmpfile` last_errno=`parse_slave_info Last_Errno $tmpfile` secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile` ocf_log debug "MySQL instance running as a replication slave" else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave - ocf_log err "check_slave invoked on an instance that is not a replication slave." + ocf_exit_reason "check_slave invoked on an instance that is not a replication slave." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS fi } check_slave() { # Checks slave status local rc new_master get_slave_info rc=$? if [ $rc -eq 0 ]; then # Did we receive an error other than max_connections? if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then # Whoa. Replication ran into an error. This slave has # diverged from its master. Make sure this resource # doesn't restart in place. - ocf_log err "MySQL instance configured for replication, but replication has failed." + ocf_exit_reason "MySQL instance configured for replication, but replication has failed." ocf_log err "See $tmpfile for details" # Just pull the reader VIP away, killing MySQL here would be pretty evil # on a loaded server set_reader_attr 0 exit $OCF_SUCCESS fi # If we got max_connections, let's remove the vip if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then set_reader_attr 0 exit $OCF_SUCCESS fi if [ "$slave_io" != 'Yes' ]; then # Not necessarily a bad thing. The master may have # temporarily shut down, and the slave may just be # reconnecting. A warning can't hurt, though. ocf_log warn "MySQL Slave IO threads currently not running." # Sanity check, are we at least on the right master new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1` if [ "$master_host" != "$new_master" ]; then # Not pointing to the right master, not good, removing the VIPs set_reader_attr 0 exit $OCF_SUCCESS fi fi if [ "$slave_sql" != 'Yes' ]; then # We don't have a replication SQL thread running. Not a # good thing. Try to recoved by restarting the SQL thread # and remove reader vip. Prevent MySQL restart. - ocf_log err "MySQL Slave SQL threads currently not running." + ocf_exit_reason "MySQL Slave SQL threads currently not running." ocf_log err "See $tmpfile for details" # Remove reader vip set_reader_attr 0 # try to restart slave ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" # Return success to prevent a restart exit $OCF_SUCCESS fi if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then # We're supposed to bail out if we lag too far # behind. Let's check our lag. if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then - ocf_log err "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)." + ocf_exit_reason "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)." ocf_log err "See $tmpfile for details" # Remove reader vip set_reader_attr 0 exit $OCF_ERR_INSTALLED fi elif ocf_is_ms; then # Even if we're not set to evict lagging slaves, we can # still use the seconds behind master value to set our # master preference. local master_pref master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind})) if [ $master_pref -lt 0 ]; then # Sanitize a below-zero preference to just zero master_pref=0 fi $CRM_MASTER -v $master_pref fi # is the slave ok to have a VIP on it if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then set_reader_attr 0 else set_reader_attr 1 fi ocf_log debug "MySQL instance running as a replication slave" rm -f $tmpfile else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave # TODO: Needs to handle when get_slave_info will return too many connections error rm -f $tmpfile - ocf_log err "check_slave invoked on an instance that is not a replication slave." + ocf_exit_reason "check_slave invoked on an instance that is not a replication slave." exit $OCF_ERR_GENERIC fi } set_master() { local new_master master_log_file master_log_pos local master_params new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1` # Keep replication position get_slave_info if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then # master_params=", MASTER_LOG_FILE='$master_log_file', \ # MASTER_LOG_POS=$master_log_pos" ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos" rm -f $tmpfile return else master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2` master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3` if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then master_params=", MASTER_LOG_FILE='$master_log_file', \ MASTER_LOG_POS=$master_log_pos" ocf_log info "Restored master pos for $new_master : $master_log_file:$master_log_pos" fi fi # Informs the MySQL server of the master to replicate # from. Accepts one mandatory argument which must contain the host # name of the new master host. The master must either be unchanged # from the laste master the slave replicated from, or freshly # reset with RESET MASTER. ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "CHANGE MASTER TO MASTER_HOST='$new_master', \ MASTER_USER='$OCF_RESKEY_replication_user', \ MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params" rm -f $tmpfile } unset_master(){ # Instructs the MySQL server to stop replicating from a master # host. # If we're currently not configured to be replicating from any # host, then there's nothing to do. But we do log a warning as # no-one but the CRM should be touching the MySQL master/slave # configuration. if ! is_slave; then ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave" return $OCF_SUCCESS fi local tmpfile tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX` # At this point, the master is read only so there should not be much binlogs to transfer # Let's wait for the last bits while true; do $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW PROCESSLIST\G' > $tmpfile if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then ocf_log info "MySQL slave has finished reading master binary log" break fi if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then ocf_log info "Master is down, no more binary logs to come" break fi if grep -i 'Connecting to master' $tmpfile >/dev/null; then ocf_log info "Master is down, no more binary logs to come" break fi if ! grep 'system user' $tmpfile >/dev/null; then ocf_log info "Slave is not running - not waiting to finish" break fi sleep 1 done # Now, stop the slave I/O thread and wait for relay log # processing to complete ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE IO_THREAD" if [ $? -gt 0 ]; then - ocf_log err "Error stopping slave IO thread" + ocf_exit_reason "Error stopping slave IO thread" exit $OCF_ERR_GENERIC fi while true; do $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW PROCESSLIST\G' > $tmpfile if grep -i 'Has read all relay log' $tmpfile >/dev/null; then ocf_log info "MySQL slave has finished processing relay log" break fi if ! grep -q 'system user' $tmpfile; then ocf_log info "Slave not runnig - not waiting to finish" break fi ocf_log info "Waiting for MySQL slave to finish processing relay log" sleep 1 done rm -f $tmpfile # Now, stop all slave activity and unset the master host ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE" if [ $? -gt 0 ]; then - ocf_log err "Error stopping rest slave threads" + ocf_exit_reason "Error stopping rest slave threads" exit $OCF_ERR_GENERIC fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "RESET SLAVE /*!50516 ALL */;" if [ $? -gt 0 ]; then - ocf_log err "Failed to reset slave" + ocf_exit_reason "Failed to reset slave" exit $OCF_ERR_GENERIC fi } # Start replication as slave start_slave() { ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" } # Set the attribute controlling the readers VIP set_reader_attr() { local curr_attr_value curr_attr_value=$(get_reader_attr) if [ "$curr_attr_value" -ne "$1" ]; then $CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1 fi } # get the attribute controlling the readers VIP get_reader_attr() { local attr_value local rc attr_value=`$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q` rc=$? if [ "$rc" -eq "0" ]; then echo $attr_value else echo -1 fi } # Stores data for MASTER STATUS from MySQL update_data_master_status() { master_status_file="${HA_RSCTMP}/master_status.${OCF_RESOURCE_INSTANCE}" $MYSQL $MYSQL_OPTIONS_REPL -e "SHOW MASTER STATUS\G" > $master_status_file } # Returns the specified value from the stored copy of SHOW MASTER STATUS. # should be call after update_data_master_status for tmpfile # Arguments: # $1 The value to get. get_master_status() { awk -v var="$1" '$1 == var ":" {print substr($0, index($0, ":") + 2)}' "$master_status_file" } # Determines what IP address is attached to the current host. The output of the # crm_attribute command looks like this: # scope=nodes name=IP value=10.2.2.161 # If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n # The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the # change master to command. get_local_ip() { local IP IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G` if [ ! $? -eq 0 ]; then uname -n else echo $IP fi } ####################################################################### # Functions invoked by resource manager actions mysql_monitor() { local rc local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi mysql_common_status $status_loglevel rc=$? # TODO: check max connections error # If status returned an error, return that immediately if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then # Check if this instance is configured as a slave, and if so # check slave status if is_slave; then check_slave fi # Check for test table ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \ -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table" rc=$? if [ $rc -ne 0 ]; then - ocf_log err "Failed to select from $test_table"; + ocf_exit_reason "Failed to select from $test_table"; return $OCF_ERR_GENERIC; fi fi if ocf_is_ms && ! get_read_only; then ocf_log debug "MySQL monitor succeeded (master)"; return $OCF_RUNNING_MASTER else ocf_log debug "MySQL monitor succeeded"; return $OCF_SUCCESS fi } mysql_start() { local rc if ocf_is_ms; then # Initialize the ReaderVIP attribute, monitor will enable it set_reader_attr 0 fi mysql_common_status info if [ $? = $OCF_SUCCESS ]; then ocf_log info "MySQL already running" return $OCF_SUCCESS fi mysql_common_prepare_dirs # Uncomment to perform permission clensing # - not convinced this should be enabled by default # #chmod 0755 $OCF_RESKEY_datadir #chown -R $OCF_RESKEY_user $OCF_RESKEY_datadir #chgrp -R $OCF_RESKEY_group $OCF_RESKEY_datadir mysql_extra_params= if ocf_is_ms; then mysql_extra_params="--skip-slave-start" fi mysql_common_start $mysql_extra_params rc=$? if [ $rc != $OCF_SUCCESS ]; then return $rc fi if ocf_is_ms; then # We're configured as a stateful resource. We must start as # slave by default. At this point we don't know if the CRM has # already promoted a master. So, we simply start in read only # mode. set_read_only on # Now, let's see whether there is a master. We might be a new # node that is just joining the cluster, and the CRM may have # promoted a master before. master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "` if [ "$master_host" -a "$master_host" != ${NODENAME} ]; then ocf_log info "Changing MySQL configuration to replicate from $master_host." set_master start_slave if [ $? -ne 0 ]; then - ocf_log err "Failed to start slave" + ocf_exit_reason "Failed to start slave" return $OCF_ERR_GENERIC fi else ocf_log info "No MySQL master present - clearing replication state" unset_master fi # We also need to set a master preference, otherwise Pacemaker # won't ever promote us in the absence of any explicit # preference set by the administrator. We choose a low # greater-than-zero preference. $CRM_MASTER -v 1 fi # Initial monitor action if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then OCF_CHECK_LEVEL=10 fi mysql_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then - ocf_log err "Failed initial monitor action" + ocf_exit_reason "Failed initial monitor action" return $rc fi ocf_log info "MySQL started" return $OCF_SUCCESS } mysql_stop() { if ocf_is_ms; then # clear preference for becoming master $CRM_MASTER -D # Remove VIP capability set_reader_attr 0 fi mysql_common_stop } mysql_promote() { local master_info if ( ! mysql_common_status err ); then return $OCF_NOT_RUNNING fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE" # Set Master Info in CIB, cluster level attribute update_data_master_status master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)" ${CRM_ATTR_REPL_INFO} -v "$master_info" rm -f $tmpfile set_read_only off || return $OCF_ERR_GENERIC # Existing master gets a higher-than-default master preference, so # the cluster manager does not shuffle the master role around # unnecessarily $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1)) # A master can accept reads set_reader_attr 1 return $OCF_SUCCESS } mysql_demote() { if ! mysql_common_status err; then return $OCF_NOT_RUNNING fi # Return master preference to default, so the cluster manager gets # a chance to select a new master $CRM_MASTER -v 1 } mysql_notify() { # If not configured as a Stateful resource, we make no sense of # notifications. if ! ocf_is_ms; then ocf_log info "This agent makes no use of notifications unless running in master/slave mode." return $OCF_SUCCESS fi local type_op type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ocf_log debug "Received $type_op notification." case "$type_op" in 'pre-promote') # Nothing to do now here, new replication info not yet published ;; 'post-promote') # The master has completed its promotion. Now is a good # time to check whether our replication slave is working # correctly. master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "` if [ "$master_host" = ${NODENAME} ]; then ocf_log info "This will be the new master, ignoring post-promote notification." else ocf_log info "Resetting replication" unset_master if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi ocf_log info "Changing MySQL configuration to replicate from $master_host" set_master if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi start_slave if [ $? -ne 0 ]; then - ocf_log err "Failed to start slave" + ocf_exit_reason "Failed to start slave" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS ;; 'pre-demote') demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "` if [ $demote_host = ${NODENAME} ]; then ocf_log info "post-demote notification for $demote_host" set_read_only on if [ $? -ne 0 ]; then - ocf_log err "Failed to set read-only"; + ocf_exit_reason "Failed to set read-only"; return $OCF_ERR_GENERIC; fi # Must kill all existing user threads because they are still Read/write # in order for the slaves to complete the read of binlogs local tmpfile tmpfile=`mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX` $MYSQL $MYSQL_OPTIONS_REPL \ -e "SHOW PROCESSLIST" > $tmpfile for thread in `awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile` do ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "KILL ${thread}" done else ocf_log info "Ignoring post-demote notification execpt for my own demotion." fi return $OCF_SUCCESS ;; 'post-demote') demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "` if [ $demote_host = ${NODENAME} ]; then ocf_log info "Ignoring post-demote notification for my own demotion." return $OCF_SUCCESS fi ocf_log info "post-demote notification for $demote_host." # The former master has just been gracefully demoted. unset_master ;; *) return $OCF_SUCCESS ;; esac } ####################################################################### ########################################################################## # If DEBUG_LOG is set, make this resource agent easy to debug: set up the # debug log and direct all output to it. Otherwise, redirect to /dev/null. # The log directory must be a directory owned by root, with permissions 0700, # and the log must be writable and not a symlink. ########################################################################## DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log" if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then DEBUG_LOG_DIR="${DEBUG_LOG%/*}" if [ -d "${DEBUG_LOG_DIR}" ]; then exec 9>>"$DEBUG_LOG" exec 2>&9 date >&9 echo "$*" >&9 env | grep OCF_ | sort >&9 set -x else exec 9>/dev/null fi fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac mysql_common_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi # What kind of method was invoked? case "$1" in start) mysql_start;; stop) mysql_stop;; status) mysql_common_status err;; monitor) mysql_monitor;; promote) mysql_promote;; demote) mysql_demote;; notify) mysql_notify;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh index 5b6a99158..a02f8cde1 100755 --- a/heartbeat/mysql-common.sh +++ b/heartbeat/mysql-common.sh @@ -1,279 +1,279 @@ #!/bin/sh ####################################################################### # Attempt to detect a default binary OCF_RESKEY_binary_default=$(which mysqld_safe 2> /dev/null) if [ "$OCF_RESKEY_binary_default" = "" ]; then OCF_RESKEY_binary_default=$(which safe_mysqld 2> /dev/null) fi # Fill in some defaults if no values are specified HOSTOS=`uname` if [ "X${HOSTOS}" = "XOpenBSD" ];then if [ "$OCF_RESKEY_binary_default" = "" ]; then OCF_RESKEY_binary_default="/usr/local/bin/mysqld_safe" fi OCF_RESKEY_config_default="/etc/my.cnf" OCF_RESKEY_datadir_default="/var/mysql" OCF_RESKEY_user_default="_mysql" OCF_RESKEY_group_default="_mysql" OCF_RESKEY_log_default="/var/log/mysqld.log" OCF_RESKEY_pid_default="/var/mysql/mysqld.pid" OCF_RESKEY_socket_default="/var/run/mysql/mysql.sock" else if [ "$OCF_RESKEY_binary_default" = "" ]; then OCF_RESKEY_binary_default="/usr/bin/safe_mysqld" fi OCF_RESKEY_config_default="/etc/my.cnf" OCF_RESKEY_datadir_default="/var/lib/mysql" OCF_RESKEY_user_default="mysql" OCF_RESKEY_group_default="mysql" OCF_RESKEY_log_default="/var/log/mysqld.log" OCF_RESKEY_pid_default="/var/run/mysql/mysqld.pid" OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock" fi OCF_RESKEY_client_binary_default="mysql" OCF_RESKEY_test_user_default="root" OCF_RESKEY_test_table_default="mysql.user" OCF_RESKEY_test_passwd_default="" OCF_RESKEY_enable_creation_default=0 OCF_RESKEY_additional_parameters_default="" OCF_RESKEY_replication_port_default="3306" OCF_RESKEY_max_slave_lag_default="3600" OCF_RESKEY_evict_outdated_slaves_default="false" OCF_RESKEY_reader_attribute_default="readable" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}` : ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}} : ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}} : ${OCF_RESKEY_test_table=${OCF_RESKEY_test_table_default}} : ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}} : ${OCF_RESKEY_enable_creation=${OCF_RESKEY_enable_creation_default}} : ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}} : ${OCF_RESKEY_replication_user=${OCF_RESKEY_replication_user_default}} : ${OCF_RESKEY_replication_passwd=${OCF_RESKEY_replication_passwd_default}} : ${OCF_RESKEY_replication_port=${OCF_RESKEY_replication_port_default}} : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}} : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}} : ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}} ####################################################################### # Convenience variables MYSQL=$OCF_RESKEY_client_binary MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10" MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd" MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd" MYSQL_TOO_MANY_CONN_ERR=1040 CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot " NODENAME=$(ocf_local_nodename) CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $NODENAME " INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'` CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication" ####################################################################### mysql_common_validate() { check_binary $OCF_RESKEY_binary check_binary $OCF_RESKEY_client_binary if [ ! -f $OCF_RESKEY_config ]; then - ocf_log err "Config $OCF_RESKEY_config doesn't exist"; + ocf_exit_reason "Config $OCF_RESKEY_config doesn't exist"; return $OCF_ERR_INSTALLED; fi if [ ! -d $OCF_RESKEY_datadir ]; then - ocf_log err "Datadir $OCF_RESKEY_datadir doesn't exist"; + ocf_exit_reason "Datadir $OCF_RESKEY_datadir doesn't exist"; return $OCF_ERR_INSTALLED; fi getent passwd $OCF_RESKEY_user >/dev/null 2>&1 if [ ! $? -eq 0 ]; then - ocf_log err "User $OCF_RESKEY_user doesn't exit"; + ocf_exit_reason "User $OCF_RESKEY_user doesn't exit"; return $OCF_ERR_INSTALLED; fi getent group $OCF_RESKEY_group >/dev/null 2>&1 if [ ! $? -eq 0 ]; then - ocf_log err "Group $OCF_RESKEY_group doesn't exist"; + ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist"; return $OCF_ERR_INSTALLED; fi return $OCF_SUCCESS } mysql_common_status() { local loglevel=$1 local pid=$2 if [ -z "$pid" ]; then if [ ! -e $OCF_RESKEY_pid ]; then ocf_log $loglevel "MySQL is not running" return $OCF_NOT_RUNNING; fi pid=`cat $OCF_RESKEY_pid`; fi if [ -d /proc -a -d /proc/1 ]; then [ "u$pid" != "u" -a -d /proc/$pid ] else kill -s 0 $pid >/dev/null 2>&1 fi if [ $? -eq 0 ]; then return $OCF_SUCCESS; else ocf_log $loglevel "MySQL not running: removing old PID file" rm -f $OCF_RESKEY_pid return $OCF_NOT_RUNNING; fi } mysql_common_prepare_dirs() { local rc touch $OCF_RESKEY_log chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log chmod 0640 $OCF_RESKEY_log [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log if ocf_is_true "$OCF_RESKEY_enable_creation" && [ ! -d $OCF_RESKEY_datadir/mysql ] ; then ocf_log info "Initializing MySQL database: " $MYSQL_BINDIR/mysql_install_db --datadir=$OCF_RESKEY_datadir rc=$? if [ $rc -ne 0 ] ; then - ocf_log err "Initialization failed: $rc"; + ocf_exit_reason "Initialization failed: $rc"; exit $OCF_ERR_GENERIC fi chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_datadir fi pid_dir=`dirname $OCF_RESKEY_pid` if [ ! -d $pid_dir ] ; then ocf_log info "Creating PID dir: $pid_dir" mkdir -p $pid_dir chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir fi socket_dir=`dirname $OCF_RESKEY_socket` if [ ! -d $socket_dir ] ; then ocf_log info "Creating socket dir: $socket_dir" mkdir -p $socket_dir chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir fi # Regardless of whether we just created the directory or it # already existed, check whether it is writable by the configured # user for dir in $pid_dir $socket_dir; do if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then - ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user" + ocf_exit_reason "Directory $dir is not writable by $OCF_RESKEY_user" exit $OCF_ERR_PERM; fi done } mysql_common_start() { local mysql_extra_params="$1" local pid ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \ --pid-file=$OCF_RESKEY_pid \ --socket=$OCF_RESKEY_socket \ --datadir=$OCF_RESKEY_datadir \ --log-error=$OCF_RESKEY_log \ --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \ $mysql_extra_params >/dev/null 2>&1 & pid=$! # Spin waiting for the server to come up. # Let the CRM/LRM time us out if required. start_wait=1 while [ $start_wait = 1 ]; do if ! ps $pid > /dev/null 2>&1; then wait $pid - ocf_log err "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation" + ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation" return $OCF_ERR_GENERIC fi mysql_common_status info rc=$? if [ $rc = $OCF_SUCCESS ]; then start_wait=0 elif [ $rc != $OCF_NOT_RUNNING ]; then ocf_log info "MySQL start failed: $rc" return $rc fi sleep 2 done return $OCF_SUCCESS } mysql_common_stop() { local pid local rc if [ ! -f $OCF_RESKEY_pid ]; then ocf_log info "MySQL is not running" return $OCF_SUCCESS fi pid=`cat $OCF_RESKEY_pid 2> /dev/null ` /bin/kill $pid > /dev/null rc=$? if [ $rc != 0 ]; then - ocf_log err "MySQL couldn't be stopped" + ocf_exit_reason "MySQL couldn't be stopped" return $OCF_ERR_GENERIC fi # stop waiting shutdown_timeout=15 if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) fi count=0 while [ $count -lt $shutdown_timeout ] do mysql_common_status info $pid rc=$? if [ $rc = $OCF_NOT_RUNNING ]; then break fi count=`expr $count + 1` sleep 1 ocf_log debug "MySQL still hasn't stopped yet. Waiting..." done mysql_common_status info $pid if [ $? != $OCF_NOT_RUNNING ]; then ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..." /bin/kill -KILL $pid > /dev/null fi ocf_log info "MySQL stopped"; rm -f /var/lock/subsys/mysqld rm -f $OCF_RESKEY_socket return $OCF_SUCCESS } diff --git a/heartbeat/named b/heartbeat/named index ede22df1a..2c34a15c2 100755 --- a/heartbeat/named +++ b/heartbeat/named @@ -1,489 +1,489 @@ #!/bin/sh # # Description: Manages a named (Bind) server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) # # Copyright: 2011 Serge Dubrouski # # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs #Defaults OCF_RESKEY_named_default="/usr/sbin/named" OCF_RESKEY_rndc_default="/usr/sbin/rndc" OCF_RESKEY_host_default="/usr/bin/host" OCF_RESKEY_named_user_default=named OCF_RESKEY_named_config_default="" OCF_RESKEY_named_pidfile_default="/var/run/named/named.pid" OCF_RESKEY_named_rootdir_default="" OCF_RESKEY_named_options_default="" OCF_RESKEY_named_keytab_file_default="" OCF_RESKEY_monitor_request_default="localhost" OCF_RESKEY_monitor_response_default="127.0.0.1" OCF_RESKEY_monitor_ip_default="127.0.0.1" : ${OCF_RESKEY_named=${OCF_RESKEY_named_default}} : ${OCF_RESKEY_rndc=${OCF_RESKEY_rndc_default}} : ${OCF_RESKEY_host=${OCF_RESKEY_host_default}} : ${OCF_RESKEY_named_user=${OCF_RESKEY_named_user_default}} : ${OCF_RESKEY_named_config=${OCF_RESKEY_named_config_default}} : ${OCF_RESKEY_named_pidfile=${OCF_RESKEY_named_pidfile_default}} : ${OCF_RESKEY_named_rootdir=${OCF_RESKEY_named_rootdir_default}} : ${OCF_RESKEY_named_options=${OCF_RESKEY_named_options_default}} : ${OCF_RESKEY_named_keytab_file=${OCF_RESKEY_named_keytab_file_default}} : ${OCF_RESKEY_monitor_request=${OCF_RESKEY_monitor_request_default}} : ${OCF_RESKEY_monitor_response=${OCF_RESKEY_monitor_response_default}} : ${OCF_RESKEY_monitor_ip=${OCF_RESKEY_monitor_ip_default}} usage() { cat < 1.0 Resource script for named (Bind) server. It manages named as an HA resource. Manages a named server Path to the named command. named Path to the rndc command. rndc Path to the host command. host User that should own named process. named_user Configuration file for named. named_config PIDFILE file for named. named_pidfile Directory that named should use for chroot if any. named_rootdir Options for named process if any. named_options named service keytab file (for GSS-TSIG). named_keytab_file Request that shall be sent to named for monitoring. Usually an A record in DNS. monitor_request Expected response from named server. monitor_response IP Address where named listens. monitor_ip EOF } # # methods: What methods/operations do we support? # named_methods() { cat </dev/null 2>&1 if [ ! $? -eq 0 ]; then - ocf_log err "User $OCF_RESKEY_named_user doesn't exist"; + ocf_exit_reason "User $OCF_RESKEY_named_user doesn't exist"; return $OCF_ERR_INSTALLED; fi if [ -z "$OCF_RESKEY_monitor_request" -o \ -z "$OCF_RESKEY_monitor_response" -o \ -z "$OCF_RESKEY_monitor_ip" ]; then - ocf_log err "None of monitor_request, monitor_response, and monitor_ip can be empty" + ocf_exit_reason "None of monitor_request, monitor_response, and monitor_ip can be empty" return $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } ## # Attempt to generate a /etc/rndc.key if one is not present ## rndc_key_generator() { local rndc_options="-a -r /dev/urandom -u $OCF_RESKEY_named_user" if [ -s /etc/rndc.key ]; then # file already exists return fi if ! have_binary "rndc-confgen"; then # can't autogen key... Report this, but not as a warning or error. # It is possible that the user configured the key in named.conf ocf_log info "rndc-confgen tool not present, unable to autogen /etc/rndc.key." return fi if [ -n "$OCF_RESKEY_rootdir" ]; then rndc_options="$rndc_options -t $OCF_RESKEY_rootdir" fi rndc-confgen $rndc_options > /dev/null 2>&1; if [ $? -eq 0 ]; then if have_binary "restorecon"; then restorecon /etc/rndc.key fi else ocf_log info "failed to auto-generate /etc/rndc.key file." fi } # # named_getpid. Get pid of named process with a given parameters. # named_getpid () { local pattern="$OCF_RESKEY_named" if [ -n "$OCF_RESKEY_named_rootdir" -a "x${OCF_RESKEY_named_rootdir}" != "x/" ]; then pattern="$pattern.*-t $OCF_RESKEY_named_rootdir" fi if [ -n "$OCF_RESKEY_named_config" ]; then pattern="$pattern.*-c $OCF_RESKEY_named_config" fi pid=`pgrep -f "$pattern"` echo $pid } # # named_status. Simple check of the status of named process by pidfile. # named_status () { ocf_pidfile_status ${OCF_RESKEY_named_pidfile} >/dev/null 2>&1 } # # named_monitor. Send a request to named and check response. # named_monitor() { local output if ! named_status then ocf_log info "named is down" return $OCF_NOT_RUNNING fi output=`$OCF_RESKEY_host $OCF_RESKEY_monitor_request $OCF_RESKEY_monitor_ip` if [ $? -ne 0 ] || ! echo $output | grep -q '.* has .*address '"$OCF_RESKEY_monitor_response" then - ocf_log err "named didn't answer properly for $OCF_RESKEY_monitor_request." + ocf_exit_reason "named didn't answer properly for $OCF_RESKEY_monitor_request." ocf_log err "Expected: $OCF_RESKEY_monitor_response." ocf_log err "Got: $output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # Reload # named_reload() { $OCF_RESKEY_rndc reload >/dev/null || return $OCF_ERR_GENERIC return $OCF_SUCCESS } # # Start # named_start() { local root_dir_opt local pid root_dir_opt="" named_status && return $OCF_SUCCESS # Remove pidfile if exists rm -f ${OCF_RESKEY_named_pidfile} if [ -n "${OCF_RESKEY_named_rootdir}" -a "x${OCF_RESKEY_named_rootdir}" != "x/" ] then root_dir_opt="-t ${OCF_RESKEY_named_rootdir}" [ -s /etc/localtime ] && cp -fp /etc/localtime ${OCF_RESKEY_named_rootdir}/etc/localtime fi if [ -n "$OCF_RESKEY_named_config" ]; then OCF_RESKEY_named_options="-c $OCF_RESKEY_named_config $OCF_RESKEY_named_options" fi rndc_key_generator if ! ${OCF_RESKEY_named} -u ${OCF_RESKEY_named_user} $root_dir_opt ${OCF_RESKEY_named_options} then - ocf_log err "named failed to start." + ocf_exit_reason "named failed to start." return $OCF_ERR_GENERIC fi pid=`named_getpid` if [ -n "$pid" ]; then if [ ! -e ${OCF_RESKEY_named_pidfile} ]; then echo $pid > ${OCF_RESKEY_named_pidfile} fi else - ocf_log err "named failed to start. Probably error in configuration." + ocf_exit_reason "named failed to start. Probably error in configuration." return $OCF_ERR_GENERIC fi while : do named_monitor && break sleep 1 ocf_log debug "named hasn't started yet." done ocf_log info "named has started." return $OCF_SUCCESS } # # Stop # named_stop () { local timeout local timewait named_status || return $OCF_SUCCESS $OCF_RESKEY_rndc stop >/dev/null if [ $? -ne 0 ]; then ocf_log info "rndc stop failed. Killing named." kill `cat ${OCF_RESKEY_named_pidfile}` fi if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timewait=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timewait=20 fi sleep 1; timeout=0 # Sleep here for 1 sec to let rndc finish. while named_status ; do if [ $timeout -ge $timewait ]; then break else sleep 1 timeout=`expr $timeout + 1` ocf_log debug "named appears to hung, waiting ..." fi done #If still up if named_status 2>&1; then - ocf_log err "named is still up! Killing" + ocf_exit_reason "named is still up! Killing" kill -9 `cat ${OCF_RESKEY_named_pidfile}` fi rm -f ${OCF_RESKEY_named_pidfile} return $OCF_SUCCESS } # Main part if [ $# -ne 1 ]; then usage exit $OCF_ERR_GENERIC fi case "$1" in methods) named_methods exit $?;; meta-data) named_meta_data exit $OCF_SUCCESS;; esac named_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi if [ `id -u` -ne 0 ]; then - ocf_log err "$0 must be run as root" + ocf_exit_reason "$0 must be run as root" exit $OCF_ERR_GENERIC fi case "$1" in status) if named_status then ocf_log info "named is up" exit $OCF_SUCCESS else ocf_log info "named is down" exit $OCF_NOT_RUNNING fi;; monitor) named_monitor exit $?;; start) named_start exit $?;; stop) named_stop exit $?;; reload) named_reload exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/pgsql b/heartbeat/pgsql index 9421d8383..110eebfb0 100755 --- a/heartbeat/pgsql +++ b/heartbeat/pgsql @@ -1,1873 +1,1873 @@ #!/bin/sh # # Description: Manages a PostgreSQL Server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA # Florian Haas (florian@linbit.com) -- makeover # Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication # David Corlette (dcorlette@netiq.com) -- add support for non-standard library locations and non-standard port # # Copyright: 2006-2012 Serge Dubrouski # and other Linux-HA contributors # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # # Get PostgreSQL Configuration parameter # get_pgsql_param() { local param_name param_name=$1 perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) { \$dir=\$1; \$dir =~ s/\s*\#.*//; \$dir =~ s/^'(\S*)'/\$1/; print \$dir;}" perl -ne "$perl_code" < $OCF_RESKEY_config } # Defaults OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl OCF_RESKEY_psql_default=/usr/bin/psql OCF_RESKEY_pgdata_default=/var/lib/pgsql/data OCF_RESKEY_pgdba_default=postgres OCF_RESKEY_pghost_default="" OCF_RESKEY_pgport_default=5432 OCF_RESKEY_pglibs_default=/usr/lib OCF_RESKEY_start_opt_default="" OCF_RESKEY_pgdb_default=template1 OCF_RESKEY_logfile_default=/dev/null OCF_RESKEY_stop_escalate_default=30 OCF_RESKEY_monitor_user_default="" OCF_RESKEY_monitor_password_default="" OCF_RESKEY_monitor_sql_default="select now();" OCF_RESKEY_check_wal_receiver_default="false" # Defaults for replication OCF_RESKEY_rep_mode_default=none OCF_RESKEY_node_list_default="" OCF_RESKEY_restore_command_default="" OCF_RESKEY_archive_cleanup_command_default="" OCF_RESKEY_recovery_end_command_default="" OCF_RESKEY_master_ip_default="" OCF_RESKEY_repuser_default="postgres" OCF_RESKEY_primary_conninfo_opt_default="" OCF_RESKEY_restart_on_promote_default="false" OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp" OCF_RESKEY_xlog_check_count_default="3" OCF_RESKEY_crm_attr_timeout_default="5" OCF_RESKEY_stop_escalate_in_slave_default=30 : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}} : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}} : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}} : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}} : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}} : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}} : ${OCF_RESKEY_pglibs=${OCF_RESKEY_pglibs_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf} : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}} : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}} : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} : ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}} : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}} : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}} : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}} : ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}} # for replication : ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}} : ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}} : ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}} : ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}} : ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}} : ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}} : ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}} : ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}} : ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}} : ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}} : ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}} : ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}} : ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}} usage() { cat < 1.0 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource. Manages a PostgreSQL database instance Path to pg_ctl command. pgctl Start options (-o start_opt in pg_ctl). "-i -p 5432" for example. start_opt Additional pg_ctl options (-w, -W etc..). ctl_opt Path to psql command. psql Path to PostgreSQL data directory. pgdata User that owns PostgreSQL. pgdba Hostname/IP address where PostgreSQL is listening pghost Port where PostgreSQL is listening pgport Custom location of the Postgres libraries. If not set, the standard location will be used. pglibs PostgreSQL user that pgsql RA will user for monitor operations. If it's not set pgdba user will be used. monitor_user Password for monitor user. monitor_password SQL script that will be used for monitor operations. monitor_sql Path to the PostgreSQL configuration file for the instance. Configuration file Database that will be used for monitoring. pgdb Path to PostgreSQL server log output file. logfile Unix socket directory for PostgreSQL socketdir Number of shutdown retries (using -m fast) before resorting to -m immediate stop escalation Replication mode may be set to "async" or "sync" or "slave". They require PostgreSQL 9.1 or later. Once set, "async" and "sync" require node_list, master_ip, and restore_command parameters,as well as configuring PostgreSQL for replication (in postgresql.conf and pg_hba.conf). "slave" means that RA only makes recovery.conf before starting to connect to primary which is running somewhere. It dosen't need master/slave setting. It requires master_ip restore_command parameters. rep_mode All node names. Please separate each node name with a space. This is required for replication. node list restore_command for recovery.conf. This is required for replication. restore_command archive_cleanup_command for recovery.conf. This is used for replication and is optional. archive_cleanup_command recovery_end_command for recovery.conf. This is used for replication and is optional. recovery_end_command Master's floating IP address to be connected from hot standby. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. master ip User used to connect to the master server. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. repuser primary_conninfo options of recovery.conf except host, port, user and application_name. This is optional for replication. primary_conninfo_opt If this is true, RA deletes recovery.conf and restarts PostgreSQL on promote to keep Timeline ID. It probably makes fail-over slower. It's recommended to set on-fail of promote up as fence. This is optional for replication. restart_on_promote Path to temporary directory. This is optional for replication. tmpdir Number of checks of xlog on monitor before promote. This is optional for replication. xlog check count The timeout of crm_attribute forever update command. Default value is 5 seconds. This is optional for replication. The timeout of crm_attribute forever update command. Number of shutdown retries (using -m fast) before resorting to -m immediate in slave state. This is optional for replication. stop escalation_in_slave If this is true, RA checks wal_receiver process on monitor and notifies its status using "(resource name)-receiver-status" attribute. It's useful for checking whether PostgreSQL (hot standby) connects to primary. The attribute shows status as "normal" or "ERROR". check_wal_receiver EOF } # # Run the given command in the Resource owner environment... # runasowner() { local quietrun="" local loglevel="-err" local var for var in 1 2 do case "$1" in "-q") quietrun="-q" shift 1;; "warn"|"err") loglevel="-$1" shift 1;; *) ;; esac done ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*" } # # Shell escape # escape_string() { echo "$*" | sed -e "s/'/'\\\\''/g" } # # methods: What methods/operations do we support? # pgsql_methods() { cat </dev/null 2>&1" return $? fi # No PID file false } pgsql_wal_receiver_status() { local PID local receiver_parent_pids PID=`head -n 1 $PIDFILE` receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al receiver process" | cut -d " " -f 3` if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" -q return 0 fi attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" -q ocf_log warn "wal receiver process is not running" return 1 } # # pgsql_real_monitor # pgsql_real_monitor() { local loglevel local rc local output # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then ocf_log info "PostgreSQL is down" return $OCF_NOT_RUNNING fi if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then pgsql_wal_receiver_status fi if is_replication; then #Check replication state output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_MS_SQL}\""` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status." return $OCF_ERR_GENERIC fi case "$output" in f) ocf_log debug "PostgreSQL is running as a primary." if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then return $OCF_RUNNING_MASTER fi ;; t) ocf_log debug "PostgreSQL is running as a hot standby." return $OCF_SUCCESS;; - *) ocf_log err "$CHECK_MS_SQL output is $output" + *) ocf_exit_reason "$CHECK_MS_SQL output is $output" return $OCF_ERR_GENERIC;; esac fi OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"` runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \ -c '$OCF_RESKEY_monitor_sql'" rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running." return $OCF_ERR_GENERIC fi if is_replication; then return $OCF_RUNNING_MASTER fi return $OCF_SUCCESS } pgsql_replication_monitor() { local rc rc=$1 if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then return $rc fi # If I am Master if [ $rc -eq $OCF_RUNNING_MASTER ]; then change_data_status "$NODENAME" "LATEST" change_pgsql_status "$NODENAME" "PRI" control_slave_status || return $OCF_ERR_GENERIC if [ "$RE_CONTROL_SLAVE" = "true" ]; then sleep 2 ocf_log info "re-controlling slave status." RE_CONTROL_SLAVE="none" control_slave_status || return $OCF_ERR_GENERIC fi return $rc fi # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor, # so I will get master node name using crm_mon -n print_crm_mon | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):]Master" if [ $? -ne 0 ] ; then # If I am Slave and Master is not exist ocf_log info "Master does not exist." change_pgsql_status "$NODENAME" "HS:alone" have_master_right if [ $? -eq 0 ]; then rm -f ${XLOG_NOTE_FILE}.* fi else output=`exec_with_retry 0 $CRM_ATTR_FOREVER -N "$NODENAME" \ -n "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$output" = "DISCONNECT" ]; then change_pgsql_status "$NODENAME" "HS:alone" fi fi return $rc } #pgsql_monitor: pgsql_real_monitor() wrapper for replication pgsql_monitor() { local rc pgsql_real_monitor rc=$? if ! is_replication; then return $rc else pgsql_replication_monitor $rc return $? fi } # pgsql_post_demote pgsql_post_demote() { DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE" if [ "$DEMOTE_NODE" != "$NODENAME" ]; then if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | tr '[A-Z]' '[a-z]' | grep $NODENAME; then show_master_baseline change_pgsql_status "$NODENAME" "HS:alone" fi fi return $OCF_SUCCESS } pgsql_pre_promote() { local master_baseline local my_master_baseline local cmp_location local number_of_nodes # If my data is newer than new master's one, I fail my resource. PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` number_of_nodes=`echo $NODE_LIST | wc -w` if [ $number_of_nodes -ge 3 -a \ "$OCF_RESKEY_rep_mode" = "sync" -a \ "$PROMOTE_NODE" != "$NODENAME" ]; then master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` if [ $? -eq 0 ]; then my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` # get older location cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\ sort | head -1` if [ "$cmp_location" != "$my_master_baseline" ]; then - ocf_log err "My data is newer than new master's one. New master's location : $master_baseline" + ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline" exec_with_retry 0 $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } pgsql_notify() { local type="${OCF_RESKEY_CRM_meta_notify_type}" local op="${OCF_RESKEY_CRM_meta_notify_operation}" local rc if ! is_replication; then return $OCF_SUCCESS fi ocf_log debug "notify: ${type} for ${op}" case $type in pre) case $op in promote) pgsql_pre_promote return $? ;; esac ;; post) case $op in promote) delete_xlog_location PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` if [ "$PROMOTE_NODE" != "$NODENAME" ]; then delete_master_baseline fi return $OCF_SUCCESS ;; demote) pgsql_post_demote return $? ;; start|stop) MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` if [ "$NODENAME" = "$MASTER_NODE" ]; then control_slave_status fi return $OCF_SUCCESS ;; esac ;; esac return $OCF_SUCCESS } control_slave_status() { local rc local data_status local target local all_data_status local tmp_data_status local number_of_nodes all_data_status=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_REPLICATION_STATE_SQL}\""` rc=$? if [ $rc -eq 0 ]; then if [ -n "$all_data_status" ]; then all_data_status=`echo $all_data_status | sed "s/\n/ /g"` fi else report_psql_error $rc err "Can't get PostgreSQL replication status." return 1 fi number_of_nodes=`echo $NODE_LIST | wc -w` for target in $NODE_LIST; do if [ "$target" = "$NODENAME" ]; then continue fi data_status="DISCONNECT" if [ -n "$all_data_status" ]; then for tmp_data_status in $all_data_status; do if ! echo $tmp_data_status | grep -q "^${target}|"; then continue fi data_status=`echo $tmp_data_status | cut -d "|" -f 2,3` ocf_log debug "node_name and data_status is $tmp_data_status" break done fi case "$data_status" in "STREAMING|SYNC") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_PROMOTE" change_pgsql_status "$target" "HS:sync" ;; "STREAMING|ASYNC") change_data_status "$target" "$data_status" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then change_master_score "$target" "$CAN_NOT_PROMOTE" set_sync_mode "$target" else if [ $number_of_nodes -le 2 ]; then change_master_score "$target" "$CAN_PROMOTE" else # I can't determine which slave's data is newest in async mode. change_master_score "$target" "$CAN_NOT_PROMOTE" fi fi change_pgsql_status "$target" "HS:async" ;; "STREAMING|POTENTIAL") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" change_pgsql_status "$target" "HS:potential" ;; "DISCONNECT") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then set_async_mode "$target" fi ;; *) change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then set_async_mode "$target" fi change_pgsql_status "$target" "HS:connected" ;; esac done return 0 } have_master_right() { local old local new local output local data_status local node local mylocation local count local newestXlog local oldfile local newfile ocf_log debug "Checking if I have a master right." data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \ "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi else if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "STREAMING|ASYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi fi ocf_log info "My data status=$data_status." show_xlog_location if [ $? -ne 0 ]; then - ocf_log err "Failed to show my xlog location." + ocf_exit_reason "Failed to show my xlog location." exit $OCF_ERR_GENERIC fi old=0 for count in `seq $OCF_RESKEY_xlog_check_count`; do if [ -f ${XLOG_NOTE_FILE}.$count ]; then old=$count continue fi break done new=`expr $old + 1` # get xlog locations of all nodes for node in ${NODE_LIST}; do output=`$CRM_ATTR_REBOOT -N "$node" -n \ "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null` if [ $? -ne 0 ]; then ocf_log warn "Can't get $node xlog location." continue else ocf_log info "$node xlog location : $output" echo "$node $output" >> ${XLOG_NOTE_FILE}.${new} if [ "$node" = "$NODENAME" ]; then mylocation=$output fi fi done oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null` newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null` if [ "$oldfile" != "$newfile" ]; then # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 return 1 fi if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \ head -1 | cut -d " " -f 2` if [ "$newestXlog" = "$mylocation" ]; then ocf_log info "I have a master right." exec_with_retry 5 $CRM_MASTER -v $PROMOTE_ME return 0 fi change_data_status "$NODENAME" "DISCONNECT" ocf_log info "I don't have correct master data." # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 fi return 1 } is_replication() { if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then return 0 fi return 1 } get_my_location() { local rc local output local replay_loc local receive_loc local output1 local output2 local log1 local log2 local newer_location output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_XLOG_LOC_SQL}\""` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc err "Can't get my xlog location." return 1 fi replay_loc=`echo $output | cut -d "|" -f 1` receive_loc=`echo $output | cut -d "|" -f 2` output1=`echo "$replay_loc" | cut -d "/" -f 1` output2=`echo "$replay_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` replay_loc="${log1}${log2}" output1=`echo "$receive_loc" | cut -d "/" -f 1` output2=`echo "$receive_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` receive_loc="${log1}${log2}" newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1` echo "$newer_location" return 0 } show_xlog_location() { local location location=`get_my_location` || return 1 exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location" } delete_xlog_location() { exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D } show_master_baseline() { local rc local location location=`get_my_location` ocf_log info "My master baseline : $location." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location" } delete_master_baseline() { exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D } set_async_mode_all() { [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0 ocf_log info "Set all nodes into async mode." runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\"" if [ $? -ne 0 ]; then - ocf_log err "Can't set all nodes into async mode." + ocf_exit_reason "Can't set all nodes into async mode." return 1 fi return 0 } set_async_mode() { cat $REP_MODE_CONF | grep -q -e "[,' ]$1[,' ]" if [ $? -eq 0 ]; then ocf_log info "Setup $1 into async mode." runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\"" else ocf_log debug "$1 is already in async mode." return 0 fi exec_with_retry 0 reload_conf } set_sync_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then ocf_log debug "$sync_node_in_conf is already sync mode." else ocf_log info "Setup $1 into sync mode." runasowner -q err "echo \"synchronous_standby_names = '$1'\" > \"$REP_MODE_CONF\"" [ "$RE_CONTROL_SLAVE" = "false" ] && RE_CONTROL_SLAVE="true" exec_with_retry 0 reload_conf fi } reload_conf() { # Invoke pg_ctl runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload" if [ $? -eq 0 ]; then ocf_log info "Reload configuration file." else - ocf_log err "Can't reload configuration file." + ocf_exit_reason "Can't reload configuration file." return 1 fi return 0 } user_recovery_conf() { # put archive_cleanup_command and recovery_end_command only when defined by user if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'" fi if [ -n "$OCF_RESKEY_recovery_end_command" ]; then echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'" fi } make_recovery_conf() { runasowner "touch $RECOVERY_CONF" if [ $? -ne 0 ]; then - ocf_log err "Can't create recovery.conf." + ocf_exit_reason "Can't create recovery.conf." return 1 fi cat > $RECOVERY_CONF <> $RECOVERY_CONF ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}" return 0 } # change pgsql-status. # arg1:node, arg2: value change_pgsql_status() { local output if ! is_node_online $1; then return 0 fi output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then # If slave's disk is broken, RA cannot read PID file # and misjudges the PostgreSQL as down while it is running. # It causes overwriting of pgsql-status by Master because replication is still connected. if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then if [ "$1" != "$NODENAME" ]; then ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited." return 0 fi fi ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2" fi return 0 } # change pgsql-data-status. # arg1:node, arg2: value change_data_status() { local output if ! node_exist $1; then return 0 fi while : do output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2." exec_with_retry 0 exec_with_timeout 0 "$CRM_ATTR_FOREVER" -N $1 -n $PGSQL_DATA_STATUS_ATTR -v "$2" else break fi done return 0 } # set master-score # arg1:node, arg2: score, arg3: resoure set_master_score() { local current_score current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null` if [ -n "$current_score" -a "$current_score" != "$2" ]; then ocf_log info "Changing $3 master score on $1 : $current_score->$2." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$target" -n "master-$3" -v "$2" fi return 0 } # change master-score # arg1:node, arg2: score change_master_score() { local instance if ! is_node_online $1; then return 0 fi if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then # If Pacemaker version is 1.0.x instance=0 while : do if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then break fi if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then instance=`expr $instance + 1` continue fi set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1 instance=`expr $instance + 1` done else # If globally-unique=false and Pacemaker version is 1.1.8 or higher # Master/Slave resource has no instance number set_master_score $1 $2 ${RESOURCE_NAME} || return 1 fi return 0 } report_psql_error() { local rc local loglevel local message rc=$1 loglevel=${2:-err} message="$3" ocf_log $loglevel "$message rc=$rc" if [ $rc -eq 1 ]; then - ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." + ocf_exit_reason "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." elif [ $rc -eq 2 ]; then ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." elif [ $rc -eq 3 ]; then - ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." + ocf_exit_reason "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." fi } # # timeout management function # arg1 timeout >= 0 (if arg1 is 0, OCF_RESKEY_crm_attr_timeout is used.) # arg2 : command # arg3 : command's args exec_with_timeout() { local func_pid local count=$OCF_RESKEY_crm_attr_timeout local rc if [ "$1" -ne 0 ]; then count=$1 fi shift $* & func_pid=$! sleep .1 while kill -s 0 $func_pid >/dev/null 2>&1; do sleep 1 count=`expr $count - 1` if [ $count -le 0 ]; then - ocf_log err "\"$*\" (pid=$func_pid) timed out." + ocf_exit_reason "\"$*\" (pid=$func_pid) timed out." kill -s 9 $func_pid >/dev/null 2>&1 return 1 fi ocf_log info "Waiting($count). \"$*\" (pid=$func_pid)." done wait $func_pid } # retry command when command doesn't return 0 # arg1 : count >= 0 (if arg1 is 0, it retries command in infinitum(1day)) # arg2..argN : command and args exec_with_retry() { local count="86400" local output local rc if [ "$1" -ne 0 ]; then count=$1 fi shift while [ $count -gt 0 ]; do output=`$*` rc=$? if [ $rc -ne 0 ]; then ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"." count=`expr $count - 1` sleep 1 else printf "${output}" return 0 fi done - ocf_log err "giving up executing \"$*\"" + ocf_exit_reason "giving up executing \"$*\"" return $rc } is_node_online() { print_crm_mon | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline" } node_exist() { print_crm_mon | tr '[A-Z]' '[a-z]' | grep -q "^node $1" } check_binary2() { if ! have_binary "$1"; then - ocf_log err "Setup problem: couldn't find command: $1" + ocf_exit_reason "Setup problem: couldn't find command: $1" return 1 fi return 0 } check_config() { local rc=0 if [ ! -f "$1" ]; then if ocf_is_probe; then ocf_log info "Configuration file is $1 not readable during probe." rc=1 else - ocf_log err "Configuration file $1 doesn't exist" + ocf_exit_reason "Configuration file $1 doesn't exist" rc=2 fi fi return $rc } # Validate most critical parameters pgsql_validate_all() { local version local check_config_rc local rep_mode_string if ! check_binary2 "$OCF_RESKEY_pgctl" || ! check_binary2 "$OCF_RESKEY_psql"; then return $OCF_ERR_INSTALLED fi check_config "$OCF_RESKEY_config" check_config_rc=$? [ $check_config_rc -eq 2 ] && return $OCF_ERR_INSTALLED [ $check_config_rc -eq 0 ] && : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`} getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1 if [ ! $? -eq 0 ]; then - ocf_log err "User $OCF_RESKEY_pgdba doesn't exist"; + ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist"; return $OCF_ERR_INSTALLED; fi if ocf_is_probe; then ocf_log info "Don't check $OCF_RESKEY_pgdata during probe" else if ! runasowner "test -w $OCF_RESKEY_pgdata"; then - ocf_log err "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba" + ocf_exit_reason "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM; fi fi if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ] then - ocf_log err "monitor password can't be empty" + ocf_exit_reason "monitor password can't be empty" return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ] then - ocf_log err "monitor_user has to be set if monitor_password is set" + ocf_exit_reason "monitor_user has to be set if monitor_password is set" return $OCF_ERR_CONFIGURED fi if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then version=`cat $OCF_RESKEY_pgdata/PG_VERSION` if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then - ocf_log err "Replication mode needs PostgreSQL 9.1 or higher." + ocf_exit_reason "Replication mode needs PostgreSQL 9.1 or higher." return $OCF_ERR_INSTALLED fi if [ ! -n "$OCF_RESKEY_master_ip" ]; then - ocf_log err "master_ip can't be empty." + ocf_exit_reason "master_ip can't be empty." return $OCF_ERR_CONFIGURED fi fi if is_replication; then if ! ocf_is_ms; then - ocf_log err "Replication(rep_mode=async or sync) requires Master/Slave configuration." + ocf_exit_reason "Replication(rep_mode=async or sync) requires Master/Slave configuration." return $OCF_ERR_CONFIGURED fi if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then - ocf_log err "Invalid rep_mode : $OCF_RESKEY_rep_mode" + ocf_exit_reason "Invalid rep_mode : $OCF_RESKEY_rep_mode" return $OCF_ERR_CONFIGURED fi if [ ! -n "$NODE_LIST" ]; then - ocf_log err "node_list can't be empty." + ocf_exit_reason "node_list can't be empty." return $OCF_ERR_CONFIGURED fi if [ $check_config_rc -eq 0 ]; then rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if ! grep -q "$rep_mode_string" $OCF_RESKEY_config; then ocf_log info "adding include directive into $OCF_RESKEY_config" echo "$rep_mode_string" >> $OCF_RESKEY_config fi else if grep -q "$rep_mode_string" $OCF_RESKEY_config; then ocf_log info "deleting include directive from $OCF_RESKEY_config" rep_mode_string=`echo $rep_mode_string | sed -e 's|/|\\\\/|g'` sed -i "/$rep_mode_string/d" $OCF_RESKEY_config fi fi fi if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then - ocf_log err "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba" + ocf_exit_reason "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM fi fi if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then if ocf_is_ms; then - ocf_log err "Replication(rep_mode=slave) does not support Master/Slave configuration." + ocf_exit_reason "Replication(rep_mode=slave) does not support Master/Slave configuration." return $OCF_ERR_CONFIGURED fi fi return $OCF_SUCCESS } # # Check if we need to create a log file # check_log_file() { if [ ! -f "$1" ] then touch $1 > /dev/null 2>&1 chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1 fi #Check if $OCF_RESKEY_pgdba can write to the log file if ! runasowner "test -w $1" then return 1 fi return 0 } # # Check socket directory # check_socket_dir() { if [ ! -d "$OCF_RESKEY_socketdir" ]; then if ! mkdir "$OCF_RESKEY_socketdir"; then - ocf_log err "Can't create directory $OCF_RESKEY_socketdir" + ocf_exit_reason "Can't create directory $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba:`getent passwd \ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" then - ocf_log err "Can't change ownership for $OCF_RESKEY_socketdir" + ocf_exit_reason "Can't change ownership for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chmod 2775 "$OCF_RESKEY_socketdir"; then - ocf_log err "Can't change permissions for $OCF_RESKEY_socketdir" + ocf_exit_reason "Can't change permissions for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi else if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then - ocf_log err "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir" + ocf_exit_reason "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi rm $OCF_RESKEY_socketdir/test.$$ fi } print_crm_mon() { if [ -z "$CRM_MON_OUTPUT" ]; then CRM_MON_OUTPUT=`exec_with_retry 0 crm_mon -n1` fi printf "${CRM_MON_OUTPUT}\n" } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_GENERIC fi PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status" RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf NODENAME=$(ocf_local_nodename | tr '[A-Z]' '[a-z]') if is_replication; then REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot" CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever" CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount" CAN_NOT_PROMOTE="-INFINITY" CAN_PROMOTE="100" PROMOTE_ME="1000" CHECK_MS_SQL="select pg_is_in_recovery()" CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()" CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication" PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status" PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status" PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc" PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline" NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'` RE_CONTROL_SLAVE="false" fi case "$1" in methods) pgsql_methods exit $?;; meta-data) meta_data exit $OCF_SUCCESS;; esac pgsql_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) if is_replication; then change_pgsql_status "$NODENAME" "UNKNOWN" fi exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi US=`id -u -n` if [ $US != root -a $US != $OCF_RESKEY_pgdba ] then - ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba" + ocf_exit_reason "$0 must be run as root or $OCF_RESKEY_pgdba" exit $OCF_ERR_GENERIC fi # make psql command options if [ -n "$OCF_RESKEY_monitor_user" ]; then PGUSER=$OCF_RESKEY_monitor_user; export PGUSER PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" else psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" fi if [ -n "$OCF_RESKEY_pghost" ]; then psql_options="$psql_options -h $OCF_RESKEY_pghost" else if [ -n "$OCF_RESKEY_socketdir" ]; then psql_options="$psql_options -h $OCF_RESKEY_socketdir" fi fi if [ -n "$OCF_RESKEY_pgport" ]; then export PGPORT=$OCF_RESKEY_pgport fi if [ -n "$OCF_RESKEY_pglibs" ]; then if [ -n "$LD_LIBRARY_PATH" ]; then export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$OCF_RESKEY_pglibs else export LD_LIBRARY_PATH=$OCF_RESKEY_pglibs fi fi # What kind of method was invoked? case "$1" in status) if pgsql_status then ocf_log info "PostgreSQL is up" exit $OCF_SUCCESS else ocf_log info "PostgreSQL is down" exit $OCF_NOT_RUNNING fi;; monitor) pgsql_monitor exit $?;; start) pgsql_start exit $?;; promote) pgsql_promote exit $?;; demote) pgsql_demote exit $?;; notify) pgsql_notify exit $?;; stop) pgsql_stop exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/postfix b/heartbeat/postfix index 8619af60d..72fc3710d 100755 --- a/heartbeat/postfix +++ b/heartbeat/postfix @@ -1,415 +1,415 @@ #!/bin/sh # # Resource script for Postfix # # Description: Manages Postfix as an OCF resource in # an high-availability setup. # # Author: Raoul Bhatia : Original Author # License: GNU General Public License (GPL) # Note: If you want to run multiple Postfix instances, please see # http://amd.co.at/adminwiki/Postfix#Adding_a_Second_Postfix_Instance_on_one_Server # http://www.postfix.org/postconf.5.html # # # usage: $0 {start|stop|reload|monitor|validate-all|meta-data} # # The "start" arg starts a Postfix instance # # The "stop" arg stops it. # # OCF parameters: # OCF_RESKEY_binary # OCF_RESKEY_config_dir # OCF_RESKEY_parameters # ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs : ${OCF_RESKEY_binary="/usr/sbin/postfix"} : ${OCF_RESKEY_config_dir=""} : ${OCF_RESKEY_parameters=""} USAGE="Usage: $0 {start|stop|reload|monitor|validate-all|meta-data}"; ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 0.1 This script manages Postfix as an OCF resource in a high-availability setup. Manages a highly available Postfix mail server instance Full path to the Postfix binary. For example, "/usr/sbin/postfix". Full path to Postfix binary Full path to a Postfix configuration directory. For example, "/etc/postfix". Full path to configuration directory The Postfix daemon may be called with additional parameters. Specify any of them here. END } postfix_running() { local loglevel loglevel=${1:-err} # run Postfix status if available if ocf_is_true $status_support; then $binary $OPTION_CONFIG_DIR status 2>&1 ret=$? if [ $ret -ne 0 ]; then ocf_log $loglevel "Postfix status: " $ret fi return $ret fi # manually check Postfix's pid PIDFILE=${queue_dir}/pid/master.pid if [ -f $PIDFILE ]; then PID=`head -n 1 $PIDFILE` kill -s 0 $PID >/dev/null 2>&1 && [ `ps -p $PID | grep master | wc -l` -eq 1 ] return $? fi # Postfix is not running false } postfix_start() { # if Postfix is running return success if postfix_running info; then ocf_log info "Postfix already running." return $OCF_SUCCESS fi # start Postfix $binary $OPTIONS start >/dev/null 2>&1 ret=$? if [ $ret -ne 0 ]; then - ocf_log err "Postfix returned error: " $ret + ocf_exit_reason "Postfix returned error: " $ret return $OCF_ERR_GENERIC fi # grant some time for startup/forking the sub processes # and loop initial monitoring until success or timeout while true; do sleep 1 # break if postfix is up and running; log failure otherwise postfix_running info && break ocf_log info "Postfix failed initial monitor action: " $ret done ocf_log info "Postfix started." return $OCF_SUCCESS } postfix_stop() { # if Postfix is not running return success if ! postfix_running info; then ocf_log info "Postfix already stopped." return $OCF_SUCCESS fi # stop Postfix $binary $OPTIONS stop >/dev/null 2>&1 ret=$? if [ $ret -ne 0 ]; then - ocf_log err "Postfix returned an error while stopping: " $ret + ocf_exit_reason "Postfix returned an error while stopping: " $ret return $OCF_ERR_GENERIC fi # grant some time for shutdown and recheck 5 times for i in 1 2 3 4 5; do if postfix_running info; then sleep 1 else break fi done # escalate to abort if we did not stop by now # @TODO shall we loop here too? if postfix_running info; then - ocf_log err "Postfix failed to stop. Escalating to 'abort'." + ocf_exit_reason "Postfix failed to stop. Escalating to 'abort'." $binary $OPTIONS abort >/dev/null 2>&1; ret=$? sleep 5 # postfix abort did not succeed if postfix_running; then - ocf_log err "Postfix failed to abort." + ocf_exit_reason "Postfix failed to abort." return $OCF_ERR_GENERIC fi fi ocf_log info "Postfix stopped." return $OCF_SUCCESS } postfix_reload() { if postfix_running; then ocf_log info "Reloading Postfix." $binary $OPTIONS reload fi } postfix_monitor() { local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi if postfix_running $status_loglevel; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } postfix_validate_all() { # check that the Postfix binaries exist and can be executed check_binary "$binary" check_binary "postconf" # if true, run in-depth directory checks dir_check=true # check config_dir and alternate_config_directories parameter if [ "x$config_dir" != "x" ]; then if [ ! -d "$config_dir" ]; then if ocf_is_probe; then ocf_log info "Postfix configuration directory '$config_dir' not readable during probe." # skip in-depth directory checks if config file isn't readable during probe dir_check=false else - ocf_log err "Postfix configuration directory '$config_dir' does not exist or is not readable." + ocf_exit_reason "Postfix configuration directory '$config_dir' does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi alternate_config_directories=`postconf -h alternate_config_directories 2>/dev/null | grep "$config_dir/\?"` if [ "x$alternate_config_directories" = "x" ]; then - ocf_log err "Postfix main configuration must contain correct 'alternate_config_directories' parameter." + ocf_exit_reason "Postfix main configuration must contain correct 'alternate_config_directories' parameter." return $OCF_ERR_INSTALLED fi fi # check spool/queue and data directories (if applicable) # this is required because "postfix check" does not catch all errors if ocf_is_true $dir_check; then if [ ! -d "$queue_dir" ]; then if ocf_is_probe; then ocf_log info "Postfix queue directory '$queue_dir' not readable during probe." else - ocf_log err "Postfix queue directory '$queue_dir' does not exist or is not readable." + ocf_exit_reason "Postfix queue directory '$queue_dir' does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi if ocf_is_true $status_support; then data_dir=`postconf $OPTION_CONFIG_DIR -h data_directory 2>/dev/null` data_dir_count=`echo "$data_dir" | tr ',' ' ' | wc -w` if [ $data_dir_count -gt 1 ]; then - ocf_log err "Postfix data directory '$orig_data_dir' cannot be set to multiple directories." + ocf_exit_reason "Postfix data directory '$orig_data_dir' cannot be set to multiple directories." return $OCF_ERR_INSTALLED fi if [ ! -d "$data_dir" ]; then if ocf_is_probe; then ocf_log info "Postfix data directory '$data_dir' not readable during probe." else - ocf_log err "Postfix data directory '$data_dir' does not exist or is not readable." + ocf_exit_reason "Postfix data directory '$data_dir' does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi fi # check directory permissions if ocf_is_true $status_support; then user=`postconf $OPTION_CONFIG_DIR -h mail_owner 2>/dev/null` for dir in "$data_dir"; do if ! su -s /bin/sh - $user -c "test -w $dir"; then if ocf_is_probe; then ocf_log info "Directory '$dir' is not writable by user '$user' during probe." else - ocf_log err "Directory '$dir' is not writable by user '$user'." + ocf_exit_reason "Directory '$dir' is not writable by user '$user'." return $OCF_ERR_PERM; fi fi done fi fi # run Postfix internal check, if not probing if ! ocf_is_probe; then $binary $OPTIONS check >/dev/null 2>&1 ret=$? if [ $ret -ne 0 ]; then - ocf_log err "Postfix 'check' failed: " $ret + ocf_exit_reason "Postfix 'check' failed: " $ret return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi binary=$OCF_RESKEY_binary config_dir=$OCF_RESKEY_config_dir parameters=$OCF_RESKEY_parameters # handle parameters case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac # build Postfix options string *outside* to access from each method OPTIONS='' OPTION_CONFIG_DIR='' # check for Postfix's postconf binary check_binary "postconf" # check if the Postfix config_dir exist if [ "x$config_dir" != "x" ]; then # remove all trailing slashes to ease "postconf alternate_config_directories" match config_dir=`echo $config_dir | sed 's/\/*$//'` # reset config_dir if it equals Postfix's default config_directory postconf -h config_directory 2>/dev/null | grep -q "^$config_dir/\?$" if [ $? -eq 0 ]; then config_dir="" fi # set OPTIONS if config_dir is still set # save OPTION_CONFIG_DIR seperatly if [ "x$config_dir" != "x" ]; then OPTION_CONFIG_DIR="-c $config_dir" OPTIONS=$OPTION_CONFIG_DIR fi fi # add all additional parameters to options string if [ "x$parameters" != "x" ]; then OPTIONS="$OPTIONS $parameters" fi # important directories, used in different methods queue_dir=`postconf $OPTION_CONFIG_DIR -h queue_directory 2>/dev/null` # check Postfix version and status support status_support=false postfix_version=`postconf -h mail_version 2>/dev/null` ocf_version_cmp "$postfix_version" "2.5.0" ret=$? # we need Postfix 2.5.0 or greater for status/data_directory support if [ $ret -eq 1 -o $ret -eq 2 ]; then status_support=true fi postfix_validate_all ret=$? LSB_STATUS_STOPPED=3 if [ $ret -ne $OCF_SUCCESS ]; then case $1 in stop) exit $OCF_SUCCESS ;; *) exit $ret;; esac fi case $1 in monitor) postfix_monitor exit $? ;; start) postfix_start exit $? ;; stop) postfix_stop exit $? ;; reload) postfix_reload exit $? ;; validate-all) exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/rsyncd b/heartbeat/rsyncd index e830bcb2e..86c771e32 100755 --- a/heartbeat/rsyncd +++ b/heartbeat/rsyncd @@ -1,270 +1,270 @@ #!/bin/sh # # Resource script for rsync daemon # # Description: Manages rsync daemon as an OCF resource in # an High Availability setup. # # Author: Dhairesh Oza # License: GNU General Public License (GPL) # # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg starts rsyncd. # # The "stop" arg stops it. # # OCF parameters: # OCF_RESKEY_binpath # OCF_RESKEY_conffile # OCF_RESKEY_bwlimit # # Note:This RA requires that the rsyncd config files has a "pid file" # entry so that it is able to act on the correct process ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 This script manages rsync daemon Manages an rsync daemon The rsync binary path. For example, "/usr/bin/rsync" Full path to the rsync binary The rsync daemon configuration file name with full path. For example, "/etc/rsyncd.conf" Configuration file name with full path This option allows you to specify a maximum transfer rate in kilobytes per second. This option is most effective when using rsync with large files (several megabytes and up). Due to the nature of rsync transfers, blocks of data are sent, then if rsync determines the transfer was too fast, it will wait before sending the next data block. The result is an average transfer rate equaling the specified limit. A value of zero specifies no limit. limit I/O bandwidth, KBytes per second END exit $OCF_SUCCESS } get_pid_and_conf_file() { if [ -n "$OCF_RESKEY_conffile" ]; then CONF_FILE=$OCF_RESKEY_conffile else CONF_FILE="/etc/rsyncd.conf" fi grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null if [ $? -eq 0 ]; then PIDFILE=`grep -v "^#" "$CONF_FILE" | grep "pid file" | awk -F "=" '{ print $2 }'` fi } rsyncd_status() { if [ -n "$PIDFILE" -a -f $PIDFILE ]; then # rsync is probably running PID=`cat $PIDFILE` if [ -n "$PID" ]; then if ps -p $PID | grep rsync >/dev/null ; then ocf_log info "rsync daemon running" return $OCF_SUCCESS else ocf_log info "rsync daemon is not running but pid file exists" return $OCF_ERR_GENERIC fi else - ocf_log err "PID file empty!" + ocf_exit_reason "PID file empty!" return $OCF_ERR_GENERIC fi fi # rsyncd is not running ocf_log info "rsync daemon is not running" return $OCF_NOT_RUNNING } rsyncd_start() { # if rsyncd is running return success rsyncd_status retVal=$? if [ $retVal -eq $OCF_SUCCESS ]; then exit $OCF_SUCCESS elif [ $retVal -ne $OCF_NOT_RUNNING ]; then - ocf_log err "Error. Unknown status." + ocf_exit_reason "Error. Unknown status." exit $OCF_ERR_GENERIC fi if [ -n "$OCF_RESKEY_binpath" ]; then COMMAND="$OCF_RESKEY_binpath --daemon" else COMMAND="rsync --daemon" fi if [ -n "$OCF_RESKEY_conffile" ]; then COMMAND="$COMMAND --config $OCF_RESKEY_conffile" fi if [ -n "$OCF_RESKEY_bwlimit" ]; then COMMAND="$COMMAND --bwlimit $OCF_RESKEY_bwlimit" fi if grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null ; then $COMMAND; if [ $? -ne 0 ]; then - ocf_log err "Error. rsync daemon returned error $?." + ocf_exit_reason "Error. rsync daemon returned error $?." exit $OCF_ERR_GENERIC fi else - ocf_log err "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA." + ocf_exit_reason "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA." return $OCF_ERR_GENERIC fi ocf_log info "Started rsync daemon." exit $OCF_SUCCESS } rsyncd_stop() { if rsyncd_status ; then PID=`cat $PIDFILE` if [ -n "$PID" ] ; then kill $PID if [ $? -ne 0 ]; then kill -s KILL $PID if [ $? -ne 0 ]; then - ocf_log err "Error. Could not stop rsync daemon." + ocf_exit_reason "Error. Could not stop rsync daemon." return $OCF_ERR_GENERIC fi fi rm $PIDFILE 2>/dev/null fi fi ocf_log info "Stopped rsync daemon." exit $OCF_SUCCESS } rsyncd_monitor() { rsyncd_status } rsyncd_validate_all() { if [ -n "$OCF_RESKEY_binpath" -a ! -x "$OCF_RESKEY_binpath" ]; then - ocf_log err "Binary path $OCF_RESKEY_binpath does not exist." + ocf_exit_reason "Binary path $OCF_RESKEY_binpath does not exist." exit $OCF_ERR_ARGS fi if [ -n "$OCF_RESKEY_conffile" -a ! -f "$OCF_RESKEY_conffile" ]; then - ocf_log err "Config file $OCF_RESKEY_conffile does not exist." + ocf_exit_reason "Config file $OCF_RESKEY_conffile does not exist." exit $OCF_ERR_ARGS fi if grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null ; then : else - ocf_log err "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA." + ocf_exit_reason "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA." return $OCF_ERR_GENERIC fi #Not checking "$OCF_RESKEY_bwlimit" return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in start) get_pid_and_conf_file rsyncd_start ;; stop) get_pid_and_conf_file rsyncd_stop ;; status) get_pid_and_conf_file rsyncd_status ;; monitor)get_pid_and_conf_file rsyncd_monitor ;; validate-all) get_pid_and_conf_file rsyncd_validate_all ;; meta-data) meta_data ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/slapd b/heartbeat/slapd index ffb40e845..c26b16f32 100755 --- a/heartbeat/slapd +++ b/heartbeat/slapd @@ -1,591 +1,591 @@ #!/bin/bash # # Stand-alone LDAP Daemon (slapd) # # Description: Manages Stand-alone LDAP Daemon (slapd) as an OCF resource in # an high-availability setup. # # Authors: Jeroen Koekkoek # nozawat@gmail.com # John Keith Hohm # # License: GNU General Public License (GPL) # Copyright: (C) 2011 Pagelink B.V. # # The OCF code was inspired by the Postfix resource script written by # Raoul Bhatia . # # The code for managing the slapd instance is based on the the slapd init # script found in Debian GNU/Linux 6.0. # # OCF parameters: # OCF_RESKEY_slapd # OCF_RESKEY_ldapsearch # OCF_RESKEY_config # OCF_RESKEY_pidfile # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_services # OCF_RESKEY_watch_suffix # OCF_RESKEY_ignore_suffix # OCF_RESKEY_bind_dn # OCF_RESKEY_password # OCF_RESKEY_parameters # OCF_RESKEY_stop_escalate # ################################################################################ # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs : ${OCF_RESKEY_slapd="/usr/sbin/slapd"} : ${OCF_RESKEY_ldapsearch="ldapsearch"} : ${OCF_RESKEY_config=""} : ${OCF_RESKEY_pidfile=""} : ${OCF_RESKEY_user=""} : ${OCF_RESKEY_group=""} : ${OCF_RESKEY_services="ldap:///"} : ${OCF_RESKEY_watch_suffix=""} : ${OCF_RESKEY_ignore_suffix=""} : ${OCF_RESKEY_bind_dn=""} : ${OCF_RESKEY_password=""} : ${OCF_RESKEY_parameters=""} : ${OCF_RESKEY_stop_escalate=15} USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}" ORIG_IFS=$IFS NEWLINE=' ' ################################################################################ usage() { echo $USAGE >&2 } meta_data() { cat < 0.1 Resource script for Stand-alone LDAP Daemon (slapd). It manages a slapd instance as an OCF resource. Manages a Stand-alone LDAP Daemon (slapd) instance Full path to the slapd binary. For example, "/usr/sbin/slapd". Full path to slapd binary Full path to the ldapsearch binary. For example, "/usr/bin/ldapsearch". Full path to ldapsearch binary Full path to a slapd configuration directory or a slapd configuration file. For example, "/etc/ldap/slapd.d" or "/etc/ldap/slapd.conf". Full path to configuration directory or file File to read the PID from; read from olcPidFile/pidfile in config if not set. File to read PID from User name or id slapd will run with. The group id is also changed to this user's gid, unless the group parameter is used to override. User name or id slapd will run with Group name or id slapd will run with. Group name or id slapd will run with LDAP (and other scheme) URLs slapd will serve. For example, "ldap://127.0.0.1:389 ldaps:/// ldapi:///" LDAP (and other scheme) URLs to serve Suffix (database backend) that will be monitored for availability. Multiple suffixes can be specified by providing a space seperated list. By providing one or more suffixes here, the ignore_suffix parameter is discarded. All suffixes will be monitored if left blank. Suffix that will be monitored for availability. Suffix (database backend) that will not be monitored for availability. Multiple suffixes can be specified by providing a space seperated list. No suffix will be excluded if left blank. Suffix that will not be monitored for availability. Distinguished Name used to bind to the LDAP directory for testing. Leave blank to bind to the LDAP directory anonymously. Distinguished Name used to bind to the LDAP directory for testing. Password used to bind to the LDAP directory for testing. Password used to bind to the LDAP directory for testing. slapd may be called with additional parameters. Specify any of them here. Any additional parameters to slapd. Number of seconds to wait for shutdown (using SIGTERM) before resorting to SIGKILL Seconds before stop escalation to KILL END } terminate() { local pid=$1 local signal=$2 local recheck=${3-0} local rc local waited=0 kill -$signal $pid >/dev/null 2>&1; rc=$? while [ \( $rc -eq 0 \) -a \( $recheck -eq 0 -o $waited -lt $recheck \) ]; do kill -0 $pid >/dev/null 2>&1; rc=$? let "waited += 1" if [ $rc -eq 0 ]; then sleep 1 fi done if [ $rc -ne 0 ]; then return 0 fi return 1 } watch_suffix() { local rc if [ -n "$OCF_RESKEY_watch_suffix" ]; then if echo "'$OCF_RESKEY_watch_suffix'" | grep "'$1'" >/dev/null 2>&1; then rc=0 else rc=1 fi else if echo "'$OCF_RESKEY_ignore_suffix'" | grep "'$1'" >/dev/null 2>&1; then rc=1 else rc=0 fi fi return $rc } slapd_pid() { local pid if [ -f "$pid_file" ]; then pid=`head -n 1 "$pid_file" 2>/dev/null` if [ "X$pid" != "X" ]; then echo "$pid" return $OCF_SUCCESS fi - ocf_log err "slapd pid file '$pid_file' empty." + ocf_exit_reason "slapd pid file '$pid_file' empty." return $OCF_ERR_GENERIC fi ocf_log info "slapd pid file '$pid_file' does not exist." return $OCF_NOT_RUNNING } slapd_status() { local pid=$1 local state=$? if [ $state -eq $OCF_SUCCESS ]; then if ! kill -0 $pid >/dev/null 2>&1; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi return $state } slapd_start() { local options local reason local rc local state slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_SUCCESS ]; then ocf_log info "slapd already running." return $state elif [ $state -eq $OCF_ERR_GENERIC ]; then return $state fi options="-u $user -g $group" if [ -d "$config" ]; then options="$options -F $config" elif [ -f "$config" ]; then options="$options -f $config" else - ocf_log err "slapd configuration '$config' does not exist." + ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi if [ -n "$parameters" ]; then options="$options $parameters" fi if [ -n "$services" ]; then $slapd -h "$services" $options 2>&1; rc=$? else $slapd $options 2>&1; rc=$? fi if [ $rc -ne 0 ]; then - ocf_log err "slapd returned error." + ocf_exit_reason "slapd returned error." return $OCF_ERR_GENERIC fi while true; do slapd_monitor start if [ $? = "$OCF_SUCCESS" ]; then break fi sleep 1 done ocf_log info "slapd started." return $OCF_SUCCESS } slapd_stop() { local pid local rc local state pid=`slapd_pid`; slapd_status $pid; state=$? if [ $state -eq $OCF_NOT_RUNNING ]; then ocf_log info "slapd already stopped." return $OCF_SUCCESS elif [ $state -eq $OCF_ERR_GENERIC ]; then return $state fi terminate $pid TERM $OCF_RESKEY_stop_escalate; rc=$? if [ $rc -ne 0 ]; then - ocf_log err "slapd failed to stop. Escalating to KILL." + ocf_exit_reason "slapd failed to stop. Escalating to KILL." terminate $pid KILL; rc=$? fi if [ -f "$pid_file" ]; then rm -f "$pid_file" >/dev/null 2>&1 fi ocf_log info "slapd stopped." return $OCF_SUCCESS } slapd_monitor() { local options local rc local state local suffix local suffixes local err_option="-info" slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_NOT_RUNNING ]; then if [ -z "$1" ];then if ! ocf_is_probe; then - ocf_log err "slapd process not found." + ocf_exit_reason "slapd process not found." fi fi return $state elif [ $state -ne $OCF_SUCCESS ]; then - ocf_log err "slapd returned error." + ocf_exit_reason "slapd returned error." return $state fi if [ -d "$config" ]; then for suffix in `find "$config"/'cn=config' -type f -name olcDatabase* -exec \ sed -ne 's/^[[:space:]]*olcSuffix:[[:space:]]\+\(.\+\)/\1/p' {} \;` do suffix=${suffix#\"*} suffix=${suffix%\"*} if watch_suffix $suffix; then suffixes="$suffixes $suffix" fi done elif [ -f "$config" ]; then for suffix in `sed -ne 's/^[[:space:]]*suffix[[:space:]]\+\(.\+\)/\1/p' "$config"` do suffix=${suffix#\"*} suffix=${suffix%\"*} if watch_suffix $suffix; then suffixes="$suffixes $suffix" fi done else if ocf_is_probe; then ocf_log info "slapd configuration '$config' does not exist during probe." else - ocf_log err "slapd configuration '$config' does not exist." + ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi fi options="-LLL -s base -x" if [ -n "$bind_dn" ]; then options="$options -D $bind_dn -w $password" fi [ -z "$1" ] && err_option="" for suffix in $suffixes; do ocf_run -q $err_option "$ldapsearch" -H "$services" -b "$suffix" $options >/dev/null 2>&1; rc=$? case "$rc" in "0") ocf_log debug "slapd database with suffix '$suffix' reachable" ;; "49") - ocf_log err "slapd database with suffix '$suffix' unreachable. Invalid credentials." + ocf_exit_reason "slapd database with suffix '$suffix' unreachable. Invalid credentials." return $OCF_ERR_CONFIGURED ;; *) if [ -z "$1" ] || [ -n "$1" -a $rc -ne 1 ]; then - ocf_log err "slapd database with suffix '$suffix' unreachable. exit code ($rc)" + ocf_exit_reason "slapd database with suffix '$suffix' unreachable. exit code ($rc)" fi state=$OCF_ERR_GENERIC ;; esac done return $state } slapd_validate_all() { check_binary "$slapd" check_binary "$ldapsearch" if [ -z "$pid_file" ]; then if [ -d "$config" ]; then pid_file=`sed -ne \ 's/^olcPidFile:[[:space:]]\+\(.\+\)[[:space:]]*/\1/p' \ "$config"/'cn=config.ldif' 2>/dev/null` elif [ -f "$config" ]; then pid_file=`sed -ne \ 's/^pidfile[[:space:]]\+\(.\+\)/\1/p' \ "$config" 2>/dev/null` else if ocf_is_probe; then ocf_log info "slapd configuration '$config' does not exist during probe." else - ocf_log err "slapd configuration '$config' does not exist." + ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi fi fi if [ -z "$user" ]; then user=`id -nu 2>/dev/null` elif ! id "$user" >/dev/null 2>&1; then - ocf_log err "slapd user '$user' does not exist" + ocf_exit_reason "slapd user '$user' does not exist" return $OCF_ERR_INSTALLED fi if [ -z "$group" ]; then group=`id -ng 2>/dev/null` elif ! grep "^$group:" /etc/group >/dev/null 2>&1; then - ocf_log err "slapd group '$group' does not exist" + ocf_exit_reason "slapd group '$group' does not exist" return $OCF_ERR_INSTALLED fi pid_dir=`dirname "$pid_file"` if [ ! -d "$pid_dir" ]; then mkdir -p "$pid_dir" chown -R "$user" "$pid_dir" chgrp -R "$group" "$pid_dir" fi return $OCF_SUCCESS } # # Main # slapd=$OCF_RESKEY_slapd ldapsearch=$OCF_RESKEY_ldapsearch config=$OCF_RESKEY_config user=$OCF_RESKEY_user group=$OCF_RESKEY_group services=$OCF_RESKEY_services bind_dn=$OCF_RESKEY_bind_dn password=$OCF_RESKEY_password parameters=$OCF_RESKEY_parameters pid_file=$OCF_RESKEY_pidfile if [ -z "$config" ]; then config_dirname="/etc/ldap" if [ -e "/etc/openldap" ]; then config_dirname="/etc/openldap" fi config="$config_dirname/slapd.conf" if [ -e "$config_dirname/slapd.d" ]; then config="$config_dirname/slapd.d" fi fi if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac slapd_validate_all rc=$? [ $rc -eq $OCF_SUCCESS ] || exit $rc case $1 in status) slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_SUCCESS ]; then ocf_log debug "slapd is running." elif [ $state -eq $OCF_NOT_RUNNING ]; then ocf_log debug "slapd is stopped." fi exit $state ;; start) slapd_start exit $? ;; stop) slapd_stop exit $? ;; monitor) slapd_monitor; state=$? exit $state ;; validate-all) exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/symlink b/heartbeat/symlink index 214092d0e..1e36a9c74 100755 --- a/heartbeat/symlink +++ b/heartbeat/symlink @@ -1,245 +1,245 @@ #!/bin/sh # # # An OCF RA that manages a symlink # # Copyright (c) 2011 Dominik Klein # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.1 This resource agent that manages a symbolic link (symlink). It is primarily intended to manage configuration files which should be enabled or disabled based on where the resource is running, such as cron job definitions and the like. Manages a symbolic link Full path of the symbolic link to be managed. This must obviously be in a filesystem that supports symbolic links. Full path of the symlink Full path to the link target (the file or directory which the symlink points to). Full path to the link target A suffix to append to any files that the resource agent moves out of the way because they clash with "link". If this is unset (the default), then the resource agent will simply refuse to create a symlink if it clashes with an existing file. Suffix to append to backup files END } symlink_monitor() { # This applies the following logic: # # * If $OCF_RESKEY_link does not exist, then the resource is # definitely stopped. # # * If $OCF_RESKEY_link exists and is a symlink that points to # ${OCF_RESKEY_target}, then the resource is definitely started. # # * If $OCF_RESKEY_link exists, but is anything other than a # symlink to ${OCF_RESKEY_target}, then the status depends on whether # ${OCF_RESKEY_backup_suffix} is set: # # - if ${OCF_RESKEY_backup_suffix} is set, then the resource is # simply not running. The existing file will be moved out of # the way, to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}, # when the resource starts. # # - if ${OCF_RESKEY_backup_suffix} is not set, then an existing # file ${OCF_RESKEY_link} is an error condition, and the # resource can't start here. rc=$OCF_ERR_GENERIC # Using ls here instead of "test -e", as "test -e" returns false # if the file does exist, but it a symlink to a file that doesn't if ! ls "$OCF_RESKEY_link" >/dev/null 2>&1; then ocf_log debug "$OCF_RESKEY_link does not exist" rc=$OCF_NOT_RUNNING elif [ ! -L "$OCF_RESKEY_link" ]; then if [ -z "$OCF_RESKEY_backup_suffix" ]; then - ocf_log err "$OCF_RESKEY_link exists but is not a symbolic link!" + ocf_exit_reason "$OCF_RESKEY_link exists but is not a symbolic link!" exit $OCF_ERR_INSTALLED else ocf_log debug "$OCF_RESKEY_link exists but is not a symbolic link, will be moved to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix} on start" rc=$OCF_NOT_RUNNING fi elif readlink -f "$OCF_RESKEY_link" | egrep -q "^${OCF_RESKEY_target}$"; then ocf_log debug "$OCF_RESKEY_link exists and is a symbolic link to ${OCF_RESKEY_target}." rc=$OCF_SUCCESS else if [ -z "$OCF_RESKEY_backup_suffix" ]; then - ocf_log err "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}!" + ocf_exit_reason "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}!" exit $OCF_ERR_INSTALLED else ocf_log debug "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}, will be moved to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix} on start" rc=$OCF_NOT_RUNNING fi fi return $rc } symlink_start() { if ! symlink_monitor; then if [ -e "$OCF_RESKEY_link" ]; then if [ -z "$OCF_RESKEY_backup_suffix" ]; then # Shouldn't happen, because symlink_monitor should # have errored out. But there is a chance that # something else put that file there after # symlink_monitor ran. - ocf_log err "$OCF_RESKEY_link exists and no backup_suffix is set, won't overwrite." + ocf_exit_reason "$OCF_RESKEY_link exists and no backup_suffix is set, won't overwrite." exit $OCF_ERR_GENERIC else ocf_log debug "Found $OCF_RESKEY_link, moving to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" ocf_run mv -v "$OCF_RESKEY_link" "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" \ || exit $OCF_ERR_GENERIC fi fi ocf_run ln -sv "$OCF_RESKEY_target" "$OCF_RESKEY_link" symlink_monitor return $? else return $OCF_SUCCESS fi } symlink_stop() { if symlink_monitor; then ocf_run rm -vf "$OCF_RESKEY_link" || exit $OCF_ERR_GENERIC if ! symlink_monitor; then if [ -e "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" ]; then ocf_log debug "Found backup ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}, moving to $OCF_RESKEY_link" # if restoring the backup fails then still return with # $OCF_SUCCESS, but log a warning ocf_run -warn mv "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" "$OCF_RESKEY_link" fi return $OCF_SUCCESS else - ocf_log err "Removing $OCF_RESKEY_link failed." + ocf_exit_reason "Removing $OCF_RESKEY_link failed." return $OCF_ERR_GENERIC fi else return $OCF_SUCCESS fi } symlink_validate_all() { if [ "x${OCF_RESKEY_link}" = "x" ]; then - ocf_log err "Mandatory parameter link is unset" + ocf_exit_reason "Mandatory parameter link is unset" exit $OCF_ERR_CONFIGURED fi if [ "x${OCF_RESKEY_target}" = "x" ]; then - ocf_log err "Mandatory parameter target is unset" + ocf_exit_reason "Mandatory parameter target is unset" exit $OCF_ERR_CONFIGURED fi # Having a non-existant target is technically not an error, as # symlinks are allowed to point to non-existant paths. But it # still doesn't hurt to warn people if the target does not exist # (but only during non-probes). if [ ! -e "${OCF_RESKEY_target}" ]; then ocf_log warn "${OCF_RESKEY_target} does not exist!" fi } symlink_usage() { cat </dev/null 2>&1 } ############################################################################ # isalive_tomcat() { # As the server stops, the PID file disappears. To avoid race conditions, # we will have remembered the PID of a running instance on script entry. local pid=$rememberedPID # If there is a PID file, attempt to use that if [ -f $CATALINA_PID ]; then local tmp ocf_log debug "Reading pid from $CATALINA_PID" tmp=`head -n 1 $CATALINA_PID` if [ $? -eq 0 ]; then pid=$tmp fi fi if [ -n "$pid" ] && [ "$pid" -gt 0 ]; then # Retry message for restraint ocf_log debug "Sending noop signal to $pid" kill -s 0 $pid >/dev/null 2>&1 return $? fi # No PID file false } ############################################################################ # Check rotatelogs process and restart if it is stopped monitor_rotatelogs() { pgrep -f "$ROTATELOGS.*$CATALINA_BASE/logs/catalina_%F.log" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log warn "A rotatelogs command for $CATALINA_BASE/logs/catalina_%F.log is not running. Restarting it." start_rotatelogs if [ $? -eq 0 ]; then ocf_log info "Restart rotatelogs process succeeded." else ocf_log warn "Restart rotatelogs process failed." fi fi } ############################################################################ # Check tomcat process and service availability monitor_tomcat() { isalive_tomcat || return $OCF_NOT_RUNNING isrunning_tomcat || return $OCF_ERR_GENERIC if ocf_is_true ${CATALINA_ROTATE_LOG}; then # Monitor rotatelogs process and restart it if it is stopped. # And never consider rotatelogs process failure to be a monitor failure # as long as Tomcat process works fine. monitor_rotatelogs fi return $OCF_SUCCESS } ############################################################################ # Startup rotatelogs process start_rotatelogs() { # -s is required because tomcat5.5's login shell is /bin/false su - -s /bin/sh $RESOURCE_TOMCAT_USER \ -c "$ROTATELOGS -l \"$CATALINA_BASE/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \ < "$CATALINA_OUT" > /dev/null 2>&1 & } ############################################################################ # Execute catalina.out log rotation rotate_catalina_out() { # Check catalina_%F.log is writable or not. CURRENT_ROTATELOG_SUFFIX=`date +"%F"` su - -s /bin/sh $RESOURCE_TOMCAT_USER \ -c "touch \"$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log\"" > /dev/null 2>&1 if [ $? -ne 0 ]; then - ocf_log err "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable." + ocf_exit_reason "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable." return $OCF_ERR_GENERIC fi # Clean up and set permissions on required files rm -rf "$CATALINA_BASE"/temp/* if [ -p "$CATALINA_OUT" ]; then rm -f "$CATALINA_OUT" elif [ -e "$CATALINA_OUT" ]; then DATE=`date +"%F-%H%M%S"` ocf_log warn "$CATALINA_OUT already exists. It is saved as $CATALINA_OUT-$DATE" mv "$CATALINA_OUT" "$CATALINA_OUT-$DATE" fi mkfifo -m700 "$CATALINA_OUT" chown --dereference "$RESOURCE_TOMCAT_USER" "$CATALINA_OUT" || true start_rotatelogs } ############################################################################ # Tomcat Command tomcatCommand() { cat<<-END_TOMCAT_COMMAND export JAVA_HOME=${JAVA_HOME} export JAVA_OPTS="${JAVA_OPTS}" export CATALINA_HOME=${CATALINA_HOME} export CATALINA_BASE=${CATALINA_BASE} export CATALINA_OUT=${CATALINA_OUT} export CATALINA_PID=${CATALINA_PID} export CATALINA_OPTS="${CATALINA_OPTS}" export CATALINA_TMPDIR="${CATALINA_TMPDIR}" export JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}" export LOGGING_CONFIG="${LOGGING_CONFIG}" export LOGGING_MANAGER="${LOGGING_MANAGER}" export TOMCAT_CFG=${TOMCAT_CFG} $TOMCAT_START_SCRIPT $@ END_TOMCAT_COMMAND } attemptTomcatCommand() { if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then export TOMCAT_CFG=$(mktemp ${HA_RSCTMP}/tomcat-tmp-XXXXX.cfg) fi if [ "$RESOURCE_TOMCAT_USER" = root ]; then "$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1 else tomcatCommand $@ | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 fi if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then rm -f "$TOMCAT_CFG" fi } ############################################################################ # Start Tomcat start_tomcat() { cd "$CATALINA_HOME/bin" validate_all_tomcat || exit $? monitor_tomcat if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi # Remove $CATALINA_PID if it exists rm -f $CATALINA_PID #ocf_log debug "catalina.out rotation FLG = ${CATALINA_ROTATE_LOG}" if ocf_is_true ${CATALINA_ROTATE_LOG}; then rotate_catalina_out if [ $? -eq 0 ]; then ocf_log debug "Rotate catalina.out succeeded." else - ocf_log err "Rotate catalina.out failed. Avoid starting tomcat without catalina.out rotation." + ocf_exit_reason "Rotate catalina.out failed. Avoid starting tomcat without catalina.out rotation." return $OCF_ERR_GENERIC fi fi echo "`date "+%Y/%m/%d %T"`: start ===========================" >> "$TOMCAT_CONSOLE" ocf_log debug "CATALINA_OPTS value = ${CATALINA_OPTS}" attemptTomcatCommand start ${TOMCAT_START_OPTS} & while true; do monitor_tomcat if [ $? -eq $OCF_SUCCESS ]; then break fi ocf_log debug "start_tomcat[$TOMCAT_NAME]: retry monitor_tomcat" sleep 3 done return $OCF_SUCCESS } ############################################################################ # Stop Tomcat stop_tomcat() { local stop_time local RA_TIMEOUT=20 local TOMCAT_STOP_OPTS="" if [ -n $OCF_RESKEY_CRM_meta_timeout ]; then RA_TIMEOUT=$((OCF_RESKEY_CRM_meta_timeout/1000)) fi STOP_TIMEOUT=$((RA_TIMEOUT-5)) if [ -n "$MAX_STOP_TIME" ]; then if [ $MAX_STOP_TIME -gt $RA_TIMEOUT ]; then ocf_log warn "max_stop_timeout must be shorter than the timeout of stop operation." fi if [ $MAX_STOP_TIME -eq 0 ]; then STOP_TIMEOUT=$RA_TIMEOUT else STOP_TIMEOUT=$MAX_STOP_TIME fi fi cd "$CATALINA_HOME/bin" memorize_pid # This lets monitoring continue to work reliably echo "`date "+%Y/%m/%d %T"`: stop ###########################" >> "$TOMCAT_CONSOLE" if [ "$TOMCAT_START_SCRIPT" = "$CATALINA_HOME/bin/catalina.sh" ]; then TOMCAT_STOP_OPTS="$STOP_TIMEOUT --force" fi stop_time=$(date +%s) attemptTomcatCommand stop $TOMCAT_STOP_OPTS lapse_sec=0 while isalive_tomcat; do sleep 1 lapse_sec=`expr $(date +%s) - $stop_time` if [ $lapse_sec -ge $STOP_TIMEOUT ]; then ocf_log debug "stop_tomcat[$TOMCAT_NAME]: stop failed, killing with SIGKILL ($lapse_sec)" kill -s KILL $rememberedPID > /dev/null 2>&1 fi done if ocf_is_true ${CATALINA_ROTATE_LOG}; then rm -f "$CATALINA_PID" "${CATALINA_OUT}" else rm -f "$CATALINA_PID" fi return $OCF_SUCCESS } metadata_tomcat() { cat < 1.0 Resource script for Tomcat. It manages a Tomcat instance as a cluster resource. Manages a Tomcat servlet environment instance to Tomcat process on start. Used to ensure process is still running and must be unique. ]]> The name of the resource Log file, used during start and stop operations. Log file Time-out for stop operation. DEPRECATED Time-out for the stop operation. DEPRECATED Maximum number of times to retry stop operation before suspending and killing Tomcat. DEPRECATED. Does not retry. Max retry count for stop operation. DEPRECATED The user who starts Tomcat. The user who starts Tomcat URL for state confirmation. URL for state confirmation Number of seconds to wait during a stop before drastic measures (force kill) are used on the tomcat process. This number MUST be less than your cluster stop timeout for the resource. The default value is five seconds before the timeout value of stop operation. When it is over this value, it stops a process in kill commands. This parameter is only effective on Tomcat 6 or later. The max time it should take for proper shutdown. Home directory of Java. Home directory of Java Java JVM options used on start and stop. Java options parsed to JVM, used on start and stop. Home directory of Tomcat. Home directory of Tomcat Instance directory of Tomcat Instance directory of Tomcat, defaults to catalina_home Log file name of Tomcat Log file name of Tomcat, defaults to catalina_base/logs/catalina.out A PID file name for Tomcat. A PID file name for Tomcat Absolute path to the custom tomcat start script to use. Tomcat start script location Tomcat start options. Tomcat start options Catalina options, for the start operation only. Catalina options Temporary directory of Tomcat Temporary directory of Tomcat, defaults to none Rotate catalina.out flag. Rotate catalina.out flag catalina.out rotation interval (seconds). catalina.out rotation interval (seconds) Java_endorsed_dirs of tomcat Java_endorsed_dirs of Tomcat, defaults to none Logging_config of tomcat Logging_config of Tomcat, defaults to none Logging_manager of tomcat Logging_manager of Tomcat, defaults to none. END return $OCF_SUCCESS } validate_all_tomcat() { local port local rc=$OCF_SUCCESS ocf_log info "validate_all_tomcat[$TOMCAT_NAME]" check_binary $WGET if [ -z "${TOMCAT_START_SCRIPT}" ]; then - ocf_log err "No default tomcat start script detected. Please specify start script location using the 'tomcat_start_script' option" + ocf_exit_reason "No default tomcat start script detected. Please specify start script location using the 'tomcat_start_script' option" rc=$OCF_ERR_CONFIGURED fi if [ -n "$MAX_STOP_TIME" ] && [ "$MAX_STOP_TIME" -lt 0 ]; then - ocf_log err "max_stop_time must be set to a value greater than 0." + ocf_exit_reason "max_stop_time must be set to a value greater than 0." rc=$OCF_ERR_CONFIGURED fi if echo "$RESOURCE_STATUSURL" | grep -q ":[0-9][0-9]*" ; then port=${RESOURCE_STATUSURL##*:} port=${port%%/*} ocf_log debug "Tomcat port is $port" ocf_log debug "grep port=\"$port\" $CATALINA_BASE/conf/server.xml" grep "port=\"$port\"" $CATALINA_BASE/conf/server.xml > /dev/null 2>&1 if [ $? -ne 0 ]; then - ocf_log err "Your configured status URL specifies a port ($port), but the server does not have a connector listening to that port in $CATALINA_BASE/conf/server.xml" + ocf_exit_reason "Your configured status URL specifies a port ($port), but the server does not have a connector listening to that port in $CATALINA_BASE/conf/server.xml" rc=$OCF_ERR_INSTALLED fi fi if ocf_is_true ${CATALINA_ROTATE_LOG}; then if [ ! -x "$ROTATELOGS" ]; then - ocf_log err "rotatelogs command does not exist." + ocf_exit_reason "rotatelogs command does not exist." rc=$OCF_ERR_INSTALLED fi fi return $rc } # As we stop tomcat, it removes it's own pid file...we still want to know what it was memorize_pid() { if [ -f $CATALINA_PID ]; then rememberedPID=$(cat $CATALINA_PID) fi } # ### tomcat RA environment variables # COMMAND=$1 TOMCAT_NAME="${OCF_RESKEY_tomcat_name-tomcat}" TOMCAT_CONSOLE="${OCF_RESKEY_script_log-/var/log/$TOMCAT_NAME.log}" RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user-root}" RESOURCE_STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}" JAVA_HOME="${OCF_RESKEY_java_home}" JAVA_OPTS="${OCF_RESKEY_java_opts}" CATALINA_HOME="${OCF_RESKEY_catalina_home}" CATALINA_BASE="${OCF_RESKEY_catalina_base-${OCF_RESKEY_catalina_home}}" CATALINA_OUT="${OCF_RESKEY_catalina_out-$CATALINA_BASE/logs/catalina.out}" CATALINA_PID=$OCF_RESKEY_catalina_pid if [ -z "$CATALINA_PID" ]; then mkdir -p "${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/" if [ "${RESOURCE_TOMCAT_USER}" != "root" ]; then chown ${RESOURCE_TOMCAT_USER} "${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/" fi CATALINA_PID="${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/catalina.pid" fi MAX_STOP_TIME="${OCF_RESKEY_max_stop_time}" TOMCAT_START_OPTS="${OCF_RESKEY_tomcat_start_opts}" TOMCAT_START_SCRIPT="${OCF_RESKEY_tomcat_start_script}" CATALINA_OPTS="-Dname=$TOMCAT_NAME ${OCF_RESKEY_catalina_opts}" CATALINA_TMPDIR="${OCF_RESKEY_catalina_tmpdir}" CATALINA_ROTATE_LOG="${OCF_RESKEY_catalina_rotate_log-NO}" CATALINA_ROTATETIME="${OCF_RESKEY_catalina_rotatetime-86400}" JAVA_ENDORSED_DIRS="${OCF_RESKEY_java_endorsed_dirs}" LOGGING_CONFIG="${OCF_RESKEY_logging_config}" LOGGING_MANAGER="${OCF_RESKEY_logging_manager}" if [ -z "${TOMCAT_START_SCRIPT}" ]; then if [ -e "$CATALINA_HOME/bin/catalina.sh" ]; then TOMCAT_START_SCRIPT="$CATALINA_HOME/bin/catalina.sh" elif [ -e "/usr/sbin/tomcat" ]; then REDIRECT_DEFAULT_CONFIG=1 TOMCAT_START_SCRIPT="/usr/sbin/tomcat" elif [ -e "/usr/sbin/tomcat6" ]; then REDIRECT_DEFAULT_CONFIG=1 TOMCAT_START_SCRIPT="/usr/sbin/tomcat6" fi fi LSB_STATUS_STOPPED=3 if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case "$COMMAND" in meta-data) metadata_tomcat; exit $OCF_SUCCESS;; help|usage) usage; exit $OCF_SUCCESS;; esac if [ ! -d "$JAVA_HOME" -o ! -d "$CATALINA_HOME" -o ! -d "$CATALINA_BASE" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac - ocf_log err "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist." + ocf_exit_reason "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist." exit $OCF_ERR_INSTALLED fi export JAVA_HOME JAVA_OPTS CATALINA_HOME CATALINA_BASE CATALINA_OUT CATALINA_PID CATALINA_OPTS CATALINA_TMPDIR JAVA_ENDORSED_DIRS LOGGING_CONFIG LOGGING_MANAGER JAVA=${JAVA_HOME}/bin/java if [ ! -x "$JAVA" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac - ocf_log err "java command does not exist." + ocf_exit_reason "java command does not exist." exit $OCF_ERR_INSTALLED fi ROTATELOGS="" if ocf_is_true ${CATALINA_ROTATE_LOG}; then # Look for rotatelogs/rotatelogs2 if [ -x /usr/sbin/rotatelogs ]; then ROTATELOGS=/usr/sbin/rotatelogs elif [ -x /usr/sbin/rotatelogs2 ]; then ROTATELOGS=/usr/sbin/rotatelogs2 fi fi # # ------------------ # the main script # ------------------ # case "$COMMAND" in start) ocf_log debug "[$TOMCAT_NAME] Enter tomcat start" start_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat start $func_status" exit $func_status ;; stop) ocf_log debug "[$TOMCAT_NAME] Enter tomcat stop" stop_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat stop $func_status" exit $func_status ;; status) if monitor_tomcat; then echo tomcat instance $TOMCAT_NAME is running exit $OCF_SUCCESS else echo tomcat instance $TOMCAT_NAME is stopped exit $OCF_NOT_RUNNING fi exit $? ;; monitor) #ocf_log debug "[$TOMCAT_NAME] Enter tomcat monitor" monitor_tomcat func_status=$? #ocf_log debug "[$TOMCAT_NAME] Leave tomcat monitor $func_status" exit $func_status ;; meta-data) metadata_tomcat exit $? ;; validate-all) validate_all_tomcat exit $? ;; usage|help) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac