diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in index 2b0d6b098..4dd646896 100755 --- a/heartbeat/CTDB.in +++ b/heartbeat/CTDB.in @@ -1,813 +1,847 @@ #!@BASH_SHELL@ # # OCF Resource Agent for managing CTDB # # Copyright (c) 2009-2010 Novell Inc., Tim Serong # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OVERVIEW # # When run by itself, CTDB can handle IP failover and includes scripts # to manage various services (Samba, Winbind, HTTP, etc.). When run as # a resource in a Pacemaker cluster, this additional functionality # should not be used; instead one should define separate resources for # CTDB, Samba, Winbind, IP addresses, etc. # # As of 2010-11-17, there is no separate OCF Samba or Winbind RA, so # it is still possible to configure CTDB so that it manages these # resources itself. In future, once Samba and Winbind RAs are # available, this ability will be deprecated and ultimately removed. # # This RA intentionally provides no ability to configure CTDB such that # it manages IP failover, HTTP, NFS, etc. # # # TODO: # - ctdb_stop doesn't really support multiple independent CTDB instances, # unless they're running from distinct ctdbd binaries (it uses pkill # $OCF_RESKEY_ctdbd_binary if "ctdb stop" doesn't work, which it might # not under heavy load - this will kill all ctdbd instances on the # system). OTOH, running multiple CTDB instances per node is, well, # AFAIK, completely crazy. Can't run more than one in a vanilla CTDB # cluster, with the CTDB init script. So it might be nice to address # this for complete semantic correctness of the RA, but shouldn't # actually cause any trouble in real life. # - As much as possible, get rid of auto config generation # - Especially smb.conf # - Verify timeouts are sane # - Monitor differentiate between error and not running? # - Do we need to verify globally unique setting? # - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on # current nodes) # - Look at enabling set_ctdb_variables() if necessary. # - Probably possible for sysconfig file to not be restored if # CTDB dies unexpectedly. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Default parameter values: # Some distro's ctdb package stores the persistent db in /var/lib/ctdb, # others store in /var/ctdb. This attempts to detect the correct default # directory. var_prefix="/var/lib/ctdb" if [ ! -d "$var_prefix" ] && [ -d "/var/ctdb" ]; then var_prefix="/var/ctdb" fi run_prefix="/run" if [ ! -d "$var_prefix" ] && [ -d "/var/run" ]; then var_prefix="/var/run" fi -: ${OCF_RESKEY_ctdb_manages_samba:=no} -: ${OCF_RESKEY_ctdb_manages_winbind:=no} -: ${OCF_RESKEY_ctdb_service_smb:=""} -: ${OCF_RESKEY_ctdb_service_nmb:=""} -: ${OCF_RESKEY_ctdb_service_winbind:=""} -: ${OCF_RESKEY_ctdb_samba_skip_share_check:=yes} -: ${OCF_RESKEY_ctdb_monitor_free_memory:=100} -: ${OCF_RESKEY_ctdb_start_as_disabled:=no} - -: ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb} -: ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb} -: ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd} -: ${OCF_RESKEY_ctdb_dbdir:=${var_prefix}} -: ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb} -: ${OCF_RESKEY_ctdb_rundir:=${run_prefix}/ctdb} -: ${OCF_RESKEY_ctdb_socket:=${OCF_RESKEY_ctdb_rundir}/ctdbd.socket} -: ${OCF_RESKEY_ctdb_debuglevel:=2} - -: ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf} -: ${OCF_RESKEY_smb_passdb_backend:=tdbsam} -: ${OCF_RESKEY_smb_idmap_backend:=tdb2} +# Parameter defaults + +OCF_RESKEY_ctdb_recovery_lock_default="" +OCF_RESKEY_ctdb_manages_samba_default="no" +OCF_RESKEY_ctdb_manages_winbind_default="no" +OCF_RESKEY_ctdb_service_smb_default="" +OCF_RESKEY_ctdb_service_nmb_default="" +OCF_RESKEY_ctdb_service_winbind_default="" +OCF_RESKEY_ctdb_samba_skip_share_check_default="yes" +OCF_RESKEY_ctdb_monitor_free_memory_default="100" +OCF_RESKEY_ctdb_start_as_disabled_default="no" + +: ${OCF_RESKEY_ctdb_recovery_lock=${OCF_RESKEY_ctdb_recovery_lock_default}} +: ${OCF_RESKEY_ctdb_manages_samba=${OCF_RESKEY_ctdb_manages_samba_default}} +: ${OCF_RESKEY_ctdb_manages_winbind=${OCF_RESKEY_ctdb_manages_winbind_default}} +: ${OCF_RESKEY_ctdb_service_smb=${OCF_RESKEY_ctdb_service_smb_default}} +: ${OCF_RESKEY_ctdb_service_nmb=${OCF_RESKEY_ctdb_service_nmb_default}} +: ${OCF_RESKEY_ctdb_service_winbind=${OCF_RESKEY_ctdb_service_winbind_default}} +: ${OCF_RESKEY_ctdb_samba_skip_share_check=${OCF_RESKEY_ctdb_samba_skip_share_check_default}} +: ${OCF_RESKEY_ctdb_monitor_free_memory=${OCF_RESKEY_ctdb_monitor_free_memory_default}} +: ${OCF_RESKEY_ctdb_start_as_disabled=${OCF_RESKEY_ctdb_start_as_disabled_default}} + +OCF_RESKEY_ctdb_config_dir_default="/etc/ctdb" +OCF_RESKEY_ctdb_binary_default="/usr/bin/ctdb" +OCF_RESKEY_ctdbd_binary_default="/usr/sbin/ctdbd" +OCF_RESKEY_ctdb_dbdir_default="${var_prefix}" +OCF_RESKEY_ctdb_logfile_default="/var/log/ctdb/log.ctdb" +OCF_RESKEY_ctdb_rundir_default="${run_prefix}/ctdb" + +: ${OCF_RESKEY_ctdb_config_dir=${OCF_RESKEY_ctdb_config_dir_default}} +: ${OCF_RESKEY_ctdb_binary=${OCF_RESKEY_ctdb_binary_default}} +: ${OCF_RESKEY_ctdbd_binary=${OCF_RESKEY_ctdbd_binary_default}} +: ${OCF_RESKEY_ctdb_dbdir=${OCF_RESKEY_ctdb_dbdir_default}} +: ${OCF_RESKEY_ctdb_logfile=${OCF_RESKEY_ctdb_logfile_default}} +: ${OCF_RESKEY_ctdb_rundir=${OCF_RESKEY_ctdb_rundir_default}} + +OCF_RESKEY_ctdb_socket_default="${OCF_RESKEY_ctdb_rundir}/ctdbd.socket" +OCF_RESKEY_ctdb_debuglevel_default="2" +OCF_RESKEY_ctdb_max_open_files_default="" + +: ${OCF_RESKEY_ctdb_socket=${OCF_RESKEY_ctdb_socket_default}} +: ${OCF_RESKEY_ctdb_debuglevel=${OCF_RESKEY_ctdb_debuglevel_default}} +: ${OCF_RESKEY_ctdb_max_open_files=${OCF_RESKEY_ctdb_max_open_files_default}} + +OCF_RESKEY_smb_conf_default="/etc/samba/smb.conf" +OCF_RESKEY_smb_private_dir_default="" +OCF_RESKEY_smb_passdb_backend_default="tdbsam" +OCF_RESKEY_smb_idmap_backend_default="tdb2" +OCF_RESKEY_smb_fileid_algorithm_default="" + +: ${OCF_RESKEY_smb_conf=${OCF_RESKEY_smb_conf_default}} +: ${OCF_RESKEY_smb_private_dir=${OCF_RESKEY_smb_private_dir_default}} +: ${OCF_RESKEY_smb_passdb_backend=${OCF_RESKEY_smb_passdb_backend_default}} +: ${OCF_RESKEY_smb_idmap_backend=${OCF_RESKEY_smb_idmap_backend_default}} +: ${OCF_RESKEY_smb_fileid_algorithm=${OCF_RESKEY_smb_fileid_algorithm_default}} ####################################################################### meta_data() { cat < 1.0 This resource agent manages CTDB, allowing one to use Clustered Samba in a Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2 or GFS2) on which the CTDB lock will be stored. Create /etc/ctdb/nodes containing a list of private IP addresses of each node in the cluster, then configure this RA as a clone. This agent expects the samba and windbind resources to be managed outside of CTDB's control as a separate set of resources controlled by the cluster manager. The optional support for enabling CTDB management of these daemons will be depreciated. For more information see http://linux-ha.org/wiki/CTDB_(resource_agent) CTDB Resource Agent The location of a shared lock file or helper binary, common across all nodes. See CTDB documentation for details. CTDB shared lock file - + Should CTDB manage starting/stopping the Samba service for you? This will be deprecated in future, in favor of configuring a separate Samba resource. Should CTDB manage Samba? - + Should CTDB manage starting/stopping the Winbind service for you? This will be deprecated in future, in favor of configuring a separate Winbind resource. Should CTDB manage Winbind? - + Name of smb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of smb init script - + Name of nmb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of nmb init script - + Name of winbind init script. Only necessary if CTDB is managing Winbind directly. Will usually be auto-detected. Name of winbind init script - + If there are very many shares it may not be feasible to check that all of them are available during each monitoring interval. In that case this check can be disabled. Skip share check during monitor? - + If the amount of free memory drops below this value the node will become unhealthy and ctdb and all managed services will be shutdown. Once this occurs, the administrator needs to find the reason for the OOM situation, rectify it and restart ctdb with "service ctdb start". Minimum amount of free memory (MB) - + When set to yes, the CTDB node will start in DISABLED mode and not host any public ip addresses. Start CTDB disabled? - + The directory containing various CTDB configuration files. The "nodes" and "notify.sh" scripts are expected to be in this directory, as is the "events.d" subdirectory. CTDB config file directory - + Full path to the CTDB binary. CTDB binary path - + Full path to the CTDB cluster daemon binary. CTDB Daemon binary path - + Full path to the domain socket that ctdbd will create, used for local clients to attach and communicate with the ctdb daemon. CTDB socket location - + The directory to put the local CTDB database files in. Persistent database files will be put in ctdb_dbdir/persistent. CTDB database directory - + Full path to log file. To log to syslog instead, use the value "syslog". CTDB log file location - + Full path to ctdb runtime directory, used for storage of socket lock state. CTDB runtime directory location - + What debug level to run at (0-10). Higher means more verbose. CTDB debug level - + Maximum number of open files (for ulimit -n) Max open files - + Path to default samba config file. Only necessary if CTDB is managing Samba. Path to smb.conf - + The directory for smbd to use for storing such files as smbpasswd and secrets.tdb. Old versions of CTBD (prior to 1.0.50) required this to be on shared storage. This parameter should not be set for current versions of CTDB, and only remains in the RA for backwards compatibility. Samba private dir (deprecated) - + Which backend to use for storing user and possibly group information. Only necessary if CTDB is managing Samba. Samba passdb backend - + Which backend to use for SID/uid/gid mapping. Only necessary if CTDB is managing Samba. Samba idmap backend - + Which fileid:algorithm to use with vfs_fileid. The correct value depends on which clustered filesystem is in use, e.g.: for OCFS2, this should be set to "fsid". Only necessary if CTDB is managing Samba. Samba VFS fileid algorithm - + END } ####################################################################### # Figure out path to /etc/sysconfig/ctdb (same logic as # loadconfig() from /etc/ctdb/functions if [ -f /etc/sysconfig/ctdb ]; then CTDB_SYSCONFIG=/etc/sysconfig/ctdb elif [ -f /etc/default/ctdb ]; then CTDB_SYSCONFIG=/etc/default/ctdb elif [ -f "$OCF_RESKEY_ctdb_config_dir/ctdb" ]; then CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb elif [ -f "$OCF_RESKEY_ctdb_config_dir/ctdbd.conf" ]; then CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdbd.conf fi # Backup paths CTDB_SYSCONFIG_BACKUP=${CTDB_SYSCONFIG}.ctdb-ra-orig invoke_ctdb() { # CTDB's defaults are: local timeout local timelimit timeout=3 timelimit=120 # ...but we override with the timeout for the current op: if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((OCF_RESKEY_CRM_meta_timeout/1000)) timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000)) fi $OCF_RESKEY_ctdb_binary --socket="$OCF_RESKEY_ctdb_socket" \ -t $timeout -T $timelimit \ "$@" } # Enable any event scripts that are explicitly required. # Any others will ultimately be invoked or not based on how they ship # with CTDB, but will generally have no effect, beacuase the relevant # CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb. enable_event_scripts() { local event_dir event_dir=$OCF_RESKEY_ctdb_config_dir/events.d chmod u+x "$event_dir/00.ctdb" # core database health check if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then chmod u+x "$event_dir/10.interface" else chmod a-x "$event_dir/10.interface" fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then chmod u+x "$event_dir/11.routing" else chmod a-x "$event_dir/11.routing" fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || \ ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then chmod u+x "$event_dir/50.samba" else chmod a-x "$event_dir/50.samba" fi } # This function has no effect (currently no way to set CTDB_SET_*) # but remains here in case we need it in future. set_ctdb_variables() { rv=$OCF_SUCCESS set | grep ^CTDB_SET_ | cut -d_ -f3- | while read v; do varname=$(echo "$v" | cut -d= -f1) value=$(echo "$v" | cut -d= -f2) invoke_ctdb setvar "$varname" "$value" || rv=$OCF_ERR_GENERIC done || rv=$OCF_ERR_GENERIC return $rv } # Add necessary settings to /etc/samba/smb.conf. In a perfect world, # we'd be able to generate a new, temporary, smb.conf file somewhere, # something like: # include = /etc/samba/smb.conf # [global] # clustering = yes # # ...etc... # Unfortunately, we can't do this, because there's no way to tell the # smb init script where the temporary config is, so we just edit # the default config file. init_smb_conf() { # Don't screw around with the config if CTDB isn't managing Samba! ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 # replace these things in smb.conf local repl repl='# CTDB-RA:|passdb backend|clustering|idmap backend|idmap config[[:space:]]*\*[[:space:]]*:[[:space:]]*backend|private dir|ctdbd socket' local private_dir [ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n" local vfs_fileid local do_vfs do_vfs=0 if [ -n "$OCF_RESKEY_smb_fileid_algorithm" ]; then repl="${repl}|fileid:algorithm|fileid:mapping" vfs_fileid="\tfileid:algorithm = $OCF_RESKEY_smb_fileid_algorithm\n" if sed -n '/^[[:space:]]*\[global\]/,/^[[:space:]]*\[/p' $OCF_RESKEY_smb_conf | \ grep -Eq '^[[:space:]]*vfs objects'; then # vfs objects already specified, will append fileid to existing line do_vfs=1 else vfs_fileid="$vfs_fileid\tvfs objects = fileid\n" fi fi # Preserve permissions of smb.conf local idmap_config if grep -Eqs '^[[:space:]]*idmap backend[[:space:]]*=' $OCF_RESKEY_smb_conf; then idmap_config=old else idmap_config=new fi cp -a "$OCF_RESKEY_smb_conf" "$OCF_RESKEY_smb_conf.$$" awk ' /^[[:space:]]*\[/ { global = 0 } /^[[:space:]]*\[global\]/ { global = 1 } { if(global) { if ('$do_vfs' && $0 ~ /^[[:space:]]vfs objects/ && $0 !~ /fileid/) { print $0" fileid" } else if ($0 !~ /^[[:space:]]*('"$repl"')/) { print } } else { print } }' "$OCF_RESKEY_smb_conf" | sed "/^[[:space:]]*\[global\]/ a\\ \t# CTDB-RA: Begin auto-generated section (do not change below)\n\ \tpassdb backend = $OCF_RESKEY_smb_passdb_backend\n\ \tclustering = yes\n\ \tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir$vfs_fileid\ \t# CTDB-RA: End auto-generated section (do not change above)" > "$OCF_RESKEY_smb_conf.$$" if [ "$idmap_config" = "old" ]; then sed -i "/^[[:space:]]*clustering = yes/ a\\ \tidmap backend = $OCF_RESKEY_smb_idmap_backend" $OCF_RESKEY_smb_conf.$$ else sed -i "/^[[:space:]]*clustering = yes/ a\\ \tidmap config * : backend = $OCF_RESKEY_smb_idmap_backend" $OCF_RESKEY_smb_conf.$$ fi dd conv=notrunc,fsync of="$OCF_RESKEY_smb_conf.$$" if=/dev/null >/dev/null 2>&1 mv "$OCF_RESKEY_smb_conf.$$" "$OCF_RESKEY_smb_conf" } # Get rid of that section we added cleanup_smb_conf() { ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 # preserve permissions of smb.conf cp -a "$OCF_RESKEY_smb_conf" "$OCF_RESKEY_smb_conf.$$" sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' "$OCF_RESKEY_smb_conf" > "$OCF_RESKEY_smb_conf.$$" mv "$OCF_RESKEY_smb_conf.$$" "$OCF_RESKEY_smb_conf" } append_ctdb_sysconfig() { [ -n "$2" ] && echo "$1=$2" >> "$CTDB_SYSCONFIG" } # Generate a new, minimal CTDB config file that's just enough # to get CTDB running as configured by the RA parameters. generate_ctdb_sysconfig() { # Backup existing sysconfig if we're not already using an auto-generated one grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG || cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP if [ $? -ne 0 ]; then ocf_log warn "Unable to backup $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP" fi ocf_log info "Generating new $CTDB_SYSCONFIG" # Note to maintainers and other random hackers: # Parameters may need to be set here, for CTDB event # scripts to pick up, or may need to be passed to ctdbd # when starting, or both. Be careful. The CTDB source # tree and manpages are your friends. As a concrete # example, setting CTDB_START_AS_DISABLED here is # completely useless, as this is actually a command line # argument for ctdbd; it's not used anywhere else. cat >$CTDB_SYSCONFIG </dev/null # public addresses file (should not be present, but need to set for correctness if it is) local pub_addr_option pub_addr_option="" [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \ pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses" # start as disabled local start_as_disabled start_as_disabled="--start-as-disabled" ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled="" # set nofile ulimit for ctdbd process if [ -n "$OCF_RESKEY_ctdb_max_open_files" ]; then ulimit -n "$OCF_RESKEY_ctdb_max_open_files" fi # Start her up "$OCF_RESKEY_ctdbd_binary" \ --reclock="$OCF_RESKEY_ctdb_recovery_lock" \ --nlist="$OCF_RESKEY_ctdb_config_dir/nodes" \ --socket="$OCF_RESKEY_ctdb_socket" \ --dbdir="$OCF_RESKEY_ctdb_dbdir" \ --dbdir-persistent="$OCF_RESKEY_ctdb_dbdir/persistent" \ --event-script-dir="$OCF_RESKEY_ctdb_config_dir/events.d" \ --notification-script="$OCF_RESKEY_ctdb_config_dir/notify.sh" \ --transport=tcp \ $start_as_disabled $log_option $pub_addr_option \ -d "$OCF_RESKEY_ctdb_debuglevel" if [ $? -ne 0 ]; then # cleanup smb.conf cleanup_smb_conf ocf_exit_reason "Failed to execute $OCF_RESKEY_ctdbd_binary." return $OCF_ERR_GENERIC else # Wait a bit for CTDB to stabilize # (until start times out if necessary) while true; do # Initial sleep is intentional (ctdb init script # has sleep after ctdbd start, but before invoking # ctdb to talk to it) sleep 1 status=$(invoke_ctdb status 2>/dev/null) if [ $? -ne 0 ]; then # CTDB will be running, kill it before returning ctdb_stop ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status" return $OCF_ERR_GENERIC fi if ! echo "$status" | grep -qs 'UNHEALTHY (THIS'; then # Status does not say this node is unhealthy, # so we're good to go. Do a bit of final # setup and (hopefully) return success. set_ctdb_variables return $? fi done fi # ctdbd will (or can) actually still be running at this point, so kill it ctdb_stop ocf_exit_reason "Timeout waiting for CTDB to stabilize" return $OCF_ERR_GENERIC } ctdb_stop() { # Do nothing if already stopped pkill -0 -f "$OCF_RESKEY_ctdbd_binary" || return $OCF_SUCCESS # Tell it to die nicely invoke_ctdb shutdown >/dev/null 2>&1 rv=$? # No more Mr. Nice Guy count=0 while pkill -0 -f "$OCF_RESKEY_ctdbd_binary" ; do sleep 1 count=$((count + 1)) [ $count -gt 10 ] && { ocf_log info "killing ctdbd " pkill -9 -f "$OCF_RESKEY_ctdbd_binary" pkill -9 -f "${OCF_RESKEY_ctdb_config_dir}/events.d/" } done # Cleanup smb.conf cleanup_smb_conf # It was a clean shutdown, return success [ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS # Unclean shutdown, return success if there's no ctdbds left (we # killed them forcibly, but at least they're good and dead). pkill -0 -f "$OCF_RESKEY_ctdbd_binary" || return $OCF_SUCCESS # Problem: ctdb shutdown didn't work and neither did some vigorous # kill -9ing. Only thing to do is report failure. return $OCF_ERR_GENERIC } ctdb_monitor() { local status # "ctdb status" exits non-zero if CTDB isn't running. # It can also exit non-zero if there's a timeout (ctdbd blocked, # stalled, massive load, or otherwise wedged). If it's actually # not running, STDERR will say "Errno:Connection refused(111)", # whereas if it's wedged, it'll say various other unpleasant things. status=$(invoke_ctdb status 2>&1) if [ $? -ne 0 ]; then if echo "$status" | grep -qs 'Connection refused'; then return $OCF_NOT_RUNNING elif echo "$status" | grep -qs 'No such file or directory'; then return $OCF_NOT_RUNNING elif echo $status | grep -qs 'connect() failed'; then return $OCF_NOT_RUNNING else ocf_exit_reason "CTDB status call failed: $status" return $OCF_ERR_GENERIC fi fi if echo "$status" | grep -Eqs '(OK|DISABLED) \(THIS'; then return $OCF_SUCCESS fi ocf_exit_reason "CTDB status is bad: $status" return $OCF_ERR_GENERIC } ctdb_validate() { # Required binaries for binary in pkill; do check_binary $binary done if [ -z "$CTDB_SYSCONFIG" ]; then ocf_exit_reason "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)" return $OCF_ERR_INSTALLED fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then ocf_exit_reason "Samba config file '$OCF_RESKEY_smb_conf' does not exist." return $OCF_ERR_INSTALLED fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!" fi if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then ocf_exit_reason "$OCF_RESKEY_ctdb_config_dir/nodes does not exist." return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then ocf_exit_reason "ctdb_recovery_lock not specified." return $OCF_ERR_CONFIGURED fi if [ "${OCF_RESKEY_ctdb_recovery_lock:0:1}" == '!' ]; then # '!' prefix means recovery lock is handled via a helper binary binary="${OCF_RESKEY_ctdb_recovery_lock:1}" binary="${binary%% *}" # trim any parameters if [ -z "$binary" ]; then ocf_exit_reason "ctdb_recovery_lock invalid helper" return $OCF_ERR_CONFIGURED fi check_binary "${binary}" else lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") touch "$lock_dir/$$" 2>/dev/null if [ $? != 0 ]; then ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." return $OCF_ERR_ARGS fi rm "$lock_dir/$$" fi return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ctdb_start;; stop) ctdb_stop;; monitor) ctdb_monitor;; validate-all) ctdb_validate;; usage|help) ctdb_usage exit $OCF_SUCCESS ;; *) ctdb_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/ClusterMon b/heartbeat/ClusterMon index 1c48f661d..1d45ff47e 100755 --- a/heartbeat/ClusterMon +++ b/heartbeat/ClusterMon @@ -1,261 +1,271 @@ #!/bin/sh # # # ClusterMon OCF RA. # Starts crm_mon in background which logs cluster status as # html to the specified file. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Bree # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # OCF instance parameters: # OCF_RESKEY_user # OCF_RESKEY_pidfile # OCF_RESKEY_update # OCF_RESKEY_extra_options # OCF_RESKEY_htmlfile ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_user_default="root" +OCF_RESKEY_update_default="15000" +OCF_RESKEY_extra_options_default="" +OCF_RESKEY_pidfile_default="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid" +OCF_RESKEY_htmlfile_default="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html" + +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_update=${OCF_RESKEY_update_default}} +: ${OCF_RESKEY_extra_options=${OCF_RESKEY_extra_options_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_htmlfile=${OCF_RESKEY_htmlfile_default}} + ####################################################################### meta_data() { cat < 1.0 This is a ClusterMon Resource Agent. It outputs current cluster status to the html. Runs crm_mon in the background, recording the cluster status to an HTML file The user we want to run crm_mon as The user we want to run crm_mon as - + How frequently should we update the cluster status Update interval - + Additional options to pass to crm_mon. Eg. -n -r Extra options - + PID file location to ensure only one instance is running PID file - + Location to write HTML output to. HTML output - + END } ####################################################################### ClusterMon_usage() { cat </dev/null if [ $? -eq 0 ]; then : Yes, user exists. We can further check his permission on crm_mon if necessary else ocf_log err "The user $OCF_RESKEY_user does not exist!" exit $OCF_ERR_ARGS fi fi # Pidfile better be an absolute path case $OCF_RESKEY_pidfile in /*) ;; *) ocf_log warn "You should have pidfile($OCF_RESKEY_pidfile) of absolute path!" ;; esac # Check the update interval if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then : else ocf_log err "Invalid update interval $OCF_RESKEY_update. It should be positive integer!" exit $OCF_ERR_ARGS fi if CheckOptions $OCF_RESKEY_extra_options; then : else ocf_log err "Invalid options $OCF_RESKEY_extra_options!" exit $OCF_ERR_ARGS fi # Htmlfile better be an absolute path case $OCF_RESKEY_htmlfile in /*) ;; *) ocf_log warn "You should have htmlfile($OCF_RESKEY_htmlfile) of absolute path!" ;; esac echo "Validate OK" return $OCF_SUCCESS } if [ $# -ne 1 ]; then ClusterMon_usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_update:="15000"} -: ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} -: ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} - OCF_RESKEY_update=`expr $OCF_RESKEY_update / 1000` case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ClusterMon_start ;; stop) ClusterMon_stop ;; monitor) ClusterMon_monitor ;; validate-all) ClusterMon_validate ;; usage|help) ClusterMon_usage exit $OCF_SUCCESS ;; *) ClusterMon_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Delay b/heartbeat/Delay index 10b14f937..f09ec1895 100755 --- a/heartbeat/Delay +++ b/heartbeat/Delay @@ -1,223 +1,229 @@ #!/bin/sh # # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # This script is a test resource for introducing delay. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_startdelay # OCF_RESKEY_stopdelay # OCF_RESKEY_mondelay # # # OCF_RESKEY_startdelay defaults to 20 (seconds) # OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay # OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay # # # This is really a test resource script. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_startdelay_default="20" +OCF_RESKEY_stopdelay_default="30" +OCF_RESKEY_mondelay_default="30" + +: ${OCF_RESKEY_startdelay=${OCF_RESKEY_startdelay_default}} +: ${OCF_RESKEY_stopdelay=${OCF_RESKEY_stopdelay_default}} +: ${OCF_RESKEY_mondelay=${OCF_RESKEY_mondelay_default}} + ####################################################################### usage() { cat <<-! usage: $0 {start|stop|status|monitor|meta-data|validate-all} ! } meta_data() { cat < 1.0 This script is a test resource for introducing delay. Waits for a defined timespan How long in seconds to delay on start operation. Start delay - + How long in seconds to delay on stop operation. Defaults to "startdelay" if unspecified. Stop delay - + How long in seconds to delay on monitor operation. Defaults to "startdelay" if unspecified. Monitor delay - + END } Delay_stat() { ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor } Delay_Status() { if Delay_stat then ocf_log info "Delay is running OK" return $OCF_SUCCESS else ocf_log info "Delay is stopped" return $OCF_NOT_RUNNING fi } Delay_Monitor() { Delay_Validate_All -q sleep $OCF_RESKEY_mondelay Delay_Status } Delay_Start() { if Delay_stat then ocf_log info "Delay already running." return $OCF_SUCCESS else Delay_Validate_All -q ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start rc=$? sleep $OCF_RESKEY_startdelay if [ $rc -ne 0 ] then return $OCF_ERR_PERM fi return $OCF_SUCCESS fi } Delay_Stop() { if Delay_stat then Delay_Validate_All -q ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop rc=$? sleep $OCF_RESKEY_stopdelay if [ $rc -ne 0 ] then return $OCF_ERR_PERM fi return $OCF_SUCCESS else ocf_log info "Delay already stopped." return $OCF_SUCCESS fi } # Check if all the arguments are valid numbers, a string is considered valid if: # 1. It does not contain any character but digits and period "."; # 2. The period "." does not occur more than once Are_Valid_Numbers() { for i in "$@"; do echo $i |grep -v [^0-9.] |grep -q -v [.].*[.] if test $? -ne 0; then return $OCF_ERR_ARGS fi done return $OCF_SUCCESS } Delay_Validate_All() { # Be quiet when specified -q option _and_ validation succeded getopts "q" option if test $option = "q"; then quiet=yes else quiet=no fi shift $(($OPTIND -1)) if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \ $OCF_RESKEY_mondelay; then if test $quiet = "no"; then echo "Validate OK" fi # _Return_ on validation success return $OCF_SUCCESS else ocf_exit_reason "Some of the instance parameters are invalid" # _Exit_ on validation failure exit $OCF_ERR_ARGS fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_startdelay=20} -: ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay} -: ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay} - case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; start) Delay_Start ;; stop) Delay_Stop ;; monitor) Delay_Monitor ;; status) Delay_Status ;; validate-all) Delay_Validate_All ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_ARGS ;; esac exit $? diff --git a/heartbeat/Dummy b/heartbeat/Dummy index ce953e14e..fe85def9a 100755 --- a/heartbeat/Dummy +++ b/heartbeat/Dummy @@ -1,181 +1,186 @@ #!/bin/sh # # # Dummy OCF RA. Does nothing except track its own state. # Use it only as a testing tool or example for how to write # a resource agent. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Bree # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_state_default="${HA_RSCTMP}/Dummy-${OCF_RESOURCE_INSTANCE}.state" +OCF_RESKEY_fake_default="dummy" + +: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}} +: ${OCF_RESKEY_fake=${OCF_RESKEY_fake_default}} + ####################################################################### meta_data() { cat < 1.0 This is a Dummy Resource Agent. It does absolutely nothing except keep track of whether its running or not. Its purpose in life is for testing and to serve as a template for RA writers. NB: Please pay attention to the timeouts specified in the actions section below. They should be meaningful for the kind of resource the agent manages. They should be the minimum advised timeouts, but they shouldn't/cannot cover _all_ possible resource instances. So, try to be neither overly generous nor too stingy, but moderate. The minimum timeouts should never be below 10 seconds. Example stateless resource agent Location to store the resource state in. State file - + Fake attribute that can be changed to cause a reload Fake attribute that can be changed to cause a reload - + END } ####################################################################### dummy_usage() { cat < 1.0 Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. -- Resource script for EVMS shared cluster container. It runs evms_activate on one node in the cluster. Manages EVMS Shared Cluster Containers (SCCs) (deprecated) If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning - + END } EvmsSCC_status() { # At the moment we don't support monitoring EVMS activations. We just return "not running" to cope with the pre-start monitor call. return $OCF_NOT_RUNNING } EvmsSCC_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" case "$n_type" in pre) case "$n_op" in start) ocf_log debug "EvmsSCC: Notify: Starting node(s): $n_start." EvmsSCC_start_notify_common ;; esac ;; esac return $OCF_SUCCESS } EvmsSCC_start() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" ocf_log debug "EvmsSCC: Start: starting node(s): $n_start." EvmsSCC_start_notify_common return $OCF_SUCCESS } EvmsSCC_stop() { return $OCF_SUCCESS } EvmsSCC_start_notify_common() { local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)} ocf_log debug "EvmsSCC: Start_Notify: I am node $n_myself." n_active="$n_active $n_start" case " $n_active " in *" $n_myself "*) ;; *) ocf_log err "EvmsSCC: $n_myself (local) not on active list!" return $OCF_ERR_GENERIC ;; esac #pick the first node from the starting list #when the cluster boots this will be one of the many booting nodes #when a node later joins the cluster, this will be the joining node local n_first=$(echo $n_start | cut -d ' ' -f 1) ocf_log debug "EvmsSCC: Start_Notify: First node in starting list is $n_first." if [ "$n_myself" = "$n_first" ] ; then ocf_log debug "EvmsSCC: Start_Notify: I am running ${EVMSACTIVATE}." while true ; do if ! ${EVMSACTIVATE} -q 2> /dev/null ; then SLEEP_TIME=$(($(ocf_maybe_random) % 40)) ocf_log info "EvmsSCC: Evms call failed - sleeping for $SLEEP_TIME seconds and then trying again." sleep $SLEEP_TIME else break fi done fi return $OCF_SUCCESS } # Check the arguments passed to this script if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi OP=$1 case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # Be obnoxious, log deprecation warning on every invocation (unless # suppressed by resource configuration). ocf_deprecated check_binary $CUT check_binary $EVMSACTIVATE case $OP in start) EvmsSCC_start ;; notify) EvmsSCC_notify ;; stop) EvmsSCC_stop ;; status|monitor) EvmsSCC_status ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Evmsd b/heartbeat/Evmsd index 5d752cb37..1f2413432 100755 --- a/heartbeat/Evmsd +++ b/heartbeat/Evmsd @@ -1,155 +1,161 @@ #!/bin/sh # # Evmsd OCF RA. # # Copyright (c) 2004 SUSE LINUX AG, Jo De Baer # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_ignore_deprecation_default="false" + +: ${OCF_RESKEY_ignore_deprecation=${OCF_RESKEY_ignore_deprecation_default}} + ####################################################################### meta_data() { cat < 1.0 Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. -- This is a Evmsd Resource Agent. Controls clustered EVMS volume management (deprecated) If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning - + END } ####################################################################### evmsd_usage() { cat < 1.1 Resource script for Filesystem. It manages a Filesystem on a shared storage medium. The standard monitor operation of depth 0 (also known as probe) checks if the filesystem is mounted. If you want deeper tests, set OCF_CHECK_LEVEL to one of the following values: 10: read first 16 blocks of the device (raw read) This doesn't exercise the filesystem at all, but the device on which the filesystem lives. This is noop for non-block devices such as NFS, SMBFS, or bind mounts. 20: test if a status file can be written and read The status file must be writable by root. This is not always the case with an NFS mount, as NFS exports usually have the "root_squash" option set. In such a setup, you must either use read-only monitoring (depth=10), export with "no_root_squash" on your NFS server, or grant world write permissions on the directory where the status file is to be placed. Manages filesystem mounts The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. block device - + The mount point for the filesystem. mount point - + The type of filesystem to be mounted. filesystem type - + Any extra options to be given as -o options to mount. For bind mounts, add "bind" here and set fstype to "none". We will do the right thing for options such as "bind,ro". options - + The prefix to be used for a status file for resource monitoring with depth 20. If you don't specify this parameter, all status files will be created in a separate directory. status file prefix - + Specify how to decide whether to run fsck or not. "auto" : decide to run fsck depending on the fstype(default) "force" : always run fsck regardless of the fstype "no" : do not run fsck ever. run_fsck - + Normally, we expect no users of the filesystem and the stop operation to finish quickly. If you cannot control the filesystem users easily and want to prevent the stop action from failing, then set this parameter to "no" and add an appropriate timeout for the stop operation. fast stop - + The use of a clone setup for local filesystems is forbidden by default. For special setups like glusterfs, cloning a mount of a local device with a filesystem like ext4 or xfs independently on several nodes is a valid use case. Only set this to "true" if you know what you are doing! allow running as a clone, regardless of filesystem type - + This option allows specifying how to handle processes that are currently accessing the mount directory. "true" : Default value, kill processes accessing mount point "safe" : Kill processes accessing mount point using methods that avoid functions that could potentially block during process detection "false" : Do not kill any processes. The 'safe' option uses shell logic to walk the /procs/ directory for pids using the mount point while the default option uses the fuser cli tool. fuser is known to perform operations that can potentially block if unresponsive nfs mounts are in use on the system. Kill processes before unmount - + END } # # Make sure the kernel does the right thing with the FS buffers # This function should be called after unmounting and before mounting # It may not be necessary in 2.4 and later kernels, but it shouldn't hurt # anything either... # # It's really a bug that you have to do this at all... # flushbufs() { if have_binary $BLOCKDEV ; then if [ "$blockdevice" = "yes" ] ; then $BLOCKDEV --flushbufs $1 return $? fi fi return 0 } # Take advantage of /etc/mtab if present, use portable mount command # otherwise. Normalize format to "dev mountpoint fstype". is_bind_mount() { echo "$options" | grep -w bind >/dev/null 2>&1 } list_mounts() { local inpf="" local mount_list="" local check_list="x" if [ -e "/proc/mounts" ] && ! is_bind_mount; then inpf=/proc/mounts elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then inpf=/etc/mtab fi # Make sure that the mount list has not been changed while reading. while [ "$mount_list" != "$check_list" ]; do check_list=$mount_list if [ "$inpf" ]; then mount_list=$(cut -d' ' -f1,2,3 < $inpf) else mount_list=$($MOUNT | cut -d' ' -f1,3,5) fi done echo "$mount_list" } determine_blockdevice() { if [ $blockdevice = "yes" ]; then return fi # Get the current real device name, if possible. # (specified devname could be -L or -U...) case "$FSTYPE" in nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none) : ;; *) DEVICE=`list_mounts | grep " $CANONICALIZED_MOUNTPOINT " | cut -d' ' -f1` if [ -b "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Lists all filesystems potentially mounted under a given path, # excluding the path itself. list_submounts() { list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r } # kernels < 2.6.26 can't handle bind remounts bind_kernel_check() { echo "$options" | grep -w ro >/dev/null 2>&1 || return uname -r | awk -F. ' $1==2 && $2==6 { sub("[^0-9].*","",$3); if ($3<26) exit(1); }' [ $? -ne 0 ] && ocf_log warn "kernel `uname -r` cannot handle read only bind mounts" } bind_mount() { if is_bind_mount && [ "$options" != "-o bind" ] then bind_kernel_check bind_opts=`echo $options | sed 's/bind/remount/'` $MOUNT $bind_opts $MOUNTPOINT else true # make sure to return OK fi } is_option() { echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1 } is_fsck_needed() { case $OCF_RESKEY_run_fsck in force) true;; no) false;; ""|auto) case $FSTYPE in ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs) false;; *) true;; esac;; *) ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'" OCF_RESKEY_run_fsck="auto" is_fsck_needed;; esac } fstype_supported() { local support="$FSTYPE" local rc if [ "X${HOSTOS}" != "XOpenBSD" ];then # skip checking /proc/filesystems for obsd return $OCF_SUCCESS fi if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then : No FSTYPE specified, rely on the system has the right file-system support already return $OCF_SUCCESS fi # support fuse-filesystems (e.g. GlusterFS) case $FSTYPE in fuse.*|glusterfs|rozofs) support="fuse";; esac grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -eq 0 ]; then # found the fs type return $OCF_SUCCESS fi # if here, we should attempt to load the module and then # check the if the filesystem support exists again. $MODPROBE $support >/dev/null if [ $? -ne 0 ]; then ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernel module" return $OCF_ERR_INSTALLED fi # It is possible for the module to load and not be complete initialized # before we check /proc/filesystems again. Give this a few trys before # giving up entirely. for try in $(seq 5); do grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -eq 0 ] ; then # yes. found the filesystem after doing the modprobe return $OCF_SUCCESS fi ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again" sleep 1 done ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems" return $OCF_ERR_INSTALLED } # # START: Start up the filesystem # Filesystem_start() { # Check if there are any mounts mounted under the mountpoint if list_mounts | grep -q -E " $CANONICALIZED_MOUNTPOINT/\w+" >/dev/null 2>&1; then ocf_log err "There is one or more mounts mounted under $MOUNTPOINT." return $OCF_ERR_CONFIGURED fi # See if the device is already mounted. if Filesystem_status >/dev/null 2>&1 ; then ocf_log info "Filesystem $MOUNTPOINT is already mounted." return $OCF_SUCCESS fi fstype_supported || exit $OCF_ERR_INSTALLED # Check the filesystem & auto repair. # NOTE: Some filesystem types don't need this step... Please modify # accordingly if [ $blockdevice = "yes" ]; then if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" exit $OCF_ERR_INSTALLED fi if is_fsck_needed; then ocf_log info "Starting filesystem check on $DEVICE" if [ -z "$FSTYPE" ]; then $FSCK -p $DEVICE else $FSCK -t $FSTYPE -p $DEVICE fi # NOTE: if any errors at all are detected, it returns non-zero # if the error is >= 4 then there is a big problem if [ $? -ge 4 ]; then ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE" return $OCF_ERR_GENERIC fi fi fi [ -d "$MOUNTPOINT" ] || ocf_run mkdir -p $MOUNTPOINT if [ ! -d "$MOUNTPOINT" ] ; then ocf_exit_reason "Couldn't find directory [$MOUNTPOINT] to use as a mount point" exit $OCF_ERR_INSTALLED fi flushbufs $DEVICE # Mount the filesystem. case "$FSTYPE" in none) $MOUNT $options $DEVICE $MOUNTPOINT && bind_mount ;; "") $MOUNT $options $DEVICE $MOUNTPOINT ;; *) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;; esac if [ $? -ne 0 ]; then ocf_exit_reason "Couldn't mount device [$DEVICE] as $MOUNTPOINT" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # end of Filesystem_start get_pids() { local dir=$1 local procs local mmap_procs if ocf_is_true "$FORCE_UNMOUNT"; then if [ "X${HOSTOS}" = "XOpenBSD" ];then fstat | grep $dir | awk '{print $3}' else $FUSER -m $dir 2>/dev/null fi elif [ "$FORCE_UNMOUNT" = "safe" ]; then procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}') mmap_procs=$(grep " ${dir}" /proc/[0-9]*/maps | awk -F/ '{print $3}') printf "${procs}\n${mmap_procs}" | sort | uniq fi } signal_processes() { local dir=$1 local sig=$2 local pids pid # fuser returns a non-zero return code if none of the # specified files is accessed or in case of a fatal # error. pids=$(get_pids "$dir") if [ -z "$pids" ]; then ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'" return fi for pid in $pids; do ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`" kill -s $sig $pid done } try_umount() { local SUB=$1 $UMOUNT $umount_force $SUB list_mounts | grep -q " $SUB " >/dev/null 2>&1 || { ocf_log info "unmounted $SUB successfully" return $OCF_SUCCESS } return $OCF_ERR_GENERIC } fs_stop() { local SUB=$1 timeout=$2 sig cnt for sig in TERM KILL; do cnt=$((timeout/2)) # try half time with TERM while [ $cnt -gt 0 ]; do try_umount $SUB && return $OCF_SUCCESS ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig" signal_processes $SUB $sig cnt=$((cnt-1)) sleep 1 done done return $OCF_ERR_GENERIC } # # STOP: Unmount the filesystem # Filesystem_stop() { # See if the device is currently mounted Filesystem_status >/dev/null 2>&1 if [ $? -eq $OCF_NOT_RUNNING ]; then # Already unmounted, wonderful. rc=$OCF_SUCCESS else # Wipe the status file, but continue with a warning if # removal fails -- the file system might be read only if [ $OCF_CHECK_LEVEL -eq 20 ]; then rm -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log warn "Failed to remove status file ${STATUSFILE}." fi fi # Determine the real blockdevice this is mounted on (if # possible) prior to unmounting. determine_blockdevice # For networked filesystems, there's merit in trying -f: case "$FSTYPE" in nfs4|nfs|cifs|smbfs) umount_force="-f" ;; esac # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. local timeout for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do ocf_log info "Trying to unmount $SUB" if ocf_is_true "$FAST_STOP"; then timeout=6 else timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"} timeout=$((timeout/1000)) fi fs_stop $SUB $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "Couldn't unmount $SUB, giving up!" fi done fi flushbufs $DEVICE return $rc } # end of Filesystem_stop # # STATUS: is the filesystem mounted or not? # Filesystem_status() { if list_mounts | grep -q " $CANONICALIZED_MOUNTPOINT " >/dev/null 2>&1; then rc=$OCF_SUCCESS msg="$MOUNTPOINT is mounted (running)" else rc=$OCF_NOT_RUNNING msg="$MOUNTPOINT is unmounted (stopped)" fi # Special case "monitor" to check whether the UUID cached and # on-disk still match? case "$OP" in status) ocf_log info "$msg";; esac return $rc } # end of Filesystem_status # Note: the read/write tests below will stall in case the # underlying block device (or in the case of a NAS mount, the # NAS server) has gone away. In that case, if I/O does not # return to normal in time, the operation hits its timeout # and it is up to the CRM to initiate appropriate recovery # actions (such as fencing the node). # # MONITOR 10: read the device # Filesystem_monitor_10() { if [ "$blockdevice" = "no" ] ; then ocf_log warn "$DEVICE is not a block device, monitor 10 is noop" return $OCF_SUCCESS fi dd_opts="iflag=direct bs=4k count=1" err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null` if [ $? -ne 0 ]; then ocf_exit_reason "Failed to read device $DEVICE" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # MONITOR 20: write and read a status file # Filesystem_monitor_20() { if [ "$blockdevice" = "no" ] ; then # O_DIRECT not supported on cifs/smbfs dd_opts="oflag=sync bs=4k conv=fsync,sync" else # Writing to the device in O_DIRECT mode is imperative # to bypass caches. dd_opts="oflag=direct,sync bs=4k conv=fsync,sync" fi status_dir=`dirname $STATUSFILE` [ -d "$status_dir" ] || mkdir -p "$status_dir" err_output=`echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1` if [ $? -ne 0 ]; then ocf_exit_reason "Failed to write status file ${STATUSFILE}" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi test -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_exit_reason "Cannot stat the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi cat ${STATUSFILE} > /dev/null if [ $? -ne 0 ]; then ocf_exit_reason "Cannot read the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } Filesystem_monitor() { Filesystem_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then case "$OCF_CHECK_LEVEL" in 10) Filesystem_monitor_10; rc=$?;; 20) Filesystem_monitor_20; rc=$?;; *) ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL" rc=$OCF_ERR_CONFIGURED ;; esac fi return $rc } # end of Filesystem_monitor # # VALIDATE_ALL: Are the instance parameters valid? # FIXME!! The only part that's useful is the return code. # This code always returns $OCF_SUCCESS (!) # Filesystem_validate_all() { if [ -n "$MOUNTPOINT" ] && [ ! -d "$MOUNTPOINT" ]; then ocf_log warn "Mountpoint $MOUNTPOINT does not exist" fi # Check if the $FSTYPE is workable # NOTE: Without inserting the $FSTYPE module, this step may be imprecise # TODO: This is Linux specific crap. if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then cut -f2 /proc/filesystems |grep -q ^$FSTYPE$ if [ $? -ne 0 ]; then modpath=/lib/modules/`uname -r` moddep=$modpath/modules.dep # Do we have $FSTYPE in modules.dep? cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$" if [ $? -ne 0 ]; then ocf_log info "It seems we do not have $FSTYPE support" fi fi fi # If we are supposed to do monitoring with status files, then # we need a utility to write in O_DIRECT mode. if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary dd # Note: really old coreutils version do not support # the "oflag" option for dd. We don't check for that # here. In case dd does not support oflag, monitor is # bound to fail, with dd spewing an error message to # the logs. On such systems, we must do without status # file monitoring. fi #TODO: How to check the $options ? return $OCF_SUCCESS } # # set the blockdevice variable to "no" or "yes" # set_blockdevice_var() { blockdevice=no # these are definitely not block devices case $FSTYPE in nfs4|nfs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs) return;; esac if `is_option "loop"`; then return fi case $DEVICE in -*) # Oh... An option to mount instead... Typically -U or -L ;; /dev/null) # Special case for BSC blockdevice=yes ;; *) if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" fi if [ ! -d "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Check the arguments passed to this script if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # Check the OCF_RESKEY_ environment variables... FORCE_UNMOUNT="yes" if [ -n "${OCF_RESKEY_force_unmount}" ]; then FORCE_UNMOUNT=$OCF_RESKEY_force_unmount fi DEVICE=$OCF_RESKEY_device FSTYPE=$OCF_RESKEY_fstype if [ ! -z "$OCF_RESKEY_options" ]; then options="-o $OCF_RESKEY_options" fi FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} OP=$1 # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac if [ x = x"$DEVICE" ]; then ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed" exit $OCF_ERR_CONFIGURED fi set_blockdevice_var # Normalize instance parameters: # It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". # But the output of `mount` and /proc/mounts do not. if [ -z "$OCF_RESKEY_directory" ]; then if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then ocf_exit_reason "Please specify the directory" exit $OCF_ERR_CONFIGURED fi else MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//') : ${MOUNTPOINT:=/} CANONICALIZED_MOUNTPOINT=$(readlink -f "$MOUNTPOINT") if [ $? -ne 0 ]; then ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed" exit $OCF_ERR_GENERIC fi # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll # kill the whole system. Is that a good idea? fi # Check to make sure the utilites are found if [ "X${HOSTOS}" != "XOpenBSD" ];then check_binary $MODPROBE check_binary $FUSER fi check_binary $FSCK check_binary $MOUNT check_binary $UMOUNT if [ "$OP" != "monitor" ]; then ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" fi case $OP in status) Filesystem_status exit $? ;; monitor) Filesystem_monitor exit $? ;; validate-all) Filesystem_validate_all exit $? ;; stop) Filesystem_stop exit $? ;; esac CLUSTERSAFE=0 is_option "ro" && CLUSTERSAFE=2 case $FSTYPE in nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs) CLUSTERSAFE=1 # this is kind of safe too ;; # add here CLUSTERSAFE=0 for all filesystems which are not # cluster aware and which, even if when mounted read-only, # could still modify parts of it such as journal/metadata ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) if ocf_is_true "$OCF_RESKEY_force_clones"; then CLUSTERSAFE=2 else CLUSTERSAFE=0 # these are not allowed fi ;; esac if ocf_is_clone; then case $CLUSTERSAFE in 0) ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" ocf_log err "DO NOT RUN IT AS A CLONE!" ocf_log err "Politely refusing to proceed to avoid data corruption." exit $OCF_ERR_CONFIGURED ;; 2) ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!" if ocf_is_true "$OCF_RESKEY_force_clones"; then ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so." else ocf_log warn "But we'll let it run because it is mounted read-only." ocf_log warn "Please make sure that it's meta data is read-only too!" fi ;; esac fi case $OP in start) Filesystem_start ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/ICP b/heartbeat/ICP index b43caab6f..90cfa3f74 100755 --- a/heartbeat/ICP +++ b/heartbeat/ICP @@ -1,296 +1,304 @@ #!/bin/sh # # # ICP # # Description: Manages an ICP Vortex clustered host drive as an HA resource # # # Author: Lars Marowsky-Bree # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 SuSE Linux AG # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 LinuxSCSI::0:0 ICP::c0h1::/dev/sdb1 LVM::myvolname # # Notice that you will need to get the utility "icpclucon" from the ICP # support to use this. # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_driveid # OCF_RESKEY_device ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_driveid_default="" +OCF_RESKEY_device_default="" + +: ${OCF_RESKEY_driveid=${OCF_RESKEY_driveid_default}} +: ${OCF_RESKEY_device=${OCF_RESKEY_device_default}} + ####################################################################### # ICPCLUCON=/usr/sbin/icpclucon # usage() { methods=`ICP_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages an ICP Vortex clustered host drive. The 'start' operation reserves the given host drive. The 'stop' operation releses the given host drive. The 'status' operation reports whether the host drive is reserved. The 'monitor' operation reports whether the host drive is reserved. The 'validate-all' operation reports whether OCF instance parameters are valid. The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat < 1.0 Resource script for ICP. It Manages an ICP Vortex clustered host drive as an HA resource. Manages an ICP Vortex clustered host drive The ICP cluster drive ID. ICP cluster drive ID - + The device name. device - + END } # # methods: What methods/operations do we support? # ICP_methods() { cat <<-! start stop status monitor methods validate-all meta-data usage ! } ICP_status() { local icp_out icp_out=$($ICPCLUCON -v -status $1) if [ $? -ne 0 ]; then ocf_log "err" "Hostdrive not reserved by us." return $OCF_ERR_GENERIC fi if expr match "$icp_out" \ '.*Drive is reserved by this host.*' >/dev/null 2>&1 ; then ocf_log "info" "Volume $1 is reserved by us." return $OCF_SUCCESS elif expr match "$icp_out" \ '.*Drive is not reserved by any host.*' >/dev/null 2>&1 ; then ocf_log "err" "Volume $1 not reserved by any host." return $OCF_NOT_RUNNING else ocf_log "err" "Unknown output from icpclucon. Assuming we do not have a reservation:" ocf_log "err" "$icp_out" return $OCF_NOT_RUNNING fi } ICP_report_status() { if ICP_status $1 ; then echo "$1: running" return $OCF_SUCCESS else echo "$1: not running" return $OCF_NOT_RUNNING fi } # # Monitor the host drive - does it really seem to be working? # # ICP_monitor() { if ICP_status $1 then return $? else ocf_log "err" "ICP host drive $1 is offline" return $OCF_NOT_RUNNING fi } Clear_bufs() { $BLOCKDEV --flushbufs $1 } # # Enable ICP host drive # ICP_start() { ocf_log "info" "Activating host drive $1" ocf_run $ICPCLUCON -v -reserve $1 if [ $? -ne 0 ]; then ocf_log "info" "Forcing reservation of $1" ocf_run $ICPCLUCON -v -force $1 || return $OCF_ERR_GENERIC fi if ICP_status $1 then : OK # A reservation isn't as prompt as it should be sleep 3 return $OCF_SUCCESS else ocf_log "err" "ICP: $1 was not reserved correctly" return $OCF_ERR_GENERIC fi } # # Release the ICP host drive # ICP_stop() { ocf_log "info" "Releasing ICP host drive $1" ocf_run $ICPCLUCON -v -release $1 || return $OCF_ERR_GENERIC ocf_log "info" "Verifying reservation" if ICP_status $1 ; then ocf_log "err" "ICP: $1 was not released correctly" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ICP_validate_all() { check_binary $BLOCKDEV check_binary $ICPCLUCON $ICPCLUCON -v -status $driveid >/dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "Invalid driveid $driveid" exit $OCF_ERR_ARGS fi if [ ! -b $device ]; then ocf_log err "Device $device is not a block device" exit $OCF_ERR_ARGS fi # Do not know how to check the association of $device with $driveid. return $OCF_SUCCESS } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations do not require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; methods) ICP_methods exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if [ -z "$OCF_RESKEY_driveid" ] then ocf_log err "Please specify OCF_RESKEY_driveid" exit $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_device" ]; then ocf_log err "Please specify OCF_RESKEY_device" exit $OCF_ERR_ARGS fi driveid=$OCF_RESKEY_driveid device=$OCF_RESKEY_device # What kind of method was invoked? case "$1" in start) ICP_validate_all ICP_start $driveid Clear_bufs $device exit $?;; stop) ICP_stop $driveid Clear_bufs $device exit $?;; status) ICP_report_status $driveid exit $?;; monitor) ICP_monitor $driveid exit $?;; validate-all) ICP_validate_all exit $?;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/IPaddr b/heartbeat/IPaddr index 8c9fb20f3..fb0deab3b 100755 --- a/heartbeat/IPaddr +++ b/heartbeat/IPaddr @@ -1,892 +1,912 @@ #!/bin/sh # # License: GNU General Public License (GPL) # Support: users@clusterlabs.org # # This script manages IP alias IP addresses # # It can add an IP alias, or remove one. # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds an IP alias. # # Surprisingly, the "stop" arg removes one. :-) # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_lvs_support ( e.g. true, on, 1 ) # OCF_RESKEY_ARP_INTERVAL_MS # OCF_RESKEY_ARP_REPEAT # OCF_RESKEY_ARP_BACKGROUND (e.g. yes ) # OCF_RESKEY_ARP_NETMASK # OCF_RESKEY_local_start_script # OCF_RESKEY_local_stop_script # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_ip_default="" +OCF_RESKEY_nic_default="eth0" +OCF_RESKEY_cidr_netmask_default="" +OCF_RESKEY_broadcast_default="" +OCF_RESKEY_iflabel_default="" +OCF_RESKEY_lvs_support_default="false" +OCF_RESKEY_local_stop_script_default="" +OCF_RESKEY_local_start_script_default="" +OCF_RESKEY_ARP_INTERVAL_MS_default="500" +OCF_RESKEY_ARP_REPEAT_default="10" +OCF_RESKEY_ARP_BACKGROUND_default="yes" +OCF_RESKEY_ARP_NETMASK_default="ffffffffffff" + +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} +: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} +: ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}} +: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}} +: ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} +: ${OCF_RESKEY_local_stop_script=${OCF_RESKEY_local_stop_script_default}} +: ${OCF_RESKEY_local_start_script=${OCF_RESKEY_local_start_script_default}} +: ${OCF_RESKEY_ARP_INTERVAL_MS=${OCF_RESKEY_ARP_INTERVAL_MS_default}} +: ${OCF_RESKEY_ARP_REPEAT=${OCF_RESKEY_ARP_REPEAT_default}} +: ${OCF_RESKEY_ARP_BACKGROUND=${OCF_RESKEY_ARP_BACKGROUND_default}} +: ${OCF_RESKEY_ARP_NETMASK=${OCF_RESKEY_ARP_NETMASK_default}} + SENDARP=$HA_BIN/send_arp FINDIF=$HA_BIN/findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ####################################################################### SYSTYPE="`uname -s`" case "$SYSTYPE" in SunOS) # `uname -r` = 5.9 -> SYSVERSION = 9 SYSVERSION="`uname -r | cut -d. -f 2`" ;; Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac meta_data() { cat < 1.0 This script manages IP alias IP addresses It can add an IP alias, or remove one. Manages virtual IPv4 addresses (portable version) The IPv4 address to be configured in dotted quad notation, for example "192.168.1.1". IPv4 address - + The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. If you can not assign any static IP address on the interface, modify this kernel parameter: sysctl -w net.ipv4.conf.all.promote_secondaries=1 (or per device) Network interface - + The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). If unspecified, the script will also try to determine this from the routing table. Netmask - + Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address - + You can specify an additional label for your IP address here. Interface label - + Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Enable support for LVS DR - + Script called when the IP is released Script called when the IP is released - + Script called when the IP is added Script called when the IP is added - + milliseconds between ARPs milliseconds between gratuitous ARPs - + How many gratuitous ARPs to send out when bringing up a new address repeat count - + run in background (no longer any reason to do this) run in background - + netmask for ARP - in nonstandard hexadecimal format. netmask for ARP - + END exit $OCF_SUCCESS } # The 'ping' command takes highly OS-dependent arguments, so this # function creates a suitable argument list for the host OS's 'ping'. # We use a subset of its functionality: # 1. single packet # 2. reasonable timeout (say 1 second) # # arguments: # $1: IP address to ping # result string: # arguments for ping command # # If more flexibility is needed, they could be specified in the environment # to this function, to adjust the resulting 'ping' arguments. # David Lee May 2007 pingargs() { _baseip=$1 _timeout=1 # seconds _pktcount=1 _systype="`uname -s`" case $_systype in Linux) # Default is perpetual ping: need "-c $_pktcount". # -c count -t timetolive -q(uiet) -n(umeric) -W timeout _pingargs="-c $_pktcount -q -n $_baseip" ;; SunOS) # Default is immediate (or timeout) return. _pingargs="$_baseip $_timeout" ;; *) _pingargs="-c $_pktcount $_baseip" ;; esac echo "$_pingargs" } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # lvs_remove_conflicting_loopback() { ipaddr="$1" ifname="$2" ocf_log info "Removing conflicting loopback $ifname." if echo $ifname > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi if [ ! -z "${OCF_RESKEY_local_stop_script}" ]; then if [ -x "${OCF_RESKEY_local_stop_script}" ]; then ${OCF_RESKEY_local_stop_script} $* fi fi delete_interface "$ifname" "$ipaddr" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by lvs_remove_conflicting_loopback() # lvs_restore_loopback() { ipaddr="$1" if [ ! -s "$VLDIR/$ipaddr" ]; then return fi ifname=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address $ipaddr on $ifname." CMD="OCF_RESKEY_cidr_netmask=32 OCF_RESKEY_ip=$1 OCF_RESKEY_nic=$ifname $FINDIF" if NICINFO=`eval $CMD` NICINFO=`echo $NICINFO | tr " " " " | tr -s " "` then netmask_text=`echo "$NICINFO" | cut -f3 -d " "` broadcast=`echo "$NICINFO" | cut -f5 -d " "` else echo "ERROR: $CMD failed (rc=$rc)" exit $OCF_ERR_GENERIC fi add_interface "$ipaddr" "$ifname" "$ifname" $netmask_text $broadcast rm -f "$VLDIR/$ipaddr" } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_solaris() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ;; *) continue;; esac # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:$%%'` case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } find_interface_bsd() { $IFCONFIG $IFCONFIG_A_OPT | awk -v ip_addr="$ipaddr" ' /UP,/ && $0 ~ /^[a-z]+[0-9]:/ { if_name=$1; sub(":$","",if_name); } $1 == "inet" && $2 == ip_addr { print if_name exit(0) }' } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_generic() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | while read ifname linkstuff do : Read gave us ifname = $ifname read inet addr junk : Read gave us inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ifname=`echo $ifname | sed 's/:$//'`;; *) continue;; esac : "comparing $ipaddr to $addr (from ifconfig)" case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface() { ipaddr="$1" case "$SYSTYPE" in SunOS) NIC=`find_interface_solaris $ipaddr`;; *BSD) NIC=`find_interface_bsd $ipaddr`;; *) NIC=`find_interface_generic $ipaddr`;; esac echo $NIC return $OCF_SUCCESS; } # # Find an unused interface/alias name for us to use for new IP alias # The argument is an IP address, and the output # is an aliased interface name (e.g., "eth0:0", "dc0", "le0:0"). # find_free_interface() { NIC="$1" if [ "X$NIC" = "X" ]; then ocf_log err "No free interface found for $OCF_RESKEY_ip" return $OCF_ERR_GENERIC; fi NICBASE="$VLDIR/IPaddr-$NIC" touch "$NICBASE" case "$SYSTYPE" in *BSD) echo $NIC; return $OCF_SUCCESS;; SunOS) j=1 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's%: .*%%'`;; *) j=0 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's% .*%%'` TRYADRCNT=`ls "${NICBASE}:"* 2>/dev/null | wc -w | tr -d ' '` if [ -f "${NICBASE}:${TRYADRCNT}" ]; then : OK else j="${TRYADRCNT}" fi ;; esac IFLIST=" `echo $IFLIST` " while [ $j -lt 512 ] do case $IFLIST in *" "$NIC:$j" "*) ;; *) NICLINK="$NICBASE:$j" if ln "$NICBASE" "$NICLINK" 2>/dev/null then echo "$NIC:$j" return $OCF_SUCCESS fi ;; esac j=`expr $j + 1` done return $OCF_ERR_GENERIC } delete_route () { ipaddr="$1" case "$SYSTYPE" in SunOS) return 0;; *BSD) CMD="$ROUTE -n delete -host $ipaddr";; *) CMD="$ROUTE -n del -host $ipaddr";; esac $CMD return $? } delete_interface () { ifname="$1" ipaddr="$2" case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then CMD="$IFCONFIG $ifname unplumb" else CMD="$IFCONFIG $ifname 0 down" fi;; Darwin*) CMD="$IFCONFIG $ifname $ipaddr delete";; *BSD) CMD="$IFCONFIG $ifname inet $ipaddr delete";; *) CMD="$IFCONFIG $ifname down";; esac ocf_log info "$CMD" $CMD return $? } add_interface () { ipaddr="$1" iface_base="$2" iface="$3" netmask="$4" broadcast="$5" if [ $# != 5 ]; then ocf_log err "Insufficient arguments to add_interface: $*" exit $OCF_ERR_ARGS fi case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then $IFCONFIG $iface plumb rc=$? if [ $rc -ne 0 ] ; then echo "ERROR: '$IFCONFIG $iface plumb' failed." return $rc fi fi # At Solaris 10, this single-command version sometimes broke. # Almost certainly an S10 bug. # CMD="$IFCONFIG $iface inet $ipaddr $text up" # So hack the following workaround: CMD="$IFCONFIG $iface inet $ipaddr" CMD="$CMD && $IFCONFIG $iface netmask $netmask" CMD="$CMD && $IFCONFIG $iface up" ;; *BSD) # netmask is always set to 255.255.255.255 for an alias CMD="$IFCONFIG $iface inet $ipaddr netmask 255.255.255.255 alias";; *) CMD="$IFCONFIG $iface $ipaddr netmask $netmask broadcast $broadcast";; esac # Use "eval $CMD" (not "$CMD"): it might be a chain of two or more commands. ocf_log info "eval $CMD" eval $CMD rc=$? if [ $rc != 0 ]; then echo "ERROR: eval $CMD failed (rc=$rc)" fi return $rc } # # Remove the IP alias for the requested IP address... # ip_stop() { SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" NIC=`find_interface $OCF_RESKEY_ip` if [ -f "$SENDARPPIDFILE" ]; then cat "$SENDARPPIDFILE" | xargs kill rm -f "$SENDARPPIDFILE" fi if [ -z "$NIC" ]; then : Requested interface not in use return $OCF_SUCCESS fi if [ ${OCF_RESKEY_lvs_support} = 1 ]; then case $NIC in lo*) : Requested interface is on loopback return $OCF_SUCCESS;; esac fi delete_route "$OCF_RESKEY_ip" delete_interface "$NIC" "$OCF_RESKEY_ip" rc=$? if [ ${OCF_RESKEY_lvs_support} = 1 ]; then lvs_restore_loopback "$OCF_RESKEY_ip" fi # remove lock file... rm -f "$VLDIR/IPaddr-$NIC" if [ $rc != 0 ]; then ocf_log warn "IP Address $OCF_RESKEY_ip NOT released: rc=$rc" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # Add an IP alias for the requested IP address... # # It could be that we already have taken it, in which case it should # do nothing. # ip_start() { # # Do we already service this IP address? # ip_status_internal if [ $? = $OCF_SUCCESS ]; then # Nothing to do, the IP is already active return $OCF_SUCCESS; fi NIC_unique=`find_free_interface $OCF_RESKEY_nic` if [ -n "$NIC_unique" ]; then : OK got interface [$NIC_unique] for $OCF_RESKEY_ip else return $OCF_ERR_GENERIC fi # This logic is mostly to support LVS (If I understand it correctly) if [ ${OCF_RESKEY_lvs_support} = 1 ]; then NIC_current=`find_interface $OCF_RESKEY_ip` case $NIC_unique in lo*) if [ x"$NIC_unique" = x"$NIC_current" ]; then # Its already "running" and not moving, nothing to do. ocf_log err "Could not find a non-loopback device to move $OCF_RESKEY_ip to" return $OCF_ERR_GENERIC fi;; *) lvs_remove_conflicting_loopback "$OCF_RESKEY_ip" "$NIC_current";; esac fi if [ ! -z "${OCF_RESKEY_local_start_script}" ]; then if [ -x "${OCF_RESKEY_local_start_script}" ]; then ${OCF_RESKEY_local_start_script} $* fi fi add_interface "$OCF_RESKEY_ip" "$OCF_RESKEY_nic" "$NIC_unique" \ "$OCF_RESKEY_cidr_netmask" "$OCF_RESKEY_broadcast" rc=$? if [ $rc != 0 ]; then ocf_log err "Could not add $OCF_RESKEY_ip to $OCF_RESKEY_nic: rc=$rc" return $rc fi # The address is active, now notify others about it using sendarp if [ "$SYSTYPE" = "DarwinBSD" -a "$NIC_unique" = "lo0" ]; then # Darwin can't send ARPs on loopback devices SENDARP="x$SENDARP" # Prevent the binary from being found fi if [ -x $SENDARP ]; then TARGET_INTERFACE=`echo $NIC_unique | sed 's%:.*%%'` SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" ARGS="-i $OCF_RESKEY_ARP_INTERVAL_MS -r $OCF_RESKEY_ARP_REPEAT" ARGS="$ARGS -p $SENDARPPIDFILE $TARGET_INTERFACE $OCF_RESKEY_ip" ARGS="$ARGS auto $OCF_RESKEY_ip $OCF_RESKEY_ARP_NETMASK" ocf_log debug "Sending Gratuitous Arp for $OCF_RESKEY_ip on $NIC_unique [$TARGET_INTERFACE]" case $OCF_RESKEY_ARP_BACKGROUND in yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?" & ) >&2 ;; *) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?";; esac fi ip_status_internal return $? } ip_status_internal() { NIC=`find_interface "$OCF_RESKEY_ip"` if [ "x$NIC" = x ]; then return $OCF_NOT_RUNNING elif [ "${OCF_RESKEY_lvs_support}" = "1" ]; then case $NIC in lo*) return $OCF_NOT_RUNNING;; *) return $OCF_SUCCESS;; esac else if [ x$OCF_RESKEY_nic != x ]; then simple_OCF_NIC=`echo $OCF_RESKEY_nic | awk -F: '{print $1}'` simple_NIC=`echo $NIC | awk -F: '{print $1}'` if [ $simple_OCF_NIC != $simple_NIC ]; then ocf_log err "$OCF_RESKEY_ip is running an interface ($simple_NIC) instead of the configured one ($simple_OCF_NIC)" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS fi } ip_status() { ip_status_internal rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "running" elif [ $rc = $OCF_NOT_RUNNING ]; then echo "stopped" else echo "unknown" fi return $rc; } # # Determine if this IP address is really being served, or not. # Note that we must distinguish if *we're* serving it locally... # ip_monitor() { ip_status_internal rc=$? if [ $OCF_CHECK_LEVEL = 0 -o $rc != 0 ]; then return $rc fi ocf_log info "Checking IP stack" PINGARGS="`pingargs $OCF_RESKEY_ip`" for j in 1 2 3 4 5 6 7 8 9 10; do MSG=`$PING $PINGARGS 2>&1` if [ $? = 0 ]; then return $OCF_SUCCESS fi done ocf_log err "$MSG" return $OCF_ERR_GENERIC } is_positive_integer() { ocf_is_decimal $1 && [ $1 -ge 1 ] if [ $? = 0 ]; then return 1 fi return 0 } ip_validate_all() { - : ${OCF_RESKEY_ARP_BACKGROUND=yes} - : ${OCF_RESKEY_ARP_NETMASK=ffffffffffff} - : ${OCF_RESKEY_ARP_INTERVAL_MS=500} - : ${OCF_RESKEY_ARP_REPEAT=10} - - check_binary $AWK check_binary $IFCONFIG check_binary $ROUTE check_binary $PING if is_positive_integer $OCF_RESKEY_ARP_INTERVAL_MS then ocf_log err "Invalid parameter value: ARP_INTERVAL_MS [$OCF_RESKEY_ARP_INTERVAL_MS]" return $OCF_ERR_ARGS fi if is_positive_integer $OCF_RESKEY_ARP_REPEAT then ocf_log err "Invalid parameter value: ARP_REPEAT [$OCF_RESKEY_ARP_REPEAT]" return $OCF_ERR_ARGS fi - : ${OCF_RESKEY_lvs_support=0} if [ "$SYSTYPE" = "Linux" -o "$SYSTYPE" = "SunOS" ]; then : else if [ "${OCF_RESKEY_lvs_support}" = "1" ]; then ocf_log err "$SYSTYPE does not support LVS" return $OCF_ERR_GENERIC fi fi case $OCF_RESKEY_ip in "") ocf_log err "Required parameter OCF_RESKEY_ip is missing" return $OCF_ERR_CONFIGURED;; [0-9]*.[0-9]*.[0-9]*.*[0-9]) : OK;; *) ocf_log err "Parameter OCF_RESKEY_ip [$OCF_RESKEY_ip] not an IP address" return $OCF_ERR_CONFIGURED;; esac # Unconditionally do this? case $OCF_RESKEY_nic in *:*) OCF_RESKEY_nic=`echo $OCF_RESKEY_nic | sed 's/:.*//'` ;; esac NICINFO=`$FINDIF` rc=$? if [ $rc != 0 ]; then ocf_log err "$FINDIF failed [rc=$rc]." return $OCF_ERR_GENERIC fi tmp=`echo "$NICINFO" | cut -f1` if [ "x$OCF_RESKEY_nic" = "x" ] then ocf_log info "Using calculated nic for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_nic=$tmp elif [ x$tmp != x${OCF_RESKEY_nic} ] then ocf_log err "Invalid parameter value: nic [$OCF_RESKEY_nic] Calculated nic: [$tmp]" return $OCF_ERR_ARGS fi tmp=`echo "$NICINFO" | cut -f2 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_cidr_netmask" != "x$tmp" ] then ocf_log info "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp" fi # Always use the calculated version becuase it might have been specified # using CIDR notation which not every system accepts OCF_RESKEY_netmask=$tmp OCF_RESKEY_cidr_netmask=$tmp; export OCF_RESKEY_cidr_netmask tmp=`echo "$NICINFO" | cut -f3 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_broadcast" = "x" ] then ocf_log debug "Using calculated broadcast for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_broadcast=$tmp elif [ x$tmp != x${OCF_RESKEY_broadcast} ]; then ocf_log err "Invalid parameter value: broadcast [$OCF_RESKEY_broadcast] Calculated broadcast: [$tmp]" return $OCF_ERR_ARGS fi return $OCF_SUCCESS } usage() { echo $USAGE >&2 return $1 } if [ $# -ne 1 ]; then usage $OCF_ERR_ARGS fi -: ${OCF_RESKEY_lvs_support=0} # Normalize the value of lvs_support if [ "${OCF_RESKEY_lvs_support}" = "true" \ -o "${OCF_RESKEY_lvs_support}" = "on" \ -o "${OCF_RESKEY_lvs_support}" = "yes" \ -o "${OCF_RESKEY_lvs_support}" = "1" ]; then OCF_RESKEY_lvs_support=1 else OCF_RESKEY_lvs_support=0 fi # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. So, don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi case $1 in meta-data) meta_data;; start) ip_validate_all && ip_start;; stop) ip_stop;; status) ip_status;; monitor) ip_monitor;; validate-all) ip_validate_all;; usage) usage $OCF_SUCCESS;; *) usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 index 462ee8042..041ace3a2 100755 --- a/heartbeat/IPaddr2 +++ b/heartbeat/IPaddr2 @@ -1,1252 +1,1272 @@ #!/bin/sh # # $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $ # # OCF Resource Agent compliant IPaddr2 script. # # Based on work by Tuomo Soini, ported to the OCF RA API by Lars # Marowsky-Brée. Implements Cluster Alias IP functionality too. # # Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff # # # Copyright (c) 2003 Tuomo Soini # Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # TODO: # - There ought to be an ocf_run_cmd function which does all logging, # timeout handling etc for us # - Make this the standard IP address agent on Linux; the other # platforms simply should ignore the additional parameters OR can use # the legacy heartbeat resource script... # - Check LVS <-> clusterip incompatibilities. # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_iflabel # OCF_RESKEY_mac # OCF_RESKEY_clusterip_hash # OCF_RESKEY_arp_interval # OCF_RESKEY_arp_count # OCF_RESKEY_arp_bg # OCF_RESKEY_preferred_lft # # OCF_RESKEY_CRM_meta_clone # OCF_RESKEY_CRM_meta_clone_max ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/findif.sh # Defaults +OCF_RESKEY_ip_default="" +OCF_RESKEY_cidr_netmask_default="" +OCF_RESKEY_broadcast_default="" +OCF_RESKEY_iflabel_default="" +OCF_RESKEY_cidr_netmask_default="" OCF_RESKEY_lvs_support_default=false OCF_RESKEY_lvs_ipv6_addrlabel_default=false OCF_RESKEY_lvs_ipv6_addrlabel_value_default=99 OCF_RESKEY_clusterip_hash_default="sourceip-sourceport" +OCF_RESKEY_mac_default="" OCF_RESKEY_unique_clone_address_default=false OCF_RESKEY_arp_interval_default=200 OCF_RESKEY_arp_count_default=5 OCF_RESKEY_arp_count_refresh_default=0 OCF_RESKEY_arp_bg_default=true +OCF_RESKEY_arp_sender_default="" +OCF_RESKEY_send_arp_opts_default="" +OCF_RESKEY_flush_routes_default="false" OCF_RESKEY_run_arping_default=false OCF_RESKEY_preferred_lft_default="forever" +OCF_RESKEY_network_namespace_default="" +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} +: ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}} +: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}} : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} : ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}} : ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}} : ${OCF_RESKEY_clusterip_hash=${OCF_RESKEY_clusterip_hash_default}} +: ${OCF_RESKEY_mac=${OCF_RESKEY_mac_default}} : ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}} : ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}} : ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}} : ${OCF_RESKEY_arp_count_refresh=${OCF_RESKEY_arp_count_refresh_default}} : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} +: ${OCF_RESKEY_arp_sender=${OCF_RESKEY_arp_sender_default}} +: ${OCF_RESKEY_send_arp_opts=${OCF_RESKEY_send_arp_opts_default}} +: ${OCF_RESKEY_flush_routes=${OCF_RESKEY_flush_routes_default}} : ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}} : ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}} +: ${OCF_RESKEY_network_namespace=${OCF_RESKEY_network_namespace_default}} + ####################################################################### SENDARP=$HA_BIN/send_arp SENDUA=$HA_BIN/send_ua FINDIF=findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip} ####################################################################### meta_data() { cat < 1.0 This Linux-specific resource manages IP alias IP addresses. It can add an IP alias, or remove one. In addition, it can implement Cluster Alias IP functionality if invoked as a clone resource. If used as a clone, you should explicitly set clone-node-max >= 2, and/or clone-max < number of nodes. In case of node failure, clone instances need to be re-allocated on surviving nodes. This would not be possible if there is already an instance on those nodes, and clone-node-max=1 (which is the default). Manages virtual IPv4 and IPv6 addresses (Linux specific version) The IPv4 (dotted quad notation) or IPv6 address (colon hexadecimal notation) example IPv4 "192.168.1.1". example IPv6 "2001:db8:DC28:0:0:FC57:D4C8:1FFF". IPv4 or IPv6 address - + The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. If you want a label, see the iflabel parameter. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. If you can not assign any static IP address on the interface, modify this kernel parameter: sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device) Network interface The netmask for the interface in CIDR format (e.g., 24 and not 255.255.255.0) If unspecified, the script will also try to determine this from the routing table. CIDR netmask - + Broadcast address associated with the IP. It is possible to use the special symbols '+' and '-' instead of the broadcast address. In this case, the broadcast address is derived by setting/resetting the host bits of the interface prefix. Broadcast address - + You can specify an additional label for your IP address here. This label is appended to your interface name. The kernel allows alphanumeric labels up to a maximum length of 15 characters including the interface name and colon (e.g. eth0:foobar1234) A label can be specified in nic parameter but it is deprecated. If a label is specified in nic name, this parameter has no effect. Interface label - + Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Notes for IPv6: It is not necessary to enable this option on IPv6. Instead, enable 'lvs_ipv6_addrlabel' option for LVS-DR usage on IPv6. Enable support for LVS DR Enable adding IPv6 address label so IPv6 traffic originating from the address's interface does not use this address as the source. This is necessary for LVS-DR health checks to realservers to work. Without it, the most recently added IPv6 address (probably the address added by IPaddr2) will be used as the source address for IPv6 traffic from that interface and since that address exists on loopback on the realservers, the realserver response to pings/connections will never leave its loopback. See RFC3484 for the detail of the source address selection. See also 'lvs_ipv6_addrlabel_value' parameter. Enable adding IPv6 address label. Specify IPv6 address label value used when 'lvs_ipv6_addrlabel' is enabled. The value should be an unused label in the policy table which is shown by 'ip addrlabel list' command. You would rarely need to change this parameter. IPv6 address label value. Set the interface MAC address explicitly. Currently only used in case of the Cluster IP Alias. Leave empty to chose automatically. Cluster IP MAC address - + Specify the hashing algorithm used for the Cluster IP functionality. Cluster IP hashing function If true, add the clone ID to the supplied value of IP to create a unique address to manage Create a unique address for cloned instances Specify the interval between unsolicited ARP packets in milliseconds. This parameter is deprecated and used for the backward compatibility only. It is effective only for the send_arp binary which is built with libnet, and send_ua for IPv6. It has no effect for other arp_sender. ARP packet interval in ms (deprecated) Number of unsolicited ARP packets to send at resource initialization. ARP packet count sent during initialization Number of unsolicited ARP packets to send during resource monitoring. Doing so helps mitigate issues of stuck ARP caches resulting from split-brain situations. ARP packet count sent during monitoring Whether or not to send the ARP packets in the background. ARP from background The program to send ARP packets with on start. Available options are: - send_arp: default - ipoibarping: default for infiniband interfaces if ipoibarping is available - iputils_arping: use arping in iputils package - libnet_arping: use another variant of arping based on libnet ARP sender - + Extra options to pass to the arp_sender program. Available options are vary depending on which arp_sender is used. A typical use case is specifying '-A' for iputils_arping to use ARP REPLY instead of ARP REQUEST as Gratuitous ARPs. Options for ARP sender - + Flush the routing table on stop. This is for applications which use the cluster IP address and which run on the same physical host that the IP address lives on. The Linux kernel may force that application to take a shortcut to the local loopback interface, instead of the interface the address is really bound to. Under those circumstances, an application may, somewhat unexpectedly, continue to use connections for some time even after the IP address is deconfigured. Set this parameter in order to immediately disable said shortcut when the IP address goes away. Flush kernel routing table on stop - + Whether or not to run arping for IPv4 collision detection check. Run arping for IPv4 collision detection check For IPv6, set the preferred lifetime of the IP address. This can be used to ensure that the created IP address will not be used as a source address for routing. Expects a value as specified in section 5.5.4 of RFC 4862. IPv6 preferred lifetime Specifies the network namespace to operate within. The namespace must already exist, and the interface to be used must be within the namespace. Network namespace to use - + END exit $OCF_SUCCESS } ip_init() { local rc if [ X`uname -s` != "XLinux" ]; then ocf_exit_reason "IPaddr2 only supported Linux." exit $OCF_ERR_INSTALLED fi if [ X"$OCF_RESKEY_ip" = "X" ] && [ "$__OCF_ACTION" != "stop" ]; then ocf_exit_reason "IP address (the ip parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi if case $__OCF_ACTION in start|stop) ocf_is_root;; *) true;; esac then : YAY! else ocf_exit_reason "You must be root for $__OCF_ACTION operation." exit $OCF_ERR_PERM fi BASEIP="$OCF_RESKEY_ip" BRDCAST="$OCF_RESKEY_broadcast" NIC="$OCF_RESKEY_nic" # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. Don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi NETMASK="$OCF_RESKEY_cidr_netmask" IFLABEL="$OCF_RESKEY_iflabel" IF_MAC="$OCF_RESKEY_mac" IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1} IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1` if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then ocf_exit_reason "LVS and load sharing do not go together well" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then : else ocf_exit_reason "Invalid meta-attribute clone_max [$IP_INC_GLOBAL], should be positive integer" exit $OCF_ERR_CONFIGURED fi echo $OCF_RESKEY_ip | grep -qs ":" if [ $? -ne 0 ];then FAMILY=inet if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then ocf_exit_reason "IPv4 does not support lvs_ipv6_addrlabel" exit $OCF_ERR_CONFIGURED fi else FAMILY=inet6 if ocf_is_true $OCF_RESKEY_lvs_support ;then ocf_exit_reason "The IPv6 does not support lvs_support" exit $OCF_ERR_CONFIGURED fi if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then if ocf_is_decimal "$OCF_RESKEY_lvs_ipv6_addrlabel_value" && [ $OCF_RESKEY_lvs_ipv6_addrlabel_value -ge 0 ]; then : else ocf_exit_reason "Invalid lvs_ipv6_addrlabel_value [$OCF_RESKEY_lvs_ipv6_addrlabel_value], should be positive integer" exit $OCF_ERR_CONFIGURED fi fi fi # support nic:iflabel format in nic parameter case $NIC in *:*) IFLABEL=`echo $NIC | sed 's/[^:]*://'` NIC=`echo $NIC | sed 's/:.*//'` # only the base name should be passed to findif OCF_RESKEY_nic=$NIC ;; esac # $FINDIF takes its parameters from the environment # NICINFO=`$FINDIF` rc=$? if [ $rc -eq 0 ] then NICINFO=`echo "$NICINFO" | sed -e 's/netmask\ //;s/broadcast\ //'` NIC=`echo "$NICINFO" | cut -d" " -f1` NETMASK=`echo "$NICINFO" | cut -d" " -f2` BRDCAST=`echo "$NICINFO" | cut -d" " -f3` else # findif couldn't find the interface if ocf_is_probe; then ocf_log info "[$FINDIF] failed" exit $OCF_NOT_RUNNING elif [ "$__OCF_ACTION" = stop ]; then ocf_log warn "[$FINDIF] failed" exit $OCF_SUCCESS else ocf_exit_reason "[$FINDIF] failed" exit $rc fi fi SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" if [ -n "$IFLABEL" ]; then IFLABEL=${NIC}:${IFLABEL} if [ ${#IFLABEL} -gt 15 ]; then ocf_exit_reason "Interface label [$IFLABEL] exceeds maximum character limit of 15" exit $OCF_ERR_CONFIGURED fi fi if [ "$IP_INC_GLOBAL" -gt 1 ] && ! ocf_is_true "$OCF_RESKEY_unique_clone_address"; then IP_CIP="yes" IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}" if [ -z "$IF_MAC" ]; then # Choose a MAC # 1. Concatenate some input together # 2. This doesn't need to be a cryptographically # secure hash. # 3. Drop everything after the first 6 octets (12 chars) # 4. Delimit the octets with ':' # 5. Make sure the first octet is odd, # so the result is a multicast MAC IF_MAC=`echo $OCF_RESKEY_ip $NETMASK $BRDCAST | \ md5sum | \ sed -e 's#\(............\).*#\1#' \ -e 's#..#&:#g; s#:$##' \ -e 's#^\(.\)[02468aAcCeE]#\11#'` fi IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$OCF_RESKEY_ip" fi } # # Find out which interfaces serve the given IP address and netmask. # The arguments are an IP address and a netmask. # Its output are interface names devided by spaces (e.g., "eth0 eth1"). # find_interface() { local ipaddr="$1" local netmask="$2" # # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces # local iface="`$IP2UTIL -o -f $FAMILY addr show \ | grep "\ $ipaddr/$netmask" \ | cut -d ' ' -f2 \ | grep -v '^ipsec[0-9][0-9]*$'`" echo "$iface" return 0 } # # Delete an interface # delete_interface () { ipaddr="$1" iface="$2" netmask="$3" CMD="$IP2UTIL -f $FAMILY addr delete $ipaddr/$netmask dev $iface" ocf_run $CMD || return $OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_flush_routes; then ocf_run $IP2UTIL route flush cache fi if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then delete_ipv6_addrlabel $ipaddr fi return $OCF_SUCCESS } # # Add an interface # add_interface () { local cmd msg ipaddr netmask broadcast iface label ipaddr="$1" netmask="$2" broadcast="$3" iface="$4" label="$5" if [ "$FAMILY" = "inet" ] && ocf_is_true $OCF_RESKEY_run_arping && check_binary arping; then arping -q -c 2 -w 3 -D -I $iface $ipaddr if [ $? = 1 ]; then ocf_log err "IPv4 address collision $ipaddr [DAD]" return $OCF_ERR_GENERIC fi fi if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then add_ipv6_addrlabel $ipaddr fi cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask dev $iface" msg="Adding $FAMILY address $ipaddr/$netmask to device $iface" if [ "$broadcast" != "none" ]; then cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask brd $broadcast dev $iface" msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface" fi if [ ! -z "$label" ]; then cmd="$cmd label $label" msg="${msg} (with label $label)" fi if [ "$FAMILY" = "inet6" ] ;then cmd="$cmd preferred_lft $OCF_RESKEY_preferred_lft" msg="${msg} (with preferred_lft $OCF_RESKEY_preferred_lft)" fi ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC msg="Bringing device $iface up" cmd="$IP2UTIL link set $iface up" ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC return $OCF_SUCCESS } # # Delete a route # delete_route () { prefix="$1" iface="$2" CMD="$IP2UTIL route delete $prefix dev $iface" ocf_log info "$CMD" $CMD return $? } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # # TODO: This is very ugly and should be controlled by an additional # instance parameter. Or even: multi-state, with the IP only being # "active" on the master!? # remove_conflicting_loopback() { ipaddr="$1" netmask="$2" broadcast="$3" ifname="$4" ocf_log info "Removing conflicting loopback $ifname." if echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi delete_interface "$ipaddr" "$ifname" "$netmask" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" "$ifname" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by remove_conflicting_loopback() # restore_loopback() { ipaddr="$1" if [ -s "$VLDIR/$ipaddr" ]; then ifinfo=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address " \ "$ifinfo." add_interface $ifinfo rm -f "$VLDIR/$ipaddr" fi } add_ipv6_addrlabel() { local cmd ipaddr value ipaddr="$1" value="$OCF_RESKEY_lvs_ipv6_addrlabel_value" cmd="$IP2UTIL addrlabel add prefix $ipaddr label $value" ocf_log info "Adding IPv6 address label prefix $ipaddr label $value" ocf_run $cmd || ocf_log warn "$cmd failed." } delete_ipv6_addrlabel() { local cmd ipaddr value ipaddr="$1" value="$OCF_RESKEY_lvs_ipv6_addrlabel_value" cmd="$IP2UTIL addrlabel del prefix $ipaddr label $value" ocf_run $cmd # an error can be ignored } is_infiniband() { $IP2UTIL link show $NIC | grep link/infiniband >/dev/null } log_arp_sender() { local cmdline local output local rc cmdline="$@" output=$($cmdline 2>&1) rc=$? if [ $rc -ne 0 ] && \ [ "$ARP_SENDER" != "libnet_arping" ] ; then # libnet_arping always return an error as no answers ocf_log err "Could not send gratuitous arps: rc=$rc" fi ocf_log $LOGLEVEL "$output" } # wrapper function to manage PID file to run arping in background run_with_pidfile() { local cmdline local pid local rc cmdline="$@" $cmdline & pid=$! echo "$pid" > $SENDARPPIDFILE wait $pid rc=$? rm -f $SENDARPPIDFILE return $rc } build_arp_sender_cmd() { case "$ARP_SENDER" in send_arp) if [ "x$IP_CIP" = "xyes" ] ; then if [ x = "x$IF_MAC" ] ; then MY_MAC=auto else # send_arp.linux should return without doing anything in this case MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'` fi else MY_MAC=auto fi ARGS="$OCF_RESKEY_send_arp_opts -i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" ARP_SENDER_CMD="$SENDARP $ARGS" ;; iputils_arping) ARGS="$OCF_RESKEY_send_arp_opts -U -c $ARP_COUNT -I $NIC $OCF_RESKEY_ip" ARP_SENDER_CMD="run_with_pidfile arping $ARGS" ;; libnet_arping) ARGS="$OCF_RESKEY_send_arp_opts -U -c $ARP_COUNT -i $NIC -S $OCF_RESKEY_ip $OCF_RESKEY_ip" ARP_SENDER_CMD="run_with_pidfile arping $ARGS" ;; ipoibarping) ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip" ARP_SENDER_CMD="ipoibarping $ARGS" ;; *) # should not occur ocf_exit_reason "unrecognized arp_sender value: $ARP_SENDER" exit $OCF_ERR_GENERIC ;; esac } # # Send Unsolicited ARPs to update neighbor's ARP cache # run_arp_sender() { if [ "x$1" = "xrefresh" ] ; then ARP_COUNT=$OCF_RESKEY_arp_count_refresh LOGLEVEL=debug else ARP_COUNT=$OCF_RESKEY_arp_count LOGLEVEL=info fi if [ $ARP_COUNT -eq 0 ] ; then return fi # do not need to send Gratuitous ARPs in the Cluster IP configuration # except send_arp.libnet binary to retain the old behavior if [ "x$IP_CIP" = "xyes" ] && \ [ "x$ARP_SENDER" != "xsend_arp" ] ; then ocf_log info "Gratuitous ARPs are not sent in the Cluster IP configuration" return fi # prepare arguments for each arp sender program # $ARP_SENDER_CMD should be set build_arp_sender_cmd ocf_log $LOGLEVEL "$ARP_SENDER_CMD" if ocf_is_true $OCF_RESKEY_arp_bg; then log_arp_sender $ARP_SENDER_CMD & else log_arp_sender $ARP_SENDER_CMD fi } # # Run send_ua to note send ICMPv6 Unsolicited Neighbor Advertisements. # run_send_ua() { local i # Duplicate Address Detection [DAD] # Kernel will flag the IP as 'tentative' until it ensured that # there is no duplicates. # If there is, it will flag it as 'dadfailed' for i in $(seq 1 10); do ipstatus=$($IP2UTIL -o -f $FAMILY addr show dev $NIC to $OCF_RESKEY_ip/$NETMASK) case "$ipstatus" in *dadfailed*) ocf_log err "IPv6 address collision $OCF_RESKEY_ip [DAD]" $IP2UTIL -f $FAMILY addr del dev $NIC $OCF_RESKEY_ip/$NETMASK if [ $? -ne 0 ]; then ocf_log err "Could not delete IPv6 address" fi return $OCF_ERR_GENERIC ;; *tentative*) if [ $i -eq 10 ]; then ocf_log warn "IPv6 address : DAD is still in tentative" fi ;; *) break ;; esac sleep 1 done # Now the address should be usable ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC" ocf_log info "$SENDUA $ARGS" $SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements." } # Do we already serve this IP address on the given $NIC? # # returns: # ok = served (for CIP: + hash bucket) # partial = served and no hash bucket (CIP only) # partial2 = served and no CIP iptables rule # no = nothing # ip_served() { if [ -z "$NIC" ]; then # no nic found or specified echo "no" return 0 fi cur_nic="`find_interface $OCF_RESKEY_ip $NETMASK`" if [ -z "$cur_nic" ]; then echo "no" return 0 fi if [ -z "$IP_CIP" ]; then for i in $cur_nic; do # only mark as served when on the same interfaces as $NIC [ "$i" = "$NIC" ] || continue echo "ok" return 0 done # There used to be logic here to pretend "not served", # if ${OCF_RESKEY_lvs_support} was enabled, and the IP was # found active on "lo*" only. With lvs_support on, you should # have NIC != lo, so thats already filtered # by the continue above. echo "no" return 0 fi # Special handling for the CIP: if [ ! -e $IP_CIP_FILE ]; then echo "partial2" return 0 fi if egrep -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then echo "ok" return 0 else echo "partial" return 0 fi exit $OCF_ERR_GENERIC } ####################################################################### ip_usage() { cat <$IP_CIP_FILE fi if [ "$ip_status" = "no" ]; then if ocf_is_true ${OCF_RESKEY_lvs_support}; then for i in `find_interface $OCF_RESKEY_ip 32`; do case $i in lo*) remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo ;; esac done fi add_interface $OCF_RESKEY_ip $NETMASK ${BRDCAST:-none} $NIC $IFLABEL rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "Failed to add $OCF_RESKEY_ip" exit $rc fi fi case $NIC in lo*) : no need to run send_arp on loopback ;; *) if [ $FAMILY = "inet" ];then run_arp_sender else if [ -x $SENDUA ]; then run_send_ua if [ $? -ne 0 ]; then ocf_exit_reason "run_send_ua failed." exit $OCF_ERR_GENERIC fi fi fi ;; esac exit $OCF_SUCCESS } ip_stop() { local ip_del_if="yes" if [ -n "$IP_CIP" ]; then # Cluster IPs need special processing when the last bucket # is removed from the node... take a lock to make sure only one # process executes that code ocf_take_lock $CIP_lockfile ocf_release_lock_on_exit $CIP_lockfile fi if [ -f "$SENDARPPIDFILE" ] ; then kill `cat "$SENDARPPIDFILE"` if [ $? -ne 0 ]; then ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip" else ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip" fi rm -f "$SENDARPPIDFILE" fi local ip_status=`ip_served` ocf_log info "IP status = $ip_status, IP_CIP=$IP_CIP" if [ $ip_status = "no" ]; then : Requested interface not in use exit $OCF_SUCCESS fi if [ -n "$IP_CIP" ] && [ $ip_status != "partial2" ]; then if [ $ip_status = "partial" ]; then exit $OCF_SUCCESS fi echo "-$IP_INC_NO" >$IP_CIP_FILE if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then ocf_log info $OCF_RESKEY_ip, $IP_CIP_HASH i=1 while [ $i -le $IP_INC_GLOBAL ]; do ocf_log info $i $IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ --new \ --clustermac $IF_MAC \ --total-nodes $IP_INC_GLOBAL \ --local-node $i \ --hashmode $IP_CIP_HASH i=`expr $i + 1` done else ip_del_if="no" fi fi if [ "$ip_del_if" = "yes" ]; then delete_interface $OCF_RESKEY_ip $NIC $NETMASK if [ $? -ne 0 ]; then ocf_exit_reason "Unable to remove IP [${OCF_RESKEY_ip} from interface [ $NIC ]" exit $OCF_ERR_GENERIC fi if ocf_is_true ${OCF_RESKEY_lvs_support}; then restore_loopback "$OCF_RESKEY_ip" fi fi exit $OCF_SUCCESS } ip_monitor() { # TODO: Implement more elaborate monitoring like checking for # interface health maybe via a daemon like FailSafe etc... local ip_status=`ip_served` case $ip_status in ok) run_arp_sender refresh return $OCF_SUCCESS ;; partial|no|partial2) exit $OCF_NOT_RUNNING ;; *) # Errors on this interface? return $OCF_ERR_GENERIC ;; esac } # make sure that we have something to send ARPs with set_send_arp_program() { ARP_SENDER=send_arp if [ -n "$OCF_RESKEY_arp_sender" ]; then case "$OCF_RESKEY_arp_sender" in send_arp) check_binary $SENDARP ;; iputils_arping) check_binary arping ;; libnet_arping) check_binary arping ;; ipoibarping) check_binary ipoibarping ;; *) ocf_exit_reason "unrecognized arp_sender value: $OCF_RESKEY_arp_sender" exit $OCF_ERR_CONFIGURED ;; esac ARP_SENDER="$OCF_RESKEY_arp_sender" else if is_infiniband; then ARP_SENDER=ipoibarping if ! have_binary ipoibarping; then [ "$__OCF_ACTION" = start ] && ocf_log warn "using send_arp for infiniband because ipoibarping is not available (set arp_sender to \"send_arp\" to suppress this message)" check_binary $SENDARP ARP_SENDER=send_arp fi fi fi } ip_validate() { check_binary $IP2UTIL IP_CIP= if [ -n "$OCF_RESKEY_network_namespace" ]; then OCF_RESKEY_network_namespace= exec $IP2UTIL netns exec "$OCF_RESKEY_network_namespace" "$0" "$__OCF_ACTION" fi ip_init set_send_arp_program if [ -n "$IP_CIP" ]; then check_binary $IPTABLES check_binary $MODPROBE fi # $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init, # do not bother here. if ocf_is_true "$OCF_RESKEY_unique_clone_address" && ! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then ocf_exit_reason "unique_clone_address makes sense only with meta globally_unique set" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then : else ocf_exit_reason "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then : else ocf_exit_reason "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_preferred_lft" ]; then ocf_exit_reason "Empty value is invalid for OCF_RESKEY_preferred_lft" exit $OCF_ERR_CONFIGURED fi if [ -n "$IP_CIP" ]; then local valid=1 case $IP_CIP_HASH in sourceip|sourceip-sourceport|sourceip-sourceport-destport) ;; *) ocf_exit_reason "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]" exit $OCF_ERR_CONFIGURED ;; esac if ocf_is_true ${OCF_RESKEY_lvs_support}; then ocf_exit_reason "LVS and load sharing not advised to try" exit $OCF_ERR_CONFIGURED fi case $IF_MAC in [0-9a-zA-Z][13579bBdDfF][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]) ;; *) valid=0 ;; esac if [ $valid -eq 0 ]; then ocf_exit_reason "Invalid IF_MAC [$IF_MAC]" exit $OCF_ERR_CONFIGURED fi fi } if ocf_is_true "$OCF_RESKEY_unique_clone_address"; then prefix=`echo $OCF_RESKEY_ip | awk -F. '{print $1"."$2"."$3}'` suffix=`echo $OCF_RESKEY_ip | awk -F. '{print $4}'` suffix=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + $suffix` OCF_RESKEY_ip="$prefix.$suffix" fi case $__OCF_ACTION in meta-data) meta_data ;; usage|help) ip_usage exit $OCF_SUCCESS ;; esac ip_validate case $__OCF_ACTION in start) ip_start ;; stop) ip_stop ;; status) ip_status=`ip_served` if [ $ip_status = "ok" ]; then echo "running" exit $OCF_SUCCESS else echo "stopped" exit $OCF_NOT_RUNNING fi ;; monitor) ip_monitor ;; validate-all) ;; *) ip_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac # vi:sw=4:ts=8: diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr index 5a447196e..b764c122d 100755 --- a/heartbeat/IPsrcaddr +++ b/heartbeat/IPsrcaddr @@ -1,520 +1,525 @@ #!/bin/sh # # Description: IPsrcaddr - Preferred source address modification # # Author: John Sutton # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: SCL Internet # # Based on the IPaddr script. # # This script manages the preferred source address associated with # packets which originate on the localhost and are routed through the # default route. By default, i.e. without the use of this script or # similar, these packets will carry the IP of the primary i.e. the # non-aliased interface. This can be a nuisance if you need to ensure # that such packets carry the same IP irrespective of which host in # a redundant cluster they actually originate from. # # It can add a preferred source address, or remove one. # # usage: IPsrcaddr {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds a preferred source address. # # Surprisingly, the "stop" arg removes it. :-) # # NOTES: # # 1) There must be one and not more than 1 default route! Mainly because # I can't see why you should have more than one. And if there is more # than one, we would have to box clever to find out which one is to be # modified, or we would have to pass its identity as an argument. # # 2) The script depends on Alexey Kuznetsov's ip utility from the # iproute aka iproute2 package. # # 3) No checking is done to see if the passed in IP address can # reasonably be associated with the interface on which the default # route exists. So unless you want to deliberately spoof your source IP, # check it! Normally, I would expect that your haresources looks # something like: # # nodename ip1 ip2 ... ipN IPsrcaddr::ipX # # where ipX is one of the ip1 to ipN. # # OCF parameters are as below: # OCF_RESKEY_ipaddress ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults +OCF_RESKEY_ipaddress_default="" +OCF_RESKEY_cidr_netmask_default="" OCF_RESKEY_proto_default="" +: ${OCF_RESKEY_ipaddress=${OCF_RESKEY_ipaddress_default}} +: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} : ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}} + ####################################################################### [ -z "$OCF_RESKEY_proto" ] && PROTO="" || PROTO="proto $OCF_RESKEY_proto" USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0" CMDCHANGE="$IP2UTIL route change to " SYSTYPE="`uname -s`" usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 Resource script for IPsrcaddr. It manages the preferred source address modification. Manages the preferred source address for outgoing IP packets The IP address. IP address - + The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). Netmask - + Proto to match when finding network. E.g. "kernel". Proto - + END } errorexit() { ocf_exit_reason "$*" exit $OCF_ERR_GENERIC } # # We can distinguish 3 cases: no preferred source address, a # preferred source address exists which matches that specified, and one # exists but doesn't match that specified. srca_read() returns 1,0,2 # respectively. # # The output of route show is something along the lines of: # # default via X.X.X.X dev eth1 src Y.Y.Y.Y # # where the src clause "src Y.Y.Y.Y" may or may not be present WS="[`echo -en ' \t'`]" OCTET="[0-9]\{1,3\}" IPADDR="\($OCTET\.\)\{3\}$OCTET" SRCCLAUSE="src$WS$WS*\($IPADDR\)" MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)" FINDIF=$HA_BIN/findif # findif needs that to be set export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress srca_read() { # Capture the default route - doublequotes prevent word splitting... DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed" # ... so we can make sure there is only 1 default route [ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \ errorexit "more than 1 default route exists" # But there might still be no default route [ -z "$DEFROUTE" ] && errorexit "no default route exists" # Sed out the source ip address if it exists SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"` # and what remains after stripping out the source ip address clause ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"` [ -z "$SRCIP" ] && return 1 [ $SRCIP = $1 ] && return 0 return 2 } # # Add (or change if it already exists) the preferred source address # The exit code should conform to LSB exit codes. # srca_start() { srca_read $1 rc=$? if [ $rc = 0 ]; then rc=$OCF_SUCCESS ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)" else $IP2UTIL route replace $NETWORK dev $INTERFACE src $1 || \ errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed" $CMDCHANGE $ROUTE_WO_SRC src $1 || \ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed" rc=$? fi return $rc } # # Remove (if it exists) the preferred source address. # If one exists but it's not the same as the one specified, that's # an error. Maybe that's the wrong behaviour because if this fails # then when IPaddr releases the associated interface (if there is one) # your default route will also get dropped ;-( # The exit code should conform to LSB exit codes. # srca_stop() { srca_read $1 rc=$? if [ $rc = 1 ]; then # We do not have a preferred source address for now ocf_log info "No preferred source address defined, nothing to stop" exit $OCF_SUCCESS fi [ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address" $IP2UTIL route replace $NETWORK dev $INTERFACE || \ errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed" $CMDCHANGE $ROUTE_WO_SRC || \ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed" return $? } srca_status() { srca_read $1 case $? in 0) echo "OK" return $OCF_SUCCESS;; 1) echo "No preferred source address defined" return $OCF_NOT_RUNNING;; 2) echo "Preferred source address has incorrect value" return $OCF_ERR_GENERIC;; esac } # A not reliable IP address checking function, which only picks up those _obvious_ violations... # # It accepts IPv4 address in dotted quad notation, for example "192.168.1.1" # # 100% confidence whenever it reports "negative", # but may get false "positive" answer. # CheckIP() { ip="$1" case $ip in *[!0-9.]*) #got invalid char false;; .*|*.) #begin or end by ".", which is invalid false;; *..*) #consecutive ".", which is invalid false;; *.*.*.*.*) #four decimal dots, which is too many false;; *.*.*.*) #exactly three decimal dots, candidate, evaluate each field local IFS=. set -- $ip if ( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] ) then if [ $1 -eq 127 ]; then ocf_exit_reason "IP address [$ip] is a loopback address, thus can not be preferred source address" exit $OCF_ERR_CONFIGURED fi else true fi ;; *) #less than three decimal dots false;; esac return $? # This return is unnecessary, this comment too :) } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface_solaris() { $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:*$%%'` case $addr in addr:$BASEIP) echo $ifname; return $OCF_SUCCESS;; $BASEIP) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface_generic() { local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \ | cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'` if [ -z "$iface" ]; then return $OCF_ERR_GENERIC else echo $iface return $OCF_SUCCESS fi } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface() { case "$SYSTYPE" in SunOS) IF=`find_interface_solaris $BASEIP` ;; *) IF=`find_interface_generic $BASEIP` ;; esac echo $IF return $OCF_SUCCESS; } ip_status() { BASEIP="$1" case "$SYSTYPE" in Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac case "$SYSTYPE" in *BSD) $IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$BASEIP " >/dev/null 2>&1 if [ $? = 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi;; Linux|SunOS) IF=`find_interface "$BASEIP"` if [ -z "$IF" ]; then return $OCF_NOT_RUNNING fi case $IF in lo*) ocf_exit_reason "IP address [$BASEIP] is served by loopback, thus can not be preferred source address" exit $OCF_ERR_CONFIGURED ;; *)return $OCF_SUCCESS;; esac ;; *) if [ -z "$IF" ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi;; esac } srca_validate_all() { if [ -z "$OCF_RESKEY_ipaddress" ]; then # usage ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!" return $OCF_ERR_CONFIGURED fi if ! [ "x$SYSTYPE" = "xLinux" ]; then # checks after this point are only relevant for linux. return $OCF_SUCCESS fi check_binary $AWK case "$SYSTYPE" in *BSD|SunOS) check_binary $IFCONFIG ;; esac # The IP address should be in good shape if CheckIP "$ipaddress"; then : else ocf_exit_reason "Invalid IP address [$ipaddress]" return $OCF_ERR_CONFIGURED fi if ocf_is_probe; then return $OCF_SUCCESS fi # We should serve this IP address of course if ip_status "$ipaddress"; then : else ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations do not require the OCF instance parameters to be set case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac ipaddress="$OCF_RESKEY_ipaddress" srca_validate_all rc=$? if [ $rc -ne $OCF_SUCCESS ]; then case $1 in # if we can't validate the configuration during a stop, that # means the resources isn't configured correctly. There's no way # to actually stop the resource in this situation because there's # no way it could have even started. Return success here # to indicate that the resource is not running, otherwise the # stop action will fail causing the node to be fenced just because # of a mis configuration. stop) exit $OCF_SUCCESS;; *) exit $rc;; esac fi findif_out=`$FINDIF -C` rc=$? [ $rc -ne 0 ] && { ocf_exit_reason "[$FINDIF -C] failed" exit $rc } INTERFACE=`echo $findif_out | awk '{print $1}'` NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'` case $1 in start) srca_start $ipaddress ;; stop) srca_stop $ipaddress ;; status) srca_status $ipaddress ;; monitor) srca_status $ipaddress ;; validate-all) srca_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # # Version 0.3 2002/11/04 17:00:00 John Sutton # Name changed from IPsrcroute to IPsrcaddr and now reports errors # using ha_log rather than on stderr. # # Version 0.2 2002/11/02 17:00:00 John Sutton # Changed status output to "OK" to satisfy ResourceManager's # we_own_resource() function. # # Version 0.1 2002/11/01 17:00:00 John Sutton # First effort but does the job? # diff --git a/heartbeat/LVM b/heartbeat/LVM index 40874fb4c..287856e54 100755 --- a/heartbeat/LVM +++ b/heartbeat/LVM @@ -1,458 +1,470 @@ #!/bin/sh # # # LVM # # Description: Manages an LVM volume as an HA resource # # # Author: Alan Robertson # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # This code significantly inspired by the LVM resource # in FailSafe by Lars Marowsky-Bree # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_volgrpname # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_volgrpname_default="" +OCF_RESKEY_exclusive_default="false" +OCF_RESKEY_tag_default="pacemaker" +OCF_RESKEY_partial_activation_default="false" + +: ${OCF_RESKEY_volgrpname=${OCF_RESKEY_volgrpname_default}} +: ${OCF_RESKEY_exclusive=${OCF_RESKEY_exclusive_default}} +: ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}} +: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}} + ####################################################################### usage() { methods=`LVM_methods` methods=`echo $methods | tr ' ' '|'` cat < 1.0 Resource script for LVM. It manages an Linux Volume Manager volume (LVM) as an HA resource. Controls the availability of an LVM Volume Group The name of volume group. Volume group name - + If set, the volume group will be activated exclusively. This option works one of two ways. If the volume group has the cluster attribute set, then the volume group will be activated exclusively using clvmd across the cluster. If the cluster attribute is not set, the volume group will be activated exclusively using a tag and the volume_list filter. When the tag option is in use, the volume_list in lvm.con must be initialized. This can be as simple as setting 'volume_list = []' depending on your setup. Exclusive activation - + If "exclusive" is set on a non clustered volume group, this overrides the tag to be used. Exclusive activation tag - + If set, the volume group will be activated partially even with some physical volumes missing. It helps to set to true when using mirrored logical volumes. Activate VG partially when missing PVs - + EOF } # # methods: What methods/operations do we support? # LVM_methods() { cat < /dev/null 2>&1; then ocf_log info "Volume group $vg not found" return $OCF_SUCCESS fi ocf_log info "Deactivating volume group $vg" lvm_pre_deactivate || exit for i in $(seq 10) do ocf_run vgchange $vgchange_deactivate_options $vg res=$? if LVM_status $vg; then ocf_exit_reason "LVM: $vg did not stop correctly" res=1 fi if [ $res -eq 0 ]; then break fi res=$OCF_ERR_GENERIC ocf_log warn "$vg still Active" ocf_log info "Retry deactivating volume group $vg" sleep 1 which udevadm > /dev/null 2>&1 && udevadm settle --timeout=5 done lvm_post_deactivate $res } # # Check whether the OCF instance parameters are valid # LVM_validate_all() { check_binary $AWK ## # lvmetad is a daemon that caches lvm metadata to improve the # performance of LVM commands. This daemon should never be used when # volume groups exist that are being managed by the cluster. The lvmetad # daemon introduces a response lag, where certain LVM commands look like # they have completed (like vg activation) when in fact the command # is still in progress by the lvmetad. This can cause reliability issues # when managing volume groups in the cluster. For Example, if you have a # volume group that is a dependency for another application, it is possible # the cluster will think the volume group is activated and attempt to start # the application before volume group is really accesible... lvmetad is bad. ## lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1 if [ $? -eq 0 ]; then # for now warn users that lvmetad is enabled and that they should disable it. In the # future we may want to consider refusing to start, or killing the lvmetad daemon. ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process" fi ## # Off-the-shelf tests... ## VGOUT=`vgck ${VOLUME} 2>&1` if [ $? -ne 0 ]; then # Inconsistency might be due to missing physical volumes, which doesn't # automatically mean we should fail. If partial_activation=true then # we should let start try to handle it, or if no PVs are listed as # "unknown device" then another node may have marked a device missing # where we have access to all of them and can start without issue. if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then case $(vgs -o attr --noheadings $OCF_RESKEY_volgrpname | tr -d ' ') in ???p??*) if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then # We are missing devices and cannot activate partially ocf_exit_reason "Volume group [$VOLUME] has devices missing. Consider partial_activation=true to attempt to activate partially" exit $OCF_ERR_GENERIC else # We are missing devices but are allowed to activate partially. # Assume that caused the vgck failure and carry on ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action." fi ;; esac # else the vg is partial but all devices are accounted for, so another # node must have marked the device missing. Proceed. else # vgck failure was for something other than missing devices ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi fi ## # Does the Volume Group exist? ## if [ "$LVM_MAJOR" = "1" ]; then VGOUT=`vgdisplay ${VOLUME} 2>&1` else VGOUT=`vgdisplay -v ${VOLUME} 2>&1` fi if [ $? -ne 0 ]; then ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi if lvs --noheadings -o segtype | grep -q "cache"; then if ! lvs --noheadings -o cache_mode "$OCF_RESKEY_volgrpname" | grep -q "writethrough"; then ocf_log warn "LVM CACHE IS NOT IN WRITETHROUGH MODE. THIS IS NOT A SUPPORTED CONFIGURATION." fi fi if ocf_is_clone && ocf_is_true "$OCF_RESKEY_exclusive"; then ocf_exit_reason "cloned lvm resources can not be activated exclusively" exit $OCF_ERR_CONFIGURED fi lvm_validate_all } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS;; methods) LVM_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if [ -z "$OCF_RESKEY_volgrpname" ] then ocf_exit_reason "You must identify the volume group name!" exit $OCF_ERR_CONFIGURED fi # Get the LVM version number, for this to work we assume(thanks to panjiam): # # LVM1 outputs like this # # # vgchange --version # vgchange: Logical Volume Manager 1.0.3 # Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10) # # LVM2 and higher versions output in this format # # # vgchange --version # LVM version: 2.00.15 (2004-04-19) # Library version: 1.00.09-ioctl (2004-03-31) # Driver version: 4.1.0 LVM_VERSION=`vgchange --version 2>&1 | \ $AWK '/Logical Volume Manager/ {print $5"\n"; exit; } /LVM version:/ {printf $3"\n"; exit;}'` rc=$? if ( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] ) then ocf_exit_reason "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?" exit $OCF_ERR_INSTALLED fi LVM_MAJOR="${LVM_VERSION%%.*}" VOLUME=$OCF_RESKEY_volgrpname OP_METHOD=$1 set_lvm_mode lvm_init if ocf_is_true "$OCF_RESKEY_partial_activation" ; then vgchange_activate_options="${vgchange_activate_options} --partial" fi # What kind of method was invoked? case "$1" in start) LVM_validate_all LVM_start $VOLUME exit $?;; stop) LVM_stop $VOLUME exit $?;; status) LVM_status $VOLUME $1 exit $?;; monitor) LVM_status $VOLUME exit $?;; validate-all) LVM_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate index 7d1fe4cf1..9c7c721bf 100755 --- a/heartbeat/LVM-activate +++ b/heartbeat/LVM-activate @@ -1,861 +1,875 @@ #!/bin/sh # # # Copyright (c) 2017 SUSE LINUX, Eric Ren # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # LVM-activate OCF Resource Agent: # # Logical volume manager (LVM) provides new features for cluster enviroment: # lvmlockd and system ID, which aims to replace clvmd and tagged-exclusive # activation. Accordingly, we have created a new resource agent named "lvmlockd" # to manage lvmlockd daemon. In addition, this new resource agent "LVM-activate" # is created to take care of LVM activation/deactivation work. This agent supports # the new features: lvmlockd and system ID, and also supports the old features: # clvmd and lvm tag. # # Thanks David Teigland! He is the author of these LVM features, giving valuable # idea/feedback about this resource agent. ############################################################################ # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_vgname_default="" +OCF_RESKEY_lvname_default="" +OCF_RESKEY_vg_access_mode_default="" +OCF_RESKEY_activation_mode_default="exclusive" +OCF_RESKEY_tag_default="pacemaker" + +: ${OCF_RESKEY_vgname=${OCF_RESKEY_vgname_default}} +: ${OCF_RESKEY_lvname=${OCF_RESKEY_lvname_default}} +: ${OCF_RESKEY_vg_access_mode=${OCF_RESKEY_vg_access_mode_default}} +: ${OCF_RESKEY_activation_mode=${OCF_RESKEY_activation_mode_default}} +: ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}} + # If LV is given, only activate this named LV; otherwise, activate all # LVs in the named VG. VG=${OCF_RESKEY_vgname} LV=${OCF_RESKEY_lvname} # How LVM controls access to the VG: # # 0: place-holder for any incorrect cases; To be safe, we enforce the VG # must use any of the following protection methods in cluster environment. # 1: vg is shared - lvmlockd (new) # 2: vg is clustered - clvmd (old) # 3: vg has system_id (new) # 4: vg has tagging (old) VG_access_mode=${OCF_RESKEY_vg_access_mode} VG_access_mode_num=0 # Activate LV(s) with "shared" lock for cluster fs # or "exclusive" lock for local fs -LV_activation_mode=${OCF_RESKEY_activation_mode:-exclusive} +LV_activation_mode=${OCF_RESKEY_activation_mode} # For system ID feature SYSTEM_ID="" # For tagging activation mode -OUR_TAG=${OCF_RESKEY_tag:-pacemaker} +OUR_TAG=${OCF_RESKEY_tag} ####################################################################### meta_data() { cat < 1.0 This agent manages LVM activation/deactivation work for a given volume group. It supports the following modes, controlled by the vg_access_mode parameter: * lvmlockd * system_id * clvmd * tagging Notes: 1. There are two possible configuration combinations: lvmlockd+LVM-activate and clvm+LVM-activate. However, it is not possible to use both at the same time! 2. Put all "lvmlockd"/"clvmd" volume groups into auto management by the agent if using the cluster to manage at least one of them. If you manage some manually, the stop action of the lvmlockd agent may fail and the node may get fenced, because some DLM lockspaces might be in use and cannot be closed automatically. This agent activates/deactivates logical volumes. The volume group name. The volume group name - + If set, only the specified LV will be activated. Only activate the given LV - + This option decides which solution will be used to protect the volume group in cluster environment. Optional solutions are: lvmlockd, clvmd, system_id and tagging. The VG access mode - + The activation mode decides the visibility of logical volumes in the cluster. There are two different modes: "shared" for cluster filesystem and "exclusive" for local filesystem. With "shared", an LV can be activated concurrently from multiple nodes. With "exclusive", an LV can be activated by one node at a time. This option only has effect on "lvmlockd"/"clvmd" vg_access_mode. For "system_id" and "tagging", they always mean exclusive activation. Logical volume activation mode - + The tag used for tagging activation mode. The tag used for tagging activation mode - + END } ####################################################################### usage() { cat </dev/null | tr -d \') export ${kvs} vg_locktype=${LVM2_VG_LOCK_TYPE} vg_clustered=${LVM2_VG_CLUSTERED} vg_systemid=${LVM2_VG_SYSTEMID} vg_tags=${LVM2_VG_TAGS} # We know this VG is using lvmlockd if the lock type is dlm. if [ "$vg_locktype" = "dlm" ]; then access_mode=1 elif [ "$vg_clustered" = "clustered" ]; then access_mode=2 elif [ -n "$vg_systemid" ]; then SYSTEM_ID=$(lvm systemid 2>/dev/null | cut -d':' -f2 | tr -d '[:blank:]') access_mode=3 elif [ -n "$vg_tags" ]; then # TODO: # We don't have reliable way to test if tagging activation is used. access_mode=4 else access_mode=0 fi return $access_mode } # TODO: All tagging activation code is almost copied from LVM RA!!! # But, the old LVM RA just uses the ordinary tags, not the "hosttag" feature # which may be a better method for active-inactive cluster scenario. # # We have two choice: # 1. Continue to use the LVM way, which may work well on old system. # 2. Change to use the real hosttag feature, but it looks very same # to systemID. # Anyway, we can easily change this if anyone requests with good reasons. # does this vg have our tag check_tags() { local owner=$(vgs -o tags --noheadings ${VG} | tr -d '[:blank:]') if [ -z "$owner" ]; then # No-one owns this VG yet return 1 fi if [ "$OUR_TAG" = "$owner" ]; then # yep, this is ours return 0 fi # some other tag is set on this vg return 2 } strip_tags() { local tag for tag in $(vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g); do ocf_log info "Stripping tag, $tag" # LVM version 2.02.98 allows changing tags if PARTIAL vgchange --deltag "$tag" ${VG} done if [ ! -z $(vgs -o tags --noheadings ${VG} | tr -d '[:blank:]') ]; then ocf_exit_reason "Failed to remove ownership tags from ${VG}" exit $OCF_ERR_GENERIC fi return $OCF_SUCCESS } set_tags() { case check_tags in 0) # we already own it. return $OCF_SUCCESS ;; 2) # other tags are set, strip them before setting if ! strip_tags; then return $OCF_ERR_GENERIC fi ;; *) : ;; esac if ! vgchange --addtag $OUR_TAG ${VG} ; then ocf_log err "Failed to add ownership tag to ${VG}" return $OCF_ERR_GENERIC fi ocf_log info "New tag \"${OUR_TAG}\" added to ${VG}" return $OCF_SUCCESS } # Parameters: # 1st: config item name # 2nd: expected config item value config_verify() { local name=$1 local expect=$2 local real="" real=$(lvmconfig "$name" | cut -d'=' -f2) if [ "$real" != "$expect" ]; then ocf_exit_reason "config item $name: expect=$expect but real=$real" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } lvmlockd_check() { config_verify "global/use_lvmlockd" "1" # locking_type was removed from config in v2.03 ocf_version_cmp "$(lvmconfig --version | awk '/LVM ver/ {sub(/\(.*/, "", $3); print $3}')" "2.03" if [ "$?" -eq 0 ]; then config_verify "global/locking_type" "1" fi # We recommend to activate one LV at a time so that this specific volume # binds to a proper filesystem to protect the data # TODO: # Will this warn message be too noisy? if [ -z "$LV" ]; then ocf_log warn "You are recommended to activate one LV at a time or use exclusive activation mode." fi # Good: lvmlockd is running, and clvmd is not running if ! pgrep lvmlockd >/dev/null 2>&1 ; then if ocf_is_probe; then ocf_log info "initial probe: lvmlockd is not running yet." exit $OCF_NOT_RUNNING fi ocf_exit_reason "lvmlockd daemon is not running!" exit $OCF_ERR_CONFIGURED fi if pgrep clvmd >/dev/null 2>&1 ; then ocf_exit_reason "clvmd daemon is running unexpectedly." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } clvmd_check() { config_verify "global/use_lvmetad" "0" config_verify "global/use_lvmlockd" "0" config_verify "global/locking_type" "3" # TODO: # David asked a question: does lvchange -aey works well enough with clvmd? # # Corey said: I think it does work well enough. We do a fair amount of exclusive # activation clvm testing, and my experience is you'll get the LV activated on # the node you ran the command on. But, I think the specific scenario and issue # that surprised us all was when the LV was *already* exclusively active on say # nodeA, and nodeB then attempts to also exclusively activate it as well. Instead # of failing, the activation succeeds even though nodeB activation didn't occur. # This is documented in the following bug: # https://bugzilla.redhat.com/show_bug.cgi?id=1191724#c8 # Technically, you're not guaranteed to have it activated on the node you run # the cmd on, but again, that's not been my experience. # # Eric: Put the interesting discussion here so that we can be more careful on this. # Good: clvmd is running, and lvmlockd is not running if ! pgrep clvmd >/dev/null 2>&1 ; then ocf_exit_reason "clvmd daemon is not running!" exit $OCF_ERR_CONFIGURED fi if pgrep lvmetad >/dev/null 2>&1 ; then ocf_exit_reason "Please stop lvmetad daemon when clvmd is running." exit $OCF_ERR_CONFIGURED fi if pgrep lvmlockd >/dev/null 2>&1 ; then ocf_exit_reason "lvmlockd daemon is running unexpectedly." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } systemid_check() { local source # system_id_source is set in lvm.conf source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2) if [ "$source" = "" ] || [ "$source" = "none" ]; then ocf_exit_reason "system_id_source in lvm.conf is not set correctly!" exit $OCF_ERR_CONFIGURED fi if [ -z ${SYSTEM_ID} ]; then ocf_exit_reason "local/system_id is not set!" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # Verify tags setup tagging_check() { # The volume_list must be initialized to something in order to # guarantee our tag will be filtered on startup if ! lvm dumpconfig activation/volume_list; then ocf_log err "LVM: Improper setup detected" ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd" exit $OCF_ERR_CONFIGURED fi # Our tag must _NOT_ be in the volume_list. This agent # overrides the volume_list during activation using the # special tag reserved for cluster activation if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\""; then ocf_log err "LVM: Improper setup detected" ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } read_parameters() { if [ -z "$VG" ] then ocf_exit_reason "You must identify the volume group name!" exit $OCF_ERR_ARGS fi if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ] then ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode" exit $OCF_ERR_ARGS fi # Convert VG_access_mode from string to index case ${VG_access_mode} in lvmlockd) VG_access_mode_num=1 ;; clvmd) VG_access_mode_num=2 ;; system_id) VG_access_mode_num=3 ;; tagging) VG_access_mode_num=4 ;; *) # dont exit with error-code here or nodes will get fenced on # e.g. "pcs resource create" ocf_exit_reason "You specified an invalid value for vg_access_mode: $VG_access_mode" ;; esac } lvm_validate() { local lv_count local mode read_parameters check_binary pgrep # Every LVM command is just symlink to lvm binary check_binary lvm check_binary dmsetup if ! vgs --foreign ${VG} >/dev/null 2>&1 ; then # stop action exits successfully if the VG cannot be accessed... if [ $__OCF_ACTION = "stop" ]; then ocf_log warn "VG [${VG}] cannot be accessed, stop action exits successfully." exit $OCF_SUCCESS fi if ocf_is_probe; then ocf_log info "initial probe: VG [${VG}] is not found on any block device yet." exit $OCF_NOT_RUNNING fi ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!" exit $OCF_ERR_CONFIGURED fi # Get the access mode from VG metadata and check if it matches the input # value. Skip to check "tagging" mode because there's no reliable way to # automatically check if "tagging" mode is being used. get_VG_access_mode_num mode=$? if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!" exit $OCF_ERR_ARGS fi # Nothing to do if the VG has no logical volume lv_count=$(vgs --foreign -o lv_count --noheadings ${VG} 2>/dev/null) if [ $lv_count -lt 1 ]; then ocf_exit_reason "Volume group [$VG] doesn't contain any logical volume!" exit $OCF_ERR_CONFIGURED fi # Check if the given $LV is in the $VG if [ -n "$LV" ]; then OUT=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1) if [ $? -ne 0 ]; then ocf_log err "lvs: ${OUT}" ocf_exit_reason "LV ($LV) is not in the given VG ($VG)." exit $OCF_ERR_ARGS fi fi # VG_access_mode_num specific checking goes here case ${VG_access_mode_num} in 1) lvmlockd_check ;; 2) clvmd_check ;; 3) systemid_check ;; 4) tagging_check ;; *) ocf_exit_reason "Incorrect VG access mode detected!" exit $OCF_ERR_CONFIGURED esac if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Improper configuration issue is detected!" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # To activate LV(s) with different "activation mode" parameters do_activate() { local activate_opt=$1 # Only activate the specific LV if it's given if [ -n "$LV" ]; then ocf_run lvchange $activate_opt ${VG}/${LV} if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi else ocf_run lvchange $activate_opt ${VG} if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } lvmlockd_activate() { # activation opt local activate_opt if [ "$LV_activation_mode" = "shared" ]; then activate_opt="-asy" else activate_opt="-aey" fi # lvmlockd requires shared VGs to be started before they're used ocf_run vgchange --lockstart ${VG} rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_log err "Failed to start shared VG(s), exit code: $rc" return $OCF_ERR_GENERIC fi do_activate "$activate_opt" if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # clvmd must be running to activate clustered VG clvmd_activate() { local activate_opt if [ "$LV_activation_mode" = "shared" ]; then activate_opt="-asy" else activate_opt="-aey" fi do_activate "$activate_opt" if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } systemid_activate() { local cur_systemid pvscan --cache cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]') # Put our system ID on the VG vgchange -y --config "local/extra_system_ids=[\"${cur_systemid}\"]" \ --systemid ${SYSTEM_ID} ${VG} do_activate "-ay" if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } tagging_activate() { if ! set_tags ; then ocf_log err "Failed to set tags on ${VG}." return $OCF_ERR_GENERIC fi do_activate "-ay --config activation{volume_list=[\"@${OUR_TAG}\"]}" if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } lvmlockd_deactivate() { do_activate "-an" if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi OUT=$(lvs --noheadings -S lv_active=active ${VG} 2>/dev/null) [ -n "$OUT" ] && return $OCF_SUCCESS # Close the lockspace of this VG if there is no active LV ocf_run vgchange --lockstop ${VG} rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_log err "Failed to close the shared VG lockspace, exit code: $rc" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } clvmd_deactivate() { do_activate "-an" if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } systemid_deactivate() { do_activate "-an" if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } tagging_deactivate() { do_activate "-an --config activation{volume_list=[\"@${OUR_TAG}\"]}" if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi if ! strip_tags ; then ocf_log err "Failed to remove tags on ${VG}." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # TODO: # How can we accurately check if LVs in the given VG are all active? # # David: # If we wanted to check that all LVs in the VG are active, then we would # probably need to use the lvs/lv_live_table command here since dmsetup # won't know about inactive LVs that should be active. # # Eric: # But, lvs/lv_live_table command doesn't work well now. I tried the following # method: # # lv_count=$(vgs --foreign -o lv_count --noheadings ${VG} 2>/dev/null | tr -d '[:blank:]') # dm_count=$(dmsetup --noheadings info -c -S "vg_name=${VG}" 2>/dev/null | grep -c "${VG}-") # test $lv_count -eq $dm_count # # It works, but we cannot afford to use LVM command in lvm_status. LVM command is expensive # because it may potencially scan all disks on the system, update the metadata even using # lvs/vgs when the metadata is somehow inconsistent. # # So, we have to make compromise that the VG is assumably active if any LV of the VG is active. # # Paul: # VGS + LVS with "-" in their name get mangled with double dashes in dmsetup. # Switching to wc and just counting lines while depending on the vgname + lvname filter # in dmsetup gets around the issue with dmsetup reporting correctly but grep failing. # # Logic for both test cases and dmsetup calls changed so they match too. # # This is AllBad but there isn't a better way that I'm aware of yet. lvm_status() { local dm_count if [ -n "${LV}" ]; then # dmsetup ls? It cannot accept device name. It's # too heavy to list all DM devices. dm_count=$(dmsetup info --noheadings --noflush -c -S "vg_name=${VG} && lv_name=${LV}" | grep -c -v '^No devices found') else dm_count=$(dmsetup info --noheadings --noflush -c -S "vg_name=${VG}" | grep -c -v '^No devices found') fi if [ $dm_count -eq 0 ]; then return $OCF_NOT_RUNNING fi return $OCF_SUCCESS } lvm_start() { local rc local vol if lvm_status ; then ocf_log info "${vol}: is already active." return $OCF_SUCCESS fi [ -z ${LV} ] && vol=${VG} || vol=${VG}/${LV} ocf_log info "Activating ${vol}" case ${VG_access_mode_num} in 1) lvmlockd_activate ;; 2) clvmd_activate ;; 3) systemid_activate ;; 4) tagging_activate ;; *) ocf_exit_reason "VG [${VG}] is not properly configured in cluster. It's unsafe!" exit $OCF_ERR_CONFIGURED ;; esac rc=$? if lvm_status ; then ocf_log info "${vol}: activated successfully." return $OCF_SUCCESS else ocf_exit_reason "${vol}: failed to activate." return $rc fi } # Deactivate LVM volume(s) lvm_stop() { local vol [ -z ${LV} ] && vol=${VG} || vol=${VG}/${LV} if ! lvm_status ; then ocf_log info "${vol}: has already been deactivated." return $OCF_SUCCESS fi ocf_log info "Deactivating ${vol}" case ${VG_access_mode_num} in 1) lvmlockd_deactivate ;; 2) clvmd_deactivate ;; 3) systemid_deactivate ;; 4) tagging_deactivate ;; *) ocf_log err "VG [${VG}] is not properly configured in cluster. It's unsafe!" exit $OCF_SUCCESS ;; esac if ! lvm_status ; then ocf_log info "${vol}: deactivated successfully." return $OCF_SUCCESS else ocf_exit_reason "${vol}: failed to deactivate." return $OCF_ERR_GENERIC fi } # # MAIN # case $__OCF_ACTION in start) lvm_validate lvm_start ;; stop) read_parameters lvm_stop ;; monitor) lvm_validate lvm_status ;; validate-all) lvm_validate ;; meta-data) meta_data ;; usage|help) usage ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/LinuxSCSI b/heartbeat/LinuxSCSI index 6cf69cb2f..015251eac 100755 --- a/heartbeat/LinuxSCSI +++ b/heartbeat/LinuxSCSI @@ -1,314 +1,322 @@ #!/bin/sh # # # LinuxSCSI # # Description: Enables/Disables SCSI devices to protect them from being # used by mistake # # # Author: Alan Robertson # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 IBM # # CAVEATS: See the usage message for some important warnings # # usage: ./LinuxSCSI (start|stop|status|monitor|meta-data|validate-all|methods) # # OCF parameters are as below: # OCF_RESKEY_scsi # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 LinuxSCSI:0:0:11 # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_scsi_default="" +OCF_RESKEY_ignore_deprecation_default="false" + +: ${OCF_RESKEY_scsi=${OCF_RESKEY_scsi_default}} +: ${OCF_RESKEY_ignore_deprecation=${OCF_RESKEY_ignore_deprecation_default}} + ####################################################################### zeropat="[ 0]0" PROCSCSI=/proc/scsi/scsi usage() { cat < 1.0 Deprecation warning: This agent makes use of Linux SCSI hot-plug functionality which has been superseded by SCSI reservations. It is deprecated and may be removed from a future release. See the scsi2reservation and sfex agents for alternatives. -- This is a resource agent for LinuxSCSI. It manages the availability of a SCSI device from the point of view of the linux kernel. It make Linux believe the device has gone away, and it can make it come back again. Enables and disables SCSI devices through the kernel SCSI hot-plug subsystem (deprecated) The SCSI instance to be managed. SCSI instance - + If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning - + EOF } scsi_methods() { cat <>$PROCSCSI echo "scsi add-single-device $host $channel $target $lun" >>$PROCSCSI if scsi_status "$1" then return $OCF_SUCCESS else ocf_log err "SCSI device $1 not active!" return $OCF_ERR_GENERIC fi } # # stop: Disable the given SCSI device in the kernel # scsi_stop() { parseinst "$1" # [ $target = error ] && exit 1 echo "scsi remove-single-device $host $channel $target $lun" >>$PROCSCSI if scsi_status "$1" then ocf_log err "SCSI device $1 still active!" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } # # status: is the given device now available? # scsi_status() { parseinst "$1" # [ $target = error ] && exit 1 [ $channel -eq 0 ] && channel=$zeropat [ $target -eq 0 ] && target=$zeropat [ $lun -eq 0 ] && lun=$zeropat greppat="Host: *scsi$host *Channel: *$channel *Id: *$target *Lun: *$lun" grep -i "$greppat" $PROCSCSI >/dev/null if [ $? -eq 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } # # validate_all: Check the OCF instance parameters # scsi_validate_all() { parseinst $instance return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then ocf_log err "Parameter number error." usage exit $OCF_ERR_GENERIC fi #if # [ -z "$OCF_RESKEY_scsi" ] && [ "X$1" = "Xmethods" ] #then # scsi_methods # exit #? #fi case $1 in methods) scsi_methods exit $OCF_SUCCESS ;; meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # Be obnoxious, log deprecation warning on every invocation (unless # suppressed by resource configuration). ocf_deprecated if [ -z "$OCF_RESKEY_scsi" ] then ocf_log err "You have to set a valid scsi id at least!" # usage exit $OCF_ERR_GENERIC fi instance=$OCF_RESKEY_scsi case $1 in start) scsi_start $instance ;; stop) scsi_stop $instance ;; status|monitor) if scsi_status $instance then ocf_log info "SCSI device $instance is running" return $OCF_SUCCESS else ocf_log info "SCSI device $instance is stopped" exit $OCF_NOT_RUNNING fi ;; validate-all) scsi_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/MailTo b/heartbeat/MailTo index e3a13a36d..2477be583 100755 --- a/heartbeat/MailTo +++ b/heartbeat/MailTo @@ -1,191 +1,199 @@ #!/bin/sh # # Resource script for MailTo # # Author: Alan Robertson # # Description: sends email to a sysadmin whenever a takeover occurs. # # Note: This command requires an argument, unlike normal init scripts. # # This can be given in the haresources file as: # # You can also give a mail subject line or even multiple addresses # MailTo::alanr@unix.sh::BigImportantWebServer # MailTo::alanr@unix.sh,spoppi@gmx.de::BigImportantWebServer # # This will then be put into the message subject and body. # # OCF parameters are as below: # OCF_RESKEY_email # OCF_RESKEY_subject # # License: GNU General Public License (GPL) # # Copyright: (C) 2005 International Business Machines ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_email_default="" +OCF_RESKEY_subject_default="Resource Group" + +: ${OCF_RESKEY_email=${OCF_RESKEY_email_default}} +: ${OCF_RESKEY_subject=${OCF_RESKEY_subject_default}} + ####################################################################### ARGS="$0 $*" us=`uname -n` usage() { echo "Usage: $0 {start|stop|status|monitor|meta-data|validate-all}" } meta_data() { cat < 1.0 This is a resource agent for MailTo. It sends email to a sysadmin whenever a takeover occurs. Notifies recipients by email in the event of resource takeover The email address of sysadmin. Email address - + The subject of the email. Subject - + END } MailProgram() { $MAILCMD -s "$1" "$email" < 1.00.2 Manages starting, stopping and monitoring of RAID devices which are preconfigured in /etc/conf.d/HB-ManageRAID. Manages RAID devices Name (case sensitive) of RAID to manage. (preconfigured in /etc/conf.d/HB-ManageRAID) RAID name - + END } # # start_raid() # start_raid() { declare -i retcode status_raid retcode=$? if [[ $retcode == $OCF_SUCCESS ]]; then return $OCF_SUCCESS elif [[ $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi for ldev in "${RAID_LOCALDISKS[@]}"; do if [[ ! -b $ldev ]]; then ocf_log err "$ldev is not a (local) block device." return $OCF_ERR_ARGS fi done $MDADM -A $RAID_DEVPATH -a yes -u ${!RAID_UUID} "${RAID_LOCALDISKS[@]}" &> /dev/null if [[ $? != 0 ]]; then ocf_log err "starting ${!RAID_DEV} with ${RAID_LOCALDISKS[*]} failed." return $OCF_ERR_GENERIC fi $MOUNT -o ${!RAID_MOUNTOPTIONS} $RAID_DEVPATH ${!RAID_MOUNTPOINT} &> /dev/null if [[ $? != 0 ]]; then $MDADM -S $RAID_DEVPATH &> /dev/null if [[ $? != 0 ]]; then ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed as well as stopping the RAID itself." else ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed. RAID stopped again." fi return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # stop_raid() # stop_raid() { status_raid if [[ $? == $OCF_NOT_RUNNING ]]; then return $OCF_SUCCESS fi $UMOUNT ${!RAID_MOUNTPOINT} &> /dev/null if [[ $? != 0 ]]; then ocf_log err "unmounting ${!RAID_MOUNTPOINT} failed. not stopping ${!RAID_DEV}!" return $OCF_ERR_GENERIC fi $MDADM -S $RAID_DEVPATH &> /dev/null if [[ $? != 0 ]]; then ocf_log err "stopping RAID ${!RAID_DEV} failed." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # status_raid() # status_raid() { declare -i retcode_raidcheck declare -i retcode_uuidcheck $CAT $RAID_MDSTAT | $GREP -e "${!RAID_DEV}[\ ]*:[\ ]*active" &> /dev/null if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING fi if [ ! -e $RAID_DEVPATH ]; then return $OCF_ERR_GENERIC fi $MDADM --detail -t $RAID_DEVPATH &> /dev/null retcode_raidcheck=$? $MDADM --detail -t $RAID_DEVPATH | $GREP -qEe "^[\ ]*UUID[\ ]*:[\ ]*${!RAID_UUID}" &> /dev/null retcode_uuidcheck=$? if [ $retcode_raidcheck -gt 3 ]; then ocf_log err "mdadm returned error code $retcode_raidcheck while checking ${!RAID_DEV}." return $OCF_ERR_GENERIC elif [ $retcode_raidcheck -eq 3 ]; then ocf_log err "${!RAID_DEV} has failed." return $OCF_ERR_GENERIC elif [ $retcode_raidcheck -lt 3 ] && [ $retcode_uuidcheck != 0 ]; then ocf_log err "active RAID ${!RAID_DEV} and configured UUID (!$RAID_UUID) do not match." return $OCF_ERR_GENERIC fi $MOUNT | $GREP -e "$RAID_DEVPATH on ${!RAID_MOUNTPOINT}" &> /dev/null if [[ $? != 0 ]]; then ocf_log err "${!RAID_DEV} seems to be no longer mounted at ${!RAID_MOUNTPOINT}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # validate_all_raid() # validate_all_raid() { # # since all parameters are checked every time ManageRAID is # invoked, there not much more to check... # # status_raid should cover the rest. # declare -i retcode status_ve retcode=$? if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac ## required configuration # [ -f /etc/conf.d/HB-ManageRAID ] || { ocf_log err "/etc/conf.d/HB-ManageRAID missing" exit $OCF_ERR_INSTALLED } . /etc/conf.d/HB-ManageRAID # ## # # check relevant environment variables for sanity and security # declare -i retcode_test declare -i retcode_grep $TEST -z "$OCF_RESKEY_raidname" retcode_test=$? echo "$OCF_RESKEY_raidname" | $GREP -qEe "^[[:alnum:]\_]+$" retcode_grep=$? if [[ $retcode_test != 1 || $retcode_grep != 0 ]]; then ocf_log err "OCF_RESKEY_raidname not set or invalid." exit $OCF_ERR_ARGS fi RAID_UUID=${OCF_RESKEY_raidname}_UUID echo ${!RAID_UUID} | $GREP -qEe "^[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_UUID is invalid." exit $OCF_ERR_ARGS fi RAID_DEV=${OCF_RESKEY_raidname}_DEV echo ${!RAID_DEV} | $GREP -qEe "^md[0-9]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_DEV is invalid." exit $OCF_ERR_ARGS fi RAID_DEVPATH=/dev/${!RAID_DEV/md/md\/} RAID_MOUNTPOINT=${OCF_RESKEY_raidname}_MOUNTPOINT echo ${!RAID_MOUNTPOINT} | $GREP -qEe "^[[:alnum:]\/\_\"\ ]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_MOUNTPOINT is invalid." exit $OCF_ERR_ARGS fi RAID_MOUNTOPTIONS=${OCF_RESKEY_raidname}_MOUNTOPTIONS echo ${!RAID_MOUNTOPTIONS} | $GREP -qEe "^[[:alpha:]\,]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_MOUNTOPTIONS is invalid." exit $OCF_ERR_ARGS fi RAID_LOCALDISKS=${OCF_RESKEY_raidname}_LOCALDISKS[@] RAID_LOCALDISKS=( "${!RAID_LOCALDISKS}" ) if [ ${#RAID_LOCALDISKS[@]} -lt 1 ]; then ocf_log err "you have to specify at least one local disk." exit $OCF_ERR_ARGS fi # # check that all relevant utilities are available # check_binary $MDADM check_binary $MOUNT check_binary $UMOUNT check_binary $GREP check_binary $CAT check_binary $TEST check_binary echo # # check that all relevant devices are available # check_file $RAID_MDSTAT # # finally... let's see what we are ordered to do :-) # case "$1" in start) start_raid ;; stop) stop_raid ;; status|monitor) status_raid ;; validate-all) validate_all_raid ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/ManageVE.in b/heartbeat/ManageVE.in index aef69f4a2..b8d251d39 100644 --- a/heartbeat/ManageVE.in +++ b/heartbeat/ManageVE.in @@ -1,313 +1,320 @@ #!@BASH_SHELL@ # # ManageVE OCF RA. Manages OpenVZ Virtual Environments (VEs) # # (c) 2006-2010 Matthias Dahl, Florian Haas, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # This OCF compliant resource agent manages OpenVZ VEs and thus requires # a proper OpenVZ installation including a recent vzctl util. # # rev. 1.00.4 # # Changelog # # 21/Oct/10 1.00.4 implement migrate_from/migrate_to # 12/Sep/06 1.00.3 more cleanup # 12/Sep/06 1.00.2 fixed some logic in start_ve # general cleanup all over the place # 11/Sep/06 1.00.1 fixed some typos # 07/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-) # ### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_veid_default="" + +: ${OCF_RESKEY_veid=${OCF_RESKEY_veid_default}} + ### # required utilities VZCTL=/usr/sbin/vzctl # # usage() # usage() { cat <<-EOF usage: $0 {start|stop|status|monitor|migrate_from|migrate_to|validate-all|usage|meta-data} EOF } # # meta_data() # meta_data() { cat < 1.00.4 This OCF compliant resource agent manages OpenVZ VEs and thus requires a proper OpenVZ installation including a recent vzctl util. Manages an OpenVZ Virtual Environment (VE) OpenVZ ID of virtual environment (see output of vzlist -a for all assigned IDs) OpenVZ ID of VE - + END } # # start_ve() # # Starts a VE, or simply logs a message if the VE is already running. # start_ve() { if status_ve; then ocf_log info "VE $VEID already running." return $OCF_SUCCESS fi ocf_run $VZCTL start $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # stop_ve() # # ATTENTION: The following code relies on vzctl's exit codes, especially: # # 0 : success # # In case any of those exit codes change, this function will need fixing. # stop_ve() { status_ve if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "VE $VEID already stopped." return $OCF_SUCCESS fi ocf_run $VZCTL stop $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # migrate_to_ve() # # In the process of a resource migration, checkpoints the VE. For this # to work, vzctl must obviously create the dump file in a place which # the migration target has access to (an NFS mount, a DRBD device, # etc.). # migrate_to_ve() { if ! status_ve; then ocf_log err "VE $VEID is not running, aborting" exit $OCF_ERR_GENERIC fi ocf_run $VZCTL chkpnt $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # migrate_to_ve() # # In the process of a resource migration, restores the VE. For this to # work, vzctl must obviously have access to the dump file which was # created on the migration source (on an NFS mount, a DRBD device, # etc.). # migrate_from_ve() { ocf_run $VZCTL restore $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # status_ve() # # ATTENTION: The following code relies on vzctl's status output. The fifth # column is interpreted as the VE status (either up or down). # # In case the output format should change, this function will need fixing. # status_ve() { declare -i retcode veexists=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $3}'` vestatus=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $5}'` retcode=$? if [[ $retcode != 0 ]]; then # log error only if expected to find running if [ "$__OCF_ACTION" = "monitor" ] && ! ocf_is_probe; then ocf_log err "vzctl status $VEID returned: $retcode" fi exit $OCF_ERR_GENERIC fi if [[ $veexists != "exist" ]]; then ocf_log err "vzctl status $VEID returned: $VEID does not exist." return $OCF_NOT_RUNNING fi case "$vestatus" in running) return $OCF_SUCCESS ;; down) return $OCF_NOT_RUNNING ;; *) ocf_log err "vzctl status $VEID, wrong output format. (5th column: $vestatus)" exit $OCF_ERR_GENERIC ;; esac } # # validate_all_ve() # # ATTENTION: The following code relies on vzctl's status output. The fifth # column is interpreted as the VE status (either up or down). # # In case the output format should change, this function will need fixing. # validate_all_ve() { declare -i retcode # VEID should be a valid VE `status_ve` retcode=$? if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi return $OCF_SUCCESS } if [[ $# != 1 ]]; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # # check relevant environment variables for sanity and security # # empty string? `test -z "$OCF_RESKEY_veid"` declare -i veidtest1=$? # really a number? `echo "$OCF_RESKEY_veid" | egrep -q '^[[:digit:]]+$'` if [[ $veidtest1 != 1 || $? != 0 ]]; then ocf_log err "OCF_RESKEY_veid not set or not a number." exit $OCF_ERR_ARGS fi declare -i VEID=$OCF_RESKEY_veid # # check that all relevant utilities are available # check_binary $VZCTL check_binary $AWK # # finally... let's see what we are ordered to do :-) # case "$1" in start) start_ve ;; stop) stop_ve ;; status|monitor) status_ve ;; migrate_to) migrate_to_ve ;; migrate_from) migrate_from_ve ;; validate-all) validate_all_ve ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/NodeUtilization b/heartbeat/NodeUtilization index 9adadb62e..544707bfa 100755 --- a/heartbeat/NodeUtilization +++ b/heartbeat/NodeUtilization @@ -1,226 +1,237 @@ #!/bin/sh # # # NodeUtilization OCF Resource Agent # # Copyright (c) 2011 SUSE LINUX, John Shi # Copyright (c) 2016 SUSE LINUX, Kristoffer Gronlund # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_pidfile_default="$HA_VARRUN/NodeUtilization-${OCF_RESOURCE_INSTANCE}" +OCF_RESKEY_dynamic_default="true" +OCF_RESKEY_utilization_cpu_default="true" +OCF_RESKEY_utilization_cpu_reservation_default="1" +OCF_RESKEY_utilization_host_memory_default="true" +OCF_RESKEY_utilization_host_memory_reservation_default="512" +OCF_RESKEY_utilization_hv_memory_default="true" +OCF_RESKEY_utilization_hv_memory_reservation_default="512" + +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_dynamic=${OCF_RESKEY_dynamic_default}} +: ${OCF_RESKEY_utilization_cpu=${OCF_RESKEY_utilization_cpu_default}} +: ${OCF_RESKEY_utilization_cpu_reservation=${OCF_RESKEY_utilization_cpu_reservation_default}} +: ${OCF_RESKEY_utilization_host_memory=${OCF_RESKEY_utilization_host_memory_default}} +: ${OCF_RESKEY_utilization_host_memory_reservation=${OCF_RESKEY_utilization_host_memory_reservation_default}} +: ${OCF_RESKEY_utilization_hv_memory=${OCF_RESKEY_utilization_hv_memory_default}} +: ${OCF_RESKEY_utilization_hv_memory_reservation=${OCF_RESKEY_utilization_hv_memory_reservation_default}} + ####################################################################### NodeUtilization_meta_data() { cat < 1.0 The Node Utilization agent detects system parameters like available CPU, host memory and hypervisor memory availability, and adds them into the CIB for each node using crm_attribute. Run the agent as a clone resource to have it populate these parameters on each node. Note: Setting hv_memory only works with Xen at the moment, using the xl or xm command line tools. Node Utilization If set, parameters will be updated if there are differences between the HA parameters and the system values when running the monitor action. If not set, the parameters will be set once when the resource instance starts. Dynamically update parameters in monitor - + Enable setting node CPU utilization limit. Set node CPU utilization limit. - + Subtract this value when setting the CPU utilization parameter. CPU reservation. - + Enable setting available host memory. Set available host memory. - + Subtract this value when setting host memory utilization, in MB. Host memory reservation, in MB. - + Enable setting available hypervisor memory. Set available hypervisor memory. - + Subtract this value when setting hypervisor memory utilization, in MB. Hypervisor memory reservation, in MB. - + END } Host_Total_Memory() { local xentool xentool=$(which xl 2> /dev/null || which xm 2> /dev/null) if [ -x $xentool ]; then $xentool info | awk '/total_memory/{printf("%d\n",$3);exit(0)}' else ocf_log warn "Can only set hv_memory for Xen hypervisor" echo "0" fi } set_utilization() { host_name="$(ocf_local_nodename)" if ocf_is_true "$OCF_RESKEY_utilization_cpu"; then sys_cpu=$(( $(grep -c processor /proc/cpuinfo) - $OCF_RESKEY_utilization_cpu_reservation )) uti_cpu=$(crm_attribute --quiet -t nodes --node "$host_name" -z -n cpu 2>/dev/null) if [ "$sys_cpu" != "$uti_cpu" ]; then if ! crm_attribute -t nodes --node "$host_name" -z -n cpu -v $sys_cpu; then ocf_log err "Failed to set the cpu utilization attribute for $host_name using crm_attribute." return 1 fi fi fi if ocf_is_true "$OCF_RESKEY_utilization_host_memory"; then sys_mem=$(( $(awk '/MemTotal/{printf("%d\n",$2/1024);exit(0)}' /proc/meminfo) - $OCF_RESKEY_utilization_host_memory_reservation )) uti_mem=$(crm_attribute --quiet -t nodes --node "$host_name" -z -n host_memory 2>/dev/null) if [ "$sys_mem" != "$uti_mem" ]; then if ! crm_attribute -t nodes --node "$host_name" -z -n host_memory -v $sys_mem; then ocf_log err "Failed to set the host_memory utilization attribute for $host_name using crm_attribute." return 1 fi fi fi if ocf_is_true "$OCF_RESKEY_utilization_hv_memory"; then hv_mem=$(( $(Host_Total_Memory) - OCF_RESKEY_utilization_hv_memory_reservation )) uti_mem=$(crm_attribute --quiet -t nodes --node "$host_name" -z -n hv_memory 2>/dev/null) [ $hv_mem -lt 0 ] && hv_mem=0 if [ "$hv_mem" != "$uti_mem" ]; then if ! crm_attribute -t nodes --node "$host_name" -z -n hv_memory -v $hv_mem; then ocf_log err "Failed to set the hv_memory utilization attribute for $host_name using crm_attribute." return 1 fi fi fi } NodeUtilization_usage() { cat < : Pure-FTPd script # Author: Raoul Bhatia : Minor Cleanup. Added Debian GNU/Linux Support # License: GNU General Public License (GPL) # # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg starts Pure-FTPd. # # The "stop" arg stops it. # # OCF parameters: # OCF_RESKEY_script # OCF_RESKEY_conffile # OCF_RESKEY_daemon_type # OCF_RESKEY_pidfile # ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -: ${OCF_RESKEY_script="/sbin/pure-config.pl"} +# Parameter defaults + +OCF_RESKEY_script_default="/sbin/pure-config.pl" +OCF_RESKEY_conffile_default="/etc/pure-ftpd/pure-ftpd.conf" +OCF_RESKEY_daemon_type_default="" +OCF_RESKEY_pidfile_default="${HA_RSCTMP}/pure-ftpd-${OCF_RESOURCE_INSTANCE}.pid" + +: ${OCF_RESKEY_script=${OCF_RESKEY_script_default}} +: ${OCF_RESKEY_conffile=${OCF_RESKEY_conffile_default}} +: ${OCF_RESKEY_daemon_type=${OCF_RESKEY_daemon_type_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} + script_basename=`basename $OCF_RESKEY_script` -: ${OCF_RESKEY_conffile="/etc/pure-ftpd/pure-ftpd.conf"} -: ${OCF_RESKEY_daemon_type=""} -: ${OCF_RESKEY_pidfile="${HA_RSCTMP}/pure-ftpd-${OCF_RESOURCE_INSTANCE}.pid"} + USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 This script manages Pure-FTPd in an Active-Passive setup Manages a Pure-FTPd FTP server instance The full path to the Pure-FTPd startup script. For example, "/sbin/pure-config.pl" Script name with full path - + The Pure-FTPd configuration file name with full path. For example, "/etc/pure-ftpd/pure-ftpd.conf" Configuration file name with full path - + The Pure-FTPd daemon to be called by pure-ftpd-wrapper. Valid options are "" for pure-ftpd, "mysql" for pure-ftpd-mysql, "postgresql" for pure-ftpd-postgresql and "ldap" for pure-ftpd-ldap Configuration file name with full path - + PID file PID file - + END exit $OCF_SUCCESS } isRunning() { kill -s 0 "$1" > /dev/null } PureFTPd_status() { if [ -f $OCF_RESKEY_pidfile ] then # Pure-FTPd is probably running PID=`head -n 1 $OCF_RESKEY_pidfile` if [ ! -z $PID ] ; then isRunning "$PID" && [ `ps -p $PID | grep pure-ftpd | wc -l` -eq 1 ] return $? fi fi # Pure-FTPd is not running false } PureFTPd_start() { local pid_dir # # make a few checks and start Pure-FTPd # if ocf_is_root ; then : ; else ocf_log err "You must be root." exit $OCF_ERR_PERM fi # if Pure-FTPd is running return success if PureFTPd_status ; then exit $OCF_SUCCESS fi # check that the Pure-FTPd script exists and can be executed if [ ! -x "$OCF_RESKEY_script" ]; then ocf_log err "Pure-FTPd script '$OCF_RESKEY_script' does not exist or cannot be executed" exit $OCF_ERR_GENERIC fi # make sure that the pid directory exists pid_dir=`dirname $OCF_RESKEY_pidfile` if [ ! -d $pid_dir ] ; then ocf_log info "Creating PID directory '$pid_dir'." mkdir -p $pid_dir fi # test for pure-ftpd-wrapper (e.g. Debian GNU/Linux Systems) if [ "$script_basename" = "pure-ftpd-wrapper" ]; then # pure-ftpd-wrapper expects STANDALONE_OR_INETD to be set to standalone STANDALONE_OR_INETD=standalone $OCF_RESKEY_script $OCF_RESKEY_daemon_type else # check that the Pure-FTPd config file exist if [ ! -f "$OCF_RESKEY_conffile" ]; then ocf_log err "Pure_FTPd config file '$OCF_RESKEY_conffile' does not exist" exit $OCF_ERR_GENERIC fi $OCF_RESKEY_script $OCF_RESKEY_conffile -g $OCF_RESKEY_pidfile fi if [ $? -ne 0 ]; then ocf_log info "Pure-FTPd returned error" $? exit $OCF_ERR_GENERIC fi exit $OCF_SUCCESS } PureFTPd_stop() { if PureFTPd_status ; then PID=`head -n 1 $OCF_RESKEY_pidfile` if [ ! -z $PID ] ; then kill $PID fi fi exit $OCF_SUCCESS } PureFTPd_monitor() { if PureFTPd_status ; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } PureFTPd_validate_all() { return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi case $1 in start) PureFTPd_start ;; stop) PureFTPd_stop ;; status) if PureFTPd_status then ocf_log info "Pure-FTPd is running" exit $OCF_SUCCESS else ocf_log info "Pure-FTPd is stopped" exit $OCF_NOT_RUNNING fi ;; monitor) PureFTPd_monitor exit $? ;; validate-all) PureFTPd_validate_all exit $? ;; meta-data) meta_data ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 index 0f960b2b4..d719df957 100755 --- a/heartbeat/Raid1 +++ b/heartbeat/Raid1 @@ -1,570 +1,586 @@ #!/bin/sh # # # License: GNU General Public License (GPL) # Support: users@clusterlabs.org # # Raid1 # Description: Manages a Linux software RAID device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdc1 # ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_raidconf_default="" +OCF_RESKEY_raiddev_default="" +OCF_RESKEY_homehost_default="" +OCF_RESKEY_force_stop_default="true" +OCF_RESKEY_udev_default="true" +OCF_RESKEY_force_clones_default="false" + +: ${OCF_RESKEY_raidconf=${OCF_RESKEY_raidconf_default}} +: ${OCF_RESKEY_raiddev=${OCF_RESKEY_raiddev_default}} +: ${OCF_RESKEY_homehost=${OCF_RESKEY_homehost_default}} +: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} +: ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}} +: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}} + ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 This resource agent manages Linux software RAID (MD) devices on a shared storage medium. It uses mdadm(8) to start, stop, and monitor the MD devices. Raidtools are supported, but deprecated. See https://raid.wiki.kernel.org/index.php/Linux_Raid for more information. Manages Linux software RAID (MD) devices on shared storage The RAID configuration file, e.g. /etc/mdadm.conf. RAID config file - + One or more block devices to use, space separated. Alternatively, set to "auto" to manage all devices specified in raidconf. block device - + The value for the homehost directive; this is an mdadm feature to protect RAIDs against being activated by accident. It is recommended to create RAIDs managed by the cluster with "homehost" set to a special value, so they are not accidentally auto-assembled by nodes not supposed to own them. Homehost for mdadm - + If processes or kernel threads are using the array, it cannot be stopped. We will try to stop processes, first by sending TERM and then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL. The lsof(8) program is required to get the list of array users. Of course, the kernel threads cannot be stopped this way. If the processes are critical for data integrity, then set this parameter to false. Note that in that case the stop operation will fail and the node will be fenced. force stop processes using the array - + Wait until udevd creates a device in the start operation. On a normally loaded host this should happen quickly, but you may be unlucky. If you are not using udev set this to "no". udev - + Activating the same md RAID array on multiple nodes at the same time will result in data corruption and thus is forbidden by default. A safe example could be an array that is only named identically across all nodes, but is in fact distinct. Only set this to "true" if you know what you are doing! force ability to run as a clone - + END } udev_settle() { if ocf_is_true $WAIT_FOR_UDEV; then udevadm settle $* fi } list_conf_arrays() { test -f $RAIDCONF || { ocf_exit_reason "$RAIDCONF gone missing!" exit $OCF_ERR_GENERIC } grep ^ARRAY $RAIDCONF | awk '{print $2}' } forall() { local func=$1 local checkall=$2 local mddev rc=0 for mddev in $RAIDDEVS; do $func $mddev rc=$(($rc | $?)) [ "$checkall" = all ] && continue [ $rc -ne 0 ] && return $rc done return $rc } are_arrays_stopped() { local rc mddev for mddev in $RAIDDEVS; do raid1_monitor_one $mddev rc=$? [ $rc -ne $OCF_NOT_RUNNING ] && break done test $rc -eq $OCF_NOT_RUNNING } md_assemble() { local mddev=$1 $MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST udev_settle --exit-if-exists=$mddev } # # START: Start up the RAID device # raid1_start() { local rc raid1_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then # md already online, nothing to do. return $OCF_SUCCESS fi if [ $rc -ne $OCF_NOT_RUNNING ]; then # If the array is in a broken state, this agent doesn't # know how to repair that. ocf_exit_reason "$RAIDDEVS in a broken state; cannot start (rc=$rc)" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else forall md_assemble all fi raid1_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS else ocf_exit_reason "Couldn't start RAID for $RAIDDEVS" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # mark_readonly() { local mddev=$1 local rc ocf_log info "Attempting to mark array $mddev readonly" $MDADM --readonly $mddev --config=$RAIDCONF rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to set $mddev readonly (rc=$rc)" fi return $rc } mknod_raid1_stop() { # first create a block device file, then try to stop the # array local rc n tmp_block_file n=`echo $1 | sed 's/[^0-9]*//'` if ! ocf_is_decimal "$n"; then ocf_log warn "could not get the minor device number from $1" return 1 fi tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`" rm -f $tmp_block_file ocf_log info "block device file $1 missing, creating one in order to stop the array" mknod $tmp_block_file b 9 $n $MDADM --stop $tmp_block_file --config=$RAIDCONF rc=$? rm -f $tmp_block_file return $rc } raid1_stop_one() { ocf_log info "Stopping array $1" if [ -b "$1" ]; then $MDADM --stop $1 --config=$RAIDCONF && return else # newer mdadm releases can stop arrays when given the # basename; try that first $MDADM --stop `basename $1` --config=$RAIDCONF && return # otherwise create a block device file mknod_raid1_stop $1 fi } get_users_pids() { local mddev=$1 local outp l ocf_log debug "running lsof to list $mddev users..." outp=`lsof $mddev | tail -n +2` echo "$outp" | awk '{print $2}' | sort -u echo "$outp" | while read l; do ocf_log warn "$l" done } stop_raid_users() { local pids pids=`forall get_users_pids all | sort -u` if [ -z "$pids" ]; then ocf_log warn "lsof reported no users holding arrays" return 2 else ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids fi } stop_arrays() { if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else forall raid1_stop_one all fi } showusers() { local disk for disk; do if have_binary lsof; then ocf_log info "running lsof to list $disk users..." ocf_run -warn lsof $disk fi if [ -d /sys/block/$disk/holders ]; then ocf_log info "ls -l /sys/block/$disk/holders" ocf_run -warn ls -l /sys/block/$disk/holders fi done } raid1_stop() { local rc # See if the MD device is already cleanly stopped: if are_arrays_stopped; then return $OCF_SUCCESS fi # Turn off raid if ! stop_arrays; then if ocf_is_true $FORCESTOP; then if have_binary lsof; then stop_raid_users case $? in 2) false;; *) stop_arrays;; esac else ocf_log warn "install lsof(8) to list users holding the disk" false fi else false fi fi rc=$? if [ $rc -ne 0 ]; then ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)" showusers $RAIDDEVS if [ $HAVE_RAIDTOOLS != "true" ]; then forall mark_readonly all fi return $OCF_ERR_GENERIC fi if are_arrays_stopped; then return $OCF_SUCCESS fi ocf_exit_reason "RAID $RAIDDEVS still active after stop command!" return $OCF_ERR_GENERIC } # # monitor: a less noisy status # raid1_monitor_one() { local mddev=$1 local md= local rc local TRY_READD=0 local pbsize # check if the md device exists first # but not if we are in the stop operation # device existence is important only for the running arrays if [ "$__OCF_ACTION" != "stop" ]; then if [ -h "$mddev" ]; then md=$(ls $mddev -l | awk -F'/' '{print $NF}') elif [ -b "$mddev" ]; then md=$(echo $mddev | sed 's,/dev/,,') else ocf_log info "$mddev is not a block device" return $OCF_NOT_RUNNING fi fi if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then ocf_log info "$md not found in /proc/mdstat" return $OCF_NOT_RUNNING fi if [ $HAVE_RAIDTOOLS != "true" ]; then $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? case $rc in 0) ;; 1) ocf_log warn "$mddev has at least one failed device." TRY_READD=1 ;; 2) ocf_exit_reason "$mddev has failed." return $OCF_ERR_GENERIC ;; 4) if [ "$__OCF_ACTION" = "stop" ] ; then # There may be a transient invalid device after # we stop MD due to uevent processing, the # original device is stopped though. return $OCF_NOT_RUNNING else ocf_exit_reason "mdadm failed on $mddev." return $OCF_ERR_GENERIC fi ;; *) ocf_exit_reason "mdadm returned an unknown result ($rc)." return $OCF_ERR_GENERIC ;; esac fi if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" $MDADM $mddev --fail detached $MDADM $mddev --remove failed $MDADM $mddev --re-add missing # TODO: At this stage, there's nothing to actually do # here. Either this worked or it did not. fi pbsize=`(blockdev --getpbsz $mddev || stat -c "%o" $mddev) 2>/dev/null` if [ -z "$pbsize" ]; then ocf_log warn "both blockdev and stat could not get the block size (will use 4k)" pbsize=4096 # try with 4k fi if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \ iflag=direct >/dev/null 2>&1 ; then ocf_exit_reason "$mddev: I/O error on read" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } raid1_monitor() { forall raid1_monitor_one } # # STATUS: is the raid device online or offline? # raid1_status() { # See if the MD device is online local rc raid1_monitor rc=$? if [ $rc -ne $OCF_SUCCESS ]; then echo "stopped" else echo "running" fi return $rc } raid1_validate_all() { return $OCF_SUCCESS } PROC_CLEANUP_TIME=3 if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac RAIDCONF="$OCF_RESKEY_raidconf" MDDEV="$OCF_RESKEY_raiddev" -FORCESTOP="${OCF_RESKEY_force_stop:-1}" -WAIT_FOR_UDEV="${OCF_RESKEY_udev:-1}" +FORCESTOP="${OCF_RESKEY_force_stop}" +WAIT_FOR_UDEV="${OCF_RESKEY_udev}" if [ -z "$RAIDCONF" ] ; then ocf_exit_reason "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_CONFIGURED fi if [ ! -r "$RAIDCONF" ] ; then ocf_exit_reason "Configuration file [$RAIDCONF] does not exist, or can not be opened!" exit $OCF_ERR_INSTALLED fi if [ -z "$MDDEV" ] ; then ocf_exit_reason "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_CONFIGURED fi if ocf_is_clone && ! ocf_is_true "$OCF_RESKEY_force_clones"; then ocf_exit_reason "md RAID arrays are NOT safe to run as a clone!" ocf_log err "Please read the comment on the force_clones parameter." exit $OCF_ERR_CONFIGURED fi if ocf_is_true $WAIT_FOR_UDEV && ! have_binary udevadm; then if [ "$__OCF_ACTION" = "start" ]; then ocf_log warn "either install udevadm or set udev to false" ocf_log info "setting udev to false!" fi WAIT_FOR_UDEV=0 fi if ! ocf_is_true $WAIT_FOR_UDEV; then export MDADM_NO_UDEV=1 fi if ocf_is_true $FORCESTOP && ! have_binary lsof; then ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..." fi HAVE_RAIDTOOLS=false if have_binary $MDADM >/dev/null 2>&1 ; then if [ -n "$OCF_RESKEY_homehost" ]; then MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" else MDADM_HOMEHOST="" fi else check_binary $RAIDSTART HAVE_RAIDTOOLS=true fi if [ $HAVE_RAIDTOOLS = true ]; then if [ "$MDDEV" = "auto" ]; then ocf_exit_reason "autoconf supported only with mdadm!" exit $OCF_ERR_INSTALLED elif [ `echo $MDDEV|wc -w` -gt 1 ]; then ocf_exit_reason "multiple devices supported only with mdadm!" exit $OCF_ERR_INSTALLED fi fi if [ "$MDDEV" = "auto" ]; then RAIDDEVS=`list_conf_arrays` else RAIDDEVS="$MDDEV" fi # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; status) raid1_status ;; monitor) raid1_monitor ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Route b/heartbeat/Route index 2da58bce1..b4011e37d 100755 --- a/heartbeat/Route +++ b/heartbeat/Route @@ -1,336 +1,344 @@ #!/bin/sh # # Route OCF RA. Enables and disables network routes. # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Default values +OCF_RESKEY_device_default="" +OCF_RESKEY_gateway_default="" +OCF_RESKEY_source_default="" +OCF_RESKEY_table_default="" OCF_RESKEY_family_default="detect" +: ${OCF_RESKEY_device=${OCF_RESKEY_device_default}} +: ${OCF_RESKEY_gateway=${OCF_RESKEY_gateway_default}} +: ${OCF_RESKEY_source=${OCF_RESKEY_source_default}} +: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}} : ${OCF_RESKEY_family=${OCF_RESKEY_family_default}} ####################################################################### meta_data() { cat < 1.0 Enables and disables network routes. Supports host and net routes, routes via a gateway address, and routes using specific source addresses. This resource agent is useful if a node's routing table needs to be manipulated based on node role assignment. Consider the following example use case: - One cluster node serves as an IPsec tunnel endpoint. - All other nodes use the IPsec tunnel to reach hosts in a specific remote network. Then, here is how you would implement this scheme making use of the Route resource agent: - Configure an ipsec LSB resource. - Configure a cloned Route OCF resource. - Create an order constraint to ensure that ipsec is started before Route. - Create a colocation constraint between the ipsec and Route resources, to make sure no instance of your cloned Route resource is started on the tunnel endpoint itself. Manages network routes The destination network (or host) to be configured for the route. Specify the netmask suffix in CIDR notation (e.g. "/24"). If no suffix is given, a host route will be created. Specify "0.0.0.0/0" or "default" if you want this resource to set the system default route. Destination network The outgoing network device to use for this route. Outgoing network device - + The gateway IP address to use for this route. Gateway IP address - + The source IP address to be configured for the route. Source IP address - + The routing table to be configured for the route. Routing table - + The address family to be used for the route ip4 IP version 4 ip6 IP version 6 detect Detect from 'destination' address. Address Family END } ####################################################################### create_route_spec() { # Creates a route specification for use by "ip route (add|del|show)" route_spec="to ${OCF_RESKEY_destination}" if [ -n "${OCF_RESKEY_device}" ]; then route_spec="${route_spec} dev ${OCF_RESKEY_device}" fi if [ -n "${OCF_RESKEY_gateway}" ]; then route_spec="${route_spec} via ${OCF_RESKEY_gateway}" fi if [ -n "${OCF_RESKEY_source}" ]; then route_spec="${route_spec} src ${OCF_RESKEY_source}" fi if [ -n "${OCF_RESKEY_table}" ]; then route_spec="${route_spec} table ${OCF_RESKEY_table}" fi echo "$route_spec" } route_usage() { cat </dev/null 2>&1; then ocf_exit_reason "Network device ${OCF_RESKEY_device} appears not to be available on this system." # OCF_ERR_ARGS prevents the resource from running anywhere at all, # maybe another node has the interface? # OCF_ERR_INSTALLED just prevents starting on this particular node. return $OCF_ERR_INSTALLED fi fi # The following tests must return $OCF_ERR_INSTALLED, but only if # the resource is actually running (i.e., not during probes) if ! ocf_is_probe; then # If a source address has been configured, is it available on # this system? if [ -n "${OCF_RESKEY_source}" ]; then if ! ip address show | grep -w ${OCF_RESKEY_source} >/dev/null 2>&1; then ocf_exit_reason "Source address ${OCF_RESKEY_source} appears not to be available on this system." # same reason as with _device: return $OCF_ERR_INSTALLED fi fi # If a gateway address has been configured, is it reachable? if [ -n "${OCF_RESKEY_gateway}" ]; then if ! ip route get ${OCF_RESKEY_gateway} >/dev/null 2>&1; then ocf_exit_reason "Gateway address ${OCF_RESKEY_gateway} is unreachable." # same reason as with _device: return $OCF_ERR_INSTALLED fi fi fi return $OCF_SUCCESS } # These two actions must always succeed case $__OCF_ACTION in meta-data) meta_data # OCF variables are not set when querying meta-data exit 0 ;; usage|help) route_usage exit $OCF_SUCCESS ;; esac # Don't do anything if the necessary utilities aren't present for binary in ip grep; do check_binary $binary done route_validate || exit $? case $OCF_RESKEY_family in ip4) addr_family="-4" ;; ip6) addr_family="-6" ;; detect) case $OCF_RESKEY_destination in *:*) addr_family="-6" ;; *.*) addr_family="-4" ;; *) ocf_exit_reason "Address family detection requires a numeric destination address." ;; esac ;; *) ocf_exit_reason "Address family '${OCF_RESKEY_family}' not recognized." ;; esac case $__OCF_ACTION in start) route_start;; stop) route_stop;; status|monitor) route_status;; reload) ocf_log info "Reloading..." route_start ;; validate-all) ;; *) route_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" exit $rc diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase index e4e84ecda..3486303f1 100755 --- a/heartbeat/SAPDatabase +++ b/heartbeat/SAPDatabase @@ -1,363 +1,401 @@ #!/bin/sh # # SAPDatabase # # Description: Manages any type of SAP supported database instance # as a High-Availability OCF compliant resource. # # Author: Alexander Krauth, October 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006, 2007, 2010, 2012 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_SID # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DBTYPE (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB) # OCF_RESKEY_DBINSTANCE (optional, Database instance name, if not equal to SID) # OCF_RESKEY_DBOSUSER (optional, the Linux user that owns the database processes on operating system level) # OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor all database services) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # Deprecated parameters: # OCF_RESKEY_NETSERVICENAME # OCF_RESKEY_DBJ2EE_ONLY # OCF_RESKEY_JAVA_HOME # OCF_RESKEY_DIR_BOOTSTRAP # OCF_RESKEY_DIR_SECSTORE # OCF_RESKEY_DB_JARS # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_SID_default="" +OCF_RESKEY_DIR_EXECUTABLE_default="/usr/sap/hostctrl/exe" +OCF_RESKEY_DBTYPE_default="" +OCF_RESKEY_DBINSTANCE_default="" +OCF_RESKEY_DBOSUSER_default="" +OCF_RESKEY_NETSERVICENAME_default="" +OCF_RESKEY_DBJ2EE_ONLY_default="" +OCF_RESKEY_JAVA_HOME_default="" +OCF_RESKEY_STRICT_MONITORING_default="false" +OCF_RESKEY_AUTOMATIC_RECOVER_default="false" +OCF_RESKEY_MONITOR_SERVICES_default="" +OCF_RESKEY_DIR_BOOTSTRAP_default="" +OCF_RESKEY_DIR_SECSTORE_default="" +OCF_RESKEY_DB_JARS_default="" +OCF_RESKEY_PRE_START_USEREXIT_default="" +OCF_RESKEY_POST_START_USEREXIT_default="" +OCF_RESKEY_PRE_STOP_USEREXIT_default="" +OCF_RESKEY_POST_STOP_USEREXIT_default="" + +: ${OCF_RESKEY_SID=${OCF_RESKEY_SID_default}} +: ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} +: ${OCF_RESKEY_DBTYPE=${OCF_RESKEY_DBTYPE_default}} +: ${OCF_RESKEY_DBINSTANCE=${OCF_RESKEY_DBINSTANCE_default}} +: ${OCF_RESKEY_DBOSUSER=${OCF_RESKEY_DBOSUSER_default}} +: ${OCF_RESKEY_NETSERVICENAME=${OCF_RESKEY_NETSERVICENAME_default}} +: ${OCF_RESKEY_DBJ2EE_ONLY=${OCF_RESKEY_DBJ2EE_ONLY_default}} +: ${OCF_RESKEY_JAVA_HOME=${OCF_RESKEY_JAVA_HOME_default}} +: ${OCF_RESKEY_STRICT_MONITORING=${OCF_RESKEY_STRICT_MONITORING_default}} +: ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}} +: ${OCF_RESKEY_MONITOR_SERVICES=${OCF_RESKEY_MONITOR_SERVICES_default}} +: ${OCF_RESKEY_DIR_BOOTSTRAP=${OCF_RESKEY_DIR_BOOTSTRAP_default}} +: ${OCF_RESKEY_DIR_SECSTORE=${OCF_RESKEY_DIR_SECSTORE_default}} +: ${OCF_RESKEY_DB_JARS=${OCF_RESKEY_DB_JARS_default}} +: ${OCF_RESKEY_PRE_START_USEREXIT=${OCF_RESKEY_PRE_START_USEREXIT_default}} +: ${OCF_RESKEY_POST_START_USEREXIT=${OCF_RESKEY_POST_START_USEREXIT_default}} +: ${OCF_RESKEY_PRE_STOP_USEREXIT=${OCF_RESKEY_PRE_STOP_USEREXIT_default}} +: ${OCF_RESKEY_POST_STOP_USEREXIT=${OCF_RESKEY_POST_STOP_USEREXIT_default}} + ####################################################################### SH=/bin/sh usage() { methods=`sapdatabase_methods` methods=`echo $methods | tr ' ' '|'` cat <<-EOF usage: $0 ($methods) $0 manages a SAP database of any type as an HA resource. Currently Oracle, MaxDB, DB/2 UDB, Sybase ASE and SAP HANA Database are supported. ABAP databases as well as JAVA only databases are supported. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!) The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports EOF } meta_data() { cat < 2.14 Resource script for SAP databases. It manages a SAP database of any type as an HA resource. The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB. The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure. The resource agent supports the following databases: - Oracle 10.2, 11.2 and 12 - DB/2 UDB for Windows and Unix 9.x - SAP-DB / MaxDB 7.x - Sybase ASE 15.7 - SAP HANA Database since 1.00 - with SAP note 1625203 (http://sdn.sap.com) In fact this resource agent does not run any database commands directly. It uses the SAP standard process SAPHostAgent to control the database. The SAPHostAgent must be installed on each cluster node locally. It will not work, if you try to run the SAPHostAgent also as a HA resource. Please follow SAP note 1031096 for the installation of SAPHostAgent. The required minimum version of SAPHostAgent is: Release: 7.20 Patch Number: 90 or compile time after: Dec 17 2011 To exemplify the usage, for a HANA database with SID "TST" and instance number "10", the resource configuration using crmsh syntax looks like: primitive rsc_SAPDatabase_TST_HDB10 ocf:heartbeat:SAPDatabase \\ params DBTYPE="HDB" SID="TST" \\ op start interval="0" timeout="3600" \\ op monitor interval="120" timeout="700" \\ op stop interval="0" timeout="600" Make sure to tune the operations timeout values accordingly with your chosen Database and available infrastructure. Note that the same configuration can be achieved using any other CLI tool for cluster configuration available, like pcs or cibadmin. Manages a SAP database instance as an HA resource. The unique database system identifier. e.g. P01 Database system ID - + The full qualified path where to find saphostexec and saphostctrl. -Usually you can leave this empty. Then the default: /usr/sap/hostctrl/exe is used. +Usually you can leave this empty. Then the default: ${OCF_RESKEY_DIR_EXECUTABLE_default} is used. path of saphostexec and saphostctrl - + The name of the database vendor you use. Set either: ADA, DB6, ORA, SYB, HDB database vendor - + Must be used for special database implementations, when database instance name is not equal to the SID (e.g. Oracle DataGuard) Database instance name, if not equal to SID - + The parameter can be set, if the database processes on operating system level are not executed with the default user of the used database type. Defaults: ADA=taken from /etc/opt/sdb, DB6=db2SID, ORA=oraSID and oracle, SYB=sybSID, HDB=SIDadm the Linux user that owns the database processes on operating system level - + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore - + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore - + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore - + This controls how the resource agent monitors the database. If set to true, it will use 'saphostctrl -function GetDatabaseStatus' to test the database state. If set to false, only operating system processes are monitored. Activates application level monitoring - + If you set this to true, 'saphostctrl -function StartDatabase' will always be called with the '-force' option. Enable or disable automatic startup recovery - + Defines which services are monitored by the SAPDatabase resource agent. Service names must correspond with the output of the 'saphostctrl -function GetDatabaseStatus' command. The default MONITOR_SERVICES value is derived from the database type DBTYPE. For reference: - DBTYPE "ORA" sets MONITOR_SERVICES="Instance|Database|Listener"; - DBTYPE "HDB" sets MONITOR_SERVICES="hdbindexserver|hdbnameserver"; - DBTYPE "ADA" sets MONITOR_SERVICES="Database"; - DBTYPE "DB6" sets MONITOR_SERVICES="{SID}|{db2sid}"; - DBTYPE "SYB" sets MONITOR_SERVICES="Server". This parameter should be set ONLY if is needed to monitor different services than the ones listed above. Database services to monitor - + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore - + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore - + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore - + The full qualified path where to find a script or program which should be executed before this resource gets started. path to a pre-start script - + The full qualified path where to find a script or program which should be executed after this resource got started. path to a post-start script - + The full qualified path where to find a script or program which should be executed before this resource gets stopped. path to a pre-start script - + The full qualified path where to find a script or program which should be executed after this resource got stopped. path to a post-start script - + END } # # methods: What methods/operations do we support? # sapdatabase_methods() { cat <<-EOF start stop status monitor recover validate-all methods meta-data usage EOF } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { NAME="$1" VALUE="$2" if [ -n "$VALUE" ] then if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return $OCF_SUCCESS } # # saphostctrl_installed # saphostctrl_installed() { - OCF_RESKEY_DIR_EXECUTABLE_default="/usr/sap/hostctrl/exe" - : ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} SAPHOSTCTRL="${OCF_RESKEY_DIR_EXECUTABLE}/saphostctrl" SAPHOSTEXEC="${OCF_RESKEY_DIR_EXECUTABLE}/saphostexec" SAPHOSTSRV="${OCF_RESKEY_DIR_EXECUTABLE}/sapstartsrv" SAPHOSTOSCOL="${OCF_RESKEY_DIR_EXECUTABLE}/saposcol" have_binary $SAPHOSTCTRL && have_binary $SAPHOSTEXEC } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) sapdatabase_methods exit $?;; *);; esac if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # mandatory parameter check if [ -z "$OCF_RESKEY_SID" ]; then ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" exit $OCF_ERR_ARGS fi SID=`echo "$OCF_RESKEY_SID"` if [ -z "$OCF_RESKEY_DBTYPE" ]; then ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ADA,DB6,ORA,SYB,HDB)!" exit $OCF_ERR_ARGS fi DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'` # source functions and initialize global variables if saphostctrl_installed; then . ${OCF_FUNCTIONS_DIR}/sapdb.sh else if [ -n "${OCF_RESKEY_DBOSUSER}" ]; then ocf_exit_reason "Usage of parameter OCF_RESKEY_DBOSUSER is not possible without having SAP Host-Agent installed" exit $OCF_ERR_ARGS fi . ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh fi sapdatabase_init # we always want to fall to the faster status method in case of a probe by the cluster ACTION=$1 if ocf_is_probe then ACTION=status fi # What kind of method was invoked? case "$ACTION" in start|stop|status|recover) sapdatabase_$ACTION exit $?;; monitor) sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING exit $?;; validate-all) sapdatabase_validate exit $?;; *) sapdatabase_methods exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance index bd20c1fc8..bff1a2606 100755 --- a/heartbeat/SAPInstance +++ b/heartbeat/SAPInstance @@ -1,980 +1,1014 @@ #!/bin/sh # # SAPInstance # # Description: Manages a single SAP Instance as a High-Availability # resource. One SAP Instance is defined by one # SAP Instance-Profile. start/stop handles all services # of the START-Profile, status and monitor care only # about essential services. # # Author: Alexander Krauth, June 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006-2008 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_InstanceName # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_WAITTIME (optional, to solve timing problems during J2EE-Addin start) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false) # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor critical services only) # OCF_RESKEY_SHUTDOWN_METHOD (optional, defaults to NORMAL, KILL: terminate the SAP instance with OS commands - faster, at your own risk) # OCF_RESKEY_ERS_InstanceName (optional, InstanceName of the ERS instance in a Master/Slave configuration) # OCF_RESKEY_ERS_START_PROFILE (optional, START_PROFILE of the ERS instance in a Master/Slave configuration) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # OCF_RESKEY_IS_ERS (needed for ENQ/REPL NW 740) # # TODO: - Option to shutdown sapstartsrv for non-active instances -> that means: do probes only with OS tools (sapinstance_status) # - Option for better standalone enqueue server monitoring, using ensmon (test enque-deque) # - Option for cleanup abandoned enqueue replication tables # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_InstanceName_default="" +OCF_RESKEY_DIR_EXECUTABLE_default="" +OCF_RESKEY_DIR_PROFILE_default="" +OCF_RESKEY_START_PROFILE_default="" +OCF_RESKEY_START_WAITTIME_default="3600" +OCF_RESKEY_AUTOMATIC_RECOVER_default="false" +OCF_RESKEY_MONITOR_SERVICES_default="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart|enq_server|enq_replicator" +OCF_RESKEY_SHUTDOWN_METHOD_default="normal" +OCF_RESKEY_ERS_InstanceName_default="" +OCF_RESKEY_ERS_START_PROFILE_default="" +OCF_RESKEY_PRE_START_USEREXIT_default="" +OCF_RESKEY_POST_START_USEREXIT_default="" +OCF_RESKEY_PRE_STOP_USEREXIT_default="" +OCF_RESKEY_POST_STOP_USEREXIT_default="" +OCF_RESKEY_IS_ERS_default="false" + +: ${OCF_RESKEY_InstanceName=${OCF_RESKEY_InstanceName_default}} +: ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} +: ${OCF_RESKEY_DIR_PROFILE=${OCF_RESKEY_DIR_PROFILE_default}} +: ${OCF_RESKEY_START_PROFILE=${OCF_RESKEY_START_PROFILE_default}} +: ${OCF_RESKEY_START_WAITTIME=${OCF_RESKEY_START_WAITTIME_default}} +: ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}} +: ${OCF_RESKEY_MONITOR_SERVICES=${OCF_RESKEY_MONITOR_SERVICES_default}} +: ${OCF_RESKEY_SHUTDOWN_METHOD=${OCF_RESKEY_SHUTDOWN_METHOD_default}} +: ${OCF_RESKEY_ERS_InstanceName=${OCF_RESKEY_ERS_InstanceName_default}} +: ${OCF_RESKEY_ERS_START_PROFILE=${OCF_RESKEY_ERS_START_PROFILE_default}} +: ${OCF_RESKEY_PRE_START_USEREXIT=${OCF_RESKEY_PRE_START_USEREXIT_default}} +: ${OCF_RESKEY_POST_START_USEREXIT=${OCF_RESKEY_POST_START_USEREXIT_default}} +: ${OCF_RESKEY_PRE_STOP_USEREXIT=${OCF_RESKEY_PRE_STOP_USEREXIT_default}} +: ${OCF_RESKEY_POST_STOP_USEREXIT=${OCF_RESKEY_POST_STOP_USEREXIT_default}} +: ${OCF_RESKEY_IS_ERS=${OCF_RESKEY_IS_ERS_default}} + ####################################################################### SH=/bin/sh sapinstance_usage() { methods=`sapinstance_methods` methods=`echo $methods | tr ' ' '|'` cat <<-EOF usage: $0 ($methods) $0 manages a SAP Instance as an HA resource. The 'start' operation starts the instance or the ERS instance in a Master/Slave configuration The 'stop' operation stops the instance The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'promote' operation starts the primary instance in a Master/Slave configuration The 'demote' operation stops the primary instance and starts the ERS instance The 'reload' operation allows changed parameters (non-unique only) without restarting the service The 'notify' operation always returns SUCCESS The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports EOF } sapinstance_meta_data() { cat < 2.14 Usually a SAP system consists of one database and at least one or more SAP instances (sometimes called application servers). One SAP Instance is defined by having exactly one instance profile. The instance profiles can usually be found in the directory /sapmnt/SID/profile. Each instance must be configured as it's own resource in the cluster configuration. The resource agent supports the following SAP versions: - SAP WebAS ABAP Release 6.20 - 7.40 - SAP WebAS Java Release 6.40 - 7.40 - SAP WebAS ABAP + Java Add-In Release 6.20 - 7.40 (Java is not monitored by the cluster in that case) When using a SAP Kernel 6.40 please check and implement the actions from the section "Manual postprocessing" from SAP note 995116 (http://sdn.sap.com). Other versions may also work with this agent, but have not been verified. All operations of the SAPInstance resource agent are done by using the startup framework called SAP Management Console or sapstartsrv that was introduced with SAP kernel release 6.40. Find more information about the SAP Management Console in SAP note 1014480. Using this framework defines a clear interface for the Heartbeat cluster, how it sees the SAP system. The options for monitoring the SAP system are also much better than other methods like just watching the ps command for running processes or doing some pings to the application. sapstartsrv uses SOAP messages to request the status of running SAP processes. Therefore it can actually ask a process itself what it's status is, independent from other problems that might exist at the same time. sapstartsrv knows 4 status colours: - GREEN = everything is fine - YELLOW = something is wrong, but the service is still working - RED = the service does not work - GRAY = the service has not been started The SAPInstance resource agent will interpret GREEN and YELLOW as OK. That means that minor problems will not be reported to the Heartbeat cluster. This prevents the cluster from doing an unwanted failover. The statuses RED and GRAY are reported as NOT_RUNNING to the cluster. Depending on the status the cluster expects from the resource, it will do a restart, failover or just nothing. Manages a SAP instance as an HA resource. The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci. Usually this is the name of the SAP instance profile. Instance name: SID_INSTANCE_VIR-HOSTNAME - + The full qualified path where to find sapstartsrv and sapcontrol. Specify this parameter, if you have changed the SAP kernel directory location after the default SAP installation. Path of sapstartsrv and sapcontrol - + The full qualified path where to find the SAP START profile. Specify this parameter, if you have changed the SAP profile directory location after the default SAP installation. Path of start profile - + The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. Start profile name - + After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start ishandled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.Usually the resource agent waits until all services are started and the SAP Management Console reports a GREEN status. A double stack installation (ABAP + Java AddIn) consists of an ABAP dispatcher and a JAVA instance. Normally the start of the JAVA instance takes much longer than the start of the ABAP instance. For a JAVA Instance you may need to configure a much higher timeout for the start operation of the resource in Heartbeat. The disadvantage here is, that the discovery of a failed start by the cluster takes longer. Somebody might say: For me it is important, that the ABAP instance is up and running. A failure of the JAVA instance shall not cause a failover of the SAP instance. Actually the SAP MC reports a YELLOW status, if the JAVA instance of a double stack system fails. From the resource agent point of view YELLOW means:everything is OK. Setting START_WAITTIME to a lower value determines the resource agent to check the status of the instance during a start operation after that time. As it would wait normally for a GREEN status, now it reports SUCCESS to the cluster in case of a YELLOW status already after the specified time. That is only useful for double stack systems. Check the successful start after that time (do not wait for J2EE-Addin) - + The SAPInstance resource agent tries to recover a failed start attempt automatically one time. This is done by killing running instance processes, removing the kill.sap file and executing cleanipc. Sometimes a crashed SAP instance leaves some processes and/or shared memory segments behind. Setting this option to true will try to remove those leftovers during a start operation. That is to reduce manual work for the administrator. Enable or disable automatic startup recovery - + Within a SAP instance there can be several services. Usually you will find the defined services in the START profile of the related instance (Attention: with SAP Release 7.10 the START profile content was moved to the instance profile). Not all of those services are worth to monitor by the cluster. For example you properly do not like to failover your SAP instance, if the central syslog collector daemon fails. Those services are monitored within the SAPInstance resource agent: - disp+work - msg_server - enserver (ENSA1) - enq_server (ENSA2) - enrepserver (ENSA1) - enq_replicator (ENSA2) - jcontrol - jstart Some other services could be monitored as well. They have to be given with the parameter MONITOR_SERVICES, e.g.: - sapwebdisp - TREXDaemon.x That names match the strings used in the output of the command 'sapcontrol -nr [Instance-Nr] -function GetProcessList'. The default should fit most cases where you want to manage a SAP Instance from the cluster. You may change this with this parameter, if you like to monitor more/less or other services that sapstartsrv supports. You may specify multiple services separated by a | (pipe) sign in this parameter: disp+work|msg_server|enserver Services to monitor - + Usually a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the graceful stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !! Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL) - + Only used in a Master/Slave resource configuration: The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile. The enqueue replication instance must be installed, before you want to configure a master-slave cluster resource. The master-slave configuration in the cluster must use this properties: clone_max = 2 clone_node_max = 1 master_node_max = 1 master_max = 1 Enqueue replication instance name: SID_INSTANCE_VIR-HOSTNAME - + Only used in a Master/Slave resource configuration: The parameter ERS_InstanceName must also be set in this configuration. The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. Enqueue replication start profile name - + The full qualified path where to find a script or program which should be executed before this resource gets started. Path to a pre-start script - + The full qualified path where to find a script or program which should be executed after this resource got started. Path to a post-start script - + The full qualified path where to find a script or program which should be executed before this resource gets stopped. Path to a pre-start script - + The full qualified path where to find a script or program which should be executed after this resource got stopped. Path to a post-start script - + Only used for ASCS/ERS SAP Netweaver installations without implementing a master/slave resource to allow the ASCS to 'find' the ERS running on another cluster node after a resource failure. This parameter should be set to true 'only' for the ERS instance for implementations following the SAP NetWeaver 7.40 HA certification (NW-HA-CLU-740). This includes also systems for NetWeaver less than 7.40, if you like to implement the NW-HA-CLU-740 scenario. Mark SAPInstance as ERS instance - + END } # # methods: What methods/operations do we support? # sapinstance_methods() { cat <<-EOF start stop status monitor promote demote reload notify validate-all methods meta-data usage EOF } # # is_clone : find out if we are configured to run in a Master/Slave configuration # is_clone() { if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ] \ && [ "$OCF_RESKEY_CRM_meta_clone_max" -gt 0 ] then if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] || \ [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] || \ [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] || \ [ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ] then ocf_log err "Clone options misconfigured. (expect: clone_max=2,clone_node_max=1,master_node_max=1,master_max=1)" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_ERS_InstanceName" ] then ocf_log err "In a Master/Slave configuration the ERS_InstanceName parameter is mandatory." exit $OCF_ERR_ARGS fi else return 0 fi return 1 } # # abnormal_end : essential things are missing, but in the natur of a SAP installation - which can be very different # from customer to customer - we cannot handle this always as an error # This would be the case, if the software is installed on shared disks and not visible # to all cluster nodes at all times. # abnormal_end() { local err_msg=$1 ocf_is_probe && { sapinstance_status exit $? } ocf_log err $err_msg if [ "$ACTION" = "stop" ] then cleanup_instance exit $OCF_SUCCESS fi exit $OCF_ERR_CONFIGURED } # # sapinstance_init : Define global variables with default values, if optional parameters are not set # # sapinstance_init() { local myInstanceName="$1" SID=`echo "$myInstanceName" | cut -d_ -f1` InstanceName=`echo "$myInstanceName" | cut -d_ -f2` InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'` SAPVIRHOST=`echo "$myInstanceName" | cut -d_ -f3` # optional OCF parameters, we try to guess which directories are correct if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] then if have_binary /usr/sap/$SID/$InstanceName/exe/sapstartsrv && have_binary /usr/sap/$SID/$InstanceName/exe/sapcontrol then DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv" SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol" elif have_binary /usr/sap/$SID/SYS/exe/run/sapstartsrv && have_binary /usr/sap/$SID/SYS/exe/run/sapcontrol then DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run" SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv" SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol" fi else if have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" && have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" then DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE" SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" fi fi sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" [ -z "$DIR_EXECUTABLE" ] && abnormal_end "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!" if [ -z "$OCF_RESKEY_DIR_PROFILE" ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" fi if [ "$myInstanceName" != "$OCF_RESKEY_InstanceName" ] then currentSTART_PROFILE=$OCF_RESKEY_ERS_START_PROFILE else currentSTART_PROFILE=$OCF_RESKEY_START_PROFILE fi if [ -z "$OCF_RESKEY_IS_ERS" ]; then is_ers="no" else is_ers="$OCF_RESKEY_IS_ERS" fi if [ -z "$currentSTART_PROFILE" ] then if [ ! -r "$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" -a -r "$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" ]; then SAPSTARTPROFILE="$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" else SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" fi else SAPSTARTPROFILE="$currentSTART_PROFILE" fi if [ -z "$OCF_RESKEY_START_WAITTIME" ] then - export OCF_RESKEY_START_WAITTIME=3600 + export OCF_RESKEY_START_WAITTIME="${OCF_RESKEY_START_WAITTIME_default}" fi if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ] then - export OCF_RESKEY_MONITOR_SERVICES="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart|enq_server|enq_replicator" + export OCF_RESKEY_MONITOR_SERVICES="${OCF_RESKEY_MONITOR_SERVICES_default}" fi # as root user we need the library path to the SAP kernel to be able to call sapcontrol if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH export LD_LIBRARY_PATH fi return $OCF_SUCCESS } # # check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance. # We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance, # because then we have two instances with the same instance number. # check_sapstartsrv() { local restart=0 local runninginst="" local chkrc=$OCF_SUCCESS local output="" if [ ! -S /tmp/.sapstream5${InstanceNr}13 ]; then ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName (no UDS), it will be started now" restart=1 else output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script` if [ $? -eq 0 ] then runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3` if [ "$runninginst" != "$InstanceName" ] then ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed" restart=1 else output=`$SAPCONTROL -nr $InstanceNr -function AccessCheck Start` if [ $? -ne 0 ]; then ocf_log warn "FAILED : sapcontrol -nr $InstanceNr -function AccessCheck Start (`ls -ld1 /tmp/.sapstream5${InstanceNr}13`)" ocf_log warn "sapstartsrv will be restarted to try to solve this situation, otherwise please check sapstsartsrv setup (SAP Note 927637)" restart=1 fi fi else ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now" restart=1 fi fi if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi if [ $restart -eq 1 ] then if [ -d /usr/sap/$SID/SYS/profile/ ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else abnormal_end "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!" fi [ ! -r $SAPSTARTPROFILE ] && abnormal_end "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!" pkill -9 -f "sapstartsrv.*$runninginst" # removing the unix domain socket files as they might have wrong permissions # or ownership - they will be recreated by sapstartsrv during next start rm -f /tmp/.sapstream5${InstanceNr}13 rm -f /tmp/.sapstream5${InstanceNr}14 $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm # now make sure the daemon has been started and is able to respond local srvrc=1 while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ] do sleep 1 $SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1 srvrc=$? done if [ $srvrc -ne 1 ] then ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !" chkrc=$OCF_SUCCESS else ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!" chkrc=$OCF_ERR_GENERIC ocf_is_probe && chkrc=$OCF_NOT_RUNNING fi fi return $chkrc } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { local NAME="$1" local VALUE="$2" if [ -n "$VALUE" ] then if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return 0 } # # cleanup_instance : remove resources (processes and shared memory) from a crashed instance) # cleanup_instance() { pkill -9 -f -U $sidadm $InstanceName ocf_log info "Terminated instance using 'pkill -9 -f -U $sidadm $InstanceName'" # it is necessary to call cleanipc as user sidadm if the system has 'vmcj/enable = ON' set - otherwise SHM-segments in /dev/shm/SAP_ES2* cannot be removed su - $sidadm -c "cleanipc $InstanceNr remove" ocf_log info "Tried to remove shared memory resources using 'cleanipc $InstanceNr remove' as user $sidadm" ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/kill.sap ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/shutdown.sap ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgcpid ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgspid return 0 } # # sapinstance_start : Start the SAP instance # sapinstance_start() { sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" local rc=$OCF_NOT_RUNNING local output="" local loopcount=0 while [ $loopcount -lt 2 ] do loopcount=$(($loopcount + 1)) check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ]; then output=`$SAPCONTROL -nr $InstanceNr -function Start` rc=$? ocf_log info "Starting SAP Instance $SID-$InstanceName: $output" fi if [ $rc -ne 0 ] then ocf_log err "SAP Instance $SID-$InstanceName start failed." return $OCF_ERR_GENERIC fi local startrc=1 while [ $startrc -gt 0 ] do local waittime_start=`date +%s` output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10` startrc=$? local waittime_stop=`date +%s` if [ $startrc -ne 0 ] then if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ] then sapinstance_monitor NOLOG if [ $? -eq $OCF_SUCCESS ] then output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running." startrc=0; loopcount=2 fi else if [ $loopcount -eq 1 ] && ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER then ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output" ocf_log warn "Try to recover $SID-$InstanceName" cleanup_instance else loopcount=2 fi startrc=-1 fi else loopcount=2 fi done done if [ $startrc -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName started: $output" rc=$OCF_SUCCESS sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 1 -l reboot; fi else ocf_log err "SAP Instance $SID-$InstanceName start failed: $output" rc=$OCF_NOT_RUNNING if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 0 -l reboot; fi fi return $rc } # # sapinstance_recover: Try startup of failed instance by cleaning up resources # sapinstance_recover() { cleanup_instance sapinstance_start return $? } # # sapinstance_stop: Stop the SAP instance # sapinstance_stop() { local output="" local rc sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" if [ "$OCF_RESKEY_SHUTDOWN_METHOD" = "KILL" ] then ocf_log info "Stopping SAP Instance $SID-$InstanceName with shutdown method KILL!" cleanup_instance return $OCF_SUCCESS fi check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ]; then output=`$SAPCONTROL -nr $InstanceNr -function Stop` rc=$? ocf_log info "Stopping SAP Instance $SID-$InstanceName: $output" fi if [ $rc -eq 0 ] then output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1` if [ $? -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName stopped: $output" rc=$OCF_SUCCESS else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" if ocf_is_true $is_ers; then crm_attribute -n runs_ers_${SID} -v 0 -l reboot; fi return $rc } # # sapinstance_monitor: Can the given SAP instance do anything useful? # sapinstance_monitor() { local MONLOG=$1 local rc check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ] then local count=0 local SERVNO local output output=`$SAPCONTROL -nr $InstanceNr -function GetProcessList -format script` # we have to parse the output, because the returncode doesn't tell anything about the instance status for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` do local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` local STATE=0 local SEARCH case $COLOR in GREEN|YELLOW) STATE=$OCF_SUCCESS;; *) STATE=$OCF_NOT_RUNNING;; esac SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] then if [ $STATE -eq $OCF_NOT_RUNNING ] then [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" rc=$STATE fi count=1 fi done if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] then if ocf_is_probe then rc=$OCF_NOT_RUNNING else [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!" rc=$OCF_ERR_GENERIC fi fi fi return $rc } # # sapinstance_status: Lightweight check of SAP instance only with OS tools # sapinstance_status() { local pid local pids [ ! -f "/usr/sap/$SID/$InstanceName/work/kill.sap" ] && return $OCF_NOT_RUNNING pids=`grep '^kill -[0-9]' /usr/sap/$SID/$InstanceName/work/kill.sap | awk '{print $3}'` for pid in $pids do [ `pgrep -f -U $sidadm $InstanceName | grep -c $pid` -gt 0 ] && return $OCF_SUCCESS done return $OCF_NOT_RUNNING } # # sapinstance_validate: Check the semantics of the input parameters # sapinstance_validate() { local rc=$OCF_SUCCESS if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!" rc=$OCF_ERR_ARGS fi if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!" rc=$OCF_ERR_ARGS fi return $rc } # # sapinstance_start_clone # sapinstance_start_clone() { sapinstance_init $OCF_RESKEY_ERS_InstanceName ${HA_SBIN_DIR}/crm_master -v 50 -l reboot sapinstance_start return $? } # # sapinstance_stop_clone # sapinstance_stop_clone() { sapinstance_init $OCF_RESKEY_ERS_InstanceName ${HA_SBIN_DIR}/crm_master -v 0 -l reboot sapinstance_stop return $? } # # sapinstance_monitor_clone # sapinstance_monitor_clone() { # first check with the status function (OS tools) if there could be something like a SAP instance running # as we do not know here, if we are in master or slave state we do not want to start our monitoring # agents (sapstartsrv) on the wrong host local rc sapinstance_init $OCF_RESKEY_InstanceName if sapinstance_status; then if sapinstance_monitor; then ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot return $OCF_RUNNING_MASTER fi # by nature of the SAP enqueue server we have to make sure # that we do a failover to the slave (enqueue replication server) # in case the enqueue process has failed. We signal this to the # cluster by setting our master preference to a lower value than the slave. ${HA_SBIN_DIR}/crm_master -v 10 -l reboot return $OCF_FAILED_MASTER fi sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_status && sapinstance_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot fi return $rc } # # sapinstance_promote_clone: In a Master/Slave configuration get Master by starting the SCS instance and stopping the ERS instance # The order is important here to behave correct from the application levels view # sapinstance_promote_clone() { local rc sapinstance_init $OCF_RESKEY_InstanceName ocf_log info "Promoting $SID-$InstanceName to running Master." sapinstance_start rc=$? if [ $rc -eq $OCF_SUCCESS ]; then sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_stop rc=$? fi return $rc } # # sapinstance_demote_clone: In a Master/Slave configuration get Slave by stopping the SCS instance and starting the ERS instance # sapinstance_demote_clone() { local rc sapinstance_init $OCF_RESKEY_InstanceName ocf_log info "Demoting $SID-$InstanceName to a slave." sapinstance_stop rc=$? if [ $rc -eq $OCF_SUCCESS ]; then sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_start rc=$? fi return $rc } # # sapinstance_notify: Handle master scoring - to make sure a slave gets the next master # sapinstance_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" if [ "${n_type}_${n_op}" = "post_promote" ]; then # After promotion of one master in the cluster, we make sure that all clones reset their master # value back to 100. This is because a failed monitor on a master might have degree one clone # instance to score 10. ${HA_SBIN_DIR}/crm_master -v 100 -l reboot elif [ "${n_type}_${n_op}" = "pre_demote" ]; then # if we are a slave and a demote event is announced, make sure we are highest on the list to become master # that is, when a slave resource was started after the promote event of an already running master (e.g. node of slave was down) # We also have to make sure to overrule the globally set resource_stickiness or any fail-count factors => INFINITY local n_uname="$OCF_RESKEY_CRM_meta_notify_demote_uname" if [ ${n_uname} != ${NODENAME} ]; then ${HA_SBIN_DIR}/crm_master -v INFINITY -l reboot fi fi } # # 'main' starts here... # ## GLOBALS SID="" sidadm="" InstanceName="" InstanceNr="" SAPVIRHOST="" DIR_EXECUTABLE="" SAPSTARTSRV="" SAPCONTROL="" DIR_PROFILE="" SAPSTARTPROFILE="" CLONE=0 NODENAME=$(ocf_local_nodename) if ( [ $# -ne 1 ] ) then sapinstance_usage exit $OCF_ERR_ARGS fi ACTION=$1 if [ "$ACTION" = "status" ]; then ACTION=monitor fi # These operations don't require OCF instance parameters to be set case "$ACTION" in usage|methods) sapinstance_$ACTION exit $OCF_SUCCESS;; meta-data) sapinstance_meta_data exit $OCF_SUCCESS;; notify) sapinstance_notify exit $OCF_SUCCESS;; *);; esac if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # parameter check if [ -z "$OCF_RESKEY_InstanceName" ] then ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!" exit $OCF_ERR_ARGS fi is_clone; CLONE=$? if [ ${CLONE} -eq 1 ] then CLACT=_clone else if [ "$ACTION" = "promote" -o "$ACTION" = "demote" ] then ocf_log err "$ACTION called in a non master/slave environment" exit $OCF_ERR_ARGS fi sapinstance_init $OCF_RESKEY_InstanceName fi # What kind of method was invoked? case "$ACTION" in start|stop|monitor|promote|demote) sapinstance_$ACTION$CLACT exit $?;; validate-all) sapinstance_validate exit $?;; reload ) ocf_log info "reloading SAPInstance parameters" exit $OCF_SUCCESS;; *) sapinstance_methods exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/SendArp b/heartbeat/SendArp index a7a14df14..9e4cbb164 100755 --- a/heartbeat/SendArp +++ b/heartbeat/SendArp @@ -1,267 +1,277 @@ #!/bin/sh # # # Copyright (c) 2006, Huang Zhen # Converting original heartbeat RA to OCF RA. # # Copyright (C) 2004 Horms # # Based on IPaddr2: Copyright (C) 2003 Tuomo Soini # # License: GNU General Public License (GPL) # Support: users@clusterlabs.org # # This script send out gratuitous Arp for an IP address # # It can be used _instead_ of the IPaddr2 or IPaddr resource # to send gratuitous arp for an IP address on a given interface, # without adding the address to that interface. I.e. if for # some reason you want to send gratuitous arp for addresses # managed by IPaddr2 or IPaddr on an additional interface. # # OCF parameters are as below: # OCF_RESKEY_ip # OCF_RESKEY_nic # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_ip_default="" +OCF_RESKEY_nic_default="" +OCF_RESKEY_background_default="true" + +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} +: ${OCF_RESKEY_background=${OCF_RESKEY_background_default}} + SENDARP=$HA_BIN/send_arp SENDARPPIDDIR=${HA_RSCTMP} BASEIP="$OCF_RESKEY_ip" INTERFACE="$OCF_RESKEY_nic" RESIDUAL="" SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP" -BACKGROUND=${OCF_RESKEY_background:-"yes"} +BACKGROUND=${OCF_RESKEY_background} # Set default values : ${ARP_INTERVAL_MS=200} # milliseconds between ARPs : ${ARP_REPEAT=5} # repeat count : ${ARP_BACKGROUND=$BACKGROUND} # no to run in foreground : ${ARP_NETMASK=ffffffffffff} # netmask for ARP ####################################################################### sendarp_meta_data() { cat < 1.0 This RA can be used _instead_ of the IPaddr2 or IPaddr RA to send gratuitous ARP for an IP address on a given interface, without adding the address to that interface. For example, if for some reason you wanted to send gratuitous ARP for addresses managed by IPaddr2 or IPaddr on an additional interface. Broadcasts unsolicited ARP announcements The IP address for sending ARP packet. IP address - + The NIC for sending ARP packet. NIC - + Send ARPs in background. Set to false if you want to test if sending ARPs succeeded. Send ARPs in background - + END } ####################################################################### sendarp_usage() { cat < 1.0 Resource script for ServeRAID. It enables/disables shared ServeRAID merge groups. Enables and disables shared ServeRAID merge groups The adapter number of the ServeRAID adapter. serveraid - + The logical drive under consideration. mergegroup - + END } ServeRAID_methods() { cat <<-! start stop status validate-all methods usage meta-data ! } ServeRAIDSCSI="/proc/scsi/ips" IPS=ipssend proc_scsi=/proc/scsi/scsi parseinst() { sr_adapter=error sr_mergegroup=error hostid=error sr_logicaldrivenumber=error if [ $# -ne 2 ] then ocf_log err "Invalid ServeRAID instance: $*" exit $OCF_ERR_ARGS fi PerlScript='next unless /^Host/; $_ .= <>.<>; print "$1 " if /SERVERAID/ and /Proces/ and /scsi(\d+)/' # Get the list of host ids of the ServeRAID host adapters hostlist=`$PERL -ne "${PerlScript}" <$proc_scsi` # Figure the host id of the desired ServeRAID adapter hostid=`echo $hostlist | cut -d' ' -f$1` if [ ! -f "$ServeRAIDSCSI/$hostid" ] then ocf_log err "No such ServeRAID adapter: $1" exit $OCF_ERR_ARGS fi case $2 in [1-8]);; *) ocf_log err "Invalid Shared Merge Group Number: $2" exit $OCF_ERR_ARGS;; esac sr_adapter=$1 sr_mergegroup=$2 CheckRaidLevel return $? } SRLogicalDriveConfig() { $IPS getconfig $sr_adapter ld } MergeGroupToSCSI_ID() { PerlScript="while (<>) { /logical drive number *([0-9]+)/i && (\$ld=\$1); /part of merge group *: *$sr_mergegroup *\$/i && print \$ld - 1, \"\n\"; }" ID=`SRLogicalDriveConfig | $PERL -e "$PerlScript"` case $ID in [0-9]*) echo "$ID"; return 0;; *) return 1;; esac } MergeGroupRaidLevel() { PerlScript="while (<>) { /RAID level *: *([0-9]+[A-Za-z]*)/i && (\$ld=\$1); /part of merge group *: *$sr_mergegroup *\$/i && print \$ld, \"\n\"; }" Level=`SRLogicalDriveConfig | $PERL -e "$PerlScript"` case $Level in ?*) echo "$Level"; return 0;; *) return 1;; esac } CheckRaidLevel() { RAIDlevel=`MergeGroupRaidLevel` case $RAIDlevel in *5*) ocf_log err "ServeRAID device $sr_adapter $sr_mergegroup is RAID level $RAIDlevel" ocf_log err "This level of ServeRAID RAID is not supported for failover by the firmware." exit $OCF_ERR_GENERIC;; esac return $OCF_SUCCESS } ReleaseSCSI() { targetid=`MergeGroupToSCSI_ID` echo "${SCSI}remove-single-device $hostid 0 $targetid 0" > $proc_scsi } AddSCSI() { targetid=`MergeGroupToSCSI_ID` echo "${SCSI}add-single-device $hostid 0 $targetid 0" > $proc_scsi } # # start: Enable the given ServeRAID device # ServeRAID_start() { if ServeRAID_status $serveraid $mergegroup then ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running." return $OCF_SUCCESS else if # # Normally we do a MERGE PARTNER, but if we still own the drive for # some reason, then we'll need to do a MERGE OWN instead... # out=`$IPS MERGE $sr_adapter $sr_mergegroup PARTNER 2>&1` if [ $? -eq $srsuccess ] then ocf_log info "$out" else ocf_run $IPS MERGE $sr_adapter $sr_mergegroup OWN fi then : OK All is well! targetid=`MergeGroupToSCSI_ID` sr_logicaldrivenumber=`expr $targetid + 1` #run $IPS SYNCH $sr_adapter $sr_logicaldrivenumber & # This version of the SYNCH command requires the 6.10 or later # ServeRAID support CD. # To avoid issues when called by lrmd, redirect stdout->stderr. # Use () to create a subshell to make the redirection be synchronized. ( ocf_run $IPS SYNCH $sr_adapter $sr_mergegroup & ) >&2 AddSCSI else return $OCF_ERR_GENERIC fi fi if ServeRAID_status "$@" then return $OCF_SUCCESS else ocf_log err "ServeRAID device $1 not active!" exit $OCF_ERR_GENERIC fi } # # stop: Disable the given ServeRAID device # ServeRAID_stop() { parseinst "$@" ReleaseSCSI if ocf_run $IPS UNMERGE $sr_adapter $sr_mergegroup then : UNMERGE $sr_adapter $sr_mergegroup worked fi if ServeRAID_status "$@" then ocf_log err "ServeRAID device $* is still active!" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } # # status: is the given device now available? # ServeRAID_status() { parseinst "$@" # # The output we're looking for # Part of merge group : 2 # SRLogicalDriveConfig \ | grep -i "part of merge group[ ]*: *$sr_mergegroup *\$" >/dev/null } # # validate_all: are the OCF instance parameters valid? # ServeRAID_validate_all() { check_binary $PERL # parseinst() will do all the work... parseinst "$@" return $? } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; # # methods: What methods do we support? # methods) ServeRAID_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if ( [ -z "$OCF_RESKEY_serveraid" ] || [ -z "$OCF_RESKEY_mergegroup" ] ) then ocf_log err "You have to set the OCF_RESKEY_serveraid and OCF_RESKEY_mergegroup\n enviroment virables before running $0 !" # usage exit $OCF_ERR_GENERIC fi : Right Number of arguments.. serveraid=$OCF_RESKEY_serveraid mergegroup=$OCF_RESKEY_mergegroup # Look for the start, stop, status, or methods calls... case "$1" in stop) ServeRAID_stop $serveraid $mergegroup exit $?;; start) ServeRAID_start $serveraid $mergegroup exit $?;; status|monitor) if ServeRAID_status $serveraid $mergegroup then ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running." exit $OCF_SUCCESS else ocf_log debug "ServeRAID merge group $serveraid $mergegroup is stopped." exit $OCF_NOT_RUNNING fi exit $?;; validate-all) ServeRAID_validate_all $serveraid $mergegroup exit $?;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/SphinxSearchDaemon b/heartbeat/SphinxSearchDaemon index ad7bc987c..cad03794d 100755 --- a/heartbeat/SphinxSearchDaemon +++ b/heartbeat/SphinxSearchDaemon @@ -1,223 +1,230 @@ #!/bin/sh # # # Searchd OCF RA. # Manages the Sphinx search daemon # # Copyright (c) 2007 Christian Rishoj (christian@rishoj.net) # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_config_default="/etc/sphinx/sphinx.conf" +OCF_RESKEY_searchd_default="/usr/local/bin/searchd" +OCF_RESKEY_search_default="/usr/local/bin/search" +OCF_RESKEY_testQuery_default="Heartbeat_Monitor_Query_Match_string" + +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_searchd=${OCF_RESKEY_searchd_default}} +: ${OCF_RESKEY_search=${OCF_RESKEY_search_default}} +: ${OCF_RESKEY_testQuery=${OCF_RESKEY_testQuery_default}} + ####################################################################### meta_data() { cat < 1.0 This is a searchd Resource Agent. It manages the Sphinx Search Daemon. Manages the Sphinx search daemon. searchd configuration file Configuration file - + searchd binary searchd binary - + Search binary for functional testing in the monitor action. search binary - + Test query for functional testing in the monitor action. The query does not need to match any documents in the index. The purpose is merely to test whether the search daemon is is able to query its indices and respond properly. test query - + END } ####################################################################### searchd_usage() { cat < /dev/null && [ `ps -p "$1" | grep searchd | wc -l` -eq 1 ] } searchd_status() { pidfile=`grep -v "^#" "$OCF_RESKEY_config" | grep -w pid_file | awk -F "[ \t]*=[ \t]*" '{ print $2 }'` if [ -f "$pidfile" ] ; then PID=`head -n 1 $pidfile` if [ ! -z "$PID" ] ; then isRunning "$PID" if [ $? = 0 ] ; then return 0 fi fi fi false } searchd_check() { $OCF_RESKEY_search --config $OCF_RESKEY_config --noinfo "$OCF_RESKEY_testQuery" > /dev/null } searchd_monitor() { if ! searchd_validate ; then return $OCF_NOT_RUNNING fi if searchd_status ; then if searchd_check ; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi else return $OCF_NOT_RUNNING fi } searchd_validate() { if [ ! -x "$OCF_RESKEY_search" ]; then ocf_log err "search binary '$OCF_RESKEY_search' does not exist or cannot be executed" return $OCF_ERR_ARGS fi if [ ! -x "$OCF_RESKEY_searchd" ]; then ocf_log err "searchd binary '$OCF_RESKEY_searchd' does not exist or cannot be executed" return $OCF_ERR_ARGS fi if [ ! -f "$OCF_RESKEY_config" ]; then ocf_log err "config file '$OCF_RESKEY_config' does not exist" return $OCF_ERR_ARGS fi return $OCF_SUCCESS } -: ${OCF_RESKEY_config=/etc/sphinx/sphinx.conf} -: ${OCF_RESKEY_search=/usr/local/bin/search} -: ${OCF_RESKEY_searchd=/usr/local/bin/searchd} -: ${OCF_RESKEY_testQuery=Heartbeat_Monitor_Query_Match_string} - case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) searchd_start;; stop) searchd_stop;; monitor) searchd_monitor;; validate-all) searchd_validate;; usage|help) searchd_usage exit $OCF_SUCCESS ;; *) searchd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/Squid.in b/heartbeat/Squid.in index e62e7ee66..fe24d9e0c 100644 --- a/heartbeat/Squid.in +++ b/heartbeat/Squid.in @@ -1,444 +1,462 @@ #!@BASH_SHELL@ # # Description: Manages a Squid Server provided by NTT OSSC as an # OCF High-Availability resource under Heartbeat/LinuxHA control # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # Copyright (c) 2008 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # ####################################################################### # OCF parameters: # OCF_RESKEY_squid_exe : Executable file # OCF_RESKEY_squid_conf : Configuration file # OCF_RESKEY_squid_pidfile: Process id file # OCF_RESKEY_squid_port : Port number # OCF_RESKEY_debug_mode : Debug mode # OCF_RESKEY_debug_log : Debug log file # OCF_RESKEY_squid_stop_timeout: # Number of seconds to await to confirm a # normal stop method # # OCF_RESKEY_squid_exe, OCF_RESKEY_squid_conf, OCF_RESKEY_squid_pidfile # and OCF_RESKEY_squid_port must be specified. Each of the rests # has its default value or refers OCF_RESKEY_squid_conf to make # its value when no explicit value is given. ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_squid_exe_default="" +OCF_RESKEY_squid_conf_default="" +OCF_RESKEY_squid_pidfile_default="" +OCF_RESKEY_squid_port_default="" +OCF_RESKEY_squid_stop_timeout_default="10" +OCF_RESKEY_debug_mode_default="" +OCF_RESKEY_debug_log_default="" + +: ${OCF_RESKEY_squid_exe=${OCF_RESKEY_squid_exe_default}} +: ${OCF_RESKEY_squid_conf=${OCF_RESKEY_squid_conf_default}} +: ${OCF_RESKEY_squid_pidfile=${OCF_RESKEY_squid_pidfile_default}} +: ${OCF_RESKEY_squid_port=${OCF_RESKEY_squid_port_default}} +: ${OCF_RESKEY_squid_stop_timeout=${OCF_RESKEY_squid_stop_timeout_default}} +: ${OCF_RESKEY_debug_mode=${OCF_RESKEY_debug_mode_default}} +: ${OCF_RESKEY_debug_log=${OCF_RESKEY_debug_log_default}} + usage() { cat <<-! usage: $0 action action: start : start a new squid instance stop : stop the running squid instance status : return the status of squid, run or down monitor : return TRUE if the squid appears to be working. meta-data : show meta data message validate-all: validate the instance parameters ! return $OCF_ERR_ARGS } metadata_squid() { cat < 1.0 The resource agent of Squid. This manages a Squid instance as an HA resource. Manages a Squid proxy server instance This is a required parameter. This parameter specifies squid's executable file. Executable file - + This is a required parameter. This parameter specifies a configuration file for a squid instance managed by this RA. Configuration file - + Deprecated - do not use anymore deprecated - do not use anymore - + This is a required parameter. This parameter specifies a port number for a squid instance managed by this RA. If multiple ports are used, you must specify only one of them. Port number - + On stop, a squid shutdown is invoked first. If the resource doesn't stop within this timeout, we resort to stopping processes by sending signals and finally KILLing them. how long to wait for squid shutdown to stop the instance before resorting to kill - + This is an optional parameter. This RA runs in debug mode when this parameter includes 'x' or 'v'. If 'x' is included, both of STDOUT and STDERR redirect to the logfile specified by "debug_log", and then the builtin shell option 'x' is turned on. It is similar about 'v'. Debug mode - + This is an optional parameter. This parameter specifies a destination file for debug logs and works only if this RA run in debug mode. Refer to "debug_mode" about debug mode. If no value is given but is required, it's constructed according to the following rules: "/var/log/" as a directory part, the basename of the configuration file given by "syslog_ng_conf" as a basename part, ".log" as a suffix. A destination of the debug log - + END return $OCF_SUCCESS } get_pids() { SQUID_PIDS=( ) # Seek by pattern SQUID_PIDS[0]=$(pgrep -f "$PROCESS_PATTERN") # Seek by child process if [[ -n "${SQUID_PIDS[0]}" ]]; then SQUID_PIDS[1]=$(pgrep -P ${SQUID_PIDS[0]}) fi if [[ -n "${SQUID_PIDS[1]}" ]]; then typeset exe exe=$(ls -l "/proc/${SQUID_PIDS[1]}/exe") if [[ $? = 0 ]]; then exe=${exe##*-> } if ! [[ "$exe" = $SQUID_EXE ]]; then SQUID_PIDS[1]="" fi else SQUID_PIDS[1]="" fi fi # Seek by port if have_binary netstat; then SQUID_PIDS[2]=$( netstat -apn | awk '/tcp.*:'$SQUID_PORT' .*LISTEN/ && $7~/^[1-9]/ { sub("\\/.*", "", $7); print $7; exit}') else SQUID_PIDS[2]=$( ss -apn | awk '/tcp.*LISTEN.*:'$SQUID_PORT'/ { sub(".*pid=", "", $7); sub(",fd=.*", "", $7); print $7 }') fi } are_all_pids_found() { if [[ -n "${SQUID_PIDS[0]}" ]] && [[ -n "${SQUID_PIDS[1]}" ]] && [[ -n "${SQUID_PIDS[2]}" ]] then return 0 else return 1 fi } are_pids_sane() { if [[ "${SQUID_PIDS[1]}" = "${SQUID_PIDS[2]}" ]]; then return $OCF_SUCCESS else ocf_exit_reason "$SQUID_NAME:Pid unmatch" return $OCF_ERR_GENERIC fi } is_squid_dead() { if [[ -z "${SQUID_PIDS[0]}" ]] && [[ -z "${SQUID_PIDS[2]}" ]] then return 0 else return 1 fi } monitor_squid() { typeset trialcount=0 while true; do get_pids if are_all_pids_found; then are_pids_sane return $OCF_SUCCESS fi if is_squid_dead; then return $OCF_NOT_RUNNING fi ocf_log info "$SQUID_NAME:Inconsistent processes:" \ "${SQUID_PIDS[0]},${SQUID_PIDS[1]},${SQUID_PIDS[2]}" (( trialcount = trialcount + 1 )) if (( trialcount > SQUID_CONFIRM_TRIALCOUNT )); then ocf_exit_reason "$SQUID_NAME:Inconsistency of processes remains unsolved" return $OCF_ERR_GENERIC fi sleep 1 done } start_squid() { typeset status monitor_squid status=$? if [[ $status != $OCF_NOT_RUNNING ]]; then return $status fi set -- "$SQUID_OPTS" ocf_run $SQUID_EXE -f "$SQUID_CONF" "$@" status=$? if [[ $status != $OCF_SUCCESS ]]; then return $OCF_ERR_GENERIC fi while true; do get_pids if are_all_pids_found && are_pids_sane; then return $OCF_SUCCESS fi ocf_log info "$SQUID_NAME:Waiting for squid to be invoked" sleep 1 done return $OCF_ERR_GENERIC } stop_squid() { typeset lapse_sec if ocf_run $SQUID_EXE -f $SQUID_CONF -k shutdown; then lapse_sec=0 while true; do get_pids if is_squid_dead; then return $OCF_SUCCESS fi (( lapse_sec = lapse_sec + 1 )) if (( lapse_sec > SQUID_STOP_TIMEOUT )); then break fi sleep 1 ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ "stop NORM $lapse_sec/$SQUID_STOP_TIMEOUT" done fi while true; do get_pids ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ "try to stop by SIGKILL:${SQUID_PIDS[0]} ${SQUID_PIDS[2]}" kill -KILL ${SQUID_PIDS[0]} ${SQUID_PIDS[2]} sleep 1 if is_squid_dead; then return $OCF_SUCCESS fi done return $OCF_ERR_GENERIC } status_squid() { return $OCF_SUCCESS } validate_all_squid() { ocf_log info "validate_all_squid[$SQUID_NAME]" return $OCF_SUCCESS } : === Debug ${0##*/} $1 === if [[ "$1" = "meta-data" ]]; then metadata_squid exit $? fi SQUID_CONF="${OCF_RESKEY_squid_conf}" if [[ -z "$SQUID_CONF" ]]; then ocf_exit_reason "SQUID_CONF is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_NAME="${SQUID_CONF##*/}" SQUID_NAME="${SQUID_NAME%.*}" DEBUG_LOG="${OCF_RESKEY_debug_log-/var/log/squid_${SQUID_NAME}_debug}.log" DEBUG_MODE="" case $OCF_RESKEY_debug_mode in *x*) DEBUG_MODE="${DEBUG_MODE}x";; esac case $OCF_RESKEY_debug_mode in *v*) DEBUG_MODE="${DEBUG_MODE}v";; esac if [ -n "$DEBUG_MODE" ]; then PS4='\d \t \h '"${1-unknown} " export PS4 exec 1>>$DEBUG_LOG 2>&1 set -$DEBUG_MODE fi SQUID_EXE="${OCF_RESKEY_squid_exe}" if [[ -z "$SQUID_EXE" ]]; then ocf_exit_reason "SQUID_EXE is not defined" exit $OCF_ERR_CONFIGURED fi if [[ ! -x "$SQUID_EXE" ]]; then ocf_exit_reason "$SQUID_EXE is not found" exit $OCF_ERR_CONFIGURED fi SQUID_PORT="${OCF_RESKEY_squid_port}" if [[ -z "$SQUID_PORT" ]]; then ocf_exit_reason "SQUID_PORT is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_OPTS="${OCF_RESKEY_squid_opts}" SQUID_PIDS=( ) SQUID_CONFIRM_TRIALCOUNT="${OCF_RESKEY_squid_confirm_trialcount-3}" SQUID_STOP_TIMEOUT="${OCF_RESKEY_squid_stop_timeout-10}" SQUID_SUSPEND_TRIALCOUNT="${OCF_RESKEY_squid_suspend_trialcount-10}" PROCESS_PATTERN="$SQUID_EXE -f $SQUID_CONF" COMMAND=$1 case "$COMMAND" in start) ocf_log debug "[$SQUID_NAME] Enter squid start" start_squid func_status=$? ocf_log debug "[$SQUID_NAME] Leave squid start $func_status" exit $func_status ;; stop) ocf_log debug "[$SQUID_NAME] Enter squid stop" stop_squid func_status=$? ocf_log debug "[$SQUID_NAME] Leave squid stop $func_status" exit $func_status ;; status) status_squid exit $? ;; monitor) #ocf_log debug "[$SQUID_NAME] Enter squid monitor" monitor_squid func_status=$? #ocf_log debug "[$SQUID_NAME] Leave squid monitor $func_status" exit $func_status ;; validate-all) validate_all_squid exit $? ;; *) usage ;; esac # vim: set sw=4 ts=4 : diff --git a/heartbeat/Stateful b/heartbeat/Stateful index 894945f32..cc461405a 100755 --- a/heartbeat/Stateful +++ b/heartbeat/Stateful @@ -1,189 +1,194 @@ #!/bin/sh # # # Example of a stateful OCF Resource Agent. # # Copyright (c) 2006 Andrew Beekhof # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_state_default="${HA_RSCTMP}/Stateful-${OCF_RESOURCE_INSTANCE}.state" + +: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}} + CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" ####################################################################### meta_data() { cat < 1.0 This is an example resource agent that implements two states Example stateful resource agent Location to store the resource state in State file - + END exit $OCF_SUCCESS } ####################################################################### stateful_usage() { cat < ${OCF_RESKEY_state} } stateful_check_state() { target=$1 if [ -f ${OCF_RESKEY_state} ]; then state=`cat ${OCF_RESKEY_state}` if [ "x$target" = "x$state" ]; then return $OCF_SUCCESS fi else if [ "x$target" = "x" ]; then return $OCF_SUCCESS fi fi return $OCF_ERR_GENERIC } stateful_start() { stateful_check_state master if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_MASTER fi stateful_update slave $CRM_MASTER -v 5 return $OCF_SUCCESS } stateful_demote() { stateful_check_state if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_NOT_RUNNING fi stateful_update slave $CRM_MASTER -v 5 return $OCF_SUCCESS } stateful_promote() { stateful_check_state if [ $? = 0 ]; then return $OCF_NOT_RUNNING fi stateful_update master $CRM_MASTER -v 10 return $OCF_SUCCESS } stateful_stop() { $CRM_MASTER -D stateful_check_state master if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_MASTER fi if [ -f ${OCF_RESKEY_state} ]; then rm ${OCF_RESKEY_state} fi return $OCF_SUCCESS } stateful_monitor() { stateful_check_state "master" if [ $? = 0 ]; then return $OCF_RUNNING_MASTER fi stateful_check_state "slave" if [ $? = 0 ]; then return $OCF_SUCCESS fi if [ -f ${OCF_RESKEY_state} ]; then echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents" cat ${OCF_RESKEY_state} return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } stateful_validate() { exit $OCF_SUCCESS } -: ${OCF_RESKEY_state=${HA_RSCTMP}/Stateful-${OCF_RESOURCE_INSTANCE}.state} - case $__OCF_ACTION in meta-data) meta_data;; start) stateful_start;; promote) stateful_promote;; demote) stateful_demote;; stop) stateful_stop;; monitor) stateful_monitor;; validate-all) stateful_validate;; usage|help) stateful_usage $OCF_SUCCESS;; *) stateful_usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? diff --git a/heartbeat/SysInfo.in b/heartbeat/SysInfo.in index 61f5d5757..df4bf6dc0 100644 --- a/heartbeat/SysInfo.in +++ b/heartbeat/SysInfo.in @@ -1,364 +1,372 @@ #!@BASH_SHELL@ # # # SysInfo OCF Resource Agent # It records (in the CIB) various attributes of a node # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Bree # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_pidfile_default="$HA_RSCTMP/SysInfo-${OCF_RESOURCE_INSTANCE}" +OCF_RESKEY_delay_default="0s" +OCF_RESKEY_clone_default="0" + +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_delay=${OCF_RESKEY_delay_default}} +: ${OCF_RESKEY_clone=${OCF_RESKEY_clone_default}} + ####################################################################### meta_data() { cat < 1.0 This is a SysInfo Resource Agent. It records (in the CIB) various attributes of a node Sample Linux output: arch: i686 os: Linux-2.4.26-gentoo-r14 free_swap: 1999 cpu_info: Intel(R) Celeron(R) CPU 2.40GHz cpu_speed: 4771.02 cpu_cores: 1 cpu_load: 0.00 ram_total: 513 ram_free: 117 root_free: 2.4 Sample Darwin output: arch: i386 os: Darwin-8.6.2 cpu_info: Intel Core Duo cpu_speed: 2.16 cpu_cores: 2 cpu_load: 0.18 ram_total: 2016 ram_free: 787 root_free: 13 Units: free_swap: Mb ram_*: Mb root_free: Gb cpu_speed (Linux): bogomips cpu_speed (Darwin): Ghz Records various node attributes in the CIB PID file PID file - + Interval to allow values to stabilize Dampening Delay - + END } ####################################################################### UpdateStat() { name=$1; shift value="$*" echo -e "$name:\t$value" ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value" } SysInfoStats() { UpdateStat arch "`uname -m`" UpdateStat os "`uname -s`-`uname -r`" case `uname -s` in "Darwin") mem=`top -l 1 | grep Mem: | awk '{print $10}'` mem_used=`top -l 1 | grep Mem: | awk '{print $8}'` mem=`SysInfo_mem_units $mem` mem_used=`SysInfo_mem_units $mem_used` mem_total=`expr $mem_used + $mem` cpu_type=`system_profiler SPHardwareDataType | grep "CPU Type:"` cpu_type=${cpu_type/*: /} cpu_speed=`system_profiler SPHardwareDataType | grep "CPU Speed:" | awk '{print $3}'` cpu_cores=`system_profiler SPHardwareDataType | grep "Number Of"` cpu_cores=${cpu_cores/*: /} ;; "Linux") if [ -f /proc/cpuinfo ]; then cpu_type=`grep "model name" /proc/cpuinfo | head -n 1` cpu_type=${cpu_type/*: /} cpu_speed=`grep "bogomips" /proc/cpuinfo | head -n 1` cpu_speed=${cpu_speed/*: /} cpu_cores=`grep "^processor" /proc/cpuinfo | wc -l` fi if [ -f /proc/meminfo ]; then # meminfo results are in kB mem=`grep "SwapFree" /proc/meminfo | awk '{print $2"k"}'` if [ ! -z $mem ]; then UpdateStat free_swap `SysInfo_mem_units $mem` fi mem=`grep "Inactive" /proc/meminfo | awk '{print $2"k"}'` mem_total=`grep "MemTotal" /proc/meminfo | awk '{print $2"k"}'` else mem=`top -n 1 | grep Mem: | awk '{print $7}'` fi ;; *) esac if [ x != x"$cpu_type" ]; then UpdateStat cpu_info "$cpu_type" fi if [ x != x"$cpu_speed" ]; then UpdateStat cpu_speed "$cpu_speed" fi if [ x != x"$cpu_cores" ]; then UpdateStat cpu_cores "$cpu_cores" fi loads=`uptime` load15=`echo ${loads} | awk '{print $10}'` UpdateStat cpu_load $load15 if [ ! -z "$mem" ]; then # Massage the memory values UpdateStat ram_total `SysInfo_mem_units $mem_total` UpdateStat ram_free `SysInfo_mem_units $mem` fi # Portability notes: # o df: -h flag not available on Solaris 8. (OK on 9, 10, ...) #FIXME# # o tail: explicit "-n" not available in Solaris; instead simplify # 'tail -n ' to the equivalent 'tail -'. disk=`df -h / | tail -1 | awk '{print $4}'` if [ x != x"$disk" ]; then UpdateStat root_free `SysInfo_hdd_units $disk` fi } SysInfo_mem_units() { mem=$1 if [ -z $1 ]; then return fi memlen=`expr ${#mem} - 1` memlen_alt=`expr ${#mem} - 2` if [ ${mem:$memlen:1} = "G" ]; then mem="${mem:0:$memlen}" if [ $mem != ${mem/./} ]; then mem_before=${mem/.*/} mem_after=${mem/*./} mem=$[mem_before*1024] if [ ${#mem_after} = 0 ]; then : elif [ ${#mem_after} = 1 ]; then mem=$[mem+100*$mem_after] elif [ ${#mem_after} = 2 ]; then mem=$[mem+10*$mem_after] elif [ ${#mem_after} = 3 ]; then mem=$[mem+$mem_after] else mem_after=${mem_after:0:3} mem=$[mem+$mem_after] fi fi elif [ ${mem:$memlen:1} = "M" ]; then mem=${mem/.*/} mem="${mem:0:$memlen}" elif [ ${mem:$memlen:1} = "k" ]; then mem="${mem:0:$memlen}" mem=${mem/.*/} mem=`expr $mem / 1024` elif [ ${mem:$memlen_alt:2} = "kB" ]; then mem="${mem:0:$memlen_alt}" mem=${mem/.*/} mem=`expr $mem / 1024` elif [ ${mem:$memlen_alt:2} = "Mb" ]; then mem="${mem:0:$memlen_alt}" mem=${mem/.*/} elif [ ${mem:$memlen_alt:2} = "MB" ]; then mem="${mem:0:$memlen_alt}" mem=${mem/.*/} fi # Round to the next multiple of 50 memlen=`expr ${#mem} - 2` mem_round="${mem:$memlen:2}" if [ x$mem_round = x ]; then : elif [ $mem_round = "00" ]; then : else mem_round=`echo $mem_round | sed 's/^0//'` if [ $mem_round -lt "50" ]; then mem=$[mem+50] mem=$[mem-$mem_round] else mem=$[mem+100] mem=$[mem-$mem_round] fi fi echo $mem } SysInfo_hdd_units() { disk=$1 disklen=`expr ${#disk} - 1` disklen_alt=`expr ${#disk} - 2` if [ ${disk:$disklen:1} = "G" ]; then disk="${disk:0:$disklen}" elif [ ${disk:$disklen:1} = "M" ]; then disk="${disk:0:$disklen}" disk=${disk/.*/} disk=`expr $disk / 1024` elif [ ${disk:$disklen:1} = "k" ]; then disk="${disk:0:$disklen}" disk=${disk/.*/} disk=`expr $disk / 1048576` elif [ ${disk:$disklen_alt:2} = "kB" ]; then disk="${disk:0:$disklen_alt}" disk=${disk/.*/} disk=`expr $disk / 1048576` elif [ ${disk:$disklen_alt:2} = "Mb" ]; then disk="${disk:0:$disklen_alt}" disk=${disk/.*/} disk=`expr $disk / 1024` elif [ ${disk:$disklen_alt:2} = "MB" ]; then disk="${disk:0:$disklen_alt}" disk=${disk/.*/} disk=`expr $disk / 1024` fi echo $disk } SysInfo_usage() { cat < $OCF_RESKEY_pidfile SysInfoStats exit $OCF_SUCCESS } SysInfo_stop() { rm $OCF_RESKEY_pidfile exit $OCF_SUCCESS } SysInfo_monitor() { if [ -f $OCF_RESKEY_pidfile ]; then clone=`cat $OCF_RESKEY_pidfile` fi if [ x$clone = x ]; then rm $OCF_RESKEY_pidfile exit $OCF_NOT_RUNNING elif [ $clone = $OCF_RESKEY_clone ]; then SysInfoStats exit $OCF_SUCCESS elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue ] || [ x$OCF_RESKEY_CRM_meta_globally_unique = xTrue ] || [ x$OCF_RESKEY_CRM_meta_globally_unique = xyes ] || [ x$OCF_RESKEY_CRM_meta_globally_unique = xYes ]; then SysInfoStats exit $OCF_SUCCESS fi exit $OCF_NOT_RUNNING } SysInfo_validate() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then SysInfo_usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_pidfile:="$HA_RSCTMP/SysInfo-${OCF_RESOURCE_INSTANCE}"} -: ${OCF_RESKEY_clone:="0"} if [ x != x${OCF_RESKEY_delay} ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) SysInfo_start ;; stop) SysInfo_stop ;; monitor) SysInfo_monitor ;; validate-all) SysInfo_validate ;; usage|help) SysInfo_usage exit $OCF_SUCCESS ;; *) SysInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/VIPArip b/heartbeat/VIPArip index cd3ca4d7f..5fc7c94c9 100755 --- a/heartbeat/VIPArip +++ b/heartbeat/VIPArip @@ -1,302 +1,314 @@ #!/bin/sh # # License: GNU General Public License (GPL) # Support: users@clusterlabs.org # Author: Huang Zhen # Copyright (c) 2006 International Business Machines # # Virtual IP Address by RIP2 protocol. # This script manages IP alias in different subnet with quagga/ripd. # It can add an IP alias, or remove one. # # The quagga package should be installed to run this RA # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds an IP alias. # Surprisingly, the "stop" arg removes one. :-) # # OCF parameters are as below # OCF_RESKEY_ip The IP address in different subnet # OCF_RESKEY_nic The nic for broadcast the route information # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs RIPDCONF=$HA_RSCTMP/VIPArip-ripd.conf ZEBRA=/usr/sbin/zebra RIPD=/usr/sbin/ripd USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; +# Parameter defaults + +OCF_RESKEY_ip_default="" +OCF_RESKEY_nic_default="eth0" +OCF_RESKEY_zebra_binary_default="${ZEBRA}" +OCF_RESKEY_ripd_binary_default="${RIPD}" + +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} +: ${OCF_RESKEY_zebra_binary=${OCF_RESKEY_zebra_binary_default}} +: ${OCF_RESKEY_ripd_binary=${OCF_RESKEY_ripd_binary_default}} + ####################################################################### meta_data() { cat < 1.0 Virtual IP Address by RIP2 protocol. This script manages IP alias in different subnet with quagga/ripd. It can add an IP alias, or remove one. Manages a virtual IP address through RIP2 The IPv4 address in different subnet, for example "192.168.1.1". The IP address in different subnet - + The nic for broadcast the route information. The ripd uses this nic to broadcast the route information to others The nic for broadcast the route information - + Absolute path to the zebra binary. zebra binary - + Absolute path to the ripd binary. ripd binary - + END exit $OCF_SUCCESS } usage() { echo $USAGE >&2 } new_config_file() { echo new_config_file $1 $2 $3 cat >$RIPDCONF < $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF } add_ip() { echo add_ip $1 sed "s/ip_tag/ip_tag\naccess-list private permit $1\/32/g" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF } del_ip() { echo del_ip $1 sed "/$1/d" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF if $GREP "access-list private permit" $RIPDCONF>/dev/null then echo some other IP is running reload_config else stop_quagga echo remove $RIPDCONF rm $RIPDCONF fi } add_nic() { echo add_nic $1 if $GREP "network $1" $RIPDCONF >/dev/null then echo the nic is already in the config file else sed "s/nic_tag/nic_tag\n no passive-interface $1\n network $1\n distribute-list private out $1\n distribute-list private in $1/g" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF fi } reload_config() { echo reload_config echo $RIPDCONF: cat $RIPDCONF echo killall -SIGHUP ripd killall -SIGHUP ripd } start_quagga() { echo start_quagga echo $RIPDCONF: cat $RIPDCONF echo $ZEBRA -d $ZEBRA -d echo $RIPD -d -f $RIPDCONF $RIPD -d -f $RIPDCONF } stop_quagga() { echo stop_quagga echo $RIPDCONF: cat $RIPDCONF echo killall -SIGTERM ripd killall -SIGTERM ripd echo killall -SIGTERM zebra killall -SIGTERM zebra } start_rip_ip() { echo start_rip_ip check_params if [ x"$OCF_RESKEY_nic" = x ] then - echo OCF_RESKEY_nic is null, set to eth0 - OCF_RESKEY_nic="eth0" + echo OCF_RESKEY_nic is null, set to ${OCF_RESKEY_nic_default} + OCF_RESKEY_nic="${OCF_RESKEY_nic_default}" fi status_rip_ip case $? in $OCF_SUCCESS) ocf_log info "already running" exit $OCF_SUCCESS ;; $OCF_NOT_RUNNING) ;; *) ocf_log info "state undefined, stopping first" stop_rip_ip ;; esac $IP2UTIL addr add $OCF_RESKEY_ip/32 dev lo if [ -f "$RIPDCONF" ] then # there is a config file, add new data(IP,nic,metric) # to the existing config file. add_ip $OCF_RESKEY_ip add_nic $OCF_RESKEY_nic set_metric 1 reload_config echo sleep 3 sleep 3 set_metric 3 reload_config else new_config_file $OCF_RESKEY_ip $OCF_RESKEY_nic 1 start_quagga echo sleep 3 sleep 3 set_metric 3 reload_config fi return $OCF_SUCCESS } stop_rip_ip() { echo stop_rip_ip check_params status_rip_ip if [ $? = $OCF_NOT_RUNNING ] then exit $OCF_SUCCESS fi $IP2UTIL addr del $OCF_RESKEY_ip dev lo echo sleep 2 sleep 2 del_ip $OCF_RESKEY_ip return $OCF_SUCCESS } status_rip_ip() { check_params if $IP2UTIL addr | $GREP $OCF_RESKEY_ip >/dev/null then if $GREP $OCF_RESKEY_ip $RIPDCONF >/dev/null then if pidof ripd >/dev/null then return $OCF_SUCCESS fi fi return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi [ x != x"$OCF_RESKEY_zebra_binary" ] && ZEBRA=$OCF_RESKEY_zebra_binary [ x != x"$OCF_RESKEY_ripd_binary" ] && RIPD=$OCF_RESKEY_ripd_binary case $1 in start) start_rip_ip;; stop) stop_rip_ip;; status) status_rip_ip;; monitor) status_rip_ip;; validate-all) check_binary $IP2UTIL exit $OCF_SUCCESS;; meta-data) meta_data;; usage) usage; exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain index 1f7a250d7..eb41e3e22 100755 --- a/heartbeat/VirtualDomain +++ b/heartbeat/VirtualDomain @@ -1,1024 +1,1036 @@ #!/bin/sh # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # Resource Agent for domains managed by the libvirt API. # Requires a running libvirt daemon (libvirtd). # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults +OCF_RESKEY_config_default="" +OCF_RESKEY_migration_transport_default="" OCF_RESKEY_migration_downtime_default=0 OCF_RESKEY_migration_speed_default=0 +OCF_RESKEY_migration_network_suffix_default="" OCF_RESKEY_force_stop_default=0 +OCF_RESKEY_monitor_scripts_default="" OCF_RESKEY_autoset_utilization_cpu_default="true" OCF_RESKEY_autoset_utilization_hv_memory_default="true" OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 )) OCF_RESKEY_CRM_meta_timeout_default=90000 OCF_RESKEY_save_config_on_stop_default=false OCF_RESKEY_sync_config_on_stop_default=false +OCF_RESKEY_snapshot_default="" OCF_RESKEY_backingfile_default="" OCF_RESKEY_stateless_default="false" OCF_RESKEY_copyindirs_default="" +OCF_RESKEY_shutdown_mode_default="" +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_migration_transport=${OCF_RESKEY_migration_transport_default}} : ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}} : ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}} +: ${OCF_RESKEY_migration_network_suffix=${OCF_RESKEY_migration_network_suffix_default}} : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} +: ${OCF_RESKEY_monitor_scripts=${OCF_RESKEY_monitor_scripts_default}} : ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} : ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} : ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}} : ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}} : ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}} : ${OCF_RESKEY_sync_config_on_stop=${OCF_RESKEY_sync_config_on_stop_default}} +: ${OCF_RESKEY_snapshot=${OCF_RESKEY_snapshot_default}} : ${OCF_RESKEY_backingfile=${OCF_RESKEY_backingfile_default}} : ${OCF_RESKEY_stateless=${OCF_RESKEY_stateless_default}} : ${OCF_RESKEY_copyindirs=${OCF_RESKEY_copyindirs_default}} +: ${OCF_RESKEY_shutdown_mode=${OCF_RESKEY_shutdown_mode_default}} if ocf_is_true ${OCF_RESKEY_sync_config_on_stop}; then OCF_RESKEY_save_config_on_stop="true" fi ####################################################################### ## I'd very much suggest to make this RA use bash, ## and then use magic $SECONDS. ## But for now: NOW=$(date +%s) usage() { echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" } VirtualDomain_meta_data() { cat < 1.1 Resource agent for a virtual domain (a.k.a. domU, virtual machine, virtual environment etc., depending on context) managed by libvirtd. Manages virtual domains through the libvirt virtualization framework Absolute path to the libvirt configuration file, for this virtual domain. Virtual domain configuration file - + Hypervisor URI to connect to. See the libvirt documentation for details on supported URI formats. The default is system dependent. Determine the system's default uri by running 'virsh --quiet uri'. Hypervisor URI Always forcefully shut down ("destroy") the domain on stop. The default behavior is to resort to a forceful shutdown only after a graceful shutdown attempt has failed. You should only set this to true if your virtual domain (or your virtualization backend) does not support graceful shutdown. Always force shutdown on stop Transport used to connect to the remote hypervisor while migrating. Please refer to the libvirt documentation for details on transports available. If this parameter is omitted, the resource will use libvirt's default transport to connect to the remote hypervisor. Remote hypervisor transport - + The username will be used in the remote libvirt remoteuri/migrateuri. No user will be given (which means root) in the username if omitted If remoteuri is set, migration_user will be ignored. Remote username for the remoteuri Define max downtime during live migration in milliseconds Live migration downtime Define live migration speed per resource in MiB/s Live migration speed Use a dedicated migration network. The migration URI is composed by adding this parameters value to the end of the node name. If the node name happens to be an FQDN (as opposed to an unqualified host name), insert the suffix immediately prior to the first period (.) in the FQDN. At the moment Qemu/KVM and Xen migration via a dedicated network is supported. Note: Be sure this composed host name is locally resolvable and the associated IP is reachable through the favored network. This suffix will be added to the remoteuri and migrateuri parameters. See also the migrate_options parameter below. Migration network host name suffix - + You can also specify here if the calculated migrate URI is unsuitable for your environment. If migrateuri is set then migration_network_suffix, migrateport and --migrateuri in migrate_options are effectively ignored. Use "%n" as the placeholder for the target node name. Please refer to the libvirt documentation for details on guest migration. Custom migrateuri for migration state transfer Extra virsh options for the guest live migration. You can also specify here --migrateuri if the calculated migrate URI is unsuitable for your environment. If --migrateuri is set then migration_network_suffix and migrateport are effectively ignored. Use "%n" as the placeholder for the target node name. Please refer to the libvirt documentation for details on guest migration. live migrate options To additionally monitor services within the virtual domain, add this parameter with a list of scripts to monitor. Note: when monitor scripts are used, the start and migrate_from operations will complete only when all monitor scripts have completed successfully. Be sure to set the timeout of these operations to accommodate this delay. space-separated list of monitor scripts - + If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it into the CPU utilization of the resource when the monitor is executed. Enable auto-setting the CPU utilization of the resource - + If set true, the agent will detect the number of *Max memory* from virsh, and put it into the hv_memory utilization of the resource when the monitor is executed. Enable auto-setting the hv_memory utilization of the resource - + This port will be used in the qemu migrateuri. If unset, the port will be a random highport. Port for migrateuri Use this URI as virsh connection URI to commuicate with a remote hypervisor. If remoteuri is set then migration_user and migration_network_suffix are effectively ignored. Use "%n" as the placeholder for the target node name. Please refer to the libvirt documentation for details on guest migration. Custom remoteuri to communicate with a remote hypervisor Changes to a running VM's config are normally lost on stop. This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter. Save running VM's config back to its config file Setting this automatically enables save_config_on_stop. When enabled this parameter instructs the RA to call csync2 -x to synchronize the file to all nodes. csync2 must be properly set up for this to work. Save running VM's config back to its config file Path to the snapshot directory where the virtual machine image will be stored. When this parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot directory when stopped. If on start a state file is present for the domain, the domain will be restored to the same state it was in right before it stopped last. This option is incompatible with the 'force_stop' option. Restore state on start/stop - + When the VM is used in Copy-On-Write mode, this is the backing file to use (with its full path). The VMs image will be created based on this backing file. This backing file will never be changed during the life of the VM. If the VM is wanted to work with Copy-On-Write mode, this is the backing file to use (with its full path) If set to true and backingfile is defined, the start of the VM will systematically create a new qcow2 based on the backing file, therefore the VM will always be stateless. If set to false, the start of the VM will use the COW (<vmname>.qcow2) file if it exists, otherwise the first start will create a new qcow2 based on the backing file given as backingfile. If set to true, the (<vmname>.qcow2) file will be re-created at each start, based on the backing file (if defined) List of directories for the virt-copy-in before booting the VM. Used only in stateless mode. List of directories for the virt-copy-in before booting the VM stateless mode. virsh shutdown method to use. Please verify that it is supported by your virsh toolsed with 'virsh help shutdown' When this parameter is set --mode shutdown_mode is passed as an additional argument to the 'virsh shutdown' command. One can use this option in case default acpi method does not work. Verify that this mode is supported by your VM. By default --mode is not passed. Instruct virsh to use specific shutdown mode - + EOF } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ $? -ne 0 ] && [ -z "$cval" ]; then crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1 if [ $? -eq 0 ]; then ocf_log debug "Unable to set utilization attribute, cib is not available" return fi fi if [ "$cval" != "$val" ]; then outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp" fi } update_utilization() { local dom_cpu dom_mem if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}') test -n "$dom_cpu" && set_util_attr cpu $dom_cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}') test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" fi } get_emulator() { local emulator="" emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') if [ -z "$emulator" ] && [ -e "$EMULATOR_STATE" ]; then emulator=$(cat $EMULATOR_STATE) fi if [ -z "$emulator" ]; then emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') fi if [ -n "$emulator" ]; then basename $emulator fi } update_emulator_cache() { local emulator emulator=$(get_emulator) if [ -n "$emulator" ]; then echo $emulator > $EMULATOR_STATE fi } # attempt to check domain status outside of libvirt using the emulator process pid_status() { local rc=$OCF_ERR_GENERIC local emulator=$(get_emulator) # An emulator is not required, so only report message in debug mode local loglevel="debug" if ocf_is_probe; then loglevel="notice" fi case "$emulator" in qemu-kvm|qemu-dm|qemu-system-*) rc=$OCF_NOT_RUNNING ps awx | grep -E "[q]emu-(kvm|dm|system).*-name $DOMAIN_NAME " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; libvirt_lxc) rc=$OCF_NOT_RUNNING ps awx | grep -E "[l]ibvirt_lxc.*-name $DOMAIN_NAME " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; # This can be expanded to check for additional emulators *) # We may be running xen with PV domains, they don't # have an emulator set. try xl list or xen-lists if have_binary xl; then rc=$OCF_NOT_RUNNING xl list $DOMAIN_NAME >/dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi elif have_binary xen-list; then rc=$OCF_NOT_RUNNING xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi else ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME" fi ;; esac if [ $rc -eq $OCF_SUCCESS ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently running." elif [ $rc -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running." fi return $rc } VirtualDomain_status() { local try=0 rc=$OCF_ERR_GENERIC status="no state" while [ "$status" = "no state" ]; do try=$(($try + 1 )) status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z') case "$status" in *"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off") # shut off: domain is defined, but not started, will not happen if # domain is created but not defined # "Domain not found" or "failed to get domain": domain is not defined # and thus not started ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)" rc=$OCF_NOT_RUNNING ;; running|paused|idle|blocked|"in shutdown") # running: domain is currently actively consuming cycles # paused: domain is paused (suspended) # idle: domain is running but idle # blocked: synonym for idle used by legacy Xen versions # in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed. ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_SUCCESS ;; ""|*"failed to "*"connect to the hypervisor"*|"no state") # Empty string may be returned when virsh does not # receive a reply from libvirtd. # "no state" may occur when the domain is currently # being migrated (on the migration target only), or # whenever virsh can't reliably obtain the domain # state. status="no state" if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then # During the stop operation, we want to bail out # quickly, so as to be able to force-stop (destroy) # the domain if necessary. ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out." return $OCF_ERR_GENERIC; elif [ "$__OCF_ACTION" = "monitor" ]; then pid_status rc=$? if [ $rc -ne $OCF_ERR_GENERIC ]; then # we've successfully determined the domains status outside of libvirt return $rc fi else # During all other actions, we just wait and try # again, relying on the CRM/LRM to time us out if # this takes too long. ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." fi sleep 1 ;; *) # any other output is unexpected. ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" sleep 1 ;; esac done return $rc } # virsh undefine removes configuration files if they are in # directories which are managed by libvirt. such directories # include also subdirectories of /etc (for instance # /etc/libvirt/*) which may be surprising. VirtualDomain didn't # include the undefine call before, hence this wasn't an issue # before. # # There seems to be no way to find out which directories are # managed by libvirt. # verify_undefined() { local tmpf if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME" then tmpf=$(mktemp -t vmcfgsave.XXXXXX) if [ ! -r "$tmpf" ]; then ocf_log warn "unable to create temp file, disk full?" # we must undefine the domain virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 else cp -p $OCF_RESKEY_config $tmpf virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 [ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config rm -f $tmpf fi fi } VirtualDomain_start() { local snapshotimage if VirtualDomain_status; then ocf_log info "Virtual domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi # systemd drop-in to stop domain before libvirtd terminates services # during shutdown/reboot if systemd_is_running ; then systemd_drop_in "99-VirtualDomain-libvirt" "After" "libvirtd.service" systemd_drop_in "99-VirtualDomain-machines" "Wants" "virt-guest-shutdown.target" systemctl start virt-guest-shutdown.target fi snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then virsh restore $snapshotimage if [ $? -eq 0 ]; then rm -f $snapshotimage return $OCF_SUCCESS fi ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory." return $OCF_ERR_GENERIC fi # Make sure domain is undefined before creating. # The 'create' command guarantees that the domain will be # undefined on shutdown, but requires the domain to be undefined. # if a user defines the domain # outside of this agent, we have to ensure that the domain # is restored to an 'undefined' state before creating. verify_undefined if [ -z "${OCF_RESKEY_backingfile}" ]; then virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config} if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi else if ocf_is_true "${OCF_RESKEY_stateless}" || [ ! -s "${OCF_RESKEY_config%%.*}.qcow2" ]; then # Create the Stateless image dirconfig=`dirname ${OCF_RESKEY_config}` qemu-img create -f qcow2 -b ${OCF_RESKEY_backingfile} ${OCF_RESKEY_config%%.*}.qcow2 if [ $? -ne 0 ]; then ocf_exit_reason "Failed qemu-img create ${DOMAIN_NAME} with backing file ${OCF_RESKEY_backingfile}." return $OCF_ERR_GENERIC fi virsh define ${OCF_RESKEY_config} if [ $? -ne 0 ]; then ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi if [ -n "${OCF_RESKEY_copyindirs}" ]; then # Inject copyindirs directories and files virt-copy-in -d ${DOMAIN_NAME} ${OCF_RESKEY_copyindirs} / if [ $? -ne 0 ]; then ocf_exit_reason "Failed on virt-copy-in command ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi fi else virsh define ${OCF_RESKEY_config} if [ $? -ne 0 ]; then ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi fi virsh $VIRSH_OPTIONS start ${DOMAIN_NAME} if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi fi while ! VirtualDomain_monitor; do sleep 1 done return $OCF_SUCCESS } force_stop() { local out ex translate local status=0 ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1) ex=$? translate=$(echo $out|tr 'A-Z' 'a-z') echo >&2 "$translate" case $ex$translate in *"error:"*"domain is not running"*|*"error:"*"domain not found"*|\ *"error:"*"failed to get domain"*) : ;; # unexpected path to the intended outcome, all is well [!0]*) ocf_exit_reason "forced stop failed" return $OCF_ERR_GENERIC ;; 0*) while [ $status != $OCF_NOT_RUNNING ]; do VirtualDomain_status status=$? done ;; esac return $OCF_SUCCESS } sync_config(){ ocf_log info "Syncing $DOMAIN_NAME config file with csync2 -x ${OCF_RESKEY_config}" if ! csync2 -x ${OCF_RESKEY_config}; then ocf_log warn "Syncing ${OCF_RESKEY_config} failed."; fi } save_config(){ CFGTMP=$(mktemp -t vmcfgsave.XXX) virsh $VIRSH_OPTIONS dumpxml --inactive --security-info ${DOMAIN_NAME} > ${CFGTMP} if [ -s ${CFGTMP} ]; then if ! cmp -s ${CFGTMP} ${OCF_RESKEY_config}; then if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes or sync_config_on_stop is on." if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}." if ocf_is_true "$OCF_RESKEY_sync_config_on_stop"; then sync_config fi else ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed." fi else ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update." fi fi else ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update." fi rm -f ${CFGTMP} } VirtualDomain_stop() { local i local status local shutdown_timeout local needshutdown=1 VirtualDomain_status status=$? case $status in $OCF_SUCCESS) if ocf_is_true $OCF_RESKEY_force_stop; then # if force stop, don't bother attempting graceful shutdown. force_stop return $? fi ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." if [ -n "$OCF_RESKEY_snapshot" ]; then virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ $? -eq 0 ]; then needshutdown=0 else ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop" fi fi # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # issue the shutdown if save state didn't shutdown for us if [ $needshutdown -eq 1 ]; then # Issue a graceful shutdown request if [ -n "${OCF_RESKEY_CRM_shutdown_mode}" ]; then shutdown_opts="--mode ${OCF_RESKEY_CRM_shutdown_mode}" fi virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} $shutdown_opts fi # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $NOW -lt $shutdown_timeout ]; do VirtualDomain_status status=$? case $status in $OCF_NOT_RUNNING) # This was a graceful shutdown. return $OCF_SUCCESS ;; $OCF_SUCCESS) # Domain is still running, keep # waiting (until shutdown_timeout # expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac NOW=$(date +%s) done ;; $OCF_NOT_RUNNING) ocf_log info "Domain $DOMAIN_NAME already stopped." return $OCF_SUCCESS esac # OK. Now if the above graceful shutdown hasn't worked, kill # off the domain with destroy. If that too does not work, # have the LRM time us out. force_stop } mk_migrateuri() { local target_node local migrate_target local hypervisor target_node="$OCF_RESKEY_CRM_meta_migrate_target" # A typical migration URI via a special migration network looks # like "tcp://bar-mig:49152". The port would be randomly chosen # by libvirt from the range 49152-49215 if omitted, at least since # version 0.7.4 ... if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" # Hostname might be a FQDN migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") case $hypervisor in qemu) # For quiet ancient libvirt versions a migration port is needed # and the URI must not contain the "//". Newer versions can handle # the "bad" URI. echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}" ;; xen) echo "xenmigr://${migrate_target}" ;; *) ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." ;; esac fi } VirtualDomain_migrate_to() { local rc local target_node local remoteuri local transport_suffix local migrateuri local migrate_opts local migrate_pid target_node="$OCF_RESKEY_CRM_meta_migrate_target" if VirtualDomain_status; then # Find out the remote hypervisor to connect to. That is, turn # something like "qemu://foo:9999/system" into # "qemu+tcp://bar:9999/system" if [ -n "${OCF_RESKEY_remoteuri}" ]; then remoteuri=`echo "${OCF_RESKEY_remoteuri}" | sed "s/%n/$target_node/g"` else if [ -n "${OCF_RESKEY_migration_transport}" ]; then transport_suffix="+${OCF_RESKEY_migration_transport}" fi # append user defined suffix if virsh target should differ from cluster node name if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then # Hostname might be a FQDN target_node=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") fi # a remote user has been defined to connect to target_node if echo ${OCF_RESKEY_migration_user} | grep -q "^[a-z][-a-z0-9]*$" ; then target_node="${OCF_RESKEY_migration_user}@${target_node}" fi # Scared of that sed expression? So am I. :-) remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") fi # User defined migrateuri or do we make one? migrate_opts="$OCF_RESKEY_migrate_options" # migration_uri is directly set if [ -n "${OCF_RESKEY_migrateuri}" ]; then migrateuri=`echo "${OCF_RESKEY_migrateuri}" | sed "s/%n/$target_node/g"` # extract migrationuri from options elif echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then migrateuri=`echo "$migrate_opts" | sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"` # auto generate else migrateuri=`mk_migrateuri` fi # remove --migrateuri from migration_opts migrate_opts=`echo "$migrate_opts" | sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\2/"` # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # Live migration speed limit if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})." virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed} fi # OK, we know where to connect to. Now do the actual migration. ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)." virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri & migrate_pid=${!} # Live migration downtime interval # Note: You can set downtime only while live migration is in progress if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then sleep 2 ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})." virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime} fi wait ${migrate_pid} rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc" return $OCF_ERR_GENERIC else ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." return $OCF_SUCCESS fi else ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } VirtualDomain_migrate_from() { # systemd drop-in to stop domain before libvirtd terminates services # during shutdown/reboot if systemd_is_running ; then systemd_drop_in "99-VirtualDomain-libvirt" "After" "libvirtd.service" systemd_drop_in "99-VirtualDomain-machines" "Wants" "virt-guest-shutdown.target" systemctl start virt-guest-shutdown.target fi while ! VirtualDomain_monitor; do sleep 1 done ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi return $OCF_SUCCESS } VirtualDomain_monitor() { # First, check the domain status. If that returns anything other # than $OCF_SUCCESS, something is definitely wrong. VirtualDomain_status rc=$? if [ ${rc} -eq ${OCF_SUCCESS} ]; then # OK, the generic status check turned out fine. Now, if we # have monitor scripts defined, run them one after another. for script in ${OCF_RESKEY_monitor_scripts}; do script_output="$($script 2>&1)" script_rc=$? if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then # A monitor script returned a non-success exit # code. Stop iterating over the list of scripts, log a # warning message, and propagate $OCF_ERR_GENERIC. ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" rc=$OCF_ERR_GENERIC break else ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" fi done fi update_emulator_cache update_utilization # Save configuration on monitor as well, so we will have a better chance of # having fresh and up to date config files on all nodes. if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi return ${rc} } VirtualDomain_validate_all() { if ocf_is_true $OCF_RESKEY_force_stop && [ -n "$OCF_RESKEY_snapshot" ]; then ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together." return $OCF_ERR_CONFIGURED fi # check if we can read the config file (otherwise we're unable to # deduce $DOMAIN_NAME from it, see below) if [ ! -r $OCF_RESKEY_config ]; then if ocf_is_probe; then ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe." elif [ "$__OCF_ACTION" = "stop" ]; then ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped." else ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or not readable." fi return $OCF_ERR_INSTALLED fi if [ -z $DOMAIN_NAME ]; then ocf_exit_reason "Unable to determine domain name." return $OCF_ERR_INSTALLED fi # Check if csync2 is available when config tells us we might need it. if ocf_is_true $OCF_RESKEY_sync_config_on_stop; then check_binary csync2 fi # Check if migration_speed is a decimal value if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then ocf_exit_reason "migration_speed has to be a decimal value" return $OCF_ERR_CONFIGURED fi # Check if migration_downtime is a decimal value if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then ocf_exit_reason "migration_downtime has to be a decimal value" return $OCF_ERR_CONFIGURED fi if ocf_is_true "${OCF_RESKEY_stateless}" && [ -z "${OCF_RESKEY_backingfile}" ]; then ocf_exit_reason "Stateless functionality can't be achieved without a backing file." return $OCF_ERR_CONFIGURED fi } VirtualDomain_getconfig() { # Grab the virsh uri default, but only if hypervisor isn't set : ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)} # Set options to be passed to virsh: VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" # Retrieve the domain name from the xml file. DOMAIN_NAME=`egrep '[[:space:]]*.*[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*\(.*\)<\/name>[[:space:]]*$/\1/'` EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state" } OCF_REQUIRED_PARAMS="config" OCF_REQUIRED_BINARIES="virsh sed" ocf_rarun $* diff --git a/heartbeat/WinPopup b/heartbeat/WinPopup index 833d592f7..ee3f68346 100755 --- a/heartbeat/WinPopup +++ b/heartbeat/WinPopup @@ -1,231 +1,237 @@ #!/bin/sh # # Resource script for sending WinPopups using smbclient # derived from Alan Robertson's MailTo script # # Author: Sandro Poppi # # Description: sends WinPopups to a sysadmin's workstation # whenever a takeover occurs. # # OCF parameters are as below: # OCF_RESKEY_hostfile # # where "hostfile" is a file containing the IPs/Workstation names # one by line to be sent WinPopups # # License: GNU General Public License (GPL) WINPOPUPFILE=${HA_VARRUN}/WinPopup ####################################################################### # Initialization: # Source function library. : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_hostfile_default="hosts" + +: ${OCF_RESKEY_hostfile=${OCF_RESKEY_hostfile_default}} + ####################################################################### us=`uname -n` usage() { echo "Usage: $0 {start|stop|status|monitor|validate-all|meta-data}" } meta_data() { cat < 1.0 Resource script for WinPopup. It sends WinPopups message to a sysadmin's workstation whenever a takeover occurs. Sends an SMB notification message to selected hosts The file containing the hosts to send WinPopup messages to. Host file - + END } sendWinPopup() { # if workstation file exists and is not zero if [ -s "$hostfile" ] ; then subject=$1 shift for i in `cat $hostfile` ; do echo "$subject $*" | smbclient -M $i >/dev/null 2>&1 done else ocf_log err "Workstation file $hostfile missing or corrupt!" return $OCF_ERR_ARGS fi return $? } SubjectLine() { case $1 in ??*) echo $1;; *) echo "Resource Group";; esac } WinPopupStart() { Subject="`SubjectLine $2` Takeover in progress on $us" if sendWinPopup "$Subject" $1; then touch $WINPOPUPFILE return $? else return $? fi } WinPopupStop () { Subject="`SubjectLine $2` Reestablishing original master connection in progress on $us" if sendWinPopup "$Subject" $1; then rm -f $WINPOPUPFILE return $? else return $? fi } WinPopupStatus () { ocf_log warn "Don't stat/monitor me! WinPopup is a pseudo resource agent, so the status reported may be incorrect" if [ -f $WINPOPUPFILE ]; then echo "running" return $OCF_SUCCESS else echo "stopped" return $OCF_NOT_RUNNING fi } # A not reliable IP address checking function, which only picks up those _obvious_ violations... # # It accepts IPv4 address in dotted quad notation, for example "192.168.1.1" # # 100% confidence whenever it reports "negative", # but may get false "positive" answer. # CheckIP() { ip="$1" case $ip in *[!0-9.]*) #got invalid char false;; .*|*.) #begin or end by ".", which is invalid false;; *..*) #consecutive ".", which is invalid false;; *.*.*.*.*) #four decimal dots, which is too many false;; *.*.*.*) #exactly three decimal dots, candidate, evaluate each field local IFS=. set -- $ip if ( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] ) then true fi ;; *) #less than three decimal dots false;; esac return $? # This return is unnecessary, this comment too :) } WinPopupValidateAll () { if [ ! -s "$hostfile" ] ; then ocf_log err "Workstation file $hostfile missing or corrupt!" return $OCF_ERR_ARGS fi # What kind of hostfiles are valid? # We stick to the definition that, a hostfile is valid if and only if it # contains at least one valid host to send WinPopup message to. # have_valid_host=no for host in `cat $hostfile`; do nmblookup $host 2>&1 | grep -q "failed to find name $host\>" if [ $? -ne 0 ]; then # have_valid_host=yes return $OCF_SUCCESS fi # $host is not a netbios name, an IP address maybe? if CheckIP "$host"; then # have_valid_host=yes return $OCF_SUCCESS fi done ocf_log err "Workstation file $hostfile contains no valid host!" return $OCF_ERR_CONFIGURED } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # See how the environment virables were set. -hostfile=${OCF_RESKEY_hostfile:-hosts} +hostfile=${OCF_RESKEY_hostfile} case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; start) WinPopupStart ;; stop) WinPopupStop ;; # Not quite sure what to do with this one... status|monitor) WinPopupStatus ;; validate-all) WinPopupValidateAll ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Xen b/heartbeat/Xen index abfd43682..d28e96db6 100755 --- a/heartbeat/Xen +++ b/heartbeat/Xen @@ -1,636 +1,653 @@ #!/bin/sh # # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # Resource Agent for the Xen Hypervisor. # Manages Xen virtual machine instances by # mapping cluster resource start and stop, # to Xen create and shutdown, respectively. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_xmfile # Absolute path to the Xen control file, # for this virtual machine. # OCF_RESKEY_allow_mem_management # Change memory usage on start/stop/migration # of virtual machine # OCF_RESKEY_reserved_Dom0_memory # minimum memory reserved for domain 0 # OCF_RESKEY_monitor_scripts # scripts to monitor services within the # virtual domain ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_xmfile_default="/etc/xen/vm/MyDomU" +OCF_RESKEY_name_default="" +OCF_RESKEY_shutdown_timeout_default="" +OCF_RESKEY_shutdown_acpi_default="0" +OCF_RESKEY_allow_mem_management_default="0" +OCF_RESKEY_node_ip_attribute_default="" +OCF_RESKEY_reserved_Dom0_memory_default="512" +OCF_RESKEY_autoset_utilization_cpu_default="false" +OCF_RESKEY_autoset_utilization_hv_memory_default="false" +OCF_RESKEY_monitor_scripts_default="" + +: ${OCF_RESKEY_xmfile=${OCF_RESKEY_xmfile_default}} +: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +: ${OCF_RESKEY_shutdown_timeout=${OCF_RESKEY_shutdown_timeout_default}} +: ${OCF_RESKEY_shutdown_acpi=${OCF_RESKEY_shutdown_acpi_default}} +: ${OCF_RESKEY_allow_mem_management=${OCF_RESKEY_allow_mem_management_default}} +: ${OCF_RESKEY_node_ip_attribute=${OCF_RESKEY_node_ip_attribute_default}} +: ${OCF_RESKEY_reserved_Dom0_memory=${OCF_RESKEY_reserved_Dom0_memory_default}} +: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} +: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} +: ${OCF_RESKEY_monitor_scripts=${OCF_RESKEY_monitor_scripts_default}} + ####################################################################### usage() { cat <<-END usage: $0 {start|stop|status|monitor|meta-data|validate-all} END } -: ${OCF_RESKEY_xmfile=/etc/xen/vm/MyDomU} -: ${OCF_RESKEY_shutdown_acpi=0} -: ${OCF_RESKEY_allow_mem_management=0} -: ${OCF_RESKEY_reserved_Dom0_memory=512} -: ${OCF_RESKEY_autoset_utilization_cpu="false"} -: ${OCF_RESKEY_autoset_utilization_hv_memory="false"} - # prefer xl xentool=$(which xl 2> /dev/null || which xm) meta_data() { cat < 1.0 Resource Agent for the Xen Hypervisor. Manages Xen virtual machine instances by mapping cluster resource start and stop, to Xen create and shutdown, respectively. A note on names We will try to extract the name from the config file (the xmfile attribute). If you use a simple assignment statement, then you should be fine. Otherwise, if there's some python acrobacy involved such as dynamically assigning names depending on other variables, and we will try to detect this, then please set the name attribute. You should also do that if there is any chance of a pathological situation where a config file might be missing, for example if it resides on a shared storage. If all fails, we finally fall back to the instance id to preserve backward compatibility. Para-virtualized guests can also be migrated by enabling the meta_attribute allow-migrate. Manages Xen unprivileged domains (DomUs) Absolute path to the Xen control file, for this virtual machine. Xen control file - + Name of the virtual machine. Xen DomU name - + The Xen agent will first try an orderly shutdown using xl shutdown. Should this not succeed within this timeout, the agent will escalate to xl destroy, forcibly killing the node. If this is not set, it will default to two-third of the stop action timeout. Setting this value to 0 forces an immediate destroy. Shutdown escalation timeout - + Handle shutdown by simulating an ACPI power button event. Enable this to allow graceful shutdown for HVM domains without installed PV drivers. Simulate power button event on shutdown - + This parameter enables dynamic adjustment of memory for start and stop actions used for Dom0 and the DomUs. The default is to not adjust memory dynamically. Use dynamic memory management - + In case of a live migration, the system will default to using the IP address associated with the hostname via DNS or /etc/hosts. This parameter allows you to specify a node attribute that will be queried instead for the target node, overriding the IP address. This allows you to use a dedicated network for live migration traffic to a specific node. Warning: make very sure the IP address does point to the right node. Or else the live migration will end up somewhere else, greatly confusing the cluster and causing havoc. Node attribute containing target IP address - + In case memory management is used, this parameter defines the minimum amount of memory to be reserved for the dom0. The default minimum memory is 512MB. Minimum Dom0 memory - + If set true, the agent will detect the number of domain's vCPUs from Xen, and put it into the CPU utilization of the resource when the monitor is executed. Before enabling make sure node utilization is also set (using NodeUtilization agent or manually) or the resource might not be able to start anywhere. Enable auto-setting the CPU utilization of the resource - + If set true, the agent will detect the number of memory from Xen, and put it into the hv_memory utilization of the resource when the monitor is executed. Before enabling make sure node utilization is also set (using NodeUtilization agent or manually) or the resource might not be able to start anywhere. Enable auto-setting the hv_memory utilization of the resource - + To additionally monitor services within the unprivileged domain, add this parameter with a list of scripts to monitor. list of space separated monitor scripts - + END } Xen_Status() { if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool list $1 >/dev/null 2>&1 if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi if have_binary xen-list; then xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null` if [ "X${STATUS}" != "X" ]; then # we have Xen 3.0.4 or higher STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi else # we have Xen 3.0.3 or lower STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null` echo "${STATUS}" | grep -qs "[-r][-b][-p]---" if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi } # If the guest is rebooting, it may completely disappear from the # list of defined guests, thus xl/xen-list would return with not # running; apparently, this period lasts only for a second or # two # If a status returns not running, then test status # again for 5 times (perhaps it'll show up) Xen_Status_with_Retry() { local rc cnt=5 Xen_Status $1 rc=$? while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do case "$__OCF_ACTION" in stop) ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..." ;; monitor) ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..." ;; *) : not reachable ;; esac sleep 1 Xen_Status $1 rc=$? cnt=$((cnt-1)) done return $rc } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ $? -ne 0 ] && [ -z "$cval" ]; then if crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -q "not connected"; then ocf_log debug "Unable to get utilization attribute $attr: cib is not available" return fi fi if [ "$cval" != "$val" ]; then outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || \ ocf_log warn "Unable to set utilization attribute $attr: $outp" fi } Xen_Update_Utilization() { local dom_status dom_cpu dom_mem dom_status=$($xentool list ${DOMAIN_NAME} | awk 'NR==2 {print $4, $3}') if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=${dom_status% *} test -n "$dom_cpu" && set_util_attr cpu $dom_cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=${dom_status#* } test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" fi } Xen_Adjust_Memory() { if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then CNTNEW=$1 RUNNING=`Xen_List_running` RUNCNT=`Xen_Count_running` MAXMEM=`Xen_Total_Memory` if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then RUNCNT=1 fi #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 #$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} for DOM in ${RUNNING}; do $xentool mem-set ${DOM} ${NEWMEM} done ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" fi } Xen_List_all() { $xentool list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}' } Xen_List_running() { ALL_DOMS=`Xen_List_all` for DOM in ${ALL_DOMS}; do if Xen_Status $DOM; then echo "${DOM} " fi done } Xen_Count_running() { Xen_List_running | wc -w } Xen_Monitor() { if ocf_is_probe; then Xen_Status ${DOMAIN_NAME} else Xen_Status_with_Retry ${DOMAIN_NAME} fi if [ $? -eq ${OCF_NOT_RUNNING} ]; then ocf_is_probe || ocf_log err "Xen domain $DOMAIN_NAME stopped" return ${OCF_NOT_RUNNING} fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu" || \ ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory" then Xen_Update_Utilization fi if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then return ${OCF_SUCCESS} fi for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do $SCRIPT if [ $? -ne 0 ]; then return ${OCF_ERR_GENERIC} fi done return ${OCF_SUCCESS} } Xen_Total_Memory() { $xentool info | grep "^total_memory" | awk '{print $3}' } Xen_Start() { if Xen_Status ${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi if [ ! -f "${OCF_RESKEY_xmfile}" ]; then ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." return $OCF_ERR_INSTALLED fi if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then Xen_Adjust_Memory 1 ocf_log info "New memory for virtual domains: ${NEWMEM}" sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi # the latest xl management tool is squeamish about some # characters in a name (the vm name is xen-f): # /etc/xen/vm/xen-f:15: config parsing error near `xen': # syntax error, unexpected IDENT, expecting STRING or NUMBER # or '[' # /etc/xen/vm/xen-f:15: config parsing error near `-f': lexical error # # the older xm management tool cannot digest quotes (see # https://developerbugs.linuxfoundation.org/show_bug.cgi?id=2671) # # hence the following if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\" else $xentool create ${OCF_RESKEY_xmfile} name="$DOMAIN_NAME" fi rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC else if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi fi while sleep 1; do Xen_Monitor && return $OCF_SUCCESS done } xen_domain_stop() { local dom=$1 local timeout if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then timeout=$OCF_RESKEY_shutdown_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=60 fi if [ "$timeout" -gt 0 ]; then ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then $xentool trigger $dom power else $xentool shutdown $dom fi while Xen_Status $dom && [ "$timeout" -gt 0 ]; do ocf_log debug "$dom still not stopped. Waiting..." timeout=$((timeout-1)) sleep 1 done fi if [ "$timeout" -eq 0 ]; then while Xen_Status $dom; do ocf_log warn "Xen domain $dom will be destroyed!" $xenkill $dom sleep 1 done # Note: This does not give up. stop isn't allowed to to fail. # If $xentool destroy fails, stop will eventually timeout. # This is the correct behaviour. fi ocf_log info "Xen domain $dom stopped." } Xen_Stop() { local vm if Xen_Status_with_Retry ${DOMAIN_NAME}; then vm=${DOMAIN_NAME} elif Xen_Status migrating-${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME is migrating" vm="migrating-${DOMAIN_NAME}" else ocf_log info "Xen domain $DOMAIN_NAME already stopped." fi if [ "$vm" ]; then xen_domain_stop $vm else # It is supposed to be gone, but there have been situations where # $xentool list / xen-list showed it as stopped but it was still # instantiated. Nuke it once more to make sure: $xenkill ${DOMAIN_NAME} fi Xen_Adjust_Memory 0 return $OCF_SUCCESS } Xen_Migrate_To() { target_node="$OCF_RESKEY_CRM_meta_migrate_target" target_attr="$OCF_RESKEY_node_ip_attribute" target_addr="$target_node" if Xen_Status ${DOMAIN_NAME}; then ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node" if [ -n "$target_attr" ]; then nodevalue=`crm_attribute --type nodes --node $target_node -n $target_attr -G -q` if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then target_addr="$nodevalue" ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" fi fi if expr "x$xentool" : "x.*xm" >/dev/null; then $xentool migrate --live $DOMAIN_NAME $target_addr else $xentool migrate $DOMAIN_NAME $target_addr fi rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc" return $OCF_ERR_GENERIC else Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded." return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } Xen_Migrate_From() { if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for status to stabilize # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=10 # should be plenty fi while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" timeout=$((timeout-1)) sleep 1 done if Xen_Status ${DOMAIN_NAME}; then Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: Active locally, migration successful" return $OCF_SUCCESS else ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" return $OCF_ERR_GENERIC fi } Xen_Validate_All() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # the name business: # # 1. use the name attribute, or # 2. find the name in the config file (if it exists) and use that # unless it contains funny characters such as '%' or space, or # 3. use the OCF_RESOURCE_INSTANCE if [ x"${OCF_RESKEY_name}" != x ]; then DOMAIN_NAME="${OCF_RESKEY_name}" else if [ -f "${OCF_RESKEY_xmfile}" ]; then DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"` if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then DOMAIN_NAME="" fi fi DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}} fi for binary in sed awk; do check_binary $binary done if have_binary xen-destroy ; then xenkill="xen-destroy" else xenkill="$xentool destroy" fi if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || { ocf_log err "shutdown_timeout must be a number" exit $OCF_ERR_CONFIGURED } fi case $1 in start) Xen_Start ;; stop) Xen_Stop ;; migrate_to) Xen_Migrate_To ;; migrate_from) Xen_Migrate_From ;; monitor) Xen_Monitor ;; status) Xen_Status ${DOMAIN_NAME} ;; validate-all) Xen_Validate_All ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Xinetd b/heartbeat/Xinetd index 4ab44c93f..e4cef5a1f 100755 --- a/heartbeat/Xinetd +++ b/heartbeat/Xinetd @@ -1,250 +1,256 @@ #!/bin/sh # # Startup/shutdown script for services managed by xinetd. # # Copyright (C) 2003 Charlie Brooks # Copyright (C) 2011 Ulrich Windl # # WARNING: Tested ONLY on SLES11 SP1 at this time. # # Author: Charlie Brooks # Description: given parameters of a service name and start|stop|status, # will enable, disable or report on a specified xinetd service # Config: all services must have a descriptor file in /etc/xinetd.d # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # OCF parameters are as below: # OCF_RESKEY_service ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_service_default="" + +: ${OCF_RESKEY_service=${OCF_RESKEY_service_default}} + service=$OCF_RESKEY_service SVCDEF=/etc/xinetd.d/$service ####################################################################### meta_data() { cat < 1.0 Resource script for Xinetd. It starts/stops services managed by xinetd by enabling or disabling them in the configuration file. The xinetd daemon itself must be running: we are not going to start or stop it ourselves. All services should have a line saying either "disable=yes" or "disable=no". The script just changes those settings before reloading xinetd. Important: in case the services managed by the cluster are the only ones enabled, you should specify the -stayalive option for xinetd or it will exit on Heartbeat stop. Alternatively, you may enable some internal service such as echo. Manages a service of Xinetd The name of the service managed by xinetd. service name - + END } get_xinetd_pid() { ps -e -o pid,comm | $AWK '$2 == "xinetd" { print $1 }' } # force xinetd to reload the service descriptions hup_inetd () { # don't rely on the pid file, but lookup xinetd in the list of # processes local pid pid=`get_xinetd_pid` if [ "$pid" ]; then if kill -s HUP $pid; then ocf_log info "asked xinetd to reload by sending SIGHUP to process $pid!" else ocf_exit_reason "could not send SIGHUP to process $pid!" exit $OCF_ERR_GENERIC fi else ocf_exit_reason "xinetd process not found!" exit $OCF_ERR_GENERIC fi } # check "disable = X", printing X check_service() { ocf_log "info" "checking \"disable\" in $1" local result=$(sed -nre 's/^[ ]*disable[ ]*=[ ]*([^ ]+)[# ]*/\1/p' $1) echo "$result" } # change "disable = X" to desired value change_service() { ocf_log "info" "setting \"disable = $1\" in $2" if ! sed -i -re 's/^([ ]*disable[ ]*=[ ]*)([^ ]+)([# ]*)/\1'"$1"'\3/' $2 then ocf_log "err" "could not edit $2" return 1 fi return 0 } xup_status () { local disabled="$(check_service $SVCDEF)" if [ "${disabled:=no}" = no ]; then echo running return $OCF_SUCCESS elif [ "$disabled" = yes ]; then echo stopped return $OCF_NOT_RUNNING else echo unknown return $OCF_ERR_CONFIGURED fi } xup_start () { if [ "running" = "`xup_status`" ]; then ocf_log info "service $service already started" exit $OCF_SUCCESS fi ocf_log "info" "enabling in $SVCDEF" if change_service "no" $SVCDEF; then hup_inetd fi } xup_stop () { if [ "stopped" = "`xup_status`" ]; then ocf_log info "service $service already stopped" exit $OCF_SUCCESS fi ocf_log "info" "disabling in $SVCDEF" if change_service "yes" $SVCDEF; then hup_inetd fi } xup_usage () { echo "Usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data}" return 0 } xup_validate_all () { if [ ! -f "$SVCDEF" ]; then ocf_exit_reason "service $service missing $SVCDEF" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then xup_usage exit $OCF_ERR_ARGS fi # These operations do not require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) xup_usage exit $OCF_SUCCESS ;; esac if [ -z "$OCF_RESKEY_service" ]; then ocf_exit_reason "please define \"service\" parameter" if [ "$1" = "start" ]; then exit $OCF_ERR_CONFIGURED else exit $OCF_NOT_RUNNING fi fi # Is xinetd running at all if [ -z "`get_xinetd_pid`" ]; then case "$1" in stop) exit $OCF_SUCCESS;; start) ocf_exit_reason "xinetd not running, we manage just xinetd services, not the daemon itself" exit $OCF_ERR_INSTALLED ;; status|monitor) if ocf_is_probe; then exit $OCF_NOT_RUNNING else ocf_exit_reason "xinetd stopped" exit $OCF_ERR_GENERIC fi ;; esac fi # Make sure the OCF_RESKEY_service is a valid xinetd service name if [ ! -f $SVCDEF ]; then ocf_exit_reason "service definition $SVCDEF not found!" if [ "$1" = "start" ]; then exit $OCF_ERR_INSTALLED else exit $OCF_NOT_RUNNING fi fi # See how we were called. case "$1" in start) xup_start ;; stop) xup_stop ;; restart) $0 stop $0 start ;; status) xup_status ;; monitor) xup_status > /dev/null ;; validate-all) xup_validate_all ;; *) xup_usage exit $OCF_ERR_UNIMPLEMENTED esac exit $? diff --git a/heartbeat/ZFS b/heartbeat/ZFS index 42f4b278a..f81355aaa 100755 --- a/heartbeat/ZFS +++ b/heartbeat/ZFS @@ -1,199 +1,203 @@ #!/bin/sh # # License: GNU General Public License (GPL) # Support: zfs@lists.illumos.org # Written by: Saso Kiselkov # # This script manages ZFS pools # It can import a ZFS pool or export it # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg imports a ZFS pool. # The "stop" arg exports it. # # OCF parameters are as follows # OCF_RESKEY_pool - the pool to import/export # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults +OCF_RESKEY_pool_default="" +OCF_RESKEY_importargs_default="" OCF_RESKEY_importforce_default=true +: ${OCF_RESKEY_pool=${OCF_RESKEY_pool_default}} +: ${OCF_RESKEY_importargs=${OCF_RESKEY_importargs_default}} : ${OCF_RESKEY_importforce=${OCF_RESKEY_importforce_default}} USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ####################################################################### meta_data() { cat < 1.0 This script manages ZFS pools It can import a ZFS pool or export it Manages ZFS pools The name of the ZFS pool to manage, e.g. "tank". ZFS pool name - + Arguments to zpool import, e.g. "-d /dev/disk/by-id". Import arguments - + zpool import is given the -f option. Import is forced END exit $OCF_SUCCESS } zpool_is_imported () { zpool list -H "$OCF_RESKEY_pool" > /dev/null } # Forcibly imports a ZFS pool, mounting all of its auto-mounted filesystems # (as configured in the 'mountpoint' and 'canmount' properties) # If the pool is already imported, no operation is taken. zpool_import () { if ! zpool_is_imported; then ocf_log debug "${OCF_RESKEY_pool}:starting import" # The meanings of the options to import are as follows: # -f : import even if the pool is marked as imported to another # system - the system may have failed and not exported it # cleanly. # -o cachefile=none : the import should be temporary, so do not # cache it persistently (across machine reboots). We want # the CRM to explicitly control imports of this pool. if ocf_is_true "${OCF_RESKEY_importforce}"; then FORCE=-f else FORCE="" fi if zpool import $FORCE $OCF_RESKEY_importargs -o cachefile=none "$OCF_RESKEY_pool" ; then ocf_log debug "${OCF_RESKEY_pool}:import successful" return $OCF_SUCCESS else ocf_log debug "${OCF_RESKEY_pool}:import failed" return $OCF_ERR_GENERIC fi fi } # Forcibly exports a ZFS pool, unmounting all of its filesystems in the process # If the pool is not imported, no operation is taken. zpool_export () { if zpool_is_imported; then ocf_log debug "${OCF_RESKEY_pool}:starting export" # -f : force the export, even if we have mounted filesystems # Please note that this may fail with a "busy" error if there are # other kernel subsystems accessing the pool (e.g. SCSI targets). # Always make sure the pool export is last in your failover logic. if zpool export -f "$OCF_RESKEY_pool" ; then ocf_log debug "${OCF_RESKEY_pool}:export successful" return $OCF_SUCCESS else ocf_log debug "${OCF_RESKEY_pool}:export failed" return $OCF_ERR_GENERIC fi fi } # Monitors the health of a ZFS pool resource. Please note that this only # checks whether the pool is imported and functional, not whether it has # any degraded devices (use monitoring systems such as Zabbix for that). zpool_monitor () { # If the pool is not imported, then we can't monitor its health if ! zpool_is_imported; then return $OCF_NOT_RUNNING fi # Check the pool status # Since version 0.7.10 status can be obtained without locks # https://github.com/zfsonlinux/zfs/pull/7563 if [ -f /proc/spl/kstat/zfs/$OCF_RESKEY_pool/state ] ; then HEALTH=$( /dev/null; then return $OCF_ERR_INSTALLED fi # If the pool is imported, then it is obviously valid if zpool_is_imported; then return $OCF_SUCCESS fi # Check that the pool can be imported if zpool import $OCF_RESKEY_importargs | grep 'pool:' | grep "\\<$OCF_RESKEY_pool\\>" > /dev/null; then return $OCF_SUCCESS else return $OCF_ERR_CONFIGURED fi } usage () { echo "$USAGE" >&2 return $1 } if [ $# -ne 1 ]; then usage $OCF_ERR_ARGS fi case $1 in meta-data) meta_data;; start) zpool_import;; stop) zpool_export;; status|monitor) zpool_monitor;; validate-all) zpool_validate;; usage) usage $OCF_SUCCESS;; *) usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? diff --git a/heartbeat/aliyun-vpc-move-ip b/heartbeat/aliyun-vpc-move-ip index 3091a6d96..a57bbfe34 100755 --- a/heartbeat/aliyun-vpc-move-ip +++ b/heartbeat/aliyun-vpc-move-ip @@ -1,298 +1,311 @@ #!/bin/sh # # OCF resource agent to move an IP address within a VPC in the Aliyun # Based on code of Markus Guertler (GitHub AWS-VPC-move-IP) # Based on code of Adam Gandelman (GitHub ec2-resource-agents/elasticip) # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_address_default="" +OCF_RESKEY_routing_table_default="" +OCF_RESKEY_interface_default="eth0" +OCF_RESKEY_profile_default="default" + +: ${OCF_RESKEY_address=${OCF_RESKEY_address_default}} +: ${OCF_RESKEY_routing_table=${OCF_RESKEY_routing_table_default}} +: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} +: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}} + ####################################################################### # aliyuncli doesnt work without HOME parameter export HOME="/root" USAGE="usage: $0 {start|stop|status|meta-data}"; ############################################################################### ############################################################################### # # Functions # ############################################################################### ip_get_and_configure() { ocf_log debug "function: ip_get_and_configure" ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" if [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then if [ -n "$ROUTE_TO_INSTANCE" ]; then ip_drop fi cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text" ocf_log debug "executing command: $cmd" $cmd rc=$? while [ $rc -ne 0 ]; do sleep 1 cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text" ocf_log debug "executing command: $cmd" $cmd rc=$? done wait_for_started fi # Reconfigure the local ip address ip addr add "${OCF_RESKEY_address}/32" dev $OCF_RESKEY_interface rc=$? if [ $rc -ne 0 ]; then ocf_log err "command failed, rc: $rc" return $OCF_ERR_GENERIC fi ocf_log debug "IP added" return $OCF_SUCCESS } ip_drop() { ocf_log debug "function: ip_drop" cmd="ip addr delete ${OCF_RESKEY_address}/32 dev $OCF_RESKEY_interface" ocf_log debug "executing command: $cmd" $cmd rc=$? if [ $rc -ne 0 ] && [ $rc -ne 2 ]; then ocf_log err "command failed, rc $rc" return $OCF_ERR_GENERIC fi cmd="aliyuncli vpc DeleteRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ROUTE_TO_INSTANCE --output text" ocf_log debug "executing command: $cmd" $cmd if [ $? -ne 0 ]; then ocf_log err "command failed, rc: $rc" return $OCF_ERR_GENERIC fi wait_for_deleted ocf_log debug "IP dropped" return $OCF_SUCCESS } wait_for_started() { cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" ocf_log debug "executing command: $cmd" ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_address | awk '{ print $3 }')" while [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; do sleep 3 cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" ocf_log debug "executing command: $cmd" ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_address | awk '{ print $3 }')" done } wait_for_deleted() { ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" while [ ! -z "$ROUTE_TO_INSTANCE" ]; do sleep 1 cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" ocf_log debug "executing command: $cmd" ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" done } ecs_ip_metadata() { cat < 2.0 Resource Agent to move IP addresses within a VPC of the Aliyun Webservices ECS by changing an entry in an specific routing table Move IP within a VPC of the Aliyun ECS VPC private IP address vpc ip - + Name of the routing table, where the route for the IP address should be changed, i.e. vtb-... routing table name - + Name of the network interface, i.e. eth0 network interface name - + Valid Aliyun CLI profile name (see 'aliyuncli configure'). See https://www.alibabacloud.com/help/doc-detail/43039.htm?spm=a2c63.p38356.b99.16.38a914abRZtOU3 for more information about aliyuncli. profile name - + END } ecs_ip_validate() { ocf_log debug "function: validate" # IP address if [ -z "$OCF_RESKEY_address" ]; then ocf_log err "IP address parameter not set $OCF_RESKEY_ADDRESS!" exit $OCF_ERR_CONFIGURED fi # Network Interface if [ -z "$OCF_RESKEY_interface" ]; then ocf_log err "Network interface parameter not set $OCF_RESKEY_INTERFACE!" exit $OCF_ERR_CONFIGURED fi # Routing Table if [ -z "$OCF_RESKEY_routing_table" ]; then ocf_log err "Routing table parameter not set $OCF_RESKEY_ROUTING_TABLE!" exit $OCF_ERR_CONFIGURED fi if [ -z "${ECS_INSTANCE_ID}" ]; then ocf_exit_reason "Instance ID not found. Is this a ECS instance?" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ecs_ip_start() { ocf_log info "ECS: Moving IP address $OCF_RESKEY_address to this host by adjusting routing table $OCF_RESKEY_routing_table" ecs_ip_monitor if [ $? = $OCF_SUCCESS ]; then ocf_log info "ECS: $OCF_RESKEY_address already started" return $OCF_SUCCESS fi ocf_log info "ECS: Adjusting routing table and locally configuring IP address" ip_get_and_configure rc=$? if [ $rc -ne 0 ]; then ocf_log err "Received $rc from 'aliyun cli'" return $OCF_ERR_GENERIC fi ecs_ip_monitor rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_log err "IP address couldn't be configured on this host (IP: $OCF_RESKEY_address, Interface: $OCF_RESKEY_interface)" return $rc fi return $OCF_SUCCESS } ecs_ip_stop() { ocf_log info "ECS: Bringing down IP address $OCF_RESKEY_address" ecs_ip_monitor if [ $? = $OCF_NOT_RUNNING ]; then ocf_log info "ECS: Address $OCF_RESKEY_address already down" return $OCF_SUCCESS fi ip_drop if [ $? -ne $OCF_SUCCESS ]; then ocf_log err "ECS: Couldn't drop IP address $OCF_RESKEY_address on interface $OCF_RESKEY_interface." return $OCF_ERR_GENERIC fi ecs_ip_monitor if [ $? = $OCF_NOT_RUNNING ]; then ocf_log info "ECS: Successfully brought down $OCF_RESKEY_address" return $OCF_SUCCESS fi ocf_log err "ECS: Couldn't bring down IP address $OCF_RESKEY_address on interface $OCF_RESKEY_interface." return $OCF_ERR_GENERIC } ecs_ip_monitor() { ocf_log debug "function: ecsip_monitor: check routing table" cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" ocf_log debug "executing command: $cmd" ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" if [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then ocf_log debug "not routed to this instance ($ECS_INSTANCE_ID) but to instance $ROUTE_TO_INSTANCE" return $OCF_NOT_RUNNING fi cmd="ping -W 1 -c 1 $OCF_RESKEY_address" ocf_log debug "executing command: $cmd" $cmd > /dev/null if [ $? -ne 0 ]; then ocf_log debug "IP $OCF_RESKEY_address not locally reachable via ping on this system" return $OCF_NOT_RUNNING fi ocf_log debug "routed in VPC and locally reachable" return $OCF_SUCCESS } ############################################################################### # # MAIN # ############################################################################### case $__OCF_ACTION in meta-data) ecs_ip_metadata exit $OCF_SUCCESS;; validate-all) ecs_ip_validate;; esac ECS_INSTANCE_ID="$(curl -s http://100.100.100.200/latest/meta-data/instance-id)" case $__OCF_ACTION in start) ecs_ip_validate ecs_ip_start;; stop) ecs_ip_stop;; monitor) ecs_ip_monitor;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/anything b/heartbeat/anything index d8f4d2dd6..a20c42fa1 100755 --- a/heartbeat/anything +++ b/heartbeat/anything @@ -1,328 +1,344 @@ #!/bin/sh # # OCF Resource Agent compliant resource script. # # Copyright (c) 2009 IN-telegence GmbH & Co. KG, Dominik Klein # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # OCF instance parameters # OCF_RESKEY_binfile # OCF_RESKEY_cmdline_options # OCF_RESKEY_workdir # OCF_RESKEY_pidfile # OCF_RESKEY_logfile # OCF_RESKEY_errlogfile # OCF_RESKEY_user # OCF_RESKEY_monitor_hook # OCF_RESKEY_stop_timeout # # This RA starts $binfile with $cmdline_options as $user in $workdir and writes a $pidfile from that. # If you want it to, it logs: # - stdout to $logfile, stderr to $errlogfile or # - stdout and stderr to $logfile # - or to will be captured by lrmd if these options are omitted. # Monitoring is done through $pidfile or your custom $monitor_hook script. # The RA expects the program to keep running "daemon-like" and # not just quit and exit. So this is NOT (yet - feel free to # enhance) a way to just run a single one-shot command which just # does something and then exits. # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_binfile_default="" +OCF_RESKEY_workdir_default="" +OCF_RESKEY_pidfile_default="${HA_VARRUN}/anything_${OCF_RESOURCE_INSTANCE}.pid" +OCF_RESKEY_logfile_default="/dev/null" +OCF_RESKEY_user_default="root" +OCF_RESKEY_stop_timeout_default="" + +: ${OCF_RESKEY_binfile=${OCF_RESKEY_binfile_default}} +: ${OCF_RESKEY_workdir=${OCF_RESKEY_workdir_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_stop_timeout=${OCF_RESKEY_stop_timeout_default}} + getpid() { grep -o '[0-9]*' $1 } anything_status() { if test -f "$pidfile" then if pid=`getpid $pidfile` && [ "$pid" ] && kill -s 0 $pid then return $OCF_SUCCESS else # pidfile w/o process means the process died return $OCF_ERR_GENERIC fi else return $OCF_NOT_RUNNING fi } anything_start() { if ! anything_status then #Make sure that PID Directory exists and is writable by proper user piddir=`dirname $pidfile` if ! su -s /bin/sh - $user -c "test -w $piddir"; then #PID Directory is not writeable by user ocf_log warn "Directory $piddir is not writable by $user, attempting to fix." ocf_log info "Creating directory $piddir" mkdir -p $piddir ocf_log info "Changing permissions for $piddir for user $user" chown $user: $piddir else ocf_log debug "Directory $piddir exists, and is writeable by $user. All fine" fi if [ -n "$logfile" -a -n "$errlogfile" ] then # We have logfile and errlogfile, so redirect STDOUT und STDERR to different files cmd="su - $user -c \"cd $workdir; nohup $binfile $cmdline_options >> $logfile 2>> $errlogfile & \"'echo \$!' " else # We only have logfile so redirect STDOUT and STDERR to the same file cmd="su - $user -c \"cd $workdir; nohup $binfile $cmdline_options >> $logfile 2>&1 & \"'echo \$!' " fi ocf_log debug "Starting $process: $cmd" # Execute the command as created above eval $cmd > $pidfile if anything_status then ocf_log debug "$process: $cmd started successfully, calling monitor" anything_monitor myres=$? return $myres else ocf_log err "$process: $cmd could not be started" return $OCF_ERR_GENERIC fi else # If already running, consider start successful ocf_log debug "$process: $cmd is already running" return $OCF_SUCCESS fi } anything_stop() { local rc=$OCF_SUCCESS if [ -n "$OCF_RESKEY_stop_timeout" ] then stop_timeout=$OCF_RESKEY_stop_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) stop_timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else stop_timeout=10 fi if anything_status then pid=`getpid $pidfile` kill $pid i=0 while [ $i -lt $stop_timeout ] do if ! anything_status then rm -f $pidfile return $OCF_SUCCESS fi sleep 1 i=$((i+1)) done ocf_log warn "Stop with SIGTERM failed/timed out, now sending SIGKILL." kill -s 9 $pid while : do if ! anything_status then ocf_log warn "SIGKILL did the job." rc=$OCF_SUCCESS break fi ocf_log info "The job still hasn't stopped yet. Waiting..." sleep 1 done fi rm -f $pidfile return $rc } anything_monitor() { anything_status ret=$? if [ $ret -eq $OCF_SUCCESS ] then if [ -n "$OCF_RESKEY_monitor_hook" ]; then eval "$OCF_RESKEY_monitor_hook" if [ $? -ne $OCF_SUCCESS ]; then return ${OCF_ERR_GENERIC} fi return $OCF_SUCCESS else true fi else return $ret fi } # FIXME: Attributes special meaning to the resource id process="$OCF_RESOURCE_INSTANCE" binfile="$OCF_RESKEY_binfile" cmdline_options="$OCF_RESKEY_cmdline_options" workdir="$OCF_RESKEY_workdir" pidfile="$OCF_RESKEY_pidfile" [ -z "$pidfile" ] && pidfile=${HA_VARRUN}/anything_${process}.pid logfile="${OCF_RESKEY_logfile:-/dev/null}" errlogfile="$OCF_RESKEY_errlogfile" user="$OCF_RESKEY_user" [ -z "$user" ] && user=root anything_validate() { if ! su - $user -c "test -x $binfile" then ocf_log err "binfile $binfile does not exist or is not executable by $user." exit $OCF_ERR_INSTALLED fi if ! getent passwd $user >/dev/null 2>&1 then ocf_log err "user $user does not exist." exit $OCF_ERR_INSTALLED fi for logfilename in "$logfile" "$errlogfile" do if [ -n "$logfilename" ]; then mkdir -p `dirname $logfilename` || { ocf_log err "cannot create $(dirname $logfilename)" exit $OCF_ERR_INSTALLED } fi done [ "x$workdir" != x -a ! -d "$workdir" ] && { ocf_log err "working directory $workdir doesn't exist" exit $OCF_ERR_INSTALLED } return $OCF_SUCCESS } anything_meta() { cat < 1.0 This is a generic OCF RA to manage almost anything. Manages an arbitrary service The full name of the binary to be executed. This is expected to keep running with the same pid and not just do something and exit. Full path name of the binary to be executed - + Command line options to pass to the binary Command line options The path from where the binfile will be executed. Full path name of the work directory - + File to read/write the PID from/to. File to write STDOUT to - + File to write STDOUT to File to write STDOUT to - + File to write STDERR to File to write STDERR to User to run the command as User to run the command as - + Command to run in monitor operation Command to run in monitor operation In the stop operation: Seconds to wait for kill -TERM to succeed before sending kill -SIGKILL. Defaults to 2/3 of the stop operation timeout. Seconds to wait after having sent SIGTERM before sending SIGKILL in stop operation - + END exit 0 } case "$1" in meta-data|metadata|meta_data) anything_meta ;; start) anything_start ;; stop) anything_stop ;; monitor) anything_monitor ;; validate-all) anything_validate ;; *) ocf_log err "$0 was called with unsupported arguments: $*" exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/apache b/heartbeat/apache index d7004cd7a..95e448593 100755 --- a/heartbeat/apache +++ b/heartbeat/apache @@ -1,719 +1,725 @@ #!/bin/sh # # High-Availability Apache/IBMhttp control script # # apache (aka IBMhttpd) # # Description: starts/stops apache web servers. # # Author: Alan Robertson # Sun Jiang Dong # # Support: users@clusterlabs.org # # License: GNU General Public License (GPL) # # Copyright: (C) 2002-2005 International Business Machines # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf # node1 10.0.0.170 IBMhttpd # # Our parsing of the Apache config files is very rudimentary. # It'll work with lots of different configurations - but not every # possible configuration. # # Patches are being accepted ;-) # # OCF parameters: # OCF_RESKEY_configfile # OCF_RESKEY_httpd # OCF_RESKEY_port # OCF_RESKEY_statusurl # OCF_RESKEY_options # OCF_RESKEY_testregex # OCF_RESKEY_client # OCF_RESKEY_testurl # OCF_RESKEY_testregex10 # OCF_RESKEY_testconffile # OCF_RESKEY_testname # OCF_RESKEY_envfiles : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/apache-conf.sh . ${OCF_FUNCTIONS_DIR}/http-mon.sh HA_VARRUNDIR=${HA_VARRUN} +# Parameter defaults + +OCF_RESKEY_httpd_default="/usr/sbin/httpd" +OCF_RESKEY_envfiles_default="/etc/apache2/envvars" +OCF_RESKEY_use_ipv6_default="false" + ####################################################################### # # Configuration options - usually you don't need to change these # ####################################################################### # IBMHTTPD=/opt/IBMHTTPServer/bin/httpd HTTPDLIST="/sbin/httpd2 /usr/sbin/httpd2 /usr/sbin/apache2 /sbin/httpd /usr/sbin/httpd /usr/sbin/apache $IBMHTTPD" MPM=/usr/share/apache2/find_mpm if [ -x $MPM ]; then HTTPDLIST="$HTTPDLIST `$MPM 2>/dev/null`" fi LOCALHOST="http://localhost" HTTPDOPTS="-DSTATUS" DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf DEFAULT_SUSECONFIG="/etc/apache2/httpd.conf" DEFAULT_RHELCONFIG="/etc/httpd/conf/httpd.conf" DEFAULT_DEBIANCONFIG="/etc/apache2/apache2.conf" # # You can also set # HTTPD # PORT # STATUSURL # CONFIGFILE # in this section if what we're doing doesn't work for you... # # End of Configuration options ####################################################################### CMD=`basename $0` # The config-file-pathname is the pathname to the configuration # file for this web server. Various appropriate defaults are # assumed if no config file is specified. If this command is # invoked as *IBM*, then the default config file name is # $DEFAULT_IBMCONFIG, otherwise the default config file # will be either $DEFAULT_RHELCONFIG or $DEFAULT_SUSECONFIG depending # on which is detected. usage() { cat <<-END usage: $0 action action: start start the web server stop stop the web server status return the status of web server, run or down monitor return TRUE if the web server appears to be working. For this to be supported you must configure mod_status and give it a server-status URL. You have to have installed either curl or wget for this to work. meta-data show meta data message validate-all validate the instance parameters END } get_pid() { if [ -f $PidFile ]; then cat $PidFile else false fi } # # return TRUE if a process with given PID is running # ProcessRunning() { local pid=$1 # Use /proc if it looks like it's here... if [ -d /proc -a -d /proc/1 ]; then [ -d /proc/$pid ] else # This assumes we're running as root... kill -s 0 "$pid" >/dev/null 2>&1 fi } silent_status() { local pid local rc=$OCF_ERR_GENERIC local retries=0 # Set a retry when apache's Graceful restart is applied and the pid file can not be acquired. if [ "$__OCF_ACTION" = "monitor" ] && ! ocf_is_probe; then retries=5 fi while true; do pid=`get_pid` if [ -n "$pid" ]; then ProcessRunning $pid rc=$? break fi : No pid file if [ $retries -le 0 ]; then break fi sleep 1 retries=`expr $retries - 1` done return $rc } # May be useful to add other distros in future validate_default_config() { if [ -e /etc/SuSE-release ]; then validate_default_suse_config elif [ -e /etc/debian_version ]; then validate_default_debian_config else return 0 fi } # When using the default /etc/apache2/httpd.conf on SUSE, the file # /etc/apache2/sysconfig.d/include.conf is required to be present, # but this is only generated if you run the apache init script # (with contents derived from /etc/sysconfig/apache2). So, here, # if we're using the default system config file and it requires # that include, we run "/etc/init.d/apache2 configtest" to ensure # the relevant config is generated and valid. We're also taking # this opportunity to enable mod_status if it's not present. validate_default_suse_config() { if [ "$CONFIGFILE" = "$DEFAULT_SUSECONFIG" ] && \ grep -Eq '^Include[[:space:]]+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE" then [ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status # init script style, for crusty old SUSE if [ -e "/etc/init.d/apache2" ]; then ocf_run -q /etc/init.d/apache2 configtest || return 1 # systemd style, for shiny new SUSE elif [ -e "/usr/sbin/start_apache2" ]; then ocf_run -q /usr/sbin/start_apache2 -t || return 1 fi fi return 0 } # Debian's Default configuration uses a lock directory /var/lock/apache2 # which is only generated using the lsb init script issues configtest. To # ensure these default directories are present it's useful to run a configtest # prior to the resource startup which will create the needed directories # # To support multiple apache instances the debian scripts and configs # obey apache2/envvars. (copy /etc/apache2 -> /etc/apache2-instance) # adjust (SUFFIX) envvars and set OCF_RESKEY_envfiles validate_default_debian_config() { if find /etc/apache2* -name apache2.conf | grep -q "$CONFIGFILE" then export APACHE_CONFDIR=$(dirname $CONFIGFILE) [ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status ocf_run -q /usr/sbin/apache2ctl configtest || return 1 fi return 0 } apache_start() { if silent_status then ocf_log info "$CMD already running (pid `get_pid`)" return $OCF_SUCCESS fi validate_default_config || return $OCF_ERR_CONFIGURED if [ -z $PIDFILE_DIRECTIVE ]; then ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE else ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE -c "PidFile $PidFile" fi tries=0 while : # wait until the user set timeout do apache_monitor ec=$? if [ $ec -eq $OCF_NOT_RUNNING ] then tries=`expr $tries + 1` ocf_log info "waiting for apache $CONFIGFILE to come up" sleep 1 else break fi done if [ $ec -ne 0 ] && silent_status; then apache_stop fi return $ec } signal_children() { for sig in SIGTERM SIGHUP SIGKILL ; do if pgrep -f $HTTPD.*$CONFIGFILE >/dev/null ; then pkill -$sig -f $HTTPD.*$CONFIGFILE >/dev/null ocf_log info "signal $sig sent to apache children" sleep 1 else break fi done } graceful_stop() { local tries=10 local pid=$1 # Try graceful stop for half timeout period if timeout period is present if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then tries=$((($OCF_RESKEY_CRM_meta_timeout/1000) / 2)) fi ocf_log info "Attempting graceful stop of apache PID $pid" kill -WINCH $pid >/dev/null while ProcessRunning $pid && [ $tries -gt 0 ] do sleep 1 tries=`expr $tries - 1` done if [ $tries -eq 0 ]; then # graceful stop didn't work, process still up. return 1 fi return 0 } kill_stop() { local tries=0 local pid=$1 ocf_log info "Killing apache PID $pid" if ProcessRunning $pid; then kill $pid >/dev/null while [ $tries -lt 10 ] do if ProcessRunning $pid; then tries=`expr $tries + 1` sleep 1 else break fi done fi } apache_stop() { local ret=$OCF_SUCCESS local pid if ! silent_status; then ocf_log info "$CMD is not running." signal_children return $ret fi pid=`get_pid` graceful_stop $pid if [ $? -ne 0 ]; then kill_stop $pid fi signal_children if ProcessRunning $pid; then ocf_exit_reason "$CMD still running ($pid). Killing pid failed." ret=$OCF_ERR_GENERIC fi if [ $ret -eq 0 ]; then ocf_log info "$CMD stopped." fi return $ret } apache_monitor_10() { if [ -f "$TESTCONFFILE" ] && [ -r "$TESTCONFFILE" ]; then readtestconf < $TESTCONFFILE else test_url="$TESTURL" test_regex="$TESTREGEX10" fi whattorun=`gethttpclient` fixtesturl is_testconf_sane || return $OCF_ERR_CONFIGURED if $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null then return $OCF_SUCCESS else if ! ocf_is_probe; then ocf_exit_reason "Failed to access httpd status page." fi return $OCF_ERR_GENERIC fi } # If the user has not provided any basic monitoring # information, allow the agent to verify the server is # healthy and capable of processing requests by requesting # the http header of website's index attempt_index_monitor_request() { local indexpage="" if [ -n "$OCF_RESKEY_testregex" ]; then return 1; fi if [ -n "$OCF_RESKEY_testregex10" ]; then return 1; fi if [ -n "$OCF_RESKEY_testurl" ]; then return 1; fi if [ -n "$OCF_RESKEY_statusurl" ]; then return 1; fi if [ -n "$OCF_RESKEY_testconffile" ]; then return 1; fi indexpage=$(buildlocalurl) request_url_header $indexpage if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi ocf_log debug "Successfully retrieved http header at $indexpage" return 0 } apache_monitor_basic() { if ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null then return $OCF_SUCCESS fi attempt_index_monitor_request if [ $? -eq 0 ]; then return $OCF_SUCCESS fi if ! ocf_is_probe; then ocf_exit_reason "Failed to access httpd status page." fi return $OCF_ERR_GENERIC } apache_monitor() { silent_status if [ $? -ne 0 ]; then ocf_log info "$CMD not running" return $OCF_NOT_RUNNING fi ourhttpclient=`findhttpclient` # we'll need one if [ -z "$ourhttpclient" ]; then ocf_exit_reason "could not find a http client; make sure that either wget or curl is available" return $OCF_ERR_INSTALLED fi case `ocf_check_level 10` in 0) apache_monitor_basic;; 10) apache_monitor_10;; esac } detect_default_config() { if [ -f $DEFAULT_SUSECONFIG ]; then echo $DEFAULT_SUSECONFIG elif [ -f $DEFAULT_DEBIANCONFIG ]; then echo $DEFAULT_DEBIANCONFIG else echo $DEFAULT_RHELCONFIG fi } apache_meta_data(){ cat < 1.0 This is the resource agent for the Apache Web server. This resource agent operates both version 1.x and version 2.x Apache servers. The start operation ends with a loop in which monitor is repeatedly called to make sure that the server started and that it is operational. Hence, if the monitor operation does not succeed within the start operation timeout, the apache resource will end with an error status. The monitor operation by default loads the server status page which depends on the mod_status module and the corresponding configuration file (usually /etc/apache2/mod_status.conf). Make sure that the server status page works and that the access is allowed *only* from localhost (address 127.0.0.1). See the statusurl and testregex attributes for more details. See also http://httpd.apache.org/ Manages an Apache Web server instance The full pathname of the Apache configuration file. This file is parsed to provide defaults for various other resource agent parameters. configuration file path The full pathname of the httpd binary (optional). httpd binary path - + A port number that we can probe for status information using the statusurl. This will default to the port number found in the configuration file, or 80, if none can be found in the configuration file. httpd port The URL to monitor (the apache server status page by default). If left unspecified, it will be inferred from the apache configuration file. If you set this, make sure that it succeeds *only* from the localhost (127.0.0.1). Otherwise, it may happen that the cluster complains about the resource being active on multiple nodes. url name Regular expression to match in the output of statusurl. Case insensitive. monitor regular expression Client to use to query to Apache. If not specified, the RA will try to find one on the system. Currently, wget and curl are supported. For example, you can set this parameter to "curl" if you prefer that to wget. http client URL to test. If it does not start with "http", then it's considered to be relative to the Listen address. test url Regular expression to match in the output of testurl. Case insensitive. extended monitor regular expression A file which contains test configuration. Could be useful if you have to check more than one web application or in case sensitive info should be passed as arguments (passwords). Furthermore, using a config file is the only way to specify certain parameters. Please see README.webapps for examples and file description. test configuration file Name of the test within the test configuration file. test name Extra options to apply when starting apache. See man httpd(8). command line options Files (one or more) which contain extra environment variables. If you want to prevent script from reading the default file, set this parameter to empty string. environment settings files - + We will try to detect if the URL (for monitor) is IPv6, but if that doesn't work set this to true to enforce IPv6. use ipv6 with http clients - + END return $OCF_SUCCESS } apache_validate_all() { if [ -z "$HTTPD" ]; then ocf_exit_reason "apache httpd program not found" return $OCF_ERR_INSTALLED fi if [ ! -x "$HTTPD" ]; then ocf_exit_reason "HTTPD $HTTPD not found or is not an executable!" return $OCF_ERR_INSTALLED fi if [ ! -f $CONFIGFILE ]; then ocf_exit_reason "Configuration file $CONFIGFILE not found!" return $OCF_ERR_INSTALLED fi # validate testconffile/testurl before apache_monitor_10() if [ -n "$TESTCONFFILE" ]; then if [ ! -f "$TESTCONFFILE" ] || [ ! -r "$TESTCONFFILE" ]; then ocf_exit_reason "Configuration file $TESTCONFFILE not found, or not readable." return $OCF_ERR_INSTALLED fi else if [ -n "$TESTURL" ]; then # remove leading or trailing spaces/tabs local temp=$(printf "$TESTURL" | sed -e 's/^[ \t]*//g' -e 's/[ \t]*$//g') if [ -z "$temp" ]; then ocf_exit_reason "testurl: \"$TESTURL\" seems to be an empty string?" return $OCF_ERR_CONFIGURED fi fi # FIXME: validate TESTREGEX10 will be needed if empty regex is not allow. fi ocf_mkstatedir root 755 `dirname $PidFile` || return $OCF_ERR_INSTALLED return $OCF_SUCCESS } find_httpd_prog() { case $0 in *IBM*) HTTPD=$IBMHTTPD DefaultConfig=$DEFAULT_IBMCONFIG;; *) HTTPD= for h in $HTTPDLIST do if [ -f $h -a -x $h ]; then HTTPD=$h break fi done # Let the user know that the $HTTPD used is not the one (s)he specified via $OCF_RESKEY_httpd if [ "X$OCF_RESKEY_httpd" != X -a "X$HTTPD" != X ]; then ocf_log info "Using $HTTPD as HTTPD" fi DefaultConfig=$(detect_default_config) ;; esac } apache_getconfig() { # these variables are global HTTPD="$OCF_RESKEY_httpd" PORT="$OCF_RESKEY_port" STATUSURL="$OCF_RESKEY_statusurl" CONFIGFILE="$OCF_RESKEY_configfile" OPTIONS="$OCF_RESKEY_options" CLIENT=${OCF_RESKEY_client} TESTREGEX=${OCF_RESKEY_testregex:-''} TESTURL="$OCF_RESKEY_testurl" TESTREGEX10=${OCF_RESKEY_testregex10} TESTCONFFILE="$OCF_RESKEY_testconffile" TESTNAME="$OCF_RESKEY_testname" - : ${OCF_RESKEY_envfiles="/etc/apache2/envvars"} + : ${OCF_RESKEY_envfiles=${OCF_RESKEY_envfiles_default}} source_envfiles $OCF_RESKEY_envfiles if [ "X$HTTPD" = X -o ! -f "$HTTPD" -o ! -x "$HTTPD" ]; then find_httpd_prog fi CONFIGFILE=${CONFIGFILE:-$DefaultConfig} if [ -n "$HTTPD" ]; then httpd_basename=`basename $HTTPD` case $httpd_basename in *-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;; esac fi GetParams $CONFIGFILE } OCF_REQUIRED_PARAMS="" OCF_REQUIRED_BINARIES="" ocf_rarun $* diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip index 2757c27d0..b27d0bfcd 100755 --- a/heartbeat/aws-vpc-move-ip +++ b/heartbeat/aws-vpc-move-ip @@ -1,362 +1,370 @@ #!/bin/sh # # # OCF resource agent to move an IP address within a VPC in the AWS # # Copyright (c) 2017 Markus Guertler (SUSE) # Based on code of Adam Gandelman (GitHub ec2-resource-agents/elasticip) # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_awscli_default="/usr/bin/aws" OCF_RESKEY_profile_default="default" +OCF_RESKEY_ip_default="" +OCF_RESKEY_address_default="" +OCF_RESKEY_routing_table_default="" +OCF_RESKEY_interface_default="eth0" OCF_RESKEY_monapi_default="false" : ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}} : ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}} +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_address=${OCF_RESKEY_address_default}} +: ${OCF_RESKEY_routing_table=${OCF_RESKEY_routing_table_default}} +: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} : ${OCF_RESKEY_monapi=${OCF_RESKEY_monapi_default}} ####################################################################### USAGE="usage: $0 {start|stop|status|meta-data}"; ############################################################################### ############################################################################### # # Functions # ############################################################################### metadata() { cat < 2.0 Resource Agent to move IP addresses within a VPC of the Amazon Webservices EC2 by changing an entry in an specific routing table Move IP within a VPC of the AWS EC2 Path to command line tools for AWS Path to AWS CLI tools Valid AWS CLI profile name (see ~/.aws/config and 'aws configure') profile name VPC private IP address VPC private IP - + Deprecated IP address param. Use the ip param instead. Deprecated VPC private IP Address - + Name of the routing table(s), where the route for the IP address should be changed. If declaring multiple routing tables they should be separated by comma. Example: rtb-XXXXXXXX,rtb-YYYYYYYYY routing table name(s) - + Name of the network interface, i.e. eth0 network interface name - + Enable enhanced monitoring using AWS API calls to check route table entry Enhanced Monitoring END } ec2ip_set_address_param_compat(){ # Include backward compatibility for the deprecated address parameter if [ -z "$OCF_RESKEY_ip" ] && [ -n "$OCF_RESKEY_address" ]; then OCF_RESKEY_ip="$OCF_RESKEY_address" fi } ec2ip_validate() { for cmd in aws ip curl; do check_binary "$cmd" done if [ -z "$OCF_RESKEY_profile" ]; then ocf_exit_reason "profile parameter not set" return $OCF_ERR_CONFIGURED fi EC2_INSTANCE_ID="$(curl -s http://169.254.169.254/latest/meta-data/instance-id)" if [ -z "${EC2_INSTANCE_ID}" ]; then ocf_exit_reason "Instance ID not found. Is this a EC2 instance?" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ec2ip_monitor() { MON_RES="" if ocf_is_true ${OCF_RESKEY_monapi} || [ "$__OCF_ACTION" = "start" ] || ocf_is_probe; then for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do ocf_log info "monitor: check routing table (API call) - $rtb" cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $rtb --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].InstanceId" ocf_log debug "executing command: $cmd" ROUTE_TO_INSTANCE="$($cmd)" ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}" if [ -z "$ROUTE_TO_INSTANCE" ]; then ROUTE_TO_INSTANCE="" fi if [ "$EC2_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then ocf_log warn "not routed to this instance ($EC2_INSTANCE_ID) but to instance $ROUTE_TO_INSTANCE on $rtb" MON_RES="$MON_RES $rtb" fi sleep 1 done if [ ! -z "$MON_RES" ]; then return $OCF_NOT_RUNNING fi else ocf_log debug "monitor: Enhanced Monitoring disabled - omitting API call" fi cmd="ip addr show to $OCF_RESKEY_ip up" ocf_log debug "executing command: $cmd" RESULT=$($cmd | grep "$OCF_RESKEY_ip") if [ -z "$RESULT" ]; then ocf_log warn "IP $OCF_RESKEY_ip not assigned to running interface" return $OCF_NOT_RUNNING fi ocf_log debug "route in VPC and address assigned" return $OCF_SUCCESS } ec2ip_drop() { cmd="ip addr delete ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface" ocf_log debug "executing command: $cmd" $cmd rc=$? if [ "$rc" -gt 0 ]; then ocf_log warn "command failed, rc $rc" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ec2ip_get_and_configure() { MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address" if [ -f $MAC_FILE ]; then cmd="cat ${MAC_FILE}" else cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3" fi ocf_log debug "executing command: $cmd" MAC_ADDR="$(eval $cmd)" rc=$? if [ $rc != 0 ]; then ocf_log warn "command failed, rc: $rc" return $OCF_ERR_GENERIC fi ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}" cmd="curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id" ocf_log debug "executing command: $cmd" EC2_NETWORK_INTERFACE_ID="$(eval $cmd)" rc=$? if [ $rc != 0 ]; then ocf_log warn "command failed, rc: $rc" return $OCF_ERR_GENERIC fi ocf_log debug "network interface id associated MAC address ${MAC_ADDR}: ${EC2_NETWORK_INTERFACE_ID}" for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --network-interface-id $EC2_NETWORK_INTERFACE_ID" ocf_log debug "executing command: $cmd" $cmd rc=$? if [ "$rc" != 0 ]; then ocf_log warn "command failed, rc: $rc" return $OCF_ERR_GENERIC fi sleep 1 done # Reconfigure the local ip address ec2ip_drop cmd="ip addr add ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface" ocf_log debug "executing command: $cmd" $cmd rc=$? if [ $rc != 0 ]; then ocf_log warn "command failed, rc: $rc" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ec2ip_stop() { ocf_log info "EC2: Bringing down IP address $OCF_RESKEY_ip" ec2ip_monitor if [ $? = $OCF_NOT_RUNNING ]; then ocf_log info "EC2: Address $OCF_RESKEY_ip already down" return $OCF_SUCCESS fi ec2ip_drop if [ $? != $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi ec2ip_monitor if [ $? != $OCF_NOT_RUNNING ]; then ocf_log error "EC2: Couldn't bring down IP address $OCF_RESKEY_ip on interface $OCF_RESKEY_interface." return $OCF_ERR_GENERIC fi ocf_log info "EC2: Successfully brought down $OCF_RESKEY_ip" return $OCF_SUCCESS } ec2ip_start() { ocf_log info "EC2: Moving IP address $OCF_RESKEY_ip to this host by adjusting routing table $OCF_RESKEY_routing_table" ec2ip_monitor if [ $? = $OCF_SUCCESS ]; then ocf_log info "EC2: $OCF_RESKEY_ip already started" return $OCF_SUCCESS fi ocf_log info "EC2: Adjusting routing table and locally configuring IP address" ec2ip_get_and_configure rc=$? if [ $rc != $OCF_SUCCESS ]; then ocf_log error "Received $rc from 'aws'" return $OCF_ERR_GENERIC fi ec2ip_monitor if [ $? != $OCF_SUCCESS ]; then ocf_log error "EC2: IP address couldn't be configured on this host (IP: $OCF_RESKEY_ip, Interface: $OCF_RESKEY_interface)" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ############################################################################### # # MAIN # ############################################################################### case $__OCF_ACTION in meta-data) metadata exit $OCF_SUCCESS ;; usage|help) echo $USAGE exit $OCF_SUCCESS ;; esac if ! ocf_is_root; then ocf_log err "You must be root for $__OCF_ACTION operation." exit $OCF_ERR_PERM fi ec2ip_set_address_param_compat ec2ip_validate case $__OCF_ACTION in start) ec2ip_start;; stop) ec2ip_stop;; monitor) ec2ip_monitor;; validate-all) exit $?;; *) echo $USAGE exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/clvm.in b/heartbeat/clvm.in index 3ab196199..e63721a5a 100644 --- a/heartbeat/clvm.in +++ b/heartbeat/clvm.in @@ -1,448 +1,457 @@ #!@BASH_SHELL@ # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ocf-directories +# Parameter defaults + +OCF_RESKEY_with_cmirrord_default="false" +OCF_RESKEY_daemon_options_default="-d0" +OCF_RESKEY_activate_vgs_default="true" +OCF_RESKEY_exclusive_default="false" + +: ${OCF_RESKEY_with_cmirrord=${OCF_RESKEY_with_cmirrord_default}} +: ${OCF_RESKEY_daemon_options=${OCF_RESKEY_with_cmirrord_default}} +: ${OCF_RESKEY_activate_vgs=${OCF_RESKEY_with_cmirrord_default}} +: ${OCF_RESKEY_exclusive=${OCF_RESKEY_exclusive_default}} + ####################################################################### meta_data() { cat < 1.0 This agent manages the clvmd daemon. clvmd Start with cmirrord (cluster mirror log daemon). activate cmirrord - + Options to clvmd. Refer to clvmd.8 for detailed descriptions. Daemon Options - + Whether or not to activate all cluster volume groups after starting the clvmd or not. Note that clustered volume groups will always be deactivated before the clvmd stops regardless of what this option is set to. Activate volume groups - + If set, only exclusive volume groups will be monitored. Only monitor exclusive volume groups - + END } ####################################################################### -: ${OCF_RESKEY_daemon_options:="-d0"} -: ${OCF_RESKEY_activate_vgs:="true"} - sbindir=$HA_SBIN_DIR if [ -z $sbindir ]; then sbindir=/usr/sbin fi DAEMON="clvmd" CMIRROR="cmirrord" DAEMON_PATH="${sbindir}/clvmd" CMIRROR_PATH="${sbindir}/cmirrord" LVMCONF="${sbindir}/lvmconf" LOCK_FILE="/var/lock/subsys/$DAEMON" # attempt to detect where the vg tools are located # for some reason this isn't consistent with sbindir # in some distros. vgtoolsdir=$(dirname $(which vgchange 2> /dev/null) 2> /dev/null) if [ -z "$vgtoolsdir" ]; then vgtoolsdir="$sbindir" fi LVM_VGCHANGE=${vgtoolsdir}/vgchange LVM_VGDISPLAY=${vgtoolsdir}/vgdisplay LVM_VGSCAN=${vgtoolsdir}/vgscan # Leaving this in for legacy. We do not want to advertize # the abilty to set options in the systconfig exists, we want # to expand the OCF style options as necessary instead. [ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster [ -f /etc/sysconfig/$DAEMON ] && . /etc/sysconfig/$DAEMON CLVMD_TIMEOUT="90" if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then CLVMD_TIMEOUT=$(($OCF_RESKEY_CRM_meta_timeout/1000)) fi clvmd_usage() { cat </dev/null | grep -a "${binary}" > /dev/null 2>&1 if [ $? -eq 0 ];then # shortcut without requiring pgrep to search through all procs return $OCF_SUCCESS fi fi pid=$(pgrep ${binary}) case $? in 0) ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}." echo "$pid" > $pidfile return $OCF_SUCCESS;; 1) rm -f "$pidfile" > /dev/null 2>&1 ocf_log info "$binary is not running" return $OCF_NOT_RUNNING;; *) rm -f "$pidfile" > /dev/null 2>&1 ocf_exit_reason "Error encountered detecting pid status of $binary" return $OCF_ERR_GENERIC;; esac } clvmd_status() { local rc local mirror_rc clvmd_validate if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Unable to monitor, Environment validation failed." return $? fi check_process $DAEMON rc=$? mirror_rc=$rc if ocf_is_true $OCF_RESKEY_with_cmirrord; then check_process $CMIRROR mirror_rc=$? fi # If these ever don't match, return error to force recovery if [ $mirror_rc -ne $rc ]; then return $OCF_ERR_GENERIC fi return $rc } # NOTE: replace this with vgs, once display filter per attr is implemented. clustered_vgs() { if ! ocf_is_true "$OCF_RESKEY_exclusive"; then ${LVM_VGDISPLAY} 2>/dev/null | awk 'BEGIN {RS="VG Name"} {if (/Clustered/) print $1;}' else for vg in $(vgs --select "clustered=yes" -o name --noheadings); do lvs --select lv_active=~'local.*exclusive' -o vg_name --noheadings $vg 2> /dev/null | awk '!seen[$1]++ {print $1}' done fi } wait_for_process() { local binary=$1 local timeout=$2 local count=0 ocf_log info "Waiting for $binary to exit" while [ $count -le $timeout ]; do check_process $binary if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "$binary terminated" return $OCF_SUCCESS fi sleep 1 count=$((count+1)) done return $OCF_ERR_GENERIC } time_left() { local end=$1 local default=$2 local now=$SECONDS local result=0 result=$(( $end - $now )) if [ $result -lt $default ]; then return $default fi return $result } clvmd_stop() { local LVM_VGS local rc=$OCF_SUCCESS local end=$(( $SECONDS + $CLVMD_TIMEOUT )) clvmd_status if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi check_process $DAEMON if [ $? -ne $OCF_NOT_RUNNING ]; then LVM_VGS="$(clustered_vgs)" if [ -n "$LVM_VGS" ]; then ocf_log info "Deactivating clustered VG(s):" ocf_run ${LVM_VGCHANGE} -anl $LVM_VGS if [ $? -ne 0 ]; then ocf_exit_reason "Failed to deactivate volume groups, cluster vglist = $LVM_VGS" return $OCF_ERR_GENERIC fi fi ocf_log info "Signaling $DAEMON to exit" killall -TERM $DAEMON if [ $? != 0 ]; then ocf_exit_reason "Failed to signal -TERM to $DAEMON" return $OCF_ERR_GENERIC fi wait_for_process $DAEMON $CLVMD_TIMEOUT rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "$DAEMON failed to exit" return $rc fi rm -f $LOCK_FILE fi check_process $CMIRROR if [ $? -ne $OCF_NOT_RUNNING ] && ocf_is_true $OCF_RESKEY_with_cmirrord; then local timeout ocf_log info "Signaling $CMIRROR to exit" killall -INT $CMIRROR time_left $end 10; timeout=$? wait_for_process $CMIRROR $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then killall -KILL $CMIRROR time_left $end 10; timeout=$? wait_for_process $CMIRROR $(time_left $end 10) rc=$? fi fi return $rc } start_process() { local binary_path=$1 local opts=$2 check_process "$(basename $binary_path)" if [ $? -ne $OCF_SUCCESS ]; then ocf_log info "Starting $binary_path: " ocf_run $binary_path $opts rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to launch $binary_path, exit code $rc" exit $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } clvmd_activate_all() { if ! ocf_is_true "$OCF_RESKEY_activate_vgs"; then ocf_log info "skipping vg activation, activate_vgs is set to $OCF_RESKEY_activate_vgs" return $OCF_SUCCESS fi # Activate all volume groups by leaving the # "volume group name" parameter empty ocf_run ${LVM_VGCHANGE} -aay if [ $? -ne 0 ]; then ocf_log info "Failed to activate VG(s):" clvmd_stop return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } clvmd_start() { local rc=0 local CLVMDOPTS="-T${CLVMD_TIMEOUT} $OCF_RESKEY_daemon_options" clvmd_validate if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Unable to start, Environment validation failed." return $? fi # systemd drop-in to stop process before storage services during # shutdown/reboot if systemd_is_running ; then systemd_drop_in "99-clvmd" "After" "blk-availability.service" fi clvmd_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "$DAEMON already started" clvmd_activate_all return $?; fi # autoset locking type to clustered when lvmconf tool is available if [ -x "$LVMCONF" ]; then $LVMCONF --enable-cluster > /dev/null 2>&1 fi # if either of these fail, script will exit OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_with_cmirrord; then start_process $CMIRROR_PATH fi start_process $DAEMON_PATH "$CLVMDOPTS" # Refresh local cache. # # It's possible that new PVs were added to this, or other VGs # while this node was down. So we run vgscan here to avoid # any potential "Missing UUID" messages with subsequent # LVM commands. # The following step would be better and more informative to the user: # 'action "Refreshing VG(s) local cache:" ${LVM_VGSCAN}' # but it could show warnings such as: # 'clvmd not running on node x-y-z Unable to obtain global lock.' # and the action would be shown as FAILED when in reality it didn't. # Ideally vgscan should have a startup mode that would not print # unnecessary warnings. ${LVM_VGSCAN} > /dev/null 2>&1 touch $LOCK_FILE clvmd_activate_all clvmd_status return $? } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; start) clvmd_start;; stop) clvmd_stop;; monitor) clvmd_status;; validate-all) clvmd_validate;; usage|help) clvmd_usage;; *) clvmd_usage exit $OCF_ERR_UNIMPLEMENTED;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/db2 b/heartbeat/db2 index 033005f94..62b288d46 100755 --- a/heartbeat/db2 +++ b/heartbeat/db2 @@ -1,902 +1,912 @@ #!/bin/sh # # db2 # # Resource agent that manages a DB2 LUW database in Standard role # or HADR configuration in master/slave configuration. # Multi partition is supported as well. # # Copyright (c) 2011 Holger Teutsch # # This agent incoporates code of a previous release created by # Alan Robertson and the community. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_instance_default="" +OCF_RESKEY_admin_default="" +OCF_RESKEY_dbpartitionnum_default="0" + +: ${OCF_RESKEY_instance=${OCF_RESKEY_instance_default}} +: ${OCF_RESKEY_admin=${OCF_RESKEY_admin_default}} +: ${OCF_RESKEY_dbpartitionnum=${OCF_RESKEY_dbpartitionnum_default}} + ####################################################################### db2_usage() { echo "db2 start|stop|monitor|promote|demote|notify|validate-all|meta-data" } db2_meta_data() { cat < 1.0 Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles in master/slave configuration. Multiple partitions are supported. Standard mode: An instance including all or selected databases is made highly available. Configure each partition as a separate primitive resource. HADR mode: A single database in HADR configuration is made highly available by automating takeover operations. Configure a master / slave resource with notifications enabled and an additional monitoring operation with role "Master". In case of HADR be very deliberate in specifying intervals/timeouts. The detection of a failure including promote must complete within HADR_PEER_WINDOW. In addition to honoring requirements for crash recovery etc. for your specific database use the following relations as guidance: "monitor interval" < HADR_PEER_WINDOW - (appr 30 sec) "promote timeout" < HADR_PEER_WINDOW + (appr 20 sec) For further information and examples consult http://www.linux-ha.org/wiki/db2_(resource_agent) Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles as master/slave configuration. Multiple partitions are supported. The instance of the database(s). instance - + List of databases to be managed, e.g "db1 db2". Defaults to all databases in the instance. Specify one db for HADR mode. List of databases to be managed DEPRECATED: The admin user of the instance. DEPRECATED: admin - + The number of the partition (DBPARTITIONNUM) to be managed. database partition number (DBPARTITIONNUM) - + END } # # validate # .. and set global variables # # exit on error # db2_validate() { local db2home db2sql db2instance # db2 uses korn shell check_binary "ksh" # check required instance vars if [ -z "$OCF_RESKEY_instance" ] then ocf_log err "DB2 required parameter instance is not set!" return $OCF_ERR_CONFIGURED fi instance=$OCF_RESKEY_instance if [ -n "$OCF_RESKEY_admin" ] then ocf_log warn "DB2 deprecated parameter admin is set, using $OCF_RESKEY_admin as instance." instance=$OCF_RESKEY_admin fi db2node=${OCF_RESKEY_dbpartitionnum:-0} db2home=$(sh -c "echo ~$instance") db2sql=$db2home/sqllib db2profile=$db2sql/db2profile db2bin=$db2sql/bin STATE_FILE=${HA_RSCTMP}/db2-${OCF_RESOURCE_INSTANCE}.state # Let's make sure a few important things are there... if ! [ -d "$db2sql" -a -d "$db2bin" -a -f "$db2profile" -a \ -x "$db2profile" -a -x "$db2bin/db2" ] then ocf_is_probe && exit $OCF_NOT_RUNNING ocf_log err "DB2 required directories and/or files not found" exit $OCF_ERR_INSTALLED fi db2instance=$(runasdb2 'echo $DB2INSTANCE') if [ "$db2instance" != "$instance" ] then ocf_is_probe && exit $OCF_NOT_RUNNING ocf_log err "DB2 parameter instance \"$instance\" != DB2INSTANCE \"$db2instance\"" exit $OCF_ERR_CONFIGURED fi # enough checking for stop to succeed [ $__OCF_ACTION = stop ] && return $OCF_SUCCESS dblist=$OCF_RESKEY_dblist if [ -n "$dblist" ] then # support , as separator as well dblist=$(echo "$dblist" | sed -e 's/[,]/ /g') else if ! dblist=$(db2_dblist) then ocf_log err "DB2 $instance($db2node): cannot retrieve db directory" exit $OCF_ERR_INSTALLED fi fi # check requirements for the HADR case if ocf_is_ms then set -- $dblist if [ $# != 1 ] then ocf_log err "DB2 resource $OCF_RESOURCE_INSTANCE must have exactly one name in dblist" exit $OCF_ERR_CONFIGURED fi if [ $db2node != 0 ] then ocf_log err "DB2 resource $OCF_RESOURCE_INSTANCE must have dbpartitionnum=0" exit $OCF_ERR_CONFIGURED fi fi return $OCF_SUCCESS } master_score() { if ! have_binary "crm_master"; then return fi crm_master $* } # # Run the given command as db2 instance user # runasdb2() { su $instance -c ". $db2profile; $*" } # # Run a command as the DB2 admin, and log the output # logasdb2() { local output rc output=$(runasdb2 $*) rc=$? if [ $rc -eq 0 ] then ocf_log info "$output" else ocf_log err "$output" fi return $rc } # # maintain the fal (first active log) attribute # db2_fal_attrib DB {set val|get} # db2_fal_attrib() { local db=$1 local attr val rc id node member me attr=db2hadr_${instance}_${db}_fal case "$2" in set) me=$(uname -n) # loop over all member nodes and set attribute crm_node -l | while read id node member do [ "$member" = member -a "$node" != "$me" ] || continue crm_attribute -t nodes -l reboot --node=$node -n $attr -v "$3" rc=$? ocf_log info "DB2 instance $instance($db2node/$db: setting attrib for FAL to $FIRST_ACTIVE_LOG @ $node" [ $rc != 0 ] && break done ;; get) crm_attribute -t nodes -l reboot -n $attr -G --quiet 2>&1 rc=$? if [ $rc != 0 ] then ocf_log warn "DB2 instance $instance($db2node/$db: can't retrieve attribute $attr, are you sure notifications are enabled ?" fi ;; *) exit $OCF_ERR_CONFIGURED esac return $rc } # # unfortunately a first connect after a crash may need several minutes # for some internal cleanup stuff in DB2. # We run a connect in background so other connects (i.e. monitoring!) may proceed. # db2_run_connect() { local db=$1 logasdb2 "db2 connect to $db; db2 terminate" } # # get some data from the database config # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW # db2_get_cfg() { local db=$1 local output hadr_vars output=$(runasdb2 db2 get db cfg for $db) [ $? != 0 ] && return $OCF_ERR_GENERIC hadr_vars=$(echo "$output" | awk '/HADR database role/ {printf "HADR_ROLE='%s'; ", $NF;} /HADR_TIMEOUT/ {printf "HADR_TIMEOUT='%s'; ", $NF;} /First active log file/ {printf "FIRST_ACTIVE_LOG='%s'\n", $NF;} /HADR_PEER_WINDOW/ {printf "HADR_PEER_WINDOW='%s'\n", $NF;}') # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW eval $hadr_vars # HADR_PEER_WINDOW comes with V9 and is checked later if [ -z "$HADR_ROLE" -o -z "$HADR_TIMEOUT" ] then ocf_log error "DB2 cfg values invalid for $instance($db2node)/$db: $hadr_vars" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # return the list of databases in the instance # db2_dblist() { local output output=$(runasdb2 db2 list database directory) || return $OCF_ERR_GENERIC echo "$output" | grep -i 'Database name.*=' | sed 's%.*= *%%' } # # Delayed check of the compatibility of DB2 instance and pacemaker # config. # Logically this belongs to validate but certain parameters can only # be retrieved once the instance is started. # db2_check_config_compatibility() { local db=$1 local is_ms ocf_is_ms is_ms=$? case "$HADR_ROLE/$is_ms" in STANDARD/0) ocf_log err "DB2 database $instance/$db is not in a HADR configuration but I am a M/S resource" exit $OCF_ERR_INSTALLED ;; STANDARD/1) # OK ;; */0) if [ -z "$HADR_PEER_WINDOW" ] then ocf_log err "DB2 database $instance: release to old, need HADR_PEER_WINDOW (>=V9)" exit $OCF_ERR_INSTALLED fi ;; */1) ocf_log err "DB2 database $instance/$db is in a HADR configuration but I must be a M/S resource" esac } # # Start instance and DB. # Standard mode is through "db2 activate" in order to start in previous # mode (Standy/Primary). # If the database is a primary AND we can determine that the running master # has a higher "first active log" we conclude that we come up after a crash # an the previous Standby is now Primary. # The db is then started as Standby. # # Other cases: danger of split brain, log error and do nothing. # db2_start() { local output start_cmd db local start_opts="dbpartitionnum $db2node" # If we detect that db partitions are not in use, and no # partition is explicitly specified, activate without # partition information. This allows db2 instances without # partition support to be managed. if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -a "$db2sql/db2nodes.cfg" ]; then start_opts="" fi if output=$(runasdb2 db2start $start_opts) then ocf_log info "DB2 instance $instance($db2node) started: $output" else case $output in *SQL1026N*) ocf_log info "DB2 instance $instance($db2node) already running: $output" ;; *) ocf_log err "$output" return $OCF_ERR_GENERIC esac fi if ! db2_instance_status then ocf_log err "DB2 instance $instance($db2node) is not active!" return $OCF_ERR_GENERIC fi [ $db2node = 0 ] || return $OCF_SUCCESS # activate DB only on node 0 for db in $dblist do # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW FIRST_ACTIVE_LOG db2_get_cfg $db || return $? # Better late than never: can only check this when the instance is already up db2_check_config_compatibility $db start_cmd="db2 activate db $db" if [ $HADR_ROLE = PRIMARY ] then local master_fal # communicate our FAL to other nodes the might start concurrently db2_fal_attrib $db set $FIRST_ACTIVE_LOG # ignore false positive: # error: Can't use > in [ ]. Escape it or use [[..]]. [SC2073] # see https://github.com/koalaman/shellcheck/issues/691 # shellcheck disable=SC2073 if master_fal=$(db2_fal_attrib $db get) && [ "$master_fal" '>' $FIRST_ACTIVE_LOG ] then ocf_log info "DB2 database $instance($db2node)/$db is Primary and outdated, starting as secondary" start_cmd="db2 start hadr on db $db as standby" HADR_ROLE=STANDBY fi fi if output=$(runasdb2 $start_cmd) then ocf_log info "DB2 database $instance($db2node)/$db started/activated" [ $HADR_ROLE != STANDBY ] && db2_run_connect $db & else case $output in SQL1490W*|SQL1494W*|SQL1497W*|SQL1777N*) ocf_log info "DB2 database $instance($db2node)/$db already activated: $output" ;; SQL1768N*"Reason code = \"7\""*) ocf_log err "DB2 database $instance($db2node)/$db is a Primary and the Standby is down" ocf_log err "Possible split brain ! Manual intervention required." ocf_log err "If this DB is outdated use \"db2 start hadr on db $db as standby\"" ocf_log err "If this DB is the surviving primary use \"db2 start hadr on db $db as primary by force\"" # might be the Standby is not yet there # might be a timing problem because "First active log" is delayed # on the next start attempt we might succeed when FAL was advanced # might be manual intervention is required # ... so let pacemaker give it another try and we will succeed then return $OCF_ERR_GENERIC ;; *) ocf_log err "DB2 database $instance($db2node)/$db didn't start: $output" return $OCF_ERR_GENERIC esac fi done # come here with success # Even if we are a db2 Primary pacemaker requires start to end up in slave mode echo SLAVE > $STATE_FILE return $OCF_SUCCESS } # # helper function to be spawned # so we can detect a hang of the db2stop command # db2_stop_bg() { local rc output local stop_opts="dbpartitionnum $db2node" rc=$OCF_SUCCESS if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -a "$db2sql/db2nodes.cfg" ]; then stop_opts="" fi if output=$(runasdb2 db2stop force $stop_opts) then ocf_log info "DB2 instance $instance($db2node) stopped: $output" else case $output in *SQL1032N*) #SQL1032N No start database manager command was issued ocf_log info "$output" ;; *) ocf_log err "DB2 instance $instance($db2node) stop failed: $output" rc=$OCF_ERR_GENERIC esac fi return $rc } # # Stop the given db2 database instance # db2_stop() { local stop_timeout grace_timeout stop_bg_pid i must_kill # remove master score master_score -D -l reboot # be very early here in order to avoid stale data rm -f $STATE_FILE db2_instance_status if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "DB2 instance $instance already stopped" return $OCF_SUCCESS fi stop_timeout=${OCF_RESKEY_CRM_meta_timeout:-20000} # grace_time is 4/5 (unit is ms) grace_timeout=$((stop_timeout/1250)) # start db2stop in background as this may hang db2_stop_bg & stop_bg_pid=$! # wait for grace_timeout i=0 while [ $i -lt $grace_timeout ] do kill -0 $stop_bg_pid 2>/dev/null || break; sleep 1 i=$((i+1)) done # collect exit status but don't hang if kill -0 $stop_bg_pid 2>/dev/null then stoprc=1 kill -9 $stop_bg_pid 2>/dev/null else wait $stop_bg_pid stoprc=$? fi must_kill=0 if [ $stoprc -ne 0 ] then ocf_log warn "DB2 instance $instance($db2node): db2stop failed, using db2nkill" must_kill=1 elif ! db2_instance_dead then ocf_log warn "DB2 instance $instance($db2node): db2stop indicated success but there a still processes, using db2nkill" must_kill=1 fi if [ $must_kill -eq 1 ] then # db2nkill kills *all* partitions on the node if [ -x $db2bin/db2nkill ] then logasdb2 $db2bin/db2nkill $db2node elif [ -x $db2bin/db2_kill ] then logasdb2 $db2bin/db2_kill fi # loop forever (or lrmd kills us due to timeout) until the # instance is dead while ! db2_instance_dead do ocf_log info "DB2 instance $instance($db2node): waiting for processes to exit" sleep 1 done ocf_log info "DB2 instance $instance($db2node) is now dead" fi return $OCF_SUCCESS } # # check whether `enough´ processes for a healthy instance are up # db2_instance_status() { local pscount pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- | grep ' db2[^ ]' | wc -l) if [ $pscount -ge 4 ]; then return $OCF_SUCCESS; elif [ $pscount -ge 1 ]; then return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } # # is the given db2 instance dead? # db2_instance_dead() { local pscount pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- | grep ' db2[^ ]' | wc -l) test $pscount -eq 0 } # # return the status of the db as "Role/Status" # e.g. Primary/Peer, Standby/RemoteCatchupPending # # If not in HADR configuration return "Standard/Standalone" # db2_hadr_status() { local db=$1 local output output=$(runasdb2 db2pd -hadr -db $db) if [ $? != 0 ] then echo "Down/Off" return 1 fi echo "$output" | awk '/^\s+HADR_(ROLE|STATE) =/ {printf $3"/"} /^\s+HADR_CONNECT_STATUS =/ {print $3; exit; } /^HADR is not active/ {print "Standard/Standalone"; exit; } /^Role *State */ {getline; printf "%s/%s\n", $1, $2; exit; }' } # # Monitor the db # And as side effect set crm_master / FAL attribute # db2_monitor() { local CMD output hadr db local rc db2_instance_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then # instance is dead remove master score master_score -D -l reboot exit $rc fi [ $db2node = 0 ] || return 0 # monitoring only for partition 0 for db in $dblist do hadr=$(db2_hadr_status $db) || return $OCF_ERR_GENERIC ocf_log debug "Monitor: DB2 database $instance($db2node)/$db has HADR status $hadr" # set master preference accordingly case "$hadr" in PRIMARY/*|Primary/*|Standard/*) # perform a basic health check CMD="if db2 connect to $db; then db2 select \* from sysibm.sysversions ; rc=\$?; db2 terminate; else rc=\$?; fi; exit \$rc" if ! output=$(runasdb2 $CMD) then case "$output" in SQL1776N*) # can't connect/select on standby, may be spurious turing takeover ;; *) ocf_log err "DB2 database $instance($db2node)/$db is not working" ocf_log err "DB2 message: $output" # dead primary, remove master score master_score -D -l reboot return $OCF_ERR_GENERIC esac fi ocf_log debug "DB2 database $instance($db2node)/$db appears to be working" ocf_is_ms && master_score -v 10000 -l reboot ;; STANDBY/*PEER/*|Standby/*Peer) master_score -v 8000 -l reboot ;; STANDBY/*|Standby/*) ocf_log warn "DB2 database $instance($db2node)/$db in status $hadr can never be promoted" master_score -D -l reboot ;; *) return $OCF_ERR_GENERIC esac done # everything OK, return if running as slave grep MASTER $STATE_FILE >/dev/null 2>&1 || return $OCF_SUCCESS return $OCF_RUNNING_MASTER } # # Promote db to Primary # db2_promote() { # validate ensured that dblist contains only one entry local db=$dblist local i hadr output force # we run this twice as after a crash of the other node # within HADR_TIMEOUT the status may be still reported as Peer # although a connection no longer exists for i in 1 2 do hadr=$(db2_hadr_status $db) || return $OCF_ERR_GENERIC ocf_log info "DB2 database $instance($db2node)/$db has HADR status $hadr and will be promoted" case "$hadr" in Standard/Standalone) # this case only to keep ocf-tester happy return $OCF_SUCCESS ;; PRIMARY/PEER/*|PRIMARY/REMOTE_CATCHUP/*|Primary/Peer) # nothing to do, only update pacemaker's view echo MASTER > $STATE_FILE return $OCF_SUCCESS ;; STANDBY/PEER/CONNECTED|Standby/Peer) # must take over ;; STANDBY/*PEER/DISCONNECTED|Standby/DisconnectedPeer) # must take over forced force="by force peer window only" ;; *) return $OCF_ERR_GENERIC esac if output=$(runasdb2 db2 takeover hadr on db $db $force) then # update pacemaker's view echo MASTER > $STATE_FILE # turn the log so we rapidly get a new FAL logasdb2 "db2 archive log for db $db" return $OCF_SUCCESS fi case "$output" in SQL1770N*"Reason code = \"7\""*) # expected, HADR_TIMEOUT is now expired # go for the second try continue ;; *) ocf_log err "DB2 database $instance($db2node)/$db promote failed: $output" return $OCF_ERR_GENERIC esac done return $OCF_ERR_GENERIC } # # Demote db to standby # db2_demote() { # validate ensured that dblist contains only one entry local db=$dblist local hadr # house keeping, set pacemaker's view to slave echo SLAVE > $STATE_FILE hadr=$(db2_hadr_status $dblist) || return $OCF_ERR_GENERIC ocf_log info "DB2 database $instance($db2node)/$db has HADR status $hadr and will be demoted" db2_monitor return $? } # # handle pre start notification # We record our first active log on the other nodes. # If two primaries come up after a crash they can safely determine who is # the outdated one. # db2_notify() { local node # only interested in pre-start [ $OCF_RESKEY_CRM_meta_notify_type = pre \ -a $OCF_RESKEY_CRM_meta_notify_operation = start ] || return $OCF_SUCESS # gets FIRST_ACTIVE_LOG db2_get_cfg $dblist || return $? db2_fal_attrib $dblist set $FIRST_ACTIVE_LOG || return $OCF_ERR_GENERIC exit $OCF_SUCCESS } ######## # Main # ######## case "$__OCF_ACTION" in meta-data) db2_meta_data exit $OCF_SUCCESS ;; usage) db2_usage exit $OCF_SUCCESS ;; start) db2_validate db2_start || exit $? db2_monitor exit $? ;; stop) db2_validate db2_stop exit $? ;; promote) db2_validate db2_promote exit $? ;; demote) db2_validate db2_demote exit $? ;; notify) db2_validate db2_notify exit $? ;; monitor) db2_validate db2_monitor exit $? ;; validate-all) db2_validate exit $? ;; *) db2_usage exit $OCF_ERR_UNIMPLEMENTED esac diff --git a/heartbeat/dnsupdate.in b/heartbeat/dnsupdate.in index 1ecbadf18..34a6c56f3 100644 --- a/heartbeat/dnsupdate.in +++ b/heartbeat/dnsupdate.in @@ -1,276 +1,297 @@ #!@BASH_SHELL@ # # # Support: users@clusterlabs.org # License: GNU General Public License v2 # # Copyright (c) 2014 SUSE Linux Products GmbH, Lars Marowsky-Brée # All Rights Reserved. # ####################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_hostname_default="" +OCF_RESKEY_ip_default="" +OCF_RESKEY_ttl_default="300" +OCF_RESKEY_keyfile_default="" +OCF_RESKEY_server_default="" +OCF_RESKEY_serverport_default="53" +OCF_RESKEY_nsupdate_opts_default="" +OCF_RESKEY_unregister_on_stop_default="false" + +: ${OCF_RESKEY_hostname=${OCF_RESKEY_hostname_default}} +: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} +: ${OCF_RESKEY_ttl=${OCF_RESKEY_ttl_default}} +: ${OCF_RESKEY_keyfile=${OCF_RESKEY_keyfile_default}} +: ${OCF_RESKEY_server=${OCF_RESKEY_server_default}} +: ${OCF_RESKEY_serverport=${OCF_RESKEY_serverport_default}} +: ${OCF_RESKEY_nsupdate_opts=${OCF_RESKEY_nsupdate_opts_default}} +: ${OCF_RESKEY_unregister_on_stop=${OCF_RESKEY_unregister_on_stop_default}} + ####################################################################### # TODO: # - Should setting CNAMEs be supported? # - Should multiple A records be supported? usage() { cat <<-! usage: $0 {start|stop|status|monitor|meta-data|validate-all} ! } meta_data() { cat < 1.0 This resource agent manages IP take-over via dynamic DNS updates. IP take-over via dynamic DNS update The hostname whose IP address will need to be updated. Hostname to update - + IP address to set. IP address to set - + Time to live, in seconds, for the DNS record. This affects how soon DNS updates propagate. It should be a reasonable compromise between update speed and DNS server load. If using booth, the ticket timeout is a good start. TTL for the DNS record - + The file containing the shared secret needed to update the DNS record. Please see the nsupdate man page for the exact syntax. nsupdate key file - + Which DNS server to send these updates for. When no server is provided, this defaults to the master server for the correct zone. DNS server to contact - + Port number on the DNS server. Note: due to a limitation in the nsupdate command, this option will only take effect if you also specify the DNS server! Port number on the DNS server - + Additional options to be passed to nsupdate. Additional nsupdate options - + Whether or not to actively remove records on stop. This is not needed for normal operation, since the site taking over the IP address will delete all previous records. Remove A record on stop - + END } dnsupdate_status() { # The resource is considered active if the current IP # address is returned as the only response. local record=$(dig ${dig_opts} ${hostname}. A +short 2>/dev/null) if [ "$record" = "$ip" ]; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } dnsupdate_monitor() { if ocf_is_probe ; then # return $OCF_NOT_RUNNING fi dnsupdate_status } dnsupdate_start() { if dnsupdate_status ; then ocf_log info "$hostname already resolves to $ip" return $OCF_SUCCESS fi ocf_log info "Updating DNS records for $hostname" ( if [ -n "$dns_server" ]; then echo "server ${dns_server} ${dns_serverport}" fi echo "update delete $hostname A" echo "update add $hostname ${OCF_RESKEY_ttl} A $ip" echo "send" ) | nsupdate ${nsupdate_opts} dnsupdate_monitor return $? } dnsupdate_stop() { if ocf_is_true "${OCF_RESKEY_unregister_on_stop}" && dnsupdate_status ; then ocf_log info "Unregistering $hostname with $ip from DNS server" ( if [ -n "$dns_server" ]; then echo "server ${dns_server} ${dns_serverport}" fi echo "update delete $hostname A $ip" echo "send" ) | nsupdate ${nsupdate_opts} dnsupdate_monitor if [ $? -ne $OCF_NOT_RUNNING ]; then ocf_log warn "Unregistering failed!" # There's no point in invoking a stop failure # here. If another site takes over the record, # it'll delete all previous entries anyway. fi fi return $OCF_SUCCESS } dnsupdate_validate() { hostname=${OCF_RESKEY_hostname} ip=${OCF_RESKEY_ip} dig_opts="" dns_server=${OCF_RESKEY_server} : ${OCF_RESKEY_serverport:="53"} dns_serverport=${OCF_RESKEY_serverport} : ${OCF_RESKEY_ttl:="300"} nsupdate_opts=${OCF_RESKEY_nsupdate_opts} if [ -z "$nsupdate_opts" -a -n "$OCF_RESKEY_opts" ]; then nsupdate_opts=${OCF_RESKEY_opts} ocf_log warn "opts was never an advertised parameter, please use nsupdate_opts" fi if [ -z "$hostname" ]; then ocf_log err "No hostname specified." exit $OCF_ERR_CONFIGURED fi if [ -z "$ip" ]; then ocf_log err "No IP specified." exit $OCF_ERR_CONFIGURED fi if ! ocf_is_decimal $OCF_RESKEY_ttl ; then ocf_log err "ttl $OCF_RESKEY_ttl is not valid" exit $OCF_ERR_CONFIGURED fi if ! ocf_is_decimal $dns_serverport ; then ocf_log err "serverport $dns_serverport is not valid" exit $OCF_ERR_CONFIGURED fi dig_opts+=" -p ${dns_serverport}" if [ -n "$dns_server" ]; then dig_opts+=" @${dns_server}" fi if [ -n "$OCF_RESKEY_keyfile" ]; then if [ ! -f ${OCF_RESKEY_keyfile} ]; then ocf_log err "keyfile $OCF_RESKEY_keyfile does not exist" exit $OCF_ERR_CONFIGURED fi nsupdate_opts+=" -k $OCF_RESKEY_keyfile" fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac check_binary dig check_binary nsupdate dnsupdate_validate case $1 in start) dnsupdate_start ;; stop) dnsupdate_stop ;; monitor) dnsupdate_monitor ;; status) dnsupdate_status ;; validate-all) # We've already run this exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/docker b/heartbeat/docker index 250714613..60e163bda 100755 --- a/heartbeat/docker +++ b/heartbeat/docker @@ -1,606 +1,609 @@ #!/bin/sh # # The docker HA resource agent creates and launches a docker container # based off a supplied docker image. Containers managed by this agent # are both created and removed upon the agent's start and stop actions. # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Parameter defaults +OCF_RESKEY_reuse_default="0" OCF_RESKEY_daemon_pidfile_default="/var/run/docker.pid" + +: ${OCF_RESKEY_reuse=${OCF_RESKEY_reuse_default}} : ${OCF_RESKEY_daemon_pidfile=${OCF_RESKEY_daemon_pidfile_default}} ####################################################################### meta_data() { cat < 1.0 The docker HA resource agent creates and launches a docker container based off a supplied docker image. Containers managed by this agent are both created and removed upon the agent's start and stop actions. Docker container resource agent. The docker image to base this container off of. docker image The name to give the created container. By default this will be that resource's instance name. docker container name Allow the image to be pulled from the configured docker registry when the image does not exist locally. NOTE, this can drastically increase the time required to start the container if the image repository is pulled over the network. Allow pulling non-local images Add options to be appended to the 'docker run' command which is used when creating the container during the start action. This option allows users to do things such as setting a custom entry point and injecting environment variables into the newly created container. Note the '-d' option is supplied regardless of this value to force containers to run in the background. NOTE: Do not explicitly specify the --name argument in the run_opts. This agent will set --name using either the resource's instance or the name provided in the 'name' argument of this agent. run options Specify a command to launch within the container once it has initialized. run command A comma separated list of directories that the container is expecting to use. The agent will ensure they exist by running 'mkdir -p' Required mount points Specify the full path of a command to launch within the container to check the health of the container. This command must return 0 to indicate that the container is healthy. A non-zero return code will indicate that the container has failed and should be recovered. If 'docker exec' is supported, it is used to execute the command. If not, nsenter is used. Note: Using this method for monitoring processes inside a container is not recommended, as containerd tries to track processes running inside the container and does not deal well with many short-lived processes being spawned. Ensure that your container monitors its own processes and terminates on fatal error rather than invoking a command from the outside. monitor command Kill a container immediately rather than waiting for it to gracefully shutdown force kill Allow the container to be reused once it is stopped. By default, containers get removed once they are stopped. Enable this option to have the particular one persist when this happens. reuse container - + Query the builtin healthcheck of docker (v1.12+) to determine health of the container. If left empty or set to false it will not be used. The healthcheck itself has to be configured within docker, e.g. via HEALTHCHECK in Dockerfile. This option just queries in what condition docker considers the container to be and lets ocf do its thing accordingly. Note that the time a container is in "starting" state counts against the monitor timeout. This is an additional check besides the standard check for the container to be running, and the optional monitor_cmd check. It doesn't disable or override them, so all of them (if used) have to come back healthy for the container to be considered healthy. use healthcheck The RA will report not running status on hosts where the docker daemon is not running. Name of the docker daemon pid file END } ####################################################################### REQUIRE_IMAGE_PULL=0 docker_usage() { cat </dev/null 2>&1; then out=$(docker exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1) rc=$? else out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --type=container --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1) rc=$? fi if [ $rc -eq 127 ]; then ocf_log err "monitor cmd failed (rc=$rc), output: $out" ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container." # there is no recovering from this, exit immediately exit $OCF_ERR_ARGS elif [ $rc -ne 0 ]; then ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out" rc=$OCF_ERR_GENERIC else ocf_log debug "monitor cmd passed: exit code = $rc" fi return $rc } container_exists() { local err err=$(docker inspect --type=container $CONTAINER 2>&1 >/dev/null) if [ $? -ne $OCF_SUCCESS ]; then case $err in *"No such container"*) # Return failure instead of exiting if container does not exist return 1 ;; *) # Exit if error running command ocf_exit_reason "$err" exit $OCF_ERR_GENERIC ;; esac fi return $OCF_SUCCESS } remove_container() { if ocf_is_true "$OCF_RESKEY_reuse"; then # never remove the container if we have reuse enabled. return 0 fi container_exists if [ $? -ne 0 ]; then # don't attempt to remove a container that doesn't exist return 0 fi ocf_log notice "Cleaning up inactive container, ${CONTAINER}." ocf_run docker rm $CONTAINER } docker_simple_status() { local val if [ ! -x "$(command -v docker)" ]; then ocf_log err "docker is not installed on this host" return $OCF_ERR_INSTALLED fi if [ ! -e "$OCF_RESKEY_daemon_pidfile" ]; then ocf_log err "docker daemon is not running, pid file $OCF_RESKEY_daemon_pidfile not exists" return $OCF_NOT_RUNNING fi container_exists if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING fi # retrieve the 'Running' attribute for the container val=$(docker inspect --type=container --format {{.State.Running}} $CONTAINER 2>/dev/null) if [ $? -ne 0 ]; then #not running as a result of container not being found return $OCF_NOT_RUNNING fi if ocf_is_true "$val"; then # container exists and is running return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } docker_health_status() { if ocf_is_true "$OCF_RESKEY_query_docker_health"; then local val container_exists if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING fi # retrieve the 'Health' attribute for the container # This is a bash-style do-while loop to wait until instance is started. # if starting takes longer than monitor timeout then upstream will make this fail. while val=$(docker inspect --type=container --format {{.State.Health.Status}} $CONTAINER 2>/dev/null) if [ $? -ne 0 ]; then #not healthy as a result of container not being found return $OCF_NOT_RUNNING fi test "$val" = "starting" do sleep 1 done if [ "$val" = "healthy" ]; then # container exists and is healthy return $OCF_SUCCESS fi return $OCF_NOT_RUNNING fi return 0 } docker_monitor() { local rc=0 docker_simple_status rc=$? if [ $rc -ne 0 ]; then return $rc fi docker_health_status rc=$? if [ $rc -ne 0 ]; then return $rc fi monitor_cmd_exec } docker_create_mounts() { oldIFS="$IFS" IFS="," for directory in $OCF_RESKEY_mount_points; do mkdir -p "$directory" done IFS="$oldIFS" } docker_start() { docker_create_mounts local run_opts="-d --name=${CONTAINER}" # check to see if the container has already started docker_simple_status if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi if [ -n "$OCF_RESKEY_run_opts" ]; then run_opts="$run_opts $OCF_RESKEY_run_opts" fi if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}" docker pull "${OCF_RESKEY_image}" if [ $? -ne 0 ]; then ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}" return $OCF_ERR_GENERIC fi fi if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then ocf_log info "starting existing container $CONTAINER." ocf_run docker start $CONTAINER else # make sure any previous container matching our container name is cleaned up first. # we already know at this point it wouldn't be running remove_container ocf_log info "running container $CONTAINER for the first time" ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd fi if [ $? -ne 0 ]; then ocf_exit_reason "docker failed to launch container" return $OCF_ERR_GENERIC fi # wait for monitor to pass before declaring that the container is started while true; do docker_simple_status if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Newly created docker container exited after start" return $OCF_ERR_GENERIC fi monitor_cmd_exec if [ $? -eq $OCF_SUCCESS ]; then ocf_log notice "Container $CONTAINER started successfully" return $OCF_SUCCESS fi ocf_exit_reason "waiting on monitor_cmd to pass after start" sleep 1 done } docker_stop() { local timeout=60 docker_simple_status if [ $? -eq $OCF_NOT_RUNNING ]; then remove_container return $OCF_SUCCESS fi if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 )) if [ $timeout -lt 10 ]; then timeout=10 fi fi if ocf_is_true "$OCF_RESKEY_force_kill"; then ocf_run docker kill $CONTAINER else ocf_log debug "waiting $timeout second[s] before killing container" ocf_run docker stop -t=$timeout $CONTAINER fi if [ $? -ne 0 ]; then ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." return $OCF_ERR_GENERIC fi remove_container if [ $? -ne 0 ]; then ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } image_exists() { # if no tag was specified, use default "latest" local COLON_FOUND=0 local SLASH_FOUND=0 local SERVER_NAME="" local IMAGE_NAME="${OCF_RESKEY_image}" local IMAGE_TAG="latest" SLASH_FOUND="$(echo "${OCF_RESKEY_image}" | grep -o '/' | grep -c .)" if [ ${SLASH_FOUND} -ge 1 ]; then SERVER_NAME="$(echo ${IMAGE_NAME} | cut -d / -f 1-${SLASH_FOUND})" IMAGE_NAME="$(echo ${IMAGE_NAME} | awk -F'/' '{print $NF}')" fi COLON_FOUND="$(echo "${IMAGE_NAME}" | grep -o ':' | grep -c .)" if [ ${COLON_FOUND} -ge 1 ]; then IMAGE_TAG="$(echo ${IMAGE_NAME} | awk -F':' '{print $NF}')" IMAGE_NAME="$(echo ${IMAGE_NAME} | cut -d : -f 1-${COLON_FOUND})" fi # IMAGE_NAME might be following formats: # - image # - repository:port/image # - docker.io/image (some distro will display "docker.io/" as prefix) docker images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/|${SERVER_NAME}\/)?${IMAGE_NAME}:${IMAGE_TAG}\$" if [ $? -eq 0 ]; then # image found return 0 fi if ocf_is_true "$OCF_RESKEY_allow_pull"; then REQUIRE_IMAGE_PULL=1 ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start" return 0 fi # image not found. return 1 } docker_validate() { check_binary docker if [ -z "$OCF_RESKEY_image" ]; then ocf_exit_reason "'image' option is required" exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_monitor_cmd" ]; then docker exec --help >/dev/null 2>&1 if [ ! $? ]; then ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified" check_binary nsenter fi fi image_exists if [ $? -ne 0 ]; then ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # TODO : # When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters. # When a user appoints reuse, the resource agent cannot connect plural clones with a container. if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then if [ -n "$OCF_RESKEY_name" ]; then if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] then ocf_exit_reason "Cannot make plural clones from the same name parameter." exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] then ocf_exit_reason "Cannot make plural master from the same name parameter." exit $OCF_ERR_CONFIGURED fi fi : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`} else : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}} fi if [ -n "$OCF_RESKEY_container" ]; then # we'll keep the container attribute around for a bit in order not to break # any existing deployments. The 'name' attribute is prefered now though. CONTAINER=$OCF_RESKEY_container ocf_log warn "The 'container' attribute is depreciated" else CONTAINER=$OCF_RESKEY_name fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; start) docker_validate docker_start;; stop) docker_stop;; monitor) docker_monitor;; validate-all) docker_validate;; usage|help) docker_usage exit $OCF_SUCCESS ;; *) docker_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/eDir88.in b/heartbeat/eDir88.in index eb740afcf..2ef8bbd7c 100644 --- a/heartbeat/eDir88.in +++ b/heartbeat/eDir88.in @@ -1,460 +1,476 @@ #!@BASH_SHELL@ # # eDirectory Resource Agent (RA) for Heartbeat. # This script is only compatible with eDirectory 8.8 and later # # Copyright (c) 2007 Novell Inc, Yan Fitterer # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OCF parameters: # OCF_RESKEY_eDir_config_file - full filename to instance configuration file # OCF_RESKEY_eDir_monitor_ldap - Should we monitor LDAP (0/1 - 1 is true) # OCF_RESKEY_eDir_monitor_idm - Should we monitor IDM (0/1 - 1 is true) # OCF_RESKEY_eDir_jvm_initial_heap - Value of the DHOST_INITIAL_HEAP java env var # OCF_RESKEY_eDir_jvm_max_heap - Value of the DHOST_MAX_HEAP java env var # OCF_RESKEY_eDir_jvm_options - Value of the DHOST_OPTIONS java env var ############################################################################### ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs test -f /opt/novell/eDirectory/bin/ndspath && . /opt/novell/eDirectory/bin/ndspath 2>/dev/null >/dev/null +# Parameter defaults + +OCF_RESKEY_eDir_config_file_default="/etc/opt/novell/eDirectory/conf/nds.conf" +OCF_RESKEY_eDir_monitor_ldap_default="0" +OCF_RESKEY_eDir_monitor_idm_default="0" +OCF_RESKEY_eDir_jvm_initial_heap_default="" +OCF_RESKEY_eDir_jvm_max_heap_default="" +OCF_RESKEY_eDir_jvm_options_default="" + +: ${OCF_RESKEY_eDir_config_file=${OCF_RESKEY_eDir_config_file_default}} +: ${OCF_RESKEY_eDir_monitor_ldap=${OCF_RESKEY_eDir_monitor_ldap_default}} +: ${OCF_RESKEY_eDir_monitor_idm=${OCF_RESKEY_eDir_monitor_idm_default}} +: ${OCF_RESKEY_eDir_jvm_initial_heap=${OCF_RESKEY_eDir_jvm_initial_heap_default}} +: ${OCF_RESKEY_eDir_jvm_max_heap=${OCF_RESKEY_eDir_jvm_max_heap_default}} +: ${OCF_RESKEY_eDir_jvm_options=${OCF_RESKEY_eDir_jvm_options_default}} + ####################################################################### usage() { ME=$(basename "$0") cat <<-EOFA usage: $ME start|stop|status|monitor|validate-all $ME manages an eDirectory instance as an HA resource. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports if the instance is running. The 'monitor' operation reports if the instance is running, and runs additional checks. The 'validate-all' operation checks the validity of the arguments (environment variables). EOFA } eDir_meta_data() { cat <<-EOFB 1.0 Resource script for managing an eDirectory instance. Manages a single instance of eDirectory as an HA resource. The "multiple instances" feature or eDirectory has been added in version 8.8. This script will not work for any version of eDirectory prior to 8.8. This RA can be used to load multiple eDirectory instances on the same host. It is very strongly recommended to put eDir configuration files (as per the eDir_config_file parameter) on local storage on each node. This is necessary for this RA to be able to handle situations where the shared storage has become unavailable. If the eDir configuration file is not available, this RA will fail, and heartbeat will be unable to manage the resource. Side effects include STONITH actions, unmanageable resources, etc... Setting a high action timeout value is _very_ _strongly_ recommended. eDir with IDM can take in excess of 10 minutes to start. If heartbeat times out before eDir has had a chance to start properly, mayhem _WILL ENSUE_. The LDAP module seems to be one of the very last to start. So this script will take even longer to start on installations with IDM and LDAP if the monitoring of IDM and/or LDAP is enabled, as the start command will wait for IDM and LDAP to be available. Manages a Novell eDirectory directory server Path to configuration file for eDirectory instance. eDir config file - + Should we monitor if LDAP is running for the eDirectory instance? eDir monitor ldap - + Should we monitor if IDM is running for the eDirectory instance? eDir monitor IDM - + Value for the DHOST_INITIAL_HEAP java environment variable. If unset, java defaults will be used. DHOST_INITIAL_HEAP value - + Value for the DHOST_MAX_HEAP java environment variable. If unset, java defaults will be used. DHOST_MAX_HEAP value - + Value for the DHOST_OPTIONS java environment variable. If unset, original values will be used. DHOST_OPTIONS value - + EOFB return $OCF_SUCCESS } # # eDir_start: Start eDirectory instance # eDir_start() { if eDir_status ; then ocf_log info "eDirectory is already running ($NDSCONF)." return $OCF_SUCCESS fi # Start eDirectory instance if [ -n "$OCF_RESKEY_eDir_jvm_initial_heap" ]; then DHOST_JVM_INITIAL_HEAP=$OCF_RESKEY_eDir_jvm_initial_heap export DHOST_JVM_INITIAL_HEAP fi if [ -n "$OCF_RESKEY_eDir_jvm_max_heap" ]; then DHOST_JVM_MAX_HEAP=$OCF_RESKEY_eDir_jvm_max_heap export DHOST_JVM_MAX_HEAP fi if [ -n "$OCF_RESKEY_eDir_jvm_options" ]; then DHOST_JVM_OPTIONS=$OCF_RESKEY_eDir_jvm_options export DHOST_JVM_OPTIONS fi $NDSMANAGE start --config-file "$NDSCONF" > /dev/null 2>&1 if [ $? -eq 0 ]; then ocf_log info "eDir start command sent for $NDSCONF." else echo "ERROR: Can't start eDirectory for $NDSCONF." return $OCF_ERR_GENERIC fi CNT=0 while ! eDir_monitor ; do # Apparently, LDAP will only start after all other services # Startup time can be in excess of 10 minutes. # Leave a very long heartbeat timeout on the start action # We're relying on heartbeat to bail us out... let CNT=$CNT+1 ocf_log info "eDirectory start waiting for ${CNT}th retry for $NDSCONF." sleep 10 done ocf_log info "eDirectory start verified for $NDSCONF." return $OCF_SUCCESS } # # eDir_stop: Stop eDirectory instance # This action is written in such a way that even when run # on a node were things are broken (no binaries, no config # etc...) it will try to stop any running ndsd processes # and report success if none are running. # eDir_stop() { if ! eDir_status ; then return $OCF_SUCCESS fi $NDSMANAGE stop --config-file "$NDSCONF" >/dev/null 2>&1 if eDir_status ; then # eDir failed to stop. ocf_log err "eDirectory instance failed to stop for $NDSCONF" return $OCF_ERR_GENERIC else ocf_log info "eDirectory stop verified for $NDSCONF." return $OCF_SUCCESS fi } # # eDir_status: is eDirectory instance up ? # eDir_status() { if [ ! -r "$NDSCONF" ] ; then ocf_log err "Config file missing ($NDSCONF)." exit $OCF_ERR_GENERIC fi # Find how many ndsd processes have open listening sockets # with the IP of this eDir instance IFACE=$(grep -i "n4u.server.interfaces" $NDSCONF | cut -f2 -d= | tr '@' ':') if [ -z "$IFACE" ] ; then ocf_log err "Cannot retrieve interfaces from $NDSCONF. eDirectory may not be correctly configured." exit $OCF_ERR_GENERIC fi # In case of multiple IP's split into an array # and check all of them IFS=', ' read -a IFACE2 <<< "$IFACE" ocf_log debug "Found ${#IFACE2[@]} interfaces from $NDSCONF." counter=${#IFACE2[@]} for IFACE in "${IFACE2[@]}" do ocf_log debug "Checking ndsd instance for $IFACE" NDSD_SOCKS=$(netstat -ntlp | grep -ce "$IFACE.*ndsd") if [ "$NDSD_SOCKS" -eq 1 ] ; then let counter=counter-1 ocf_log debug "Found ndsd instance for $IFACE" elif [ "$NDSD_SOCKS" -gt 1 ] ; then ocf_log err "More than 1 ndsd listening socket matched. Likely misconfiguration of eDirectory." exit $OCF_ERR_GENERIC fi done if [ $counter -eq 0 ] ; then # Correct ndsd instance is definitely running ocf_log debug "All ndsd instances found." return 0; elif [ $counter -lt ${#IFACE2[@]} ]; then ocf_log err "Only some ndsd listening sockets matched, something is very wrong." exit $OCF_ERR_GENERIC fi # No listening socket. Make sure we don't have the process running... PIDDIR=$(grep -i "n4u.server.vardir" "$NDSCONF" | cut -f2 -d=) if [ -z "$PIDDIR" ] ; then ocf_log err "Cannot get vardir from nds config ($NDSCONF). Probable eDir configuration error." exit $OCF_ERR_GENERIC fi NDSD_PID=$(cat $PIDDIR/ndsd.pid 2>/dev/null) if [ -z "$NDSD_PID" ] ; then # PID file unavailable or empty. # This will happen if the PIDDIR is not available # on this node at this time. return 1 fi RC=$(ps -p "$NDSD_PID" | grep -c ndsd) if [ "$RC" -gt 0 ] ; then # process found but no listening socket. ndsd likely not operational ocf_log err "ndsd process found, but no listening socket. Something's gone wrong ($NDSCONF)" exit $OCF_ERR_GENERIC fi ocf_log debug "ndsd instance is not running, but no other error detected." return 1 } # # eDir_monitor: Do more in-depth checks to ensure that eDirectory is fully functional # LDAP and IDM checks are only done if reqested. # # eDir_monitor() { if ! eDir_status ; then ocf_log info "eDirectory instance is down ($NDSCONF)" return $OCF_NOT_RUNNING fi # We know the right ndsd is running locally, check health $NDSSTAT --config-file "$NDSCONF" >/dev/null 2>&1 if [ $? -ne 0 ] ; then return 1 fi # Monitor IDM first, as it will start before LDAP if [ $MONITOR_IDM -eq 1 ]; then RET=$($NDSTRACE --config-file "$NDSCONF" -c modules | egrep -i '^vrdim.*Running' | awk '{print $1}') if [ "$RET" != "vrdim" ]; then ocf_log err "eDirectory IDM engine isn't running ($NDSCONF)." return $OCF_ERR_GENERIC fi fi if [ $MONITOR_LDAP -eq 1 ] ; then $NDSNLDAP -c --config-file "$NDSCONF" >/dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "eDirectory LDAP server isn't running ($NDSCONF)." return $OCF_ERR_GENERIC fi fi ocf_log debug "eDirectory monitor success ($NDSCONF)" return $OCF_SUCCESS } # # eDir_validate: Validate environment # eDir_validate() { declare rc=$OCF_SUCCESS # Script must be run as root if ! ocf_is_root ; then ocf_log err "$0 must be run as root" rc=$OCF_ERR_GENERIC fi # ndsmanage must be available and runnable check_binary $NDSMANAGE # ndsstat must be available and runnable check_binary $NDSSTAT # Config file must be readable if [ ! -r "$NDSCONF" ] ; then ocf_log err "eDirectory configuration file [$NDSCONF] is not readable" rc=$OCF_ERR_ARGS fi # monitor_ldap must be unambiguously resolvable to a truth value MONITOR_LDAP=$(echo "$MONITOR_LDAP" | tr [A-Z] [a-z]) case "$MONITOR_LDAP" in yes|true|1) MONITOR_LDAP=1;; no|false|0) MONITOR_LDAP=0;; *) ocf_log err "Configuration parameter eDir_monitor_ldap has invalid value [$MONITOR_LDAP]" rc=$OCF_ERR_ARGS;; esac # monitor_idm must be unambiguously resolvable to a truth value MONITOR_IDM=$(echo "$MONITOR_IDM" | tr [A-Z] [a-z]) case "$MONITOR_IDM" in yes|true|1) MONITOR_IDM=1;; no|false|0) MONITOR_IDM=0;; *) ocf_log err "Configuration parameter eDir_monitor_idm has invalid value [$MONITOR_IDM]" rc=$OCF_ERR_ARGS;; esac # eDir_jvm_initial_heap must be blank or numeric if [ -n "$OCF_RESKEY_eDir_jvm_initial_heap" ] ; then if ! ocf_is_decimal "$OCF_RESKEY_eDir_jvm_initial_heap" ; then ocf_log err "Configuration parameter eDir_jvm_initial_heap has invalid" \ "value [$OCF_RESKEY_eDir_jvm_initial_heap]" rc=$OCF_ERR_ARGS fi fi # eDir_jvm_max_heap must be blank or numeric if [ -n "$OCF_RESKEY_eDir_jvm_max_heap" ] ; then if ! ocf_is_decimal "$OCF_RESKEY_eDir_jvm_max_heap" ; then ocf_log err "Configuration parameter eDir_jvm_max_heap has invalid" \ "value [$OCF_RESKEY_eDir_jvm_max_heap]" rc=$OCF_ERR_ARGS fi fi if [ $rc -ne $OCF_SUCCESS ] ; then ocf_log err "Invalid environment" fi return $rc } # # Start of main logic # ocf_log debug "$0 started with arguments \"$*\"" NDSBASE=/opt/novell/eDirectory NDSNLDAP=$NDSBASE/sbin/nldap NDSMANAGE=$NDSBASE/bin/ndsmanage NDSSTAT=$NDSBASE/bin/ndsstat NDSTRACE=$NDSBASE/bin/ndstrace NDSCONF=${OCF_RESKEY_eDir_config_file:-/etc/opt/novell/eDirectory/conf/nds.conf} MONITOR_LDAP=${OCF_RESKEY_eDir_monitor_ldap:-0} MONITOR_IDM=${OCF_RESKEY_eDir_monitor_idm:-0} # What kind of method was invoked? case "$1" in validate-all) eDir_validate; exit $?;; meta-data) eDir_meta_data; exit $OCF_SUCCESS;; status) if eDir_status ; then ocf_log info "eDirectory instance is up ($NDSCONF)" exit $OCF_SUCCESS else ocf_log info "eDirectory instance is down ($NDSCONF)" exit $OCF_NOT_RUNNING fi;; start) : skip;; stop) : skip;; monitor) : skip;; usage) usage; exit $OCF_SUCCESS;; *) ocf_log err "Invalid argument [$1]" usage; exit $OCF_ERR_ARGS;; esac # From now on we must have a valid environment to continue. # stop goes in the list above as it should ideally be able to # clean up after a start that failed due to bad args eDir_validate RC=$? if [ $RC -ne $OCF_SUCCESS ]; then exit $RC fi case "$1" in start) eDir_start;; stop) eDir_stop;; monitor) eDir_monitor;; esac exit $? diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor index e835a6894..e791fbe9d 100755 --- a/heartbeat/ethmonitor +++ b/heartbeat/ethmonitor @@ -1,557 +1,576 @@ #!/bin/sh # # OCF Resource Agent compliant script. # Monitor the vitality of a local network interface. # # Based on the work by Robert Euhus and Lars Marowsky-Bree. # # Transfered from Ipaddr2 into ethmonitor by Alexander Krauth # # Copyright (c) 2011 Robert Euhus, Alexander Krauth, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # OCF parameters are as below # # OCF_RESKEY_interface # OCF_RESKEY_multiplicator # OCF_RESKEY_name # OCF_RESKEY_repeat_count # OCF_RESKEY_repeat_interval # OCF_RESKEY_pktcnt_timeout # OCF_RESKEY_arping_count # OCF_RESKEY_arping_timeout # OCF_RESKEY_arping_cache_entries # # TODO: Check against IPv6 # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_interface_default="" +OCF_RESKEY_name_default="" +OCF_RESKEY_multiplier_default="1" +OCF_RESKEY_repeat_count_default="5" +OCF_RESKEY_repeat_interval_default="10" +OCF_RESKEY_pktcnt_timeout_default="5" +OCF_RESKEY_arping_count_default="1" +OCF_RESKEY_arping_timeout_default="1" +OCF_RESKEY_arping_cache_entries_default="5" +OCF_RESKEY_link_status_only_default="false" + +: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} +: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +: ${OCF_RESKEY_multiplier=${OCF_RESKEY_multiplier_default}} +: ${OCF_RESKEY_repeat_count=${OCF_RESKEY_repeat_count_default}} +: ${OCF_RESKEY_repeat_interval=${OCF_RESKEY_repeat_interval_default}} +: ${OCF_RESKEY_pktcnt_timeout=${OCF_RESKEY_pktcnt_timeout_default}} +: ${OCF_RESKEY_arping_count=${OCF_RESKEY_arping_count_default}} +: ${OCF_RESKEY_arping_timeout=${OCF_RESKEY_arping_timeout_default}} +: ${OCF_RESKEY_arping_cache_entries=${OCF_RESKEY_arping_cache_entries_default}} +: ${OCF_RESKEY_link_status_only=${OCF_RESKEY_link_status_only_default}} + ####################################################################### meta_data() { cat < 1.2 Monitor the vitality of a local network interface. You may set up this RA as a clone resource to monitor the network interfaces on different nodes, with the same interface name. This is not related to the IP address or the network on which a interface is configured. You may use this RA to move resources away from a node, which has a faulty interface or prevent moving resources to such a node. This gives you independent control of the resources, without involving cluster intercommunication. But it requires your nodes to have more than one network interface. The resource configuration requires a monitor operation, because the monitor does the main part of the work. In addition to the resource configuration, you need to configure some location constraints, based on a CIB attribute value. The name of the attribute value is configured in the 'name' option of this RA. Example constraint configuration using crmsh location loc_connected_node my_resource_grp \ rule $id="rule_loc_connected_node" -INF: ethmonitor eq 0 Example constraint configuration using pcs. Only allow 'my_resource' to run on nodes where eth0 ethernet device is available. pcs constraint location my_resource rule score=-INFINITY ethmonitor-eth0 ne 1 The ethmonitor works in 3 different modes to test the interface vitality. 1. call ip to see if the link status is up (if link is down -> error) 2. call ip and watch the RX counter (if packages come around in a certain time -> success) 3. call arping to check whether any of the IPs found in the local ARP cache answers an ARP REQUEST (one answer -> success) 4. return error Monitors network interfaces The name of the network interface which should be monitored (e.g. eth0). Network interface name - + The name of the CIB attribute to set. This is the name to be used in the constraints. Defaults to "ethmonitor-'interface_name'". Attribute name - + Multiplier for the value of the CIB attriobute specified in parameter name. Multiplier for result variable - + Specify how often the interface will be monitored, before the status is set to failed. You need to set the timeout of the monitoring operation to at least repeat_count * repeat_interval Monitor repeat count - + Specify how long to wait in seconds between the repeat_counts. Monitor repeat interval in seconds - + Timeout for the RX packet counter. Stop listening for packet counter changes after the given number of seconds. packet counter timeout - + Number of ARP REQUEST packets to send for every IP. Usually one ARP REQUEST (arping) is send Number of arpings per IP - + Time in seconds to wait for ARP REQUESTs (all packets of arping_count). This is to limit the time for arp requests, to be able to send requests to more than one node, without running in the monitor operation timeout. Timeout for arpings per IP - + Maximum number of IPs from ARP cache list to check for ARP REQUEST (arping) answers. Newest entries are tried first. Number of ARP cache entries to try - + For interfaces that are infiniband devices. infiniband device For infiniband devices, this is the port to monitor. infiniband port Only report success based on link status. Do not perform RX counter or arping related connectivity tests. link status check only - + END exit $OCF_SUCCESS } # # Return true, if the interface exists # is_interface() { # # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces # local iface=`$IP2UTIL -o -f link addr show | grep " $1:" \ | cut -d ' ' -f2 | tr -d ':' | sort -u | grep -v '^ipsec[0-9][0-9]*$'` [ "$iface" != "" ] } infiniband_status() { local device="$OCF_RESKEY_infiniband_device" if [ -n "$OCF_RESKEY_infiniband_port" ]; then device="${OCF_RESKEY_infiniband_device}:${OCF_RESKEY_infiniband_port}" fi case "${OCF_RESKEY_infiniband_device}" in *ib*|*mlx*) ibstatus ${device} | grep -q ACTIVE ;; *hfi*) opainfo | grep -q Active ;; esac } if_init() { local rc if [ X"$OCF_RESKEY_interface" = "X" ]; then ocf_exit_reason "Interface name (the interface parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi NIC="$OCF_RESKEY_interface" if is_interface $NIC then case "$NIC" in *:*) ocf_exit_reason "Do not specify a virtual interface : $OCF_RESKEY_interface" exit $OCF_ERR_CONFIGURED;; *) ;; esac else case $__OCF_ACTION in validate-all) ocf_exit_reason "Interface $NIC does not exist" exit $OCF_ERR_CONFIGURED;; *) ## It might be a bond interface which is temporarily not available, therefore we want to continue here ocf_log warn "Interface $NIC does not exist" ;; esac fi - : ${OCF_RESKEY_multiplier:="1"} if ! ocf_is_decimal "$OCF_RESKEY_multiplier"; then ocf_exit_reason "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]" exit $OCF_ERR_CONFIGURED fi ATTRNAME=${OCF_RESKEY_name:-"ethmonitor-$NIC"} REP_COUNT=${OCF_RESKEY_repeat_count:-5} if ! ocf_is_decimal "$REP_COUNT" -o [ $REP_COUNT -lt 1 ]; then ocf_exit_reason "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]" exit $OCF_ERR_CONFIGURED fi REP_INTERVAL_S=${OCF_RESKEY_repeat_interval:-10} if ! ocf_is_decimal "$REP_INTERVAL_S"; then ocf_exit_reason "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]" exit $OCF_ERR_CONFIGURED fi - : ${OCF_RESKEY_pktcnt_timeout:="5"} if ! ocf_is_decimal "$OCF_RESKEY_pktcnt_timeout"; then ocf_exit_reason "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]" exit $OCF_ERR_CONFIGURED fi - : ${OCF_RESKEY_arping_count:="1"} if ! ocf_is_decimal "$OCF_RESKEY_arping_count"; then ocf_exit_reason "Invalid OCF_RESKEY_arping_count [$OCF_RESKEY_arping_count]" exit $OCF_ERR_CONFIGURED fi - : ${OCF_RESKEY_arping_timeout:="1"} if ! ocf_is_decimal "$OCF_RESKEY_arping_timeout"; then ocf_exit_reason "Invalid OCF_RESKEY_arping_timeout [$OCF_RESKEY_arping_count]" exit $OCF_ERR_CONFIGURED fi - : ${OCF_RESKEY_arping_cache_entries:="5"} if ! ocf_is_decimal "$OCF_RESKEY_arping_cache_entries"; then ocf_exit_reason "Invalid OCF_RESKEY_arping_cache_entries [$OCF_RESKEY_arping_cache_entries]" exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_infiniband_device" ]; then #ibstatus or opainfo is required if an infiniband_device is provided case "${OCF_RESKEY_infiniband_device}" in *ib*|*mlx*) check_binary ibstatus ;; *hfi*) check_binary opainfo ;; esac fi return $OCF_SUCCESS } # get the link status on $NIC # asks ip about running (up) interfaces, returns the number of matching interface names that are up get_link_status () { $IP2UTIL -o link show up dev "$NIC" | grep -v 'NO-CARRIER' | grep -c "$NIC" } # returns the number of received rx packets on $NIC get_rx_packets () { ocf_log debug "$IP2UTIL -o -s link show dev $NIC" $IP2UTIL -o -s link show dev "$NIC" \ | sed 's/.* RX: [^0-9]*[0-9]* *\([0-9]*\) .*/\1/' # the first number after RX: is the # of bytes , # the second is the # of packets received } # watch for packet counter changes for max. OCF_RESKEY_pktcnt_timeout seconds # returns immedeately with return code 0 if any packets were received # otherwise 1 is returned watch_pkt_counter () { local RX_PACKETS_NEW local RX_PACKETS_OLD RX_PACKETS_OLD="`get_rx_packets`" for n in `seq $(( $OCF_RESKEY_pktcnt_timeout * 10 ))`; do sleep 0.1 RX_PACKETS_NEW="`get_rx_packets`" ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW" if [ "$RX_PACKETS_OLD" -ne "$RX_PACKETS_NEW" ]; then ocf_log debug "we received some packets." return 0 fi done return 1 } # returns list of cached ARP entries for $NIC # sorted by age ("last confirmed") # max. OCF_RESKEY_arping_cache_entries entries get_arp_list () { $IP2UTIL -s neighbour show dev $NIC \ | sort -t/ -k2,2n | cut -d' ' -f1 \ | head -n $OCF_RESKEY_arping_cache_entries # the "used" entries in `ip -s neighbour show` are: # "last used"/"last confirmed"/"last updated" } # arping the IP given as argument $1 on $NIC # until OCF_RESKEY_arping_count answers are received do_arping () { # TODO: add the source IP # TODO: check for diffenrent arping versions out there arping -q -c $OCF_RESKEY_arping_count -w $OCF_RESKEY_arping_timeout -I $NIC $1 # return with the exit code of the arping command return $? } # # Check the interface depending on the level given as parameter: $OCF_RESKEY_check_level # # 09: check for nonempty ARP cache # 10: watch for packet counter changes # # 19: check arping_ip_list # 20: check arping ARP cache entries # # 30: watch for packet counter changes in promiscios mode # # If unsuccessfull in levels 18 and above, # the tests for higher check levels are run. # if_check () { local arp_list # always check link status first link_status="`get_link_status`" ocf_log debug "link_status: $link_status (1=up, 0=down)" if [ $link_status -eq 0 ]; then ocf_log notice "link_status: DOWN" return $OCF_NOT_RUNNING fi # if this is an infiniband device, try ibstatus script if [ -n "$OCF_RESKEY_infiniband_device" ]; then if infiniband_status; then return $OCF_SUCCESS fi ocf_log info "Infiniband device $OCF_RESKEY_infiniband_device is not available, check ibstatus for more information" return $OCF_NOT_RUNNING fi # if using link_status_only, skip RX count and arping related tests if ocf_is_true "$OCF_RESKEY_link_status_only"; then return $OCF_SUCCESS fi # watch for packet counter changes ocf_log debug "watch for packet counter changes" watch_pkt_counter if [ $? -eq 0 ]; then return $OCF_SUCCESS else ocf_log debug "No packets received during packet watch timeout" fi # check arping ARP cache entries ocf_log debug "check arping ARP cache entries" arp_list=`get_arp_list` for ip in `echo $arp_list`; do do_arping $ip && return $OCF_SUCCESS done # if we get here, the ethernet device is considered not running. # provide some logging information if [ -z "$arp_list" ]; then ocf_log info "No ARP cache entries found to arping" fi # watch for packet counter changes in promiscios mode # ocf_log debug "watch for packet counter changes in promiscios mode" # be sure switch off promiscios mode in any case # TODO: check first, wether promisc is already on and leave it untouched. # trap "$IP2UTIL link set dev $NIC promisc off; exit" INT TERM EXIT # $IP2UTIL link set dev $NIC promisc on # watch_pkt_counter && return $OCF_SUCCESS # $IP2UTIL link set dev $NIC promisc off # trap - INT TERM EXIT # looks like it's not working (for whatever reason) return $OCF_NOT_RUNNING } ####################################################################### if_usage() { cat < /dev/null` sleep $sleep_time 2> /dev/null runs=$(($runs + 1)) fi if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error" fi done ocf_log debug "Monitoring return code: $mon_rc" if [ $mon_rc -eq $OCF_SUCCESS ]; then set_cib_value 1 attr_rc=$? else ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed." set_cib_value 0 attr_rc=$? fi ## The resource should not fail, if the interface is down. It should fail, if the update of the CIB variable has errors. ## To react on the interface failure you must use constraints based on the CIB variable value, not on the resource itself. exit $attr_rc } if_stop() { attrd_updater -D -n $ATTRNAME ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop } if_start() { local rc ha_pseudo_resource $OCF_RESOURCE_INSTANCE start rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "Failure to create ethmonitor state file" return $rc fi # perform the first monitor during the start operation if_monitor return $? } if_validate() { check_binary $IP2UTIL check_binary arping if_init } case $__OCF_ACTION in meta-data) meta_data ;; usage|help) if_usage exit $OCF_SUCCESS ;; esac if_validate case $__OCF_ACTION in start) if_start exit $? ;; stop) if_stop exit $? ;; monitor|status) if_monitor exit $? ;; validate-all) exit $? ;; *) if_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/fio.in b/heartbeat/fio.in index 68a123983..0347c1ca6 100644 --- a/heartbeat/fio.in +++ b/heartbeat/fio.in @@ -1,172 +1,178 @@ #!@BASH_SHELL@ # # fio RA # # Copyright (c) 2010 SUSE Linux Products GmbH, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_args_default="" + +: ${OCF_RESKEY_args=${OCF_RESKEY_args_default}} + ####################################################################### meta_data() { cat < 1.0 fio is a generic I/O load generator. This RA allows start/stop of fio instances to simulate load on a cluster without configuring complex services. fio IO load generator Arguments to the fio client. Minimally, this should be a (list of) job descriptions to run. fio arguments - + END } ####################################################################### fio_usage() { cat </dev/null 2>&1 ${fio_state_file} ocf_log info "fio started as pid=$fio_pid" exit $OCF_SUCCESS } fio_stop() { for sig in SIGINT SIGTERM SIGKILL ; do fio_monitor ; rc=$? case $rc in $OCF_NOT_RUNNING) ocf_log info "fio already stopped." exit $OCF_SUCCESS ;; $OCF_ERR_GENERIC) rm $fio_state_file ocf_log info "fio stopped and cleaned up." exit $OCF_SUCCESS ;; $OCF_SUCCESS) if [ -n "$fio_pid" ]; then ocf_log info "Sending $sig to fio (pid=$fio_pid)" kill -$sig $fio_pid sleep 3 continue fi ocf_log err "Internal logic failure in fio RA." ;; *) ocf_log err "Internal logic failure in fio RA." ;; esac done ocf_log err "fio did not stop! Perhaps hung on IO?" exit $OCF_ERR_GENERIC } fio_monitor() { fio_state_file="${HA_RSCTMP}/fio-${OCF_RESOURCE_INSTANCE}.state" if [ ! -e $fio_state_file ]; then return $OCF_NOT_RUNNING fi fio_pid=`cat $fio_state_file` if [ -z "$fio_pid" ]; then ocf_log err "State file found, but empty. Assuming stopped." return $OCF_NOT_RUNNING fi ps=`ps h -o comm $fio_pid 2>&1` if [ "$ps" != "fio" ]; then fio_pid="" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } fio_validate() { return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; validate-all) fio_validate;; usage|help) fio_usage exit $OCF_SUCCESS ;; esac ocf_is_probe || check_binary fio case $__OCF_ACTION in start) fio_start;; stop) fio_stop;; monitor) fio_monitor;; *) fio_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/galera b/heartbeat/galera index ed8b464b6..9b9fe5569 100755 --- a/heartbeat/galera +++ b/heartbeat/galera @@ -1,1010 +1,1022 @@ #!/bin/sh # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ## # README. # # This agent only supports being configured as a multistate Master # resource. # # Slave vs Master role: # # During the 'Slave' role, galera instances are in read-only mode and # will not attempt to connect to the cluster. This role exists only as # a means to determine which galera instance is the most up-to-date. The # most up-to-date node will be used to bootstrap a galera cluster that # has no current members. # # The galera instances will only begin to be promoted to the Master role # once all the nodes in the 'wsrep_cluster_address' connection address # have entered read-only mode. At that point the node containing the # database that is most current will be promoted to Master. Once the first # Master instance bootstraps the galera cluster, the other nodes will be # promoted to Master as well. # # Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3 # # pcs resource create db galera enable_creation=true \ # wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master # # By setting the 'enable_creation' option, the database will be automatically # generated at startup. The meta attribute 'master-max=3' means that all 3 # nodes listed in the wsrep_cluster_address list will be allowed to connect # to the galera cluster and perform replication. # # NOTE: If you have more nodes in the pacemaker cluster then you wish # to have in the galera cluster, make sure to use location contraints to prevent # pacemaker from attempting to place a galera instance on a node that is # not in the 'wsrep_cluster_address" list. # ## ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs if [ "$__OCF_ACTION" != "meta-data" ]; then . ${OCF_FUNCTIONS_DIR}/mysql-common.sh NODENAME=$(ocf_attribute_target) fi # It is common for some galera instances to store # check user that can be used to query status # in this file if [ -f "/etc/sysconfig/clustercheck" ]; then . /etc/sysconfig/clustercheck elif [ -f "/etc/default/clustercheck" ]; then . /etc/default/clustercheck fi +# Parameter defaults + +OCF_RESKEY_wsrep_cluster_address_default="" +OCF_RESKEY_cluster_host_map_default="" +OCF_RESKEY_check_user_default="root" +OCF_RESKEY_check_passwd_default="" + +: ${OCF_RESKEY_wsrep_cluster_address=${OCF_RESKEY_wsrep_cluster_address_default}} +: ${OCF_RESKEY_cluster_host_map=${OCF_RESKEY_cluster_host_map_default}} +: ${OCF_RESKEY_check_user=${OCF_RESKEY_check_user_default}} +: ${OCF_RESKEY_check_passwd=${OCF_RESKEY_check_passwd_default}} + ####################################################################### # Defaults: OCF_RESKEY_check_passwd_use_empty_default=0 : ${OCF_RESKEY_check_passwd_use_empty=${OCF_RESKEY_check_passwd_use_empty_default}} ####################################################################### usage() { cat < 1.0 Resource script for managing galara database. Manages a galara instance Location of the MySQL server binary MySQL server binary Location of the MySQL client binary MySQL client binary Configuration file MySQL config Directory containing databases MySQL datadir User running MySQL daemon MySQL user Group running MySQL daemon (for logfile and directory permissions) MySQL group The logfile to be used for mysqld. MySQL log file The pidfile to be used for mysqld. MySQL pid file The socket to be used for mysqld. MySQL socket If the MySQL database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld The galera cluster address. This takes the form of: gcomm://node,node,node Only nodes present in this node list will be allowed to start a galera instance. The galera node names listed in this address are expected to match valid pacemaker node names. If both names need to differ, you must provide a mapping in option cluster_host_map. Galera cluster address - + A mapping of pacemaker node names to galera node names. To be used when both pacemaker and galera names need to differ, (e.g. when galera names map to IP from a specific network interface) This takes the form of: pcmk1:node.1.galera;pcmk2:node.2.galera;pcmk3:node.3.galera where the galera resource started on node pcmk1 would be named node.1.galera in the wsrep_cluster_address Pacemaker to Galera name mapping - + Cluster check user. MySQL test user - + Cluster check user password. Empty passwords are ignored unless the parameter "check_passwd_use_empty" is set to 1. check password - + Use an empty "check_passwd" password. If this parameter is set to 1, "check_passwd" will be ignored and an empty password is used when calling the "mysql" client binary. check password use empty END } get_option_variable() { local key=$1 $MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" | tail -1 } get_status_variable() { local key=$1 $MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1 } set_bootstrap_node() { local node=$(ocf_attribute_target $1) ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true" } clear_bootstrap_node() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D } is_bootstrap() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" --quiet 2>/dev/null } set_no_grastate() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true" } clear_no_grastate() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D } is_no_grastate() { local node=$(ocf_attribute_target $1) ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null } clear_last_commit() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D } set_last_commit() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1 } get_last_commit() { local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null else ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null fi } clear_safe_to_bootstrap() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -D } set_safe_to_bootstrap() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -v $1 } get_safe_to_bootstrap() { local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null else ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null fi } wait_for_sync() { local state=$(get_status_variable "wsrep_local_state") ocf_log info "Waiting for database to sync with the cluster. " while [ "$state" != "4" ]; do sleep 1 state=$(get_status_variable "wsrep_local_state") done ocf_log info "Database synced." } is_primary() { cluster_status=$(get_status_variable "wsrep_cluster_status") if [ "$cluster_status" = "Primary" ]; then return 0 fi if [ -z "$cluster_status" ]; then ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status" else ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}" fi return 1 } is_readonly() { local res=$(get_option_variable "read_only") if ! ocf_is_true "$res"; then return 1 fi cluster_status=$(get_status_variable "wsrep_cluster_status") if ! [ "$cluster_status" = "Disconnected" ]; then return 1 fi return 0 } master_exists() { if [ "$__OCF_ACTION" = "demote" ]; then # We don't want to detect master instances during demote. # 1. we could be detecting ourselves as being master, which is no longer the case. # 2. we could be detecting other master instances that are in the process of shutting down. # by not detecting other master instances in "demote" we are deferring this check # to the next recurring monitor operation which will be much more accurate return 1 fi # determine if a master instance is already up and is healthy ${HA_SBIN_DIR}/crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 return $? } clear_master_score() { local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then $CRM_MASTER -D else $CRM_MASTER -D -N $node fi } set_master_score() { local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then $CRM_MASTER -v 100 else $CRM_MASTER -N $node -v 100 fi } promote_everyone() { for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do local pcmk_node=$(galera_to_pcmk_name $node) if [ -z "$pcmk_node" ]; then ocf_log err "Could not determine pacemaker node from galera name <${node}>." return else node=$pcmk_node fi set_master_score $node done } greater_than_equal_long() { # there are values we need to compare in this script # that are too large for shell -gt to process echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true" } galera_to_pcmk_name() { local galera=$1 if [ -z "$OCF_RESKEY_cluster_host_map" ]; then echo $galera else echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$galera"'" {print $1;exit}' fi } pcmk_to_galera_name() { local pcmk=$1 if [ -z "$OCF_RESKEY_cluster_host_map" ]; then echo $pcmk else echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$pcmk"'" {print $2;exit}' fi } detect_first_master() { local best_commit=0 local last_commit=0 local missing_nodes=0 local nodes="" local nodes_recovered="" local all_nodes local best_node_gcomm local best_node local safe_to_bootstrap all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') best_node=$(galera_to_pcmk_name $best_node_gcomm) if [ -z "$best_node" ]; then ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>." return fi # avoid selecting a recovered node as bootstrap if possible for node in $all_nodes; do local pcmk_node=$(galera_to_pcmk_name $node) if [ -z "$pcmk_node" ]; then ocf_log err "Could not determine pacemaker node from galera name <${node}>." return else node=$pcmk_node fi if is_no_grastate $node; then nodes_recovered="$nodes_recovered $node" else nodes="$nodes $node" fi done for node in $nodes_recovered $nodes; do safe_to_bootstrap=$(get_safe_to_bootstrap $node) if [ "$safe_to_bootstrap" = "1" ]; then # Galera marked the node as safe to boostrap during shutdown. Let's just # pick it as our bootstrap node. ocf_log info "Node <${node}> is marked as safe to bootstrap." best_node=$node # We don't need to wait for the other nodes to report state in this case missing_nodes=0 break fi last_commit=$(get_last_commit $node) if [ -z "$last_commit" ]; then ocf_log info "Waiting on node <${node}> to report database status before Master instances can start." missing_nodes=1 continue fi # this means -1, or that no commit has occured yet. if [ "$last_commit" = "18446744073709551615" ]; then last_commit="0" fi greater_than_equal_long "$last_commit" "$best_commit" if [ $? -eq 0 ]; then best_node=$(ocf_attribute_target $node) best_commit=$last_commit fi done if [ $missing_nodes -eq 1 ]; then return fi ocf_log info "Promoting $best_node to be our bootstrap node" set_master_score $best_node set_bootstrap_node $best_node } detect_safe_to_bootstrap() { local safe_to_bootstrap="" local uuid="" local seqno="" if [ -f ${OCF_RESKEY_datadir}/grastate.dat ]; then ocf_log info "attempting to read safe_to_bootstrap flag from ${OCF_RESKEY_datadir}/grastate.dat" safe_to_bootstrap=$(sed -n 's/^safe_to_bootstrap:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat) uuid=$(sed -n 's/^uuid:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat) seqno=$(sed -n 's/^seqno:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat) fi if [ -z "$uuid" ] || [ -z "$seqno" ] || \ [ "$uuid" = "00000000-0000-0000-0000-000000000000" ] || \ [ "$seqno" = "-1" ]; then clear_safe_to_bootstrap return fi if [ "$safe_to_bootstrap" = "1" ] || [ "$safe_to_bootstrap" = "0" ]; then set_safe_to_bootstrap $safe_to_bootstrap else clear_safe_to_bootstrap fi } detect_last_commit() { local last_commit local recover_args="--defaults-file=$OCF_RESKEY_config \ --pid-file=$OCF_RESKEY_pid \ --socket=$OCF_RESKEY_socket \ --datadir=$OCF_RESKEY_datadir \ --user=$OCF_RESKEY_user" local recovery_file_regex='s/.*WSREP\:.*position\s*recovery.*--log_error='\''\([^'\'']*\)'\''.*/\1/p' local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p' # codership/galera#354 # Some ungraceful shutdowns can leave an empty gvwstate.dat on # disk. This will prevent galera to join the cluster if it is # configured to attempt PC recovery. Removing that file makes the # node fall back to the normal, unoptimized joining process. if [ -f ${OCF_RESKEY_datadir}/gvwstate.dat ] && \ [ ! -s ${OCF_RESKEY_datadir}/gvwstate.dat ]; then ocf_log warn "empty ${OCF_RESKEY_datadir}/gvwstate.dat detected, removing it to prevent PC recovery failure at next restart" rm -f ${OCF_RESKEY_datadir}/gvwstate.dat fi ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then local tmp=$(mktemp) chown $OCF_RESKEY_user:$OCF_RESKEY_group $tmp # if we pass here because grastate.dat doesn't exist, # try not to bootstrap from this node if possible if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then set_no_grastate fi ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" ${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)" if [ -z "$last_commit" ]; then # Galera uses InnoDB's 2pc transactions internally. If # server was stopped in the middle of a replication, the # recovery may find a "prepared" XA transaction in the # redo log, and mysql won't recover automatically local recovery_file="$(cat $tmp | sed -n $recovery_file_regex)" if [ -e $recovery_file ]; then cat $recovery_file | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null if [ $? -eq 0 ]; then # we can only rollback the transaction, but that's OK # since the DB will get resynchronized anyway ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover" ${OCF_RESKEY_binary} $recover_args --wsrep-recover \ --tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)" if [ ! -z "$last_commit" ]; then ocf_log warn "State recovered. force SST at next restart for full resynchronization" rm -f ${OCF_RESKEY_datadir}/grastate.dat # try not to bootstrap from this node if possible set_no_grastate fi fi fi fi rm -f $tmp fi if [ ! -z "$last_commit" ]; then ocf_log info "Last commit version found: $last_commit" set_last_commit $last_commit return $OCF_SUCCESS else ocf_exit_reason "Unable to detect last known write sequence number" clear_last_commit return $OCF_ERR_GENERIC fi } # For galera, promote is really start galera_promote() { local rc local extra_opts local bootstrap local safe_to_bootstrap master_exists if [ $? -eq 0 ]; then # join without bootstrapping extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}" else bootstrap=$(is_bootstrap) if ocf_is_true $bootstrap; then # The best node for bootstrapping wasn't cleanly shutdown. Allow # bootstrapping anyways if [ "$(get_safe_to_bootstrap)" = "0" ]; then sed -ie 's/^\(safe_to_bootstrap:\) 0/\1 1/' ${OCF_RESKEY_datadir}/grastate.dat ocf_log info "safe_to_bootstrap in ${OCF_RESKEY_datadir}/grastate.dat set to 1 on node ${NODENAME}" fi ocf_log info "Node <${NODENAME}> is bootstrapping the cluster" extra_opts="--wsrep-cluster-address=gcomm://" else ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected." clear_last_commit return $OCF_ERR_GENERIC fi fi galera_monitor if [ $? -eq $OCF_RUNNING_MASTER ]; then if ocf_is_true $bootstrap; then promote_everyone clear_bootstrap_node ocf_log info "boostrap node already up, promoting the rest of the galera instances." fi clear_safe_to_bootstrap clear_last_commit return $OCF_SUCCESS fi # last commit/safe_to_bootstrap flag are no longer relevant once promoted clear_last_commit clear_safe_to_bootstrap mysql_common_prepare_dirs mysql_common_start "$extra_opts" rc=$? if [ $rc != $OCF_SUCCESS ]; then return $rc fi galera_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then ocf_exit_reason "Failed initial monitor action" return $rc fi is_readonly if [ $? -eq 0 ]; then ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration." return $OCF_ERR_GENERIC fi is_primary if [ $? -ne 0 ]; then ocf_exit_reason "Failure. Master instance started, but is not in Primary mode." return $OCF_ERR_GENERIC fi if ocf_is_true $bootstrap; then promote_everyone clear_bootstrap_node # clear attribute no-grastate. if last shutdown was # not clean, we cannot be extra-cautious by requesting a SST # since this is the bootstrap node clear_no_grastate ocf_log info "Bootstrap complete, promoting the rest of the galera instances." else # if this is not the bootstrap node, make sure this instance # syncs with the rest of the cluster before promotion returns. wait_for_sync # sync is done, clear info about last startup clear_no_grastate fi ocf_log info "Galera started" return $OCF_SUCCESS } galera_demote() { mysql_common_stop rc=$? if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then ocf_exit_reason "Failed to stop Master galera instance during demotion to Master" return $rc fi # if this node was previously a bootstrap node, that is no longer the case. clear_bootstrap_node clear_last_commit clear_no_grastate clear_safe_to_bootstrap # Clear master score here rather than letting pacemaker do so once # demote finishes. This way a promote cannot take place right # after this demote even if pacemaker is requested to do so. It # will first have to run a start/monitor op, to reprobe the state # of the other galera nodes and act accordingly. clear_master_score # record last commit for next promotion detect_safe_to_bootstrap detect_last_commit rc=$? return $rc } galera_start() { local rc local galera_node galera_node=$(pcmk_to_galera_name $NODENAME) if [ -z "$galera_node" ]; then ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>." return $OCF_ERR_CONFIGURED fi echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node if [ $? -ne 0 ]; then ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}> to start this galera instance" return $OCF_ERR_CONFIGURED fi galera_monitor if [ $? -eq $OCF_RUNNING_MASTER ]; then ocf_exit_reason "master galera instance started outside of the cluster's control" return $OCF_ERR_GENERIC fi mysql_common_prepare_dirs detect_safe_to_bootstrap detect_last_commit rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi master_exists if [ $? -eq 0 ]; then ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster." set_master_score $NODENAME else clear_master_score detect_first_master fi return $OCF_SUCCESS } galera_monitor() { local rc local galera_node local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi mysql_common_status $status_loglevel rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then last_commit=$(get_last_commit $node) if [ -n "$last_commit" ]; then # if last commit is set, this instance is considered started in slave mode rc=$OCF_SUCCESS master_exists if [ $? -ne 0 ]; then detect_first_master else # a master instance exists and is healthy, promote this # local read only instance # so it can join the master galera cluster. set_master_score fi fi return $rc elif [ $rc -ne $OCF_SUCCESS ]; then return $rc fi # if we make it here, mysql is running. Check cluster status now. galera_node=$(pcmk_to_galera_name $NODENAME) if [ -z "$galera_node" ]; then ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>." return $OCF_ERR_CONFIGURED fi echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node if [ $? -ne 0 ]; then ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" return $OCF_ERR_GENERIC fi is_primary if [ $? -eq 0 ]; then if ocf_is_probe; then # restore master score during probe # if we detect this is a master instance set_master_score fi rc=$OCF_RUNNING_MASTER else ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state." rc=$OCF_ERR_GENERIC fi return $rc } galera_stop() { local rc # make sure the process is stopped mysql_common_stop rc=$1 clear_safe_to_bootstrap clear_last_commit clear_master_score clear_bootstrap_node clear_no_grastate return $rc } galera_validate() { if ! ocf_is_ms; then ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource." return $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value." return $OCF_ERR_CONFIGURED fi mysql_common_validate } case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac galera_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi if [ -z "${OCF_RESKEY_check_passwd}" ]; then # This value is automatically sourced from /etc/sysconfig/checkcluster if available OCF_RESKEY_check_passwd=${MYSQL_PASSWORD} fi if [ -z "${OCF_RESKEY_check_user}" ]; then # This value is automatically sourced from /etc/sysconfig/checkcluster if available OCF_RESKEY_check_user=${MYSQL_USERNAME} fi : ${OCF_RESKEY_check_user="root"} MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}" if ocf_is_true "${OCF_RESKEY_check_passwd_use_empty}"; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=''" elif [ -n "${OCF_RESKEY_check_passwd}" ]; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" fi # This value is automatically sourced from /etc/sysconfig/checkcluster if available if [ -n "${MYSQL_HOST}" ]; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}" fi # This value is automatically sourced from /etc/sysconfig/checkcluster if available if [ -n "${MYSQL_PORT}" ]; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}" fi # What kind of method was invoked? case "$1" in start) galera_start;; stop) galera_stop;; status) mysql_common_status err;; monitor) galera_monitor;; promote) galera_promote;; demote) galera_demote;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/garbd b/heartbeat/garbd index b88d448fb..f2a7266dc 100755 --- a/heartbeat/garbd +++ b/heartbeat/garbd @@ -1,430 +1,436 @@ #!/bin/sh # # Copyright (c) 2015 Damien Ciabrini # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ## # README. # # Resource agent for garbd, the Galera arbitrator # # You can use this agent if you run an even number of galera nodes, # and you want an additional node to avoid split-brain situations. # # garbd requires that a Galera cluster is running, so make sure to # add a proper ordering constraint to the cluster, e.g.: # # pcs constraint order galera-master then garbd # # If you add garbd to the cluster while Galera is not running, you # might want to disable it before setting up ordering constraint, e.g.: # # pcs resource create garbd garbd \ # wsrep_cluster_address=gcomm://node1:4567,node2:4567 \ # meta target-role=stopped # # Use location constraints to avoid running galera and garbd on # the same node, e.g.: # # pcs constraint colocation add garbd with galera-master -INFINITY # pcs constraint location garbd prefers node3=INFINITY # ## ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Set default paramenter values OCF_RESKEY_binary_default="/usr/sbin/garbd" OCF_RESKEY_log_default="/var/log/garbd.log" OCF_RESKEY_pid_default="/var/run/garbd.pid" OCF_RESKEY_user_default="mysql" if [ "X${HOSTOS}" = "XOpenBSD" ];then OCF_RESKEY_group_default="_mysql" else OCF_RESKEY_group_default="mysql" fi +OCF_RESKEY_options_default="" +OCF_RESKEY_wsrep_cluster_address_default="" +OCF_RESKEY_wsrep_cluster_name_default="" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} +: ${OCF_RESKEY_options=${OCF_RESKEY_options_default}} +: ${OCF_RESKEY_wsrep_cluster_address=${OCF_RESKEY_wsrep_cluster_address_default}} +: ${OCF_RESKEY_wsrep_cluster_name=${OCF_RESKEY_wsrep_cluster_name_default}} usage() { cat < 1.0 Resource script for managing Galera arbitrator. Manages a galera arbitrator instance Location of the Galera arbitrator binary garbd server binary User running the garbd process garbd user Group running garbd (for logfile permissions) garbd group The logfile to be used for garbd. Galera arbitrator log file The pidfile to be used for garbd. Galera arbitrator pidfile Additional parameters which are passed to garbd on startup. Additional parameters to pass to garbd - + The galera cluster address. This takes the form of: gcomm://node:port,node:port,node:port Unlike Galera servers, port is mandatory for garbd. Galera cluster address - + The group name of the Galera cluster to connect to. Galera cluster name - + END } garbd_start() { local rc local pid local start_wait local garbd_params garbd_status info rc=$? if [ $rc -eq $OCF_SUCCESS ]; then ocf_exit_reason "garbd started outside of the cluster's control" return $OCF_ERR_GENERIC; fi touch $OCF_RESKEY_log chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log chmod 0640 $OCF_RESKEY_log [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log garbd_params="--address=${OCF_RESKEY_wsrep_cluster_address} \ --group ${OCF_RESKEY_wsrep_cluster_name} \ --log ${OCF_RESKEY_log}" if [ ! -z "${OCF_RESKEY_options}" ]; then garbd_params="${garbd_params} --options=${OCF_RESKEY_options}" fi # garbd has no parameter to run as a specific user, # so we need to start it by our own means pid=$(su - -s /bin/sh $OCF_RESKEY_user -c "${OCF_RESKEY_binary} ${garbd_params} >/dev/null 2>&1 & echo \$!") # garbd doesn't create a pidfile either, so we create our own echo $pid > $OCF_RESKEY_pid if [ $? -ne 0 ]; then ocf_exit_reason "Cannot create pidfile for garbd at $OCF_RESKEY_pid (rc=$?), please check your installation" return $OCF_ERR_GENERIC fi # Spin waiting for garbd to connect to the cluster. # Let the CRM/LRM time us out if required. start_wait=1 while [ $start_wait -eq 1 ]; do garbd_monitor info rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then ocf_exit_reason "garbd failed to start (pid=$pid), check logs in ${OCF_RESKEY_log}" return $OCF_ERR_GENERIC elif [ $rc -eq $OCF_SUCCESS ]; then start_wait=0 fi sleep 2 done ocf_log info "garbd connected to cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" return $OCF_SUCCESS } garbd_status() { local loglevel=$1 local rc ocf_pidfile_status $OCF_RESKEY_pid rc=$? if [ $rc -eq 0 ]; then return $OCF_SUCCESS elif [ $rc -eq 2 ]; then return $OCF_NOT_RUNNING else # clean up if pidfile is stale if [ $rc -eq 1 ]; then ocf_log $loglevel "garbd not running: removing old PID file" rm -f $OCF_RESKEY_pid fi return $OCF_ERR_GENERIC fi } _port_by_pid() { local pid pid="$1" if have_binary "netstat"; then netstat -tnp 2>/dev/null | grep -s -q "ESTABLISHED.*${pid}/" else ss -Htnp 2>/dev/null | grep -s -q "^ESTAB.*pid=${pid}" fi } garbd_monitor() { local rc local pid local loglevel=$1 # Set loglevel to info during probe if ocf_is_probe; then loglevel="info" fi garbd_status $loglevel rc=$? # probe just wants to know if garbd is running or not if ocf_is_probe && [ $rc -ne $OCF_SUCCESS ]; then rc=$OCF_NOT_RUNNING fi # Consider garbd is working if it's connected to at least # one node in the galera cluster. # Note: a Galera node in Non-Primary state will be # stopped by the galera RA. So we can assume that # garbd will always be connected to the right partition if [ $rc -eq $OCF_SUCCESS ]; then pid=`cat $OCF_RESKEY_pid 2> /dev/null ` _port_by_pid $pid if [ $? -ne 0 ]; then ocf_log $loglevel "garbd disconnected from cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" rc=$OCF_ERR_GENERIC fi fi return $rc } garbd_stop() { local rc local pid if [ ! -f $OCF_RESKEY_pid ]; then ocf_log info "garbd is not running" return $OCF_SUCCESS fi pid=`cat $OCF_RESKEY_pid 2> /dev/null ` ocf_log info "stopping garbd" # make sure the process is stopped ocf_stop_processes TERM 10 $pid rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC else rm -f $OCF_RESKEY_pid ocf_log info "garbd stopped" return $OCF_SUCCESS fi } garbd_validate() { if ! have_binary "$OCF_RESKEY_binary"; then ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_binary" return $OCF_ERR_INSTALLED; fi if ! have_binary "netstat"; then if ! have_binary "ss"; then ocf_exit_reason "Setup problem: couldn't find command: netstat or ss" return $OCF_ERR_INSTALLED; fi fi if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then ocf_exit_reason "garbd must be configured with a wsrep_cluster_address value." return $OCF_ERR_CONFIGURED fi # unlike galera RA, ports must be set in cluster address for garbd # https://github.com/codership/galera/issues/98 for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do echo $node | grep -s -q ':[1-9][0-9]*$' if [ $? -ne 0 ]; then ocf_exit_reason "wsrep_cluster_address must specify ports (gcomm://node1:port,node2:port)." return $OCF_ERR_CONFIGURED fi done # Ensure that the encryption method is set if garbd is configured # to use SSL. echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_(key|cert)=' if [ $? -eq 0 ]; then echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_cipher=' if [ $? -ne 0 ]; then ocf_exit_reason "option socket.ssl_cipher must be set if SSL is enabled." return $OCF_ERR_CONFIGURED fi fi if [ -z "$OCF_RESKEY_wsrep_cluster_name" ]; then ocf_exit_reason "garbd must be configured with a wsrep_cluster_name value." return $OCF_ERR_CONFIGURED fi if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then ocf_exit_reason "User $OCF_RESKEY_user doesn't exist" return $OCF_ERR_INSTALLED fi if ! getent group $OCF_RESKEY_group >/dev/null 2>&1; then ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac garbd_validate rc=$? # trap configuration errors early, but don't block stop in such cases LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi # What kind of method was invoked? case "$1" in start) garbd_start;; stop) garbd_stop;; status) garbd_status err;; monitor) garbd_monitor err;; promote) garbd_promote;; demote) garbd_demote;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/iSCSILogicalUnit.in b/heartbeat/iSCSILogicalUnit.in index 5838c8738..0fe85b593 100644 --- a/heartbeat/iSCSILogicalUnit.in +++ b/heartbeat/iSCSILogicalUnit.in @@ -1,739 +1,741 @@ #!@BASH_SHELL@ # # # iSCSILogicalUnit OCF RA. Exports and manages iSCSI Logical Units. # # (c) 2013 LINBIT, Lars Ellenberg # (c) 2009-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults # Set a default implementation based on software installed if have_binary ietadm; then OCF_RESKEY_implementation_default="iet" elif have_binary tgtadm; then OCF_RESKEY_implementation_default="tgt" elif have_binary lio_node; then OCF_RESKEY_implementation_default="lio" elif have_binary targetcli; then OCF_RESKEY_implementation_default="lio-t" fi : ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}} # Use a default SCSI ID and SCSI SN that is unique across the cluster, # and persistent in the event of resource migration. # SCSI IDs are limited to 24 bytes, but only 16 bytes are known to be # supported by all iSCSI implementations this RA cares about. Thus, # for a default, use the first 16 characters of # $OCF_RESOURCE_INSTANCE. OCF_RESKEY_scsi_id_default="${OCF_RESOURCE_INSTANCE:0:16}" : ${OCF_RESKEY_scsi_id=${OCF_RESKEY_scsi_id_default}} # To have a reasonably unique default SCSI SN, use the first 8 bytes # of an MD5 hash of of $OCF_RESOURCE_INSTANCE sn=`echo -n "${OCF_RESOURCE_INSTANCE}" | md5sum | sed -e 's/ .*//'` OCF_RESKEY_scsi_sn_default=${sn:0:8} : ${OCF_RESKEY_scsi_sn=${OCF_RESKEY_scsi_sn_default}} +OCF_RESKEY_allowed_initiators_default="" +: ${OCF_RESKEY_allowed_initiators=${OCF_RESKEY_allowed_initiators_default}} # set 0 as a default value for lio iblock device number OCF_RESKEY_lio_iblock_default=0 OCF_RESKEY_lio_iblock=${OCF_RESKEY_lio_iblock:-$OCF_RESKEY_lio_iblock_default} ## tgt specifics # tgt has "backing store type" and "backing store open flags", # as well as device-type. # # suggestions how to make this generic accross all supported implementations? # how should they be named, how should they be mapped to implementation specifics? # # OCF_RESKEY_tgt_bstype # OCF_RESKEY_tgt_bsoflags # OCF_RESKEY_tgt_bsopts # OCF_RESKEY_tgt_device_type # targetcli: iSCSITarget and iSCSILogicalUnit must use the same lockfile TARGETLOCKFILE=${HA_RSCTMP}/targetcli.lock ####################################################################### meta_data() { cat < 0.9 Manages iSCSI Logical Unit. An iSCSI Logical unit is a subdivision of an SCSI Target, exported via a daemon that speaks the iSCSI protocol. Manages iSCSI Logical Units (LUs) The iSCSI target daemon implementation. Must be one of "iet", "tgt", "lio", or "lio-t". If unspecified, an implementation is selected based on the availability of management utilities, with "iet" being tried first, then "tgt", then "lio", then "lio-t". iSCSI target daemon implementation The iSCSI Qualified Name (IQN) that this Logical Unit belongs to. iSCSI target IQN The Logical Unit number (LUN) exposed to initiators. Logical Unit number (LUN) The path to the block device exposed. Some implementations allow this to be a regular file, too. Block device (or file) path The SCSI ID to be configured for this Logical Unit. The default is the resource name, truncated to 24 bytes. SCSI ID The SCSI serial number to be configured for this Logical Unit. The default is a hash of the resource name, truncated to 8 bytes. SCSI serial number The SCSI UNMAP command to be configured for this Logical Unit. Setting this integer to 1 will enable TPU IOCTL emulation. SCSI UNMAP (for TRIM / DISCARD) The SCSI EXTENDED COPY command to be configured for this Logical Unit. Setting this integer to 1 will enable 3PC IOCTL emulation. SCSI extended write The SCSI Compare and Write command to be configured for this Logical Unit. Setting this integer to 1 will enable CAW IOCTL emulation. SCSI compare and write The SCSI vendor ID to be configured for this Logical Unit. SCSI vendor ID The SCSI product ID to be configured for this Logical Unit. SCSI product ID TGT specific backing store type. If you want to use aio, make sure your tgtadm is built against libaio. See tgtadm(8). TGT backing store type TGT specific backing store open flags (direct|sync). See tgtadm(8). TGT backing store open flags TGT specific backing store options. See tgtadm(8). TGT backing store options TGT specific device type. See tgtadm(8). TGT device type Additional LU parameters. A space-separated list of "name=value" pairs which will be passed through to the iSCSI daemon's management interface. The supported parameters are implementation dependent. Neither the name nor the value may contain whitespace. List of iSCSI LU parameters Allowed initiators. A space-separated list of initiators allowed to connect to this lun. Initiators may be listed in any syntax the target implementation allows. If this parameter is empty or not set, access to this lun will not be allowed from any initiator, if target is not in demo mode. This parameter is only necessary when using LIO. List of iSCSI initiators allowed to connect to this lun. - + LIO iblock device name, a number starting from 0. Using distinct values here avoids a warning in LIO "LEGACY: SHARED HBA"; and it is necessary when using multiple LUNs started at the same time (eg. on node failover) to prevent a race condition in tcm_core on mkdir() in /sys/kernel/config/target/core/. LIO iblock device number - + END } ####################################################################### iSCSILogicalUnit_usage() { cat < /sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/wwn/vpd_unit_serial fi ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns create /backstores/block/${OCF_RESOURCE_INSTANCE} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC if $(ip a | grep -q inet6); then ocf_run -q targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/portals delete 0.0.0.0 3260 ocf_run -q targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/portals create ::0 fi if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then for initiator in ${OCF_RESKEY_allowed_initiators}; do ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls create ${initiator} add_mapped_luns=False || exit $OCF_ERR_GENERIC ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} create ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC done fi if [ -n "${OCF_RESKEY_emulate_tpu}" ]; then echo ${OCF_RESKEY_emulate_tpu} > ${iblock_attrib_path}/emulate_tpu || exit $OCF_ERR_GENERIC fi if [ -n "${OCF_RESKEY_emulate_3pc}" ]; then echo ${OCF_RESKEY_emulate_3pc} > ${iblock_attrib_path}/emulate_3pc || exit $OCF_ERR_GENERIC fi if [ -n "${OCF_RESKEY_emulate_caw}" ]; then echo ${OCF_RESKEY_emulate_caw} > ${iblock_attrib_path}/emulate_caw || exit $OCF_ERR_GENERIC fi ;; esac # Force the monitor operation to pass before start is considered a success. iSCSILogicalUnit_monitor } iSCSILogicalUnit_stop() { iSCSILogicalUnit_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi case $OCF_RESKEY_implementation in iet) # IET allows us to remove LUs while they are in use ocf_run ietadm --op delete \ --tid=${TID} \ --lun=${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC ;; tgt) # tgt will fail to remove an LU while it is in use, # but at the same time does not allow us to # selectively shut down a connection that is using a # specific LU. Thus, we need to loop here until tgtd # decides that the LU is no longer in use, or we get # timed out by the LRM. while ! ocf_run -warn tgtadm --lld iscsi --op delete --mode logicalunit \ --tid ${TID} \ --lun=${OCF_RESKEY_lun}; do sleep 1 done ;; lio) acls_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/acls" for initiatorpath in ${acls_configfs_path}/*; do initiator=$(basename "${initiatorpath}") if [ -e "${initiatorpath}/lun_${OCF_RESKEY_lun}" ]; then ocf_log info "deleting acl at ${initiatorpath}/lun_${OCF_RESKEY_lun}" ocf_run lio_node --dellunacl=${OCF_RESKEY_target_iqn} 1 \ ${initiator} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC fi done lun_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/" if [ -e "${lun_configfs_path}" ]; then ocf_run lio_node --dellun=${OCF_RESKEY_target_iqn} 1 ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC fi block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path" if [ -e "${block_configfs_path}" ]; then ocf_run tcm_node --freedev=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC fi ;; lio-t) ocf_take_lock $TARGETLOCKFILE ocf_release_lock_on_exit $TARGETLOCKFILE # "targetcli delete" will fail if the LUN is already # gone. Log a warning and still push ahead. ocf_run -warn targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns delete ${OCF_RESKEY_lun} if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then for initiator in ${OCF_RESKEY_allowed_initiators}; do if targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} status | grep "Mapped LUNs: 0" >/dev/null ; then ocf_run -warn targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/ delete ${initiator} fi done fi # If we've proceeded down to here and we're unable to # delete the backstore, then something is seriously # wrong and we need to fail the stop operation # (potentially causing fencing) ocf_run targetcli /backstores/block delete ${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC ;; esac return $OCF_SUCCESS } iSCSILogicalUnit_monitor() { if [ x"${OCF_RESKEY_tgt_bstype}" != x"rbd" ]; then # If our backing device (or file) doesn't even exist, we're not running [ -e ${OCF_RESKEY_path} ] || return $OCF_NOT_RUNNING fi case $OCF_RESKEY_implementation in iet) # Figure out and set the target ID TID=`sed -ne "s/tid:\([[:digit:]]\+\) name:${OCF_RESKEY_target_iqn}$/\1/p" < /proc/net/iet/volume` if [ -z "${TID}" ]; then # Our target is not configured, thus we're not # running. return $OCF_NOT_RUNNING fi # FIXME: this looks for a matching LUN and path, but does # not actually test for the correct target ID. grep -E -q "[[:space:]]+lun:${OCF_RESKEY_lun}.*path:${OCF_RESKEY_path}$" /proc/net/iet/volume && return $OCF_SUCCESS ;; tgt) # Figure out and set the target ID TID=`tgtadm --lld iscsi --op show --mode target \ | sed -ne "s/^Target \([[:digit:]]\+\): ${OCF_RESKEY_target_iqn}$/\1/p"` if [ -z "$TID" ]; then # Our target is not configured, thus we're not # running. return $OCF_NOT_RUNNING fi # This only looks for the backing store, but does not test # for the correct target ID and LUN. tgtadm --lld iscsi --op show --mode target \ | grep -E -q "[[:space:]]+Backing store.*: ${OCF_RESKEY_path}$" && return $OCF_SUCCESS ;; lio) configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/${OCF_RESOURCE_INSTANCE}/udev_path" [ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS # if we aren't activated, is a block device still left over? block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path" [ -e ${block_configfs_path} ] && ocf_log warn "existing block without an active lun: ${block_configfs_path}" [ -e ${block_configfs_path} ] && return $OCF_ERR_GENERIC ;; lio-t) configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/*/udev_path" [ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS # if we aren't activated, is a block device still left over? block_configfs_path="/sys/kernel/config/target/core/iblock_*/${OCF_RESOURCE_INSTANCE}/udev_path" [ -e ${block_configfs_path} ] && ocf_log warn "existing block without an active lun: ${block_configfs_path}" [ -e ${block_configfs_path} ] && return $OCF_ERR_GENERIC ;; esac return $OCF_NOT_RUNNING } iSCSILogicalUnit_validate() { # Do we have all required variables? for var in target_iqn lun path; do param="OCF_RESKEY_${var}" if [ -z "${!param}" ]; then ocf_exit_reason "Missing resource parameter \"$var\"!" exit $OCF_ERR_CONFIGURED fi done # Is the configured implementation supported? case "$OCF_RESKEY_implementation" in "iet"|"tgt"|"lio"|"lio-t") ;; "") # The user didn't specify an implementation, and we were # unable to determine one from installed binaries (in # other words: no binaries for any supported # implementation could be found) ocf_exit_reason "Undefined iSCSI target implementation" exit $OCF_ERR_INSTALLED ;; *) ocf_exit_reason "Unsupported iSCSI target implementation \"$OCF_RESKEY_implementation\"!" exit $OCF_ERR_CONFIGURED ;; esac # Do we have a valid LUN? case $OCF_RESKEY_implementation in iet) # IET allows LUN 0 and up [ $OCF_RESKEY_lun -ge 0 ] case $? in 0) # OK ;; 1) ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be a non-negative integer)." exit $OCF_ERR_CONFIGURED ;; *) ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be an integer)." exit $OCF_ERR_CONFIGURED ;; esac ;; tgt) # tgt reserves LUN 0 for its own purposes [ $OCF_RESKEY_lun -ge 1 ] case $? in 0) # OK ;; 1) ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be greater than 0)." exit $OCF_ERR_CONFIGURED ;; *) ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be an integer)." exit $OCF_ERR_CONFIGURED ;; esac ;; esac # Do we have any configuration parameters that the current # implementation does not support? local unsupported_params local var local envar case $OCF_RESKEY_implementation in iet) # IET does not support setting the vendor and product ID # (it always uses "IET" and "VIRTUAL-DISK") unsupported_params="vendor_id product_id allowed_initiators lio_iblock tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type emulate_tpu emulate_3pc emulate_caw" ;; tgt) unsupported_params="allowed_initiators lio_iblock emulate_tpu emulate_3pc emulate_caw" ;; lio) unsupported_params="scsi_id vendor_id product_id tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type emulate_tpu emulate_3pc emulate_caw" ;; lio-t) unsupported_params="scsi_id vendor_id product_id tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type lio_iblock" ;; esac for var in ${unsupported_params}; do envar=OCF_RESKEY_${var} defvar=OCF_RESKEY_${var}_default if [ -n "${!envar}" ]; then if [[ "${!envar}" != "${!defvar}" ]];then case "$__OCF_ACTION" in start|validate-all) ocf_log warn "Configuration parameter \"${var}\"" \ "is not supported by the iSCSI implementation" \ "and will be ignored." ;; esac fi fi done if ! ocf_is_probe; then # Do we have all required binaries? case $OCF_RESKEY_implementation in iet) check_binary ietadm ;; tgt) check_binary tgtadm ;; lio) check_binary tcm_node check_binary lio_node ;; lio-t) check_binary targetcli ;; esac # Is the required kernel functionality available? case $OCF_RESKEY_implementation in iet) [ -d /proc/net/iet ] if [ $? -ne 0 ]; then ocf_log err "/proc/net/iet does not exist or is not a directory -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi ;; tgt) # tgt is userland only ;; esac fi return $OCF_SUCCESS } case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) iSCSILogicalUnit_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test iSCSILogicalUnit_validate case $__OCF_ACTION in start) iSCSILogicalUnit_start;; stop) iSCSILogicalUnit_stop;; monitor|status) iSCSILogicalUnit_monitor;; reload) ocf_log err "Reloading..." iSCSILogicalUnit_start ;; validate-all) ;; *) iSCSILogicalUnit_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/iSCSITarget.in b/heartbeat/iSCSITarget.in index 9128fdc55..9e6e0b5fa 100644 --- a/heartbeat/iSCSITarget.in +++ b/heartbeat/iSCSITarget.in @@ -1,695 +1,698 @@ #!@BASH_SHELL@ # # # iSCSITarget OCF RA. Exports and manages iSCSI targets. # # (c) 2009-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults # Set a default implementation based on software installed if have_binary ietadm; then OCF_RESKEY_implementation_default="iet" elif have_binary tgtadm; then OCF_RESKEY_implementation_default="tgt" elif have_binary lio_node; then OCF_RESKEY_implementation_default="lio" elif have_binary targetcli; then OCF_RESKEY_implementation_default="lio-t" fi : ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}} # Listen on 0.0.0.0:3260 by default OCF_RESKEY_portals_default="0.0.0.0:3260" : ${OCF_RESKEY_portals=${OCF_RESKEY_portals_default}} +OCF_RESKEY_allowed_initiators_default="" +: ${OCF_RESKEY_allowed_initiators=${OCF_RESKEY_allowed_initiators_default}} + # Lockfile, used for selecting a target ID LOCKFILE=${HA_RSCTMP}/iSCSITarget-${OCF_RESKEY_implementation}.lock # targetcli: iSCSITarget and iSCSILogicalUnit must use the same lockfile TARGETLOCKFILE=${HA_RSCTMP}/targetcli.lock ####################################################################### meta_data() { cat < 0.9 Manages iSCSI targets. An iSCSI target is a collection of SCSI Logical Units (LUs) exported via a daemon that speaks the iSCSI protocol. iSCSI target export agent The iSCSI target daemon implementation. Must be one of "iet", "tgt", "lio", or "lio-t". If unspecified, an implementation is selected based on the availability of management utilities, with "iet" being tried first, then "tgt", then "lio", then "lio-t". Specifies the iSCSI target implementation ("iet", "tgt", "lio", or "lio-t"). The target iSCSI Qualified Name (IQN). Should follow the conventional "iqn.yyyy-mm.<reversed domain name>[:identifier]" syntax. iSCSI target IQN The iSCSI target ID. Required for tgt. iSCSI target ID iSCSI network portal addresses. Not supported by all implementations. If unset, the default is to create one portal that listens on ${OCF_RESKEY_portal_default}. iSCSI portal addresses iSCSI iSER network portal addresses. Not supported by all implementations. iSCSI iSER enabled portal addresses Allowed initiators. A space-separated list of initiators allowed to connect to this target. Initiators may be listed in any syntax the target implementation allows. If this parameter is empty or not set, access to this target will be allowed from any initiator. List of iSCSI initiators allowed to connect to this target - + A username used for incoming initiator authentication. If unspecified, allowed initiators will be able to log in without authentication. This is a unique parameter, as it not allowed to re-use a single username across multiple target instances. Incoming account username A password used for incoming initiator authentication. Incoming account password Additional target parameters. A space-separated list of "name=value" pairs which will be passed through to the iSCSI daemon's management interface. The supported parameters are implementation dependent. Neither the name nor the value may contain whitespace. List of iSCSI target parameters END } ####################################################################### iSCSITarget_usage() { cat <> /etc/initiators.deny echo "${OCF_RESKEY_iqn} ${OCF_RESKEY_allowed_initiators// /,}" >> /etc/initiators.allow else echo "${OCF_RESKEY_iqn} ALL" >> /etc/initiators.allow fi # In iet, adding a new user and assigning it to a target # is one operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run ietadm --op new --user \ --tid=${tid} \ --params=IncomingUser=${OCF_RESKEY_incoming_username},Password=${OCF_RESKEY_incoming_password} \ || exit $OCF_ERR_GENERIC fi ;; tgt) local tid tid="${OCF_RESKEY_tid}" # Create the target. ocf_run tgtadm --lld iscsi --op new --mode target \ --tid=${tid} \ --targetname ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC # Set parameters. for param in ${OCF_RESKEY_additional_parameters}; do name=${param%=*} value=${param#*=} ocf_run tgtadm --lld iscsi --op update --mode target \ --tid=${tid} \ --name=${name} --value=${value} || exit $OCF_ERR_GENERIC done # For tgt, we always have to add access per initiator; # access to targets is denied by default. If # "allowed_initiators" is unset, we must use the special # keyword ALL. for initiator in ${OCF_RESKEY_allowed_initiators=ALL}; do ocf_run tgtadm --lld iscsi --op bind --mode target \ --tid=${tid} \ --initiator-address=${initiator} || exit $OCF_ERR_GENERIC done # In tgt, we must first create a user account, then assign # it to a target using the "bind" operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run tgtadm --lld iscsi --mode account --op new \ --user=${OCF_RESKEY_incoming_username} \ --password=${OCF_RESKEY_incoming_password} || exit $OCF_ERR_GENERIC ocf_run tgtadm --lld iscsi --mode account --op bind \ --tid=${tid} \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC fi ;; lio) # lio distinguishes between targets and target portal # groups (TPGs). We will always create one TPG, with the # number 1. In lio, creating a network portal # automatically creates the corresponding target if it # doesn't already exist. for portal in ${OCF_RESKEY_portals}; do ocf_run lio_node --addnp ${OCF_RESKEY_iqn} 1 \ ${portal} || exit $OCF_ERR_GENERIC done # in lio, we can set target parameters by manipulating # the appropriate configfs entries for param in ${OCF_RESKEY_additional_parameters}; do name=${param%=*} value=${param#*=} configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/param/${name}" if [ -e ${configfs_path} ]; then echo ${value} > ${configfs_path} || exit $OCF_ERR_GENERIC else ocf_log warn "Unsupported iSCSI target parameter ${name}: will be ignored." fi done # lio does per-initiator filtering by default. To disable # this, we need to switch the target to "permissive mode". if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then for initiator in ${OCF_RESKEY_allowed_initiators}; do ocf_run lio_node --addnodeacl ${OCF_RESKEY_iqn} 1 \ ${initiator} || exit $OCF_ERR_GENERIC done else ocf_run lio_node --permissive ${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC # permissive mode enables read-only access by default, # so we need to change that to RW to be in line with # the other implementations. echo 0 > "/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/attrib/demo_mode_write_protect" if [ `cat /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/attrib/demo_mode_write_protect` -ne 0 ]; then ocf_log err "Failed to disable write protection for target ${OCF_RESKEY_iqn}." exit $OCF_ERR_GENERIC fi fi # TODO: add CHAP authentication support when it gets added # back into LIO ocf_run lio_node --disableauth ${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC # Finally, we need to enable the target to allow # initiators to connect ocf_run lio_node --enabletpg=${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC ;; lio-t) # lio distinguishes between targets and target portal # groups (TPGs). We will always create one TPG, with the # number 1. In lio, creating a network portal # automatically creates the corresponding target if it # doesn't already exist. ocf_take_lock $TARGETLOCKFILE ocf_release_lock_on_exit $TARGETLOCKFILE ocf_run targetcli /iscsi set global auto_add_default_portal=false || exit $OCF_ERR_GENERIC if ! [ -d /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn} ] ; then ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC fi for portal in ${OCF_RESKEY_portals}; do if [ $portal != ${OCF_RESKEY_portals_default} ] ; then IFS=':' read -a sep_portal <<< "$portal" ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/portals create "${sep_portal[0]}" "${sep_portal[1]}" || exit $OCF_ERR_GENERIC fi done # in lio, we can set target parameters by manipulating # the appropriate configfs entries for param in ${OCF_RESKEY_additional_parameters}; do name=${param%=*} value=${param#*=} configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/param/${name}" if [ -e ${configfs_path} ]; then echo ${value} > ${configfs_path} || exit $OCF_ERR_GENERIC else ocf_log warn "Unsupported iSCSI target parameter ${name}: will be ignored." fi done # allow iSER enabled portal for iser_portal in ${OCF_RESKEY_iser_portals}; do configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/np/${iser_portal}\:*/iser" if [ -f ${configfs_path} ]; then echo "1" > ${configfs_path} || exit $OCF_ERR_GENERIC else ocf_log warn "Unable to set iSER on: $iser_portal" fi done # lio does per-initiator filtering by default. To disable # this, we need to switch the target to "permissive mode". if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then # enable authentication for tpg1 if incoming_username # is defined if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/ set attribute authentication=1 || exit $OCF_ERR_GENERIC ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/ set attribute generate_node_acls=0 || exit $OCF_ERR_GENERIC fi for initiator in ${OCF_RESKEY_allowed_initiators}; do ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/acls create ${initiator} || exit $OCF_ERR_GENERIC # enable chap if incoming_username is defined if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/acls/${initiator}/ set auth userid=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/acls/${initiator}/ set auth password=${OCF_RESKEY_incoming_password} || exit $OCF_ERR_GENERIC fi done else ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/ set attribute authentication=0 demo_mode_write_protect=0 generate_node_acls=1 cache_dynamic_acls=1 || exit $OCF_ERR_GENERIC fi ;; esac iSCSITarget_monitor } iSCSITarget_stop() { iSCSITarget_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi local tid case $OCF_RESKEY_implementation in iet) # Figure out the target ID tid=`sed -ne "s/tid:\([[:digit:]]\+\) name:${OCF_RESKEY_iqn}/\1/p" < /proc/net/iet/volume` if [ -z "${tid}" ]; then ocf_log err "Failed to retrieve target ID for IQN ${OCF_RESKEY_iqn}" exit $OCF_ERR_GENERIC fi # Close existing connections. There is no other way to # do this in IET than to parse the contents of # /proc/net/iet/session. set -- $(sed -ne '/^tid:'${tid}' /,/^tid/ { /^[[:space:]]*sid:\([0-9]\+\)/ { s/^[[:space:]]*sid:\([0-9]*\).*/--sid=\1/; h; }; /^[[:space:]]*cid:\([0-9]\+\)/ { s/^[[:space:]]*cid:\([0-9]*\).*/--cid=\1/; G; p; }; }' < /proc/net/iet/session) while [[ -n $2 ]]; do # $2 $1 looks like "--sid=X --cid=Y" ocf_run ietadm --op delete \ --tid=${tid} $2 $1 shift 2 done # In iet, unassigning a user from a target and # deleting the user account is one operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run ietadm --op delete --user \ --tid=${tid} \ --params=IncomingUser=${OCF_RESKEY_incoming_username} \ || exit $OCF_ERR_GENERIC fi # Loop on delete. Keep trying until we time out, if # necessary. while true; do if ietadm --op delete --tid=${tid}; then ocf_log debug "Removed target ${OCF_RESKEY_iqn}." break else ocf_log warn "Failed to remove target ${OCF_RESKEY_iqn}, retrying." sleep 1 fi done # Avoid stale /etc/initiators.{allow,deny} entries # for this target if [ -e /etc/initiators.deny ]; then ocf_run sed -e "/^${OCF_RESKEY_iqn}[[:space:]]/d" \ -i /etc/initiators.deny fi if [ -e /etc/initiators.allow ]; then ocf_run sed -e "/^${OCF_RESKEY_iqn}[[:space:]]/d" \ -i /etc/initiators.allow fi ;; tgt) tid="${OCF_RESKEY_tid}" # Close existing connections. There is no other way to # do this in tgt than to parse the output of "tgtadm --op # show". set -- $(tgtadm --lld iscsi --op show --mode target \ | sed -ne '/^Target '${tid}':/,/^Target/ { /^[[:space:]]*I_T nexus: \([0-9]\+\)/ { s/^.*: \([0-9]*\).*/--sid=\1/; h; }; /^[[:space:]]*Connection: \([0-9]\+\)/ { s/^.*: \([0-9]*\).*/--cid=\1/; G; p; }; /^[[:space:]]*LUN information:/ q; }') while [[ -n $2 ]]; do # $2 $1 looks like "--sid=X --cid=Y" ocf_run tgtadm --lld iscsi --op delete --mode connection \ --tid=${tid} $2 $1 shift 2 done # In tgt, we must first unbind the user account from # the target, then remove the account itself. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run tgtadm --lld iscsi --mode account --op unbind \ --tid=${tid} \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC ocf_run tgtadm --lld iscsi --mode account --op delete \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC fi # Loop on delete. Keep trying until we time out, if # necessary. while true; do if tgtadm --lld iscsi --op delete --mode target --tid=${tid}; then ocf_log debug "Removed target ${OCF_RESKEY_iqn}." break else ocf_log warn "Failed to remove target ${OCF_RESKEY_iqn}, retrying." sleep 1 fi done # In tgt, we don't have to worry about our ACL # entries. They are automatically removed upon target # deletion. ;; lio) # In lio, removing a target automatically removes all # associated TPGs, network portals, and LUNs. ocf_run lio_node --deliqn ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC ;; lio-t) ocf_take_lock $TARGETLOCKFILE ocf_release_lock_on_exit $TARGETLOCKFILE ocf_run targetcli /iscsi delete ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC ;; esac return $OCF_SUCCESS } iSCSITarget_monitor() { case $OCF_RESKEY_implementation in iet) grep -Eq "tid:[0-9]+ name:${OCF_RESKEY_iqn}" /proc/net/iet/volume && return $OCF_SUCCESS ;; tgt) tgtadm --lld iscsi --op show --mode target \ | grep -Eq "Target [0-9]+: ${OCF_RESKEY_iqn}" && return $OCF_SUCCESS ;; lio | lio-t) # if we have no configfs entry for the target, it's # definitely stopped [ -d /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn} ] || return $OCF_NOT_RUNNING # if the target is there, but its TPG is not enabled, then # we also consider it stopped [ `cat /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/enable` -eq 1 ] || return $OCF_NOT_RUNNING return $OCF_SUCCESS ;; esac return $OCF_NOT_RUNNING } iSCSITarget_validate() { # Do we have all required variables? local required_vars case $OCF_RESKEY_implementation in iet) required_vars="iqn" ;; tgt) required_vars="iqn tid" ;; esac for var in ${required_vars}; do param="OCF_RESKEY_${var}" if [ -z "${!param}" ]; then ocf_exit_reason "Missing resource parameter \"$var\"!" exit $OCF_ERR_CONFIGURED fi done # Is the configured implementation supported? case "$OCF_RESKEY_implementation" in "iet"|"tgt"|"lio"|"lio-t") ;; "") # The user didn't specify an implementation, and we were # unable to determine one from installed binaries (in # other words: no binaries for any supported # implementation could be found) ocf_exit_reason "Undefined iSCSI target implementation" exit $OCF_ERR_INSTALLED ;; *) ocf_exit_reason "Unsupported iSCSI target implementation \"$OCF_RESKEY_implementation\"!" exit $OCF_ERR_CONFIGURED ;; esac # Do we have any configuration parameters that the current # implementation does not support? local unsupported_params local var local envar case $OCF_RESKEY_implementation in iet|tgt) # IET and tgt do not support binding a target portal to a # specific IP address. unsupported_params="portals" ;; lio|lio-t) unsupported_params="tid" ;; esac for var in ${unsupported_params}; do envar=OCF_RESKEY_${var} defvar=OCF_RESKEY_${var}_default if [ -n "${!envar}" ]; then if [[ "${!envar}" != "${!defvar}" ]];then case "$__OCF_ACTION" in start|validate-all) ocf_log warn "Configuration parameter \"${var}\"" \ "is not supported by the iSCSI implementation" \ "and will be ignored." ;; esac fi fi done if ! ocf_is_probe; then # Do we have all required binaries? case $OCF_RESKEY_implementation in iet) check_binary ietadm ;; tgt) check_binary tgtadm ;; lio) check_binary tcm_node check_binary lio_node ;; lio-t) check_binary targetcli ;; esac # Is the required kernel functionality available? case $OCF_RESKEY_implementation in iet) [ -d /proc/net/iet ] if [ $? -ne 0 ]; then ocf_log err "/proc/net/iet does not exist or is not a directory -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi ;; tgt) # tgt is userland only ;; lio) # lio needs configfs to be mounted if ! grep -Eq "^.*/sys/kernel/config[[:space:]]+configfs" /proc/mounts; then ocf_log err "configfs not mounted at /sys/kernel/config -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi # check for configfs entries created by target_core_mod if [ ! -d /sys/kernel/config/target ]; then ocf_log err "/sys/kernel/config/target does not exist or is not a directory -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi ;; lio-t) #targetcli loads the needed kernel modules ;; esac fi return $OCF_SUCCESS } case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) iSCSITarget_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test iSCSITarget_validate case $__OCF_ACTION in start) iSCSITarget_start;; stop) iSCSITarget_stop;; monitor|status) iSCSITarget_monitor;; reload) ocf_log err "Reloading..." iSCSITarget_start ;; validate-all) ;; *) iSCSITarget_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/ids b/heartbeat/ids index f2a573b74..8300f69a5 100755 --- a/heartbeat/ids +++ b/heartbeat/ids @@ -1,738 +1,751 @@ #!/bin/sh # # # ids # # Description: # # OCF resource agent that manages an # IBM Informix Dynamic Server (IDS) instance # as an High-Availability resource. #### # # Author: Lars D. Forseth, or # Created: May 25th 2007 # Last Modified: July 30th 2007 # Support: users@clusterlabs.org # License: GNU General Public License (GPL), Version 2 or later # Copyright: (c) 2002 - 2007 International Business Machines, Inc. # # This code is inspired by the db2 OCF resource agent # written by Alan Robertson, #### # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. #### # # Example usage as it would appear in /etc/ha.d/haresources: # node1 192.168.0.1 ids::/informix::ids1::onconfig.ids1 # # # --> Note that passing dbname and sqltestquery in heartbeat version 1 style is not supported! # # See usage() function below for more details... #### # # OCF instance parameters: # OCF_RESKEY_informixdir # OCF_RESKEY_informixserver # OCF_RESKEY_onconfig # OCF_RESKEY_dbname # OCF_RESKEY_sqltestquery #### # # Include general OCF functions and variables (such as OCF return codes). # : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_informixdir_default="" +OCF_RESKEY_informixserver_default="" +OCF_RESKEY_onconfig_default="" +OCF_RESKEY_dbname_default="sysmaster" +OCF_RESKEY_sqltestquery_default="SELECT COUNT(*) FROM systables;" + +: ${OCF_RESKEY_informixdir=${OCF_RESKEY_informixdir_default}} +: ${OCF_RESKEY_informixserver=${OCF_RESKEY_informixserver_default}} +: ${OCF_RESKEY_onconfig=${OCF_RESKEY_onconfig_default}} +: ${OCF_RESKEY_dbname=${OCF_RESKEY_dbname_default}} +: ${OCF_RESKEY_sqltestquery=${OCF_RESKEY_sqltestquery_default}} # # Function that displays the usage of this script. # ids_usage() { methods=`ids_methods` methods=`echo $methods | tr ' ' '|'` echo " usage: $0 ($methods) $0 manages an IBM Informix Dynamic Server (IDS) instance as an High-Availability resource. The 'start' operation starts the database. The 'stop' operation stops the database. The 'status' operation reports whether the database is running The 'monitor' operation reports whether the database seems to be working The 'validate-all' operation reports whether the parameters are valid The 'methods' operation lists the methods $0 supports The 'usage' operation displays this text The 'meta-data' operation returns the meta-data (in XML) of this resource script " } # # Function that displays the possible methods this script supports. # ids_methods() { echo " start stop status monitor validate-all methods usage meta-data " } # # Function that displays the meta-data of this OCF resource agent. # ids_meta_data() { cat <<-! 1.0 OCF resource agent to manage an IBM Informix Dynamic Server (IDS) instance as an High-Availability resource. Manages an Informix Dynamic Server (IDS) instance The value the environment variable INFORMIXDIR has after a typical installation of IDS. Or in other words: the path (without trailing '/') where IDS was installed to. If this parameter is unspecified the script will try to get the value from the shell environment. INFORMIXDIR environment variable - + The value the environment variable INFORMIXSERVER has after a typical installation of IDS. Or in other words: the name of the IDS server instance to manage. If this parameter is unspecified the script will try to get the value from the shell environment. INFORMIXSERVER environment variable - + The value the environment variable ONCONFIG has after a typical installation of IDS. Or in other words: the name of the configuration file for the IDS instance specified in INFORMIXSERVER. The specified configuration file will be searched at '$INFORMIXDIR/etc/$ONCONFIG'. If this parameter is unspecified the script will try to get the value from the shell environment. ONCONFIG environment variable - + This parameter defines which database to use in order to monitor the IDS instance. If this parameter is unspecified the script will use the 'sysmaster' database as a default. database to use for monitoring, defaults to 'sysmaster' - + SQL test query to run on the database specified by the parameter 'dbname' in order to monitor the IDS instance and determine if it's functional or not. If this parameter is unspecified the script will use 'SELECT COUNT(*) FROM systables;' as a default. SQL test query to use for monitoring, defaults to 'SELECT COUNT(*) FROM systables;' - + ! } # # Function that either forwards log messages to the ocf_log function # provided by heartbeat or simply prints them to standard out via echo. # This is determined by setting the variable "idslogger" to "echo" or "ocf". # The default for "idslogger" is "ocf". # ids_log() { # Where should the passed log messages be passed to, # to the standard output via the echo command ("echo") # or to the ocf_log function provided by heartbeat ("ocf") ? # Default is "ocf". idslogger="ocf" # When the variable "idsdebug" is not set to "true" # this function (ids_log) will not print any info message # that has been forwarded to it! # This is done in order to spare if-statements within the # other functions in this script and to centralize the decision # whether to have a chatty resource script or not... ;) # Nevertheless, error messages will always be printed! idsdebug=false # Only continue if the two expected parameters # are not empty and "idsdebug" is set to "true" # or the message is of type "error". if [ $# -eq 2 -a -n "$1" -a -n "$2" ]; then if [ "$idsdebug" = "true" -o "$1" = "error" ]; then case $idslogger in # Print messages to stdout via echo command. echo) echo "`date +'%b %d %H:%M:%S'`: [$1] $2";; # Pass messages to ocf_log function. ocf|*) ocf_log "$1" "$2";; esac fi fi } # # Function that prints the current values of important environment variables # needed by the script and the IDS instance itself. The just mentioned variables are: # - INFORMIXDIR # - INFORMIXSERVER # - ONCONFIG # - PATH # - LD_LIBRARY_PATH # ids_debug() { ids_log info "called ids_debug" ids_log info "INFORMIXDIR=$INFORMIXDIR" ids_log info "OCF_RESKEY_informixdir=$OCF_RESKEY_informixdir" ids_log info "INFORMIXSERVER=$INFORMIXSERVER" ids_log info "OCF_RESKEY_informixserver=$OCF_RESKEY_informixserver" ids_log info "ONCONFIG=$ONCONFIG" ids_log info "OCF_RESKEY_onconfig=$OCF_RESKEY_onconfig" ids_log info "PATH=$PATH" ids_log info "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" ids_log info "dbname=$OCF_RESKEY_dbname" ids_log info "sqltestquery=$OCF_RESKEY_sqltestquery" ids_log info "this script is run as user: `id`" ids_log info "...in the current working directory: `pwd`" } # # Function that validates if the passed parameters are valid and sets them if valid. # If the first three parameters have not been passed, # this function checks whether they have been already set in the parent's shell environment. # The variables that are checked and set (only the capitalized ones are set) are: # - INFORMIXDIR # - INFORMIXSERVER # - ONCONFIG # - PATH # - LD_LIBRARY_PATH # - dbname # - sqltestquery # ids_validate() { ids_log info "called ids_validate" rc=$OCF_SUCCESS # Check if INFORMIX, INFORMIXSERVER and ONCONFIG # have been passed or set and validate them. # OCF vars not passed, vars empty - set and export them to the shell environment. if [ -n "$OCF_RESKEY_informixdir" -a -n "$OCF_RESKEY_informixserver" -a -n "$OCF_RESKEY_onconfig" ]; then ids_log info "ids_validate: passed vars not empty" INFORMIXDIR=$OCF_RESKEY_informixdir export INFORMIXDIR INFORMIXSERVER=$OCF_RESKEY_informixserver export INFORMIXSERVER ONCONFIG=$OCF_RESKEY_onconfig export ONCONFIG fi # Check if INFORMIXDIR is non-empty and a directory (and if there was an error so far). if [ $rc -eq $OCF_SUCCESS -a -n "$INFORMIXDIR" -a -d "$INFORMIXDIR" ]; then ids_log info "ids_validate: INFORMIXDIR is valid: $INFORMIXDIR" rc=$OCF_SUCCESS else ids_log error "ids_validate: INFORMIXDIR is invalid: $INFORMIXDIR" rc=$OCF_ERR_ARGS fi # Check if INFORMIXSERVER is non-empty (and if there was an error so far). if [ $rc -eq $OCF_SUCCESS -a -n "$INFORMIXSERVER" ]; then ids_log info "ids_validate: INFORMIXSERVER is valid: $INFORMIXSERVER" rc=$OCF_SUCCESS else ids_log error "ids_validate: INFORMIXSERVER is invalid: $INFORMIXSERVER" rc=$OCF_ERR_ARGS fi # Check if ONCONFIG is non-empty and a non-empty file (and if there was an error so far). if [ $rc -eq $OCF_SUCCESS -a -n "$ONCONFIG" -a -s "$INFORMIXDIR/etc/$ONCONFIG" ]; then ids_log info "ids_validate: ONCONFIG is a non-empty file in: \$INFORMIXDIR/etc/\$ONCONFIG where ONCONFIG=$ONCONFIG" rc=$OCF_SUCCESS else if [ -z "$ONCONFIG" -a -s "$INFORMIXDIR/etc/onconfig" ]; then ONCONFIG="onconfig" export ONCONFIG ids_log info "ids_validate: ONCONFIG is a non-empty file in: \$INFORMIXDIR/etc/\$ONCONFIG where ONCONFIG=$ONCONFIG" rc=$OCF_SUCCESS else if [ -z "$ONCONFIG" -a -s "$INFORMIXDIR/etc/onconfig.std" ]; then ONCONFIG="onconfig.std" export ONCONFIG ids_log info "ids_validate: ONCONFIG is a non-empty file in: \$INFORMIXDIR/etc/\$ONCONFIG where ONCONFIG=$ONCONFIG" rc=$OCF_SUCCESS else ids_log error "ids_validate: ONCONFIG is invalid, searched for it in: \$INFORMIXDIR/etc/\$ONCONFIG where ONCONFIG=$ONCONFIG" rc=$OCF_ERR_ARGS fi fi fi # Check if the commands oninit, onstat, onmode and dbaccess exist in INFORMIXDIR/bin/ # and whether they are executable (do this only if there wasn't an error so far). if [ $rc -eq $OCF_SUCCESS -a -x "$INFORMIXDIR/bin/oninit" -a -x "$INFORMIXDIR/bin/onstat" -a -x "$INFORMIXDIR/bin/onmode" -a -x "$INFORMIXDIR/bin/dbaccess" ]; then ids_log info "ids_validate: oninit, onstat and dbaccess exist and are executable in: \$INFORMIXDIR/bin/" rc=$OCF_SUCCESS else ids_log error "ids_validate: oninit, onstat or dbacces don't exist or they are not executable in: \$INFORMIXDIR/bin/" rc=$OCF_ERR_PERM fi # Extend PATH and LD_LIBRARY_PATH as needed for the IDS instance to run properly # BUT: only do this if it hasn't been done before! Otherwise PATH and LD_LIBRARY_PATH will # keep on growing every time heartbeat calls the IDS resource agent script! ;) echo $PATH | grep $INFORMIXDIR > /dev/null 2>&1 inpath=$? if [ $rc -eq $OCF_SUCCESS -a $inpath -ne 0 ]; then PATH="${INFORMIXDIR}/bin":${PATH} export PATH ids_log info "ids_validate: PATH did not contain INFORMIXDIR, added \$INFORMIXDIR/bin" else ids_log info "ids_validate: INFORMIXDIR already in PATH, where PATH=$PATH" fi echo $LD_LIBRARY_PATH | grep $INFORMIXDIR > /dev/null 2>&1 inldlibpath=$? if [ $rc -eq $OCF_SUCCESS -a $inldlibpath -ne 0 ]; then LD_LIBRARY_PATH="${INFORMIXDIR}/lib:${INFORMIXDIR}/lib/esql" export LD_LIBRARY_PATH ids_log info "ids_validate: LD_LIBRARY_PATH did not contain INFORMIXDIR, added \$INFORMIXDIR/lib and \$INFORMIXDIR/lib/esql, added them" else ids_log info "ids_validate: INFORMIXDIR already in LD_LIBRARY_PATH, where LD_LIBRARY_PATH=$LD_LIBRARY_PATH" fi # Check if dbname is empty (and if there was an error so far) # if it is empty, assign default. if [ $rc -eq $OCF_SUCCESS -a -n "$OCF_RESKEY_dbname" ]; then ids_log info "ids_validate: dbname is valid: $OCF_RESKEY_dbname" rc=$OCF_SUCCESS else ids_log info "ids_validate: dbname is invalid: $OCF_RESKEY_dbname" - ids_log info "ids_validate: using 'sysmaster' as default..." - OCF_RESKEY_dbname="sysmaster" + ids_log info "ids_validate: using '${OCF_RESKEY_dbname_default}' as default..." + OCF_RESKEY_dbname="${OCF_RESKEY_dbname_default}" export OCF_RESKEY_dbname rc=$OCF_SUCCESS fi # Check if sqltestquery is empty (and if there was an error so far) # if it is empty, assign default. if [ $rc -eq $OCF_SUCCESS -a -n "$OCF_RESKEY_sqltestquery" ]; then ids_log info "ids_validate: sqltestquery is valid: $OCF_RESKEY_sqltestquery" rc=$OCF_SUCCESS else ids_log info "ids_validate: sqltestquery is invalid: $OCF_RESKEY_sqltestquery" - ids_log info "ids_validate: using 'SELECT COUNT(*) FROM systables;' as default..." - OCF_RESKEY_sqltestquery="SELECT COUNT(*) FROM systables;" + ids_log info "ids_validate: using '${OCF_RESKEY_sqltestquery_default}' as default..." + OCF_RESKEY_sqltestquery="${OCF_RESKEY_sqltestquery_default}" export OCF_RESKEY_sqltestquery rc=$OCF_SUCCESS fi # Return exit status code. return $rc } # # Function that start the IDS instance and reports any error that # may occur while starting. # ids_start() { ids_log info "called ids_start" # Get current status of IDS instance. ids_status stat=$? case $stat in # IDS instance already running - exit with success. $OCF_SUCCESS) ids_log info "ids_start: IDS instance already running: $stat" rc=$OCF_SUCCESS;; # IDS instance in undefined state - exit with error. $OCF_ERR_GENERIC) ids_log error "ids_start: IDS instance in undefined state: $stat" ids_debug rc=$OCF_ERR_GENERIC;; # IDS instance not running - try to start it. $OCF_NOT_RUNNING) ids_log info "ids_start: executing 'oninit' now..." oninit stat=$? ids_log info "ids_start: done executing 'oninit': $stat" # The oninit command terminated successfully - check new state of IDS instance. if [ $stat -eq 0 ]; then # Initialize stat with failure exit status code. stat=$OCF_ERR_GENERIC # Endless loop that waits until IDS is completely online. # If IDS takes too long to achieve this or even hangs, # the timeout settings of heartbeat will cancel the starting # of the IDS resource and therefore terminate the loop. while [ $stat -ne $OCF_SUCCESS ]; do ids_status stat=$? done # IDS is running now - success. ids_log info "ids_start: IDS instance successfully started: $stat" rc=$OCF_SUCCESS # The oninit command terminated with an error - starting the IDS resource failed! else ids_log error "ids_start: starting IDS instance failed: $stat" ids_debug rc=$OCF_ERR_GENERIC fi ;; # Unexpected state - return OCF_ERR_UNIMPLEMENTED error. *) ids_log error "ids_start: unexpected state returned from ids_status: $stat" ids_debug rc=$OCF_ERR_UNIMPLEMENTED;; esac # Return exit status code. return $rc } # # Function that stops the IDS instance and reports any error that # may occur while stopping. # ids_stop() { ids_log info "caled ids_stop" ids_status stat=$? case $stat in # IDS instance is not running - success stopping it. $OCF_NOT_RUNNING) ids_log info "ids_stop: IDS instance is not running: $stat" rc=$OCF_SUCCESS;; # IDS instance is in an undefined state - exit with error. $OCF_ERR_GENERIC) ids_log error "ids_stop: IDS instance in undefined state: $stat" ids_debug rc=$OCF_ERR_GENERIC;; # IDS instance is running - try to stop it. $OCF_SUCCESS) ids_log info "ids_stop: running 'onmode -kuy' now..." onmode -kuy stat=$? ids_log info "ids_stop: done running 'onmode -kuy' now: $stat" # The onmode command terminated successfully - check new state of the IDS instance. if [ $stat -eq 0 ]; then ids_status stat=$? # New state is: not running - success. if [ $stat -eq $OCF_NOT_RUNNING ]; then ids_log info "ids_stop: IDS instance successfully stopped: $stat" rc=$OCF_SUCCESS # New state is: running or even undefined - failure! else ids_log error "ids_stop: stopping IDS instance failed: $stat" ids_debug rc=$OCF_ERR_GENERIC fi # The onmode command terminated with an error - stopping the IDS resource failed! else ids_log error "ids_stop: stopping IDS instance (by executing 'onmode -kuy') failed: $stat" ids_debug rc=$OCF_ERR_GENERIC fi ;; # Unexpected state - return OCF_ERR_UNIMPLEMENTED error. *) ids_log error "ids_stop: unexpected state returned from ids_status: $stat" ids_debug rc=$OCF_ERR_UNIMPLEMENTED;; esac # Return exit status code indicating whether IDS was successfully stopped or not. return $rc } # # Function that determines the current status/state of the IDS instance, # meaning whether it is running (the case when output of "onstat -" contains "On-Line"), # not running (the case when output of "onstat -" contains "shared memory not initialized") # or in an undefined state (the case output of "onstat -" contains "Quiescent", "Single-User", or other). # If the IDS instance is declared running the exit status code will indicate succes, otherwise failure of course. # ids_status() { ids_log info "called ids_status" # Get current status from the onstat tool and store it. stat=`onstat -` case $stat in # IDS instance is running. *"On-Line"*) ids_log info "ids_status: IDS instance running: $stat" rc=$OCF_SUCCESS;; # IDS instance is not running. *"shared memory not initialized"*) ids_log info "ids_status: IDS instance not running: $stat" rc=$OCF_NOT_RUNNING;; # IDS instance is in an undefined state! *) ids_log error "ids_status: IDS instance status undefined: $stat" rc=$OCF_ERR_GENERIC;; esac # Return exit status code (ergo current status of the IDS instance) to caller return $rc } # # Function that monitors the current status _and_ funtionality of the IDS instance. # First the state of the instance is determined. If it is running, a sql test query is # executed on the database. If the sql test query executes sucessfully, the instance's # status is rechecked and if it is still running, the script terminates with an exit # status code indicating success. If any of the above described steps fails, # the script terminates with an error. # ids_monitor() { ids_log info "called ids_monitor" ids_status stat=$? case $stat in # IDS instance is not running - monitoring failed. $OCF_NOT_RUNNING) ids_log info "ids_monitor: IDS instance is not running: $stat" rc=$OCF_NOT_RUNNING;; # IDS instance in an undefined state - exit with error. $OCF_ERR_GENERIC) ids_log error "ids_monitor: IDS instance in undefined state: $stat" ids_debug rc=$OCF_ERR_GENERIC;; # IDS instance is running - try to execute the sql test query and recheck state. $OCF_SUCCESS) ids_log info "ids_monitor: IDS instance is running (before executing sql test query)" ids_log info "ids_monitor: running sql test query now..." echo $OCF_RESKEY_sqltestquery | dbaccess $OCF_RESKEY_dbname - > /dev/null 2>&1 stat=$? ids_log info "ids_monitor: done running sql test query now: $stat" # The sql test query terminated successfully - check the new state of the IDS instance. if [ $stat -eq 0 ]; then ids_status stat=$? # New state is: running - success. if [ $stat -eq $OCF_SUCCESS ]; then ids_log info "ids_monitor: successfully ran sql test query on IDS instance: $stat" rc=$OCF_SUCCESS # New state is: not running or even undefined - failure! else ids_log error "ids_monitor: running sql test query on IDS instance failed: $stat" ids_debug rc=$OCF_ERR_GENERIC fi # The sql test query terminated with an error - exit with error! else ids_log error "ids_monitor: running sql test query on IDS instance failed: $stat" ids_debug rc=$OCF_ERR_GENERIC fi ;; # Unexpected state - return OCF_ERR_UNIMPLEMENTED error! *) ids_log error "ids_monitor: unexpected state returned from ids_status: $stat" ids_debug rc=$OCF_ERR_UNIMPLEMENTED;; esac # Return exit status code indicating whether IDS is running and functional or not. return $rc } ### # # M A I N S E C T I O N # ### case "$1" in usage) ids_usage exit $?;; meta-data) ids_meta_data exit $?;; esac # Validate configuration (parameters and such) # passed to this script and only process the method parameter # if the configuration is valid! Otherwise exit with OCF_ERR_ARGS error code. # Only check configuration when given method is not "validate-all", # as in case of "validate-all" the configuration will be checked anyway! ;) if [ "$1" != "validate-all" ]; then ids_validate valid=$? ids_log info "main section: validated ids RA configuration, result: $valid" # Configuration invalid - terminate with error message. if [ $valid -ne $OCF_SUCCESS ]; then ids_log error "main section: terminating script due to invalid configuration" ids_debug exit $OCF_ERR_ARGS fi fi # Configuration valid or method equals to "validate-all" - react depending on called method. case "$1" in start) ids_start exit $?;; stop) ids_stop exit $?;; status) ids_status exit $?;; monitor) ids_monitor exit $?;; validate-all) ids_validate exit $?;; methods) ids_methods exit $?;; *) ids_log error "mainsection: no or invalid command supplied: $1" exit $OCF_ERR_UNIMPLEMENTED;; esac ############################################################################### diff --git a/heartbeat/iface-vlan b/heartbeat/iface-vlan index 783fa5b11..cbe7e86da 100755 --- a/heartbeat/iface-vlan +++ b/heartbeat/iface-vlan @@ -1,475 +1,475 @@ #!/bin/sh # # OCF Resource Agent compliant iface-vlan script. # # Implements network VLAN interface management # # Copyright (C) 2013 Red Hat, Inc. All rights reserved. # Author: Fabio M. Di Nitto # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # TODO: # # OCF parameters are as below # OCF_RESKEY_vlan_interface # OCF_RESKEY_vlan_id # OCF_RESKEY_vlan_name # OCF_RESKEY_vlan_reorder_hdr # OCF_RESKEY_vlan_gvrp # OCF_RESKEY_vlan_mvrp # OCF_RESKEY_vlan_loose_binding # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_vlan_reorder_hdr_default=1 OCF_RESKEY_vlan_gvrp_default=0 OCF_RESKEY_vlan_mvrp_default=0 OCF_RESKEY_vlan_loose_binding_default=0 OCF_RESKEY_vlan_name_default=${OCF_RESKEY_vlan_interface}.${OCF_RESKEY_vlan_id} : ${OCF_RESKEY_vlan_name=${OCF_RESKEY_vlan_name_default}} : ${OCF_RESKEY_vlan_reorder_hdr=${OCF_RESKEY_vlan_reorder_hdr_default}} : ${OCF_RESKEY_vlan_gvrp=${OCF_RESKEY_vlan_gvrp_default}} # don't set defaults for mvrp or loose binding since both # are rather new kernel features and they might not be supported #: ${OCF_RESKEY_vlan_mvrp=${OCF_RESKEY_vlan_mvrp_default}} #: ${OCF_RESKEY_vlan_loose_binding=${OCF_RESKEY_vlan_loose_binding_default}} ####################################################################### vlan_usage() { cat < 1.0 This resource manages VLAN network interfaces. It can add, remove, configure VLANs. Manages VLAN network interfaces. Define the interface where VLAN should be attached. Network interface. Define the VLAN ID. It has to be a value between 0 and 4094. Define the VLAN ID. Define the name of the VLAN interface (max 15 charaters). Name of the VLAN. - + Enable or disable header reordering. Enable or disable header reordering. Enable or disable GARP VLAN registration protocol. Enable or disable gvrp. Enable or disable Multiple VLAN Registration Protocol. Please note that most distributions do not ship a version of iproute2 that supports mvrp yet, even if the kernel has support for it. Check output of $IPADDR2 link add type vlan --help in the FLAG section to verify if mvrp support is available. Enable or disable mvrp. Enable or disable VLAN loose bind. By default the VLAN interface admin status (UP/DOWN) follows the underneath interface status. Enabling loose bind allows the VLAN to disconnect from the interface status. Be very careful that enabling loose binding could invalidate this agent monitor operations. Please note that most distributions do not ship a version of iproute2 that supports loose_binding yet, even if the kernel has support for it. Check output of $IPADDR2 link add type vlan --help in the FLAG section to verify if loose_binding support is available. Enable or disable loose binding. END } # check if the interface is admin up/down iface_is_up() { if ! $IP2UTIL -o link show $1 | \ sed -e 's#.*<##g' -e 's#>.*##' -e 's#LOWER_UP##g' | \ grep -q UP; then return 1 fi return 0 } # check if the slaves have link layer up/down # see kernel network documentation on meaning of LOWER_UP flag # for more in depth explanation on how it works # NOTE: this check is not reliable in virt environment # since interfaces are always LOWER_UP. There is no way # from the guest to know if the host has disconnected somehow iface_lower_is_up() { if ! $IP2UTIL -o link show $1 | \ grep -q LOWER_UP; then return 1 fi return 0 } vlan_validate() { check_binary $IP2UTIL if [ -z "$OCF_RESKEY_vlan_interface" ]; then ocf_log err "Invalid OCF_RESKEY_vlan_interface: value cannot be empty" return 1 fi # the echo .. is the equivalent of strlen in bash # # /usr/include/linux/if.h:#define IFNAMSIZ 16 # needs to include 0 byte end string if [ "${#OCF_RESKEY_vlan_interface}" -gt 15 ]; then ocf_log err "Invalid OCF_RESKEY_vlan_interface: name is too long" return 1 fi if [ ! -d "/sys/class/net" ]; then ocf_log err "Unable to find sysfs network class in /sys" return 1 fi if [ ! -e "/sys/class/net/$OCF_RESKEY_vlan_interface" ]; then ocf_log err "Invalid OCF_RESKEY_vlan_interface: $OCF_RESKEY_vlan_interface does not exists" return 1 fi if [ -z "$OCF_RESKEY_vlan_id" ]; then ocf_log err "Invalid OCF_RESKEY_vlan_id: value cannot be empty" return 1 fi if ! ocf_is_decimal "$OCF_RESKEY_vlan_id" || \ [ "$OCF_RESKEY_vlan_id" -gt "4094" ]; then ocf_log err "Invalid OCF_RESKEY_vlan_id: must be a decimal value (0 to 4094 included)" return 1 fi if [ "${#OCF_RESKEY_vlan_name}" -gt 15 ]; then ocf_log err "Invalid OCF_RESKEY_vlan_name: name is too long" return 1 fi return 0 } vlan_check() { if [ -e "/sys/class/net/$OCF_RESKEY_vlan_name" ]; then if [ ! -e "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" ]; then return $OCF_ERR_GENERIC fi else if [ -e "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" ]; then error="$(rm -f "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to remove stale lock file for vlan $OCF_RESKEY_vlan_name: $error" return $OCF_ERR_GENERIC fi fi return $OCF_NOT_RUNNING fi if ! iface_is_up $OCF_RESKEY_vlan_interface; then if ocf_is_true "$OCF_RESKEY_vlan_loose_binding"; then ocf_log warn "Interface $OCF_RESKEY_vlan_interface is administratively down" else ocf_log err "Interface $OCF_RESKEY_vlan_interface is administratively down" return $OCF_ERR_GENERIC fi fi if ! iface_is_up $OCF_RESKEY_vlan_name; then ocf_log err "VLAN $OCF_RESKEY_vlan_name is administratively down" return $OCF_ERR_GENERIC fi if ! iface_lower_is_up $OCF_RESKEY_vlan_name; then ocf_log err "VLAN $OCF_RESKEY_vlan_name has no active link-layer" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # we need a simpler stop version to clean after us if start fails # without involving any error checking # rolling back in case of failure is otherwise complex vlan_force_stop() { $IP2UTIL link delete "$OCF_RESKEY_vlan_name" >/dev/null 2>&1 rm -f "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" 2>&1 } vlan_start() { # check if the vlan already exists vlan_check ret=$? if [ "$ret" != "$OCF_NOT_RUNNING" ]; then return $ret fi # make sure kernel module is loaded if [ ! -e /proc/net/vlan ]; then error="$(modprobe 8021q 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to load kernel 8021q driver: $error" return $OCF_ERR_GENERIC fi fi # generate options VLANOPTS="" if [ -n "$OCF_RESKEY_vlan_reorder_hdr" ]; then if ocf_is_true "$OCF_RESKEY_vlan_reorder_hdr"; then VLANOPTS="reorder_hdr on" else VLANOPTS="reorder_hdr off" fi fi if [ -n "$OCF_RESKEY_vlan_gvrp" ]; then if ocf_is_true "$OCF_RESKEY_vlan_gvrp"; then VLANOPTS="$VLANOPTS gvrp on" else VLANOPTS="$VLANOPTS gvrp off" fi fi if [ -n "$OCF_RESKEY_vlan_mvrp" ]; then if ocf_is_true "$OCF_RESKEY_vlan_mvrp"; then VLANOPTS="$VLANOPTS mvrp on" else VLANOPTS="$VLANOPTS mvrp off" fi fi if [ -n "$OCF_RESKEY_vlan_loose_binding" ]; then if ocf_is_true "$OCF_RESKEY_vlan_loose_binding"; then VLANOPTS="$VLANOPTS loose_binding on" else VLANOPTS="$VLANOPTS loose_binding off" fi fi # create the VLAN error="$($IP2UTIL link add link "$OCF_RESKEY_vlan_interface" name "$OCF_RESKEY_vlan_name" type vlan id "$OCF_RESKEY_vlan_id" $VLANOPTS 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to create VLAN $OCF_RESKEY_vlan_name: $error" return $OCF_ERR_GENERIC fi # set the interface up error="$($IP2UTIL link set dev "$OCF_RESKEY_vlan_interface" up 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to set VLAN $OCF_RESKEY_vlan_interface up: $error" return $OCF_ERR_GENERIC fi # set the vlan up error="$($IP2UTIL link set dev "$OCF_RESKEY_vlan_name" up 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to set VLAN $OCF_RESKEY_vlan_name up: $error" return $OCF_ERR_GENERIC fi error="$(touch "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to create lock file for VLAN $OCF_RESKEY_vlan_name: $error" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } vlan_stop() { vlan_check ret=$? if [ "$ret" = "$OCF_NOT_RUNNING" ]; then return $OCF_SUCCESS fi if [ "$ret" != "$OCF_SUCCESS" ]; then return $ret fi # set vlan down error="$($IP2UTIL link set dev "$OCF_RESKEY_vlan_name" down 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to set VLAN $OCF_RESKEY_vlan_name down: $error" return $OCF_ERR_GENERIC fi # delete vlan error="$($IP2UTIL link delete "$OCF_RESKEY_vlan_name" 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to delete VLAN $OCF_RESKEY_vlan_name: $error" return $OCF_ERR_GENERIC fi error="$(rm -f "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" 2>&1)" if [ "$?" != "0" ]; then ocf_log err "Unable to remove lock file for VLAN $OCF_RESKEY_vlan_name: $error" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) vlan_meta_data exit $OCF_SUCCESS ;; usage|help) vlan_usage exit $OCF_SUCCESS ;; esac if [ ! -d "$HA_RSCTMP" ]; then ocf_log debug "$HA_RSCTMP not found, we are probably being executed manually" mkdir -p "$HA_RSCTMP" fi if [ -n "$__OCF_ACTION" ] && ! vlan_validate; then exit $OCF_ERR_CONFIGURED fi case $__OCF_ACTION in start|stop) if ! ocf_is_root; then ocf_log err "You must be root for $__OCF_ACTION operation." exit $OCF_ERR_PERM fi ;; esac case $__OCF_ACTION in start) vlan_start ret=$? if [ "$ret" != "$OCF_SUCCESS" ]; then vlan_force_stop fi exit $ret ;; stop) vlan_stop exit $? ;; status|monitor) vlan_check exit $? ;; validate-all) # vlan_validate above does the trick ;; *) vlan_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac # vi:sw=4:ts=8: diff --git a/heartbeat/ipsec b/heartbeat/ipsec index fe26631ca..ba3e38bb6 100755 --- a/heartbeat/ipsec +++ b/heartbeat/ipsec @@ -1,189 +1,196 @@ #!/bin/sh # # # IPSEC OCF RA. Handles IPSEC tunnels associated with a VIP # # Copyright (c) 2017 Red Hat Inc. # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Defaults +OCF_RESKEY_tunnel_default="" +OCF_RESKEY_vip_default="" OCF_RESKEY_confdir_default="/etc/ipsec.d/" +OCF_RESKEY_fallbacktunnel_default="" + +: ${OCF_RESKEY_tunnel=${OCF_RESKEY_tunnel_default}} +: ${OCF_RESKEY_vip=${OCF_RESKEY_vip_default}} : ${OCF_RESKEY_confdir=${OCF_RESKEY_confdir_default}} +: ${OCF_RESKEY_fallbacktunnel=${OCF_RESKEY_fallbacktunnel_default}} meta_data() { cat < 1.0 This is a Resource Agent to manage IPSEC tunnels associated with a Virtual IP Address. It's meant to be collocated with a specific VIP, and will manage setting up or down a specific tunnel. Handles IPSEC tunnels for VIPs The name of the tunnel to be monitored. Tunnel name - + Virtual IP address that the tunnel is using. VIP - + The directory where the IPSEC tunnel configurations can be found. Tunnel name The name of the tunnel to fall back to when the main tunnel is put down. Tunnel name to fall back to - + END } ####################################################################### ipsec_usage() { cat </dev/null fi rc=$? if [ $rc -eq 0 ]; then return $OCF_SUCCESS fi # JBoss service error return $OCF_ERR_GENERIC } monitor_rotatelogs() { pgrep -f "$ROTATELOGS.*$CONSOLE$ROTATELOG_SUFFIX" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log warn "A rotatelogs command for $CONSOLE is not running. Restarting it." start_rotatelogs if [ $? -eq 0 ]; then ocf_log info "Restart rotatelogs process succeeded." else ocf_log warn "Restart rotatelogs process failed." fi fi } monitor_jboss() { if ! pgrep -f "$PSTRING" > /dev/null; then return $OCF_NOT_RUNNING fi isrunning_jboss $1 rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if ocf_is_true $ROTATELOG_FLG; then # Monitor rotatelogs process and restart it if it is stopped. # And never consider rotatelogs process failure to be a monitor failure # as long as JBoss process works fine. monitor_rotatelogs fi return $OCF_SUCCESS } start_rotatelogs() { su - -s /bin/sh $JBOSS_USER \ -c "$ROTATELOGS -l \"$CONSOLE$ROTATELOG_SUFFIX\" $ROTATEVALUE" \ < "$CONSOLE" > /dev/null 2>&1 & } rotate_console() { # Check $CONSOLE$ROTATELOG_SUFFIX is writable or not. CURRENT_ROTATELOG_SUFFIX=`date +"$ROTATELOG_SUFFIX"` su - -s /bin/sh $JBOSS_USER \ -c "touch \"$CONSOLE$CURRENT_ROTATELOG_SUFFIX\"" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "$CONSOLE$CURRENT_ROTATELOG_SUFFIX is not writable." return $OCF_ERR_GENERIC fi # Clean up and set permissions on required files if [ -p "$CONSOLE" ]; then rm -rf "$CONSOLE" elif [ -e "$CONSOLE" ]; then DATE=`date +"%F-%H%M%S"` ocf_log warn "$CONSOLE already exists. It is saved as $CONSOLE-$DATE" mv "$CONSOLE" "$CONSOLE-$DATE" fi mkfifo -m700 "$CONSOLE" chown --dereference "$JBOSS_USER" "$CONSOLE" || true start_rotatelogs } start_jboss() { monitor_jboss start if [ $? -eq $OCF_SUCCESS ]; then ocf_log info "JBoss already running." return $OCF_SUCCESS fi if ocf_is_true $ROTATELOG_FLG; then rotate_console if [ $? -eq 0 ]; then ocf_log debug "Rotate console log succeeded." else ocf_log err "Rotate console log failed. Avoid starting jboss without console log rotation." return $OCF_ERR_GENERIC fi fi ocf_log info "Starting JBoss[$RESOURCE_NAME]" if [ "$JBOSS_USER" = root ]; then "$RUN_COMMAND" $RUN_OPTS \ >> "$CONSOLE" 2>&1 & else su - -s /bin/sh "$JBOSS_USER" \ -c "export JAVA_HOME=\"${JAVA_HOME}\"; \ export JAVA_OPTS=\"${JAVA_OPTS}\"; \ export JBOSS_HOME=\"${JBOSS_HOME}\"; \ export JBOSS_BASE_DIR=\"${JBOSS_BASE_DIR}\"; \ \"$RUN_COMMAND\" $RUN_OPTS" \ >> "$CONSOLE" 2>&1 & fi while true; do monitor_jboss start if [ $? -eq $OCF_SUCCESS ]; then break fi ocf_log info "start_jboss[$RESOURCE_NAME]: retry monitor_jboss" sleep 3 done ocf_log info "JBoss[$RESOURCE_NAME] is started." return $OCF_SUCCESS } output_thread_dump() { ocf_log info "stop_jboss[$RESOURCE_NAME]: output a JVM thread dump to $CONSOLE" pkill -QUIT -f "$PSTRING" } # arg1 : timeout # arg2 : send specified signal wait_process_exit() { local lapse_sec=0 local timeout=$1 local signal=$2 while pgrep -f "$PSTRING" > /dev/null; do sleep 1 lapse_sec=`expr $lapse_sec + 1` if [ -n "$signal" ]; then ocf_log info "stop_jboss[$RESOURCE_NAME]: kill jboss by SIG$signal ($lapse_sec/$timeout)" pkill -$signal -f "$PSTRING" else ocf_log info "stop_jboss[$RESOURCE_NAME]: stop NORM $lapse_sec/$timeout" fi if [ "$timeout" -ne 0 -a $lapse_sec -ge $timeout ]; then return 1 fi done return 0 } stop_jboss5() { if [ "$JBOSS_USER" = root ]; then "$JBOSS_HOME/bin/shutdown.sh" $SHUTDOWN_OPTS -S \ >> "$CONSOLE" 2>&1 & else su - -s /bin/sh "$JBOSS_USER" \ -c "export JAVA_HOME=\"${JAVA_HOME}\"; \ export JBOSS_HOME=\"${JBOSS_HOME}\"; \ \"$JBOSS_HOME/bin/shutdown.sh\" $SHUTDOWN_OPTS -S" \ >> "$CONSOLE" 2>&1 & fi if ! wait_process_exit $SHUTDOWN_TIMEOUT; then output_thread_dump if ! wait_process_exit $KILL_TIMEOUT TERM; then return 1 fi fi return 0 } stop_jboss6() { pkill -TERM -f "$PSTRING" if ! wait_process_exit $SHUTDOWN_TIMEOUT; then output_thread_dump return 1 fi return 0 } stop_jboss() { local rc if ! pgrep -f "$PSTRING" > /dev/null; then ocf_log info "JBoss[$RESOURCE_NAME] is already stopped." else ocf_log info "Stopping JBoss[$RESOURCE_NAME]" # JBoss5 : shutdonw.sh -> SIGQUIT(output thread dump) -> SIGTERM # If the JBoss process hangs, JBoss RA waits $SHUTDOWN_TIMEOUT # seconds and tries kill TERM and QUIT for $KILL_TIMEOUT seconds. # JBoss6 : SIGTERM -> SIGQUIT(output thread dump) # If the JBoss process hangs, JBoss RA waits $SHUTDOWN_TIMEOUT # seconds and tries kill QUIT. if [ "$JBOSS_VERSION" -le 5 ]; then stop_jboss5 rc=$? else stop_jboss6 rc=$? fi if [ $rc -ne 0 ]; then # JBoss5 # The stop timeout of RA should be # longer than $SHUTDOWN_TIMEOUT + $KILL_TIMEOUT. # JBoss6 # The stop timeout of RA should be longer than $SHUTDOWN_TIMEOUT. wait_process_exit 0 KILL fi ocf_log info "JBoss[$RESOURCE_NAME] is stopped." fi if ocf_is_true $ROTATELOG_FLG; then rm -f "${CONSOLE}" fi return $OCF_SUCCESS } status_jboss() { if ! pgrep -f "$PSTRING" > /dev/null; then echo "JBoss process[$RESOURCE_NAME] is not running." return $OCF_NOT_RUNNING fi if isrunning_jboss; then echo "JBoss[$RESOURCE_NAME] is running." return $OCF_SUCCESS else echo "JBoss process[$RESOURCE_NAME] is running." echo "But, we can not access JBoss web service." return $OCF_NOT_RUNNING fi } metadata_jboss() { cat < 1.0 Resource script for Jboss. It manages a Jboss instance as an HA resource. Manages a JBoss application server instance -The version of JBoss. Default is 5. +The version of JBoss. Default is ${OCF_RESKEY_jboss_version_default}. The usage of JBoss was greatly changed as of JBoss 6. Specify "6" when you use JBoss 6. The version of JBoss - + The name of the resource. Defaults to the name of the resource instance. The name of the resource A destination of the log of jboss run and shutdown script. jboss log path - + Timeout for jboss bin/shutdown.sh. We wait for this timeout to expire, then send the TERM and QUIT signals. Finally, the KILL signal is used to terminate the jboss process. You should set the timeout for the stop operation to a value bigger than the sum of the timeout parameters. See also kill_timeout. shutdown timeout - + If bin/shutdown.sh doesn't stop the jboss process, then we send it TERM and QUIT signals, intermittently and once a second. After this timeout expires, if the process is still live, we use the KILL signal. See also shutdown_timeout. stop by signal timeout - + A user name to start a JBoss. A user name to start a resource. - + URL to test in the monitor operation. URL to test in the monitor operation. Home directory of Java. Defaults to the environment variable JAVA_HOME. If it is not set, then define this parameter. Home directory of Java. Java options. Java options. - + Home directory of Jboss. Home directory of Jboss. - + Base directory of JBoss. This parameter is not used in JBoss5. Base directory of JBoss. With this string heartbeat matches for the right process to kill. pkill/pgrep search string JBoss start command. JBoss start command. Start options to start Jboss with, defaults are from the Jboss-Doku. options for jboss run.sh Stop options to stop Jboss with. options for jboss shutdown.sh - + Rotate console log flag. Rotate console log flag - + Console log rotation value (default is 86400 seconds). Console log rotation value (default is 86400 seconds) - + Rotate console log suffix. Rotate console log suffix - + END return $OCF_SUCCESS } validate_all_jboss() { if [ ! -d "$JAVA_HOME" ]; then ocf_log err "JAVA_HOME does not exist." return $OCF_ERR_INSTALLED fi if [ ! -d "$JBOSS_HOME" ]; then ocf_log err "JBOSS_HOME does not exist." return $OCF_ERR_INSTALLED fi if [ "$JBOSS_VERSION" -gt 5 ]; then if [ ! -d "$JBOSS_BASE_DIR" ]; then ocf_log err "JBOSS_BASE_DIR does not exist." return $OCF_ERR_INSTALLED fi fi if [ ! -x "$JAVA" ]; then ocf_log err "java command does not exist." return $OCF_ERR_INSTALLED fi if ocf_is_true $ROTATELOG_FLG; then if [ ! -x "$ROTATELOGS" ]; then ocf_log err "rotatelogs command does not exist." return $OCF_ERR_INSTALLED fi fi return $OCF_SUCCESS } +# Parameter defaults + +OCF_RESKEY_jboss_version_default="5" +OCF_RESKEY_console_default="/var/log/${OCF_RESOURCE_INSTANCE}.log" +OCF_RESKEY_shutdown_timeout_default="5" +OCF_RESKEY_kill_timeout_default="10" +OCF_RESKEY_user_default="root" +OCF_RESKEY_java_opts_default="" +OCF_RESKEY_jboss_home_default="" +OCF_RESKEY_shutdown_opts_default="-s 127.0.0.1:1099" +OCF_RESKEY_rotate_consolelog_default="false" +OCF_RESKEY_rotate_value_default="86400" +OCF_RESKEY_rotate_logsuffix_default=".%F" + COMMAND=$1 -JBOSS_VERSION="${OCF_RESKEY_jboss_version-5}" +JBOSS_VERSION="${OCF_RESKEY_jboss_version-${OCF_RESKEY_jboss_version_default}}" if ! ocf_is_decimal $JBOSS_VERSION; then ocf_log err "Invalid parameter value: jboss_version [$JBOSS_VERSION]" return $OCF_ERR_ARGS fi # Setting of the default value if [ "$JBOSS_VERSION" -le 5 ]; then OCF_RESKEY_statusurl_default="http://127.0.0.1:8080" OCF_RESKEY_pstring_default="java -Dprogram.name=run.sh" OCF_RESKEY_run_command_default="${OCF_RESKEY_jboss_home}/bin/run.sh" OCF_RESKEY_run_opts_default="-c default" else OCF_RESKEY_jboss_base_dir_default="${OCF_RESKEY_jboss_home}/standalone" JBOSS_BASE_DIR="${OCF_RESKEY_jboss_base_dir-${OCF_RESKEY_jboss_base_dir_default}}" OCF_RESKEY_statusurl_default="http://127.0.0.1:9990" OCF_RESKEY_pstring_default="java.*-Djboss.server.base.dir=${JBOSS_BASE_DIR}( .*)?$" OCF_RESKEY_run_command_default="${OCF_RESKEY_jboss_home}/bin/standalone.sh" OCF_RESKEY_run_opts_default="" fi RESOURCE_NAME="${OCF_RESKEY_resource_name-${OCF_RESOURCE_INSTANCE}}" CONSOLE="${OCF_RESKEY_console-/var/log/${RESOURCE_NAME}.log}" -SHUTDOWN_TIMEOUT="${OCF_RESKEY_shutdown_timeout-5}" -KILL_TIMEOUT="${OCF_RESKEY_kill_timeout-10}" -JBOSS_USER="${OCF_RESKEY_user-root}" +SHUTDOWN_TIMEOUT="${OCF_RESKEY_shutdown_timeout-${OCF_RESKEY_shutdown_timeout_default}}" +KILL_TIMEOUT="${OCF_RESKEY_kill_timeout-${OCF_RESKEY_kill_timeout_default}}" +JBOSS_USER="${OCF_RESKEY_user-${OCF_RESKEY_user_default}}" STATUSURL="${OCF_RESKEY_statusurl-${OCF_RESKEY_statusurl_default}}" PSTRING="${OCF_RESKEY_pstring-${OCF_RESKEY_pstring_default}}" RUN_OPTS="${OCF_RESKEY_run_opts-${OCF_RESKEY_run_opts_default}}" -SHUTDOWN_OPTS="${OCF_RESKEY_shutdown_opts--s 127.0.0.1:1099}" -ROTATELOG_FLG="${OCF_RESKEY_rotate_consolelog-false}" -ROTATEVALUE="${OCF_RESKEY_rotate_value-86400}" -ROTATELOG_SUFFIX="${OCF_RESKEY_rotate_logsuffix-.%F}" +SHUTDOWN_OPTS="${OCF_RESKEY_shutdown_opts-${OCF_RESKEY_shutdown_opts_default}}" +ROTATELOG_FLG="${OCF_RESKEY_rotate_consolelog-${OCF_RESKEY_rotate_consolelog_default}}" +ROTATEVALUE="${OCF_RESKEY_rotate_value-${OCF_RESKEY_rotate_value_default}}" +ROTATELOG_SUFFIX="${OCF_RESKEY_rotate_logsuffix-${OCF_RESKEY_rotate_logsuffix_default}}" if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi if [ "$COMMAND" = "meta-data" ]; then metadata_jboss exit $OCF_SUCCESS fi if [ "$COMMAND" = "help" -o "$COMMAND" = "usage" ]; then usage exit $OCF_SUCCESS fi # test if these two are set and if directories exist and if the # required scripts/binaries exist; use OCF_ERR_INSTALLED JAVA_HOME="${OCF_RESKEY_java_home-${JAVA_HOME}}" JAVA_OPTS="${OCF_RESKEY_java_opts}" JBOSS_HOME="${OCF_RESKEY_jboss_home}" RUN_COMMAND="${OCF_RESKEY_run_command-${OCF_RESKEY_run_command_default}}" LSB_STATUS_STOPPED=3 export JAVA_HOME JAVA_OPTS JBOSS_HOME JBOSS_BASE_DIR JAVA=${JAVA_HOME}/bin/java ROTATELOGS="" if ocf_is_true $ROTATELOG_FLG; then # Look for rotatelogs/rotatelogs2 if [ -x /usr/sbin/rotatelogs ]; then ROTATELOGS=/usr/sbin/rotatelogs elif [ -x /usr/sbin/rotatelogs2 ]; then ROTATELOGS=/usr/sbin/rotatelogs2 fi fi validate_all_jboss rc=$? [ "$COMMAND" = "validate-all" ] && exit $rc if [ $rc -ne 0 ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi case "$COMMAND" in start) start_jboss func_status=$? exit $func_status ;; stop) stop_jboss func_status=$? exit $func_status ;; status) status_jboss exit $? ;; monitor) monitor_jboss func_status=$? exit $func_status ;; validate-all) validate_all_jboss exit $? ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/jira.in b/heartbeat/jira.in index b9d6abed3..6a4d9c570 100644 --- a/heartbeat/jira.in +++ b/heartbeat/jira.in @@ -1,281 +1,291 @@ #!@BASH_SHELL@ # #################################################################### # Description: OCF Resource Agent to manage JIRA software. # Author : Saleh A. (saleh.abbas.saber@gmail.com) # # License : WTFPL 2 # # DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE # Version 2, December 2004 # # Copyright (C) 2004 Sam Hocevar # # Everyone is permitted to copy and distribute verbatim or modified # copies of this license document, and changing it is allowed as long # as the name is changed. # # DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE # TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION # # 0. You just DO WHAT THE FUCK YOU WANT TO. # #################################################################### # Parameters: # OCF_RESKEY_statusurl : Status URL to monitor JIRA # (default: http://localhost:8080/status) # OCF_RESKEY_java_home : Java Home # (default: /usr/lib/jvm/jre) # OCF_RESKEY_jira_installation : Jira installtion directory # OCF_RESKEY_jira_user : User running Jira software # (by default: jira) #################################################################### # Initialization # Source ocf-shellfuncs : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_statusurl_default="http://localhost:8080/status" +OCF_RESKEY_java_home_default="/usr/lib/jvm/jre" +OCF_RESKEY_jira_user_default="jira" + +: ${OCF_RESKEY_statusurl=${OCF_RESKEY_statusurl_default}} +: ${OCF_RESKEY_java_home=${OCF_RESKEY_java_home_default}} +: ${OCF_RESKEY_jira_user=${OCF_RESKEY_jira_user_default}} + # Usage jira_usage() { cat <<_EOF Usage: $0 action Supported Actions: start : start jira stop : stop jira monitor : show jira status meta-data : show the meta-data validate-all: validate the RA configuration _EOF } # Start jira_start() { # exit immediately if configuration is not valid jira_validate_all || exit $? # if resource is already running, bail out early if jira_monitor; then ocf_log info "Resource is already running" return $OCF_SUCCESS fi # Starting Jira waittime=300 su -m $jira_user -c "$jira_installation/bin/startup.sh &> /dev/null" while [[ $waittime -gt 0 ]]; do if $(curl --connect-timeout 1 --max-time 3 -s ${statusurl} | grep '{"state":"RUNNING"}' > /dev/null); then waittime=0 else sleep 1 waittime=$(($waittime - 1)) fi done # Verify jira is running jira_monitor rc=$? return $? } # Stop jira_stop() { local rc # exit immediately if configuration is not valid jira_validate_all || exit $? jira_monitor rc=$? case "$rc" in "$OCF_SUCCESS") # Currently running. Normal, expected behavior. ocf_log debug "Resource is currently running" ;; "$OCF_NOT_RUNNING") # Currently not running. Nothing to do. ocf_log info "Resource is already stopped" return $OCF_SUCCESS ;; esac # Stopping Jira waittime=300 su -m $jira_user -c "$jira_installation/bin/shutdown.sh &> /dev/null" while [[ $waittime -gt 0 ]]; do if $(kill -0 $(cat ${jira_installation}/work/catalina.pid 2> /dev/null) 2> /dev/null) ; then sleep 1 waittime=$(($waittime - 1)) else waittime=0 fi done # Stop JIRA forcely if it failed if $(kill -0 $(cat ${jira_installation}/work/catalina.pid 2> /dev/null) 2> /dev/null) ; then kill -9 $(cat ${jira_installation}/work/catalina.pid) sleep 1 fi # Verify jira is stopped jira_monitor rc=$? return $rc } # Monitor jira_monitor() { local rc # exit immediately if configuration is not valid jira_validate_all || exit $? if $(kill -0 $(cat ${jira_installation}/work/catalina.pid 2> /dev/null) 2> /dev/null) ; then # Is jira working if $(curl --connect-timeout 1 --max-time 3 -s ${statusurl} | grep '{"state":"RUNNING"}' > /dev/null) ; then rc=0 else # Jira has a problem rc=2 fi else # Tomcat is stopped (and Jira) rc=1 fi case "$rc" in 0) rc=$OCF_SUCCESS ocf_log debug "Resource is running" ;; 1) rc=$OCF_NOT_RUNNING ocf_log debug "Resource is not running" ;; *) ocf_log err "Resource has failed" exit $OCF_ERR_GENERIC esac return $rc } # Validat All jira_validate_all() { # Check if java is installed if ! [ -d $OCF_RESKEY_java_home ]; then ocf_log err "$OCF_RESKEY_java_home does not exist. \ Please ensure that Java is installed and configured correctly" exit $OCF_ERR_INSTALLED fi # Check if JIRA installation directory exists if ! [ -d $OCF_RESKEY_jira_installation ]; then ocf_log err "$OCF_RESKEY_jira_installation does not exist." exit $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } # Meta-data jira_meta_data(){ cat < 0.1 OCF Resource Agent to manage JIRA software JIRA OCF RA Status URL for JIRA monitoring JIRA status url - + Java Home in the Linux instance Java Home - + JIRA installation directory (binaries, ... etc) JIRA installation directory User to run Jira software with Jira user - + EOF } # Execution # Set vars from defined OCF env vars -statusurl=${OCF_RESKEY_statusurl-http://localhost:8080/status} -java_home=${OCF_RESKEY_java_home-/usr/lib/jvm/jre} +statusurl=${OCF_RESKEY_statusurl-${OCF_RESKEY_statusurl_default}} +java_home=${OCF_RESKEY_java_home-${OCF_RESKEY_java_home_default}} jira_installation=${OCF_RESKEY_jira_installation} -jira_user=${OCF_RESKEY_jira_user-jira} +jira_user=${OCF_RESKEY_jira_user-${OCF_RESKEY_jira_user_default}} # Export JAVA_HOME env variable export JAVA_HOME=${OCF_RESKEY_java_home} # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) jira_meta_data exit $OCF_SUCCESS ;; usage|help) jira_usage exit $OCF_SUCCESS ;; esac # Anything other than meta-data and usage must pass validation jira_validate_all || exit $? # Translate each action into the appropriate function call case $__OCF_ACTION in start) jira_start;; stop) jira_stop;; status|monitor) jira_monitor;; validate-all) ;; *) jira_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? exit $rc diff --git a/heartbeat/lvmlockd b/heartbeat/lvmlockd index 57f7fdc76..3c8c69cc9 100755 --- a/heartbeat/lvmlockd +++ b/heartbeat/lvmlockd @@ -1,399 +1,411 @@ #!/bin/sh # # # lvmlockd OCF Resource Agent # # Copyright (c) 2017 SUSE LINUX, Eric Ren # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_with_cmirrord_default="false" +OCF_RESKEY_pidfile_default="/run/lvmlockd.pid" +OCF_RESKEY_socket_path_default="/run/lvm/lvmlockd.socket" +OCF_RESKEY_syslog_priority_default="warning" +OCF_RESKEY_adopt_default="1" + +: ${OCF_RESKEY_with_cmirrord=${OCF_RESKEY_with_cmirrord_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_socket_path=${OCF_RESKEY_socket_path_default}} +: ${OCF_RESKEY_syslog_priority=${OCF_RESKEY_syslog_priority_default}} +: ${OCF_RESKEY_adopt=${OCF_RESKEY_adopt_default}} + ####################################################################### meta_data() { cat < 1.0 This agent manages the lvmlockd daemon. "lvmlockd" is like "clvmd". Both are used by LVM commands to coordinate access to shared storage, but with different design and implementations. "lvmlockd" can use two lock managers: dlm and sanlock. This agent only supports "dlm + lvmlockd". If dlm (or corosync) are already being used by other cluster software, you are advised to select dlm, then configure "controld" resource agent for dlm and this agent for "lvmlockd". Otherwise, consider sanlock for "lvmlockd" if dlm/corosync is not required. Using lvmlockd requires the settings in LVM configuration file (/etc/lvm/lvm.conf): "locking_type = 1" and "use_lvmlockd = 1". This RA will change the settings respectively if needed. For more information, refer to manpage lvmlockd.8. This agent manages the lvmlockd daemon Start with cmirrord (cluster mirror log daemon). activate cmirrord - + pid file pid file - + Set the socket path to listen on. socket path - + Write log messages from this level up to syslog. syslog priority - + Adopt locks from a previous instance of lvmlockd. Adopt locks from a previous instance of lvmlockd - + END } ####################################################################### -: ${OCF_RESKEY_pidfile:="/run/lvmlockd.pid"} - LOCKD="lvmlockd" CMIRRORD="cmirrord" # 0.5s sleep each count TIMEOUT_COUNT=20 usage() { cat </dev/null 2>&1 fi } silent_status() { local pid=$(get_pid) if [ -n "$pid" ] ; then daemon_is_running "$pid" rc=$? mirror_rc=$rc if ocf_is_true $OCF_RESKEY_with_cmirrord; then pid=$(pgrep $CMIRRORD | head -n1) daemon_is_running "$pid" mirror_rc=$? fi # If these ever don't match, return error to force recovery if [ $mirror_rc -ne $rc ]; then return $OCF_ERR_GENERIC fi return $rc else # No pid file false fi } # change /etc/lvm/lvm.conf to use lvmlockd setup_lvm_config() { local out="" local use_lvmlockd="" local lock_type="" # To use lvmlockd, ensure configure lvm.conf: # locking_type = 1 # use_lvmlockd = 1 out=$(lvmconfig 'global/use_lvmlockd') use_lvmlockd=$(echo "$out" | cut -d'=' -f2) out=$(lvmconfig 'global/locking_type') lock_type=$(echo "$out" | cut -d'=' -f2) if [ "$use_lvmlockd" -ne 1 ] ; then ocf_log info "setting \"use_lvmlockd=1\" in /etc/lvm/lvm.conf ..." sed -i 's,^[[:blank:]]*use_lvmlockd[[:blank:]]*=.*,\ \ \ \ use_lvmlockd = 1,g' /etc/lvm/lvm.conf fi if [ "$lock_type" -ne 1 ] ; then ocf_log info "setting \"locking_type=1\" in /etc/lvm/lvm.conf ..." sed -i 's,^[[:blank:]]*locking_type[[:blank:]]*=.*,\ \ \ \ locking_type = 1,g' /etc/lvm/lvm.conf fi return $OCF_SUCCESS } check_dlm_controld() { local pid="" # dlm daemon should have only one instance, but for safe... pid=$(pgrep dlm_controld | head -n1) if ! daemon_is_running $pid ; then ocf_exit_reason "DLM is not running. Is it configured?" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } lvmlockd_start() { local extras="" setup_lvm_config ocf_log info "checking if DLM is started first..." check_dlm_controld if silent_status ; then ocf_log info "${LOCKD} already started (pid=$(get_pid))" return $OCF_SUCCESS fi if ocf_is_true $OCF_RESKEY_with_cmirrord; then ocf_log info "starting ${CMIRRORD}..." $CMIRRORD rc=$? if [ $rc -ne $OCF_SUCCESS ] ; then ocf_exit_reason "Failed to start ${CMIRRORD}, exit code: $rc" return $OCF_ERR_GENERIC fi fi if [ ! -z "$OCF_RESKEY_socket_path" ] ; then extras="$extras -s ${OCF_RESKEY_socket_path}" fi if [ ! -z "$OCF_RESKEY_syslog_priority" ] ; then extras="$extras -S ${OCF_RESKEY_syslog_priority}" fi if [ ! -z "$OCF_RESKEY_adopt" ] ; then extras="$extras -A ${OCF_RESKEY_adopt}" else # Inside lvmlockd daemon, this option defaults to 0. But, we # want it defaults to 1 for resource agent. When RA monitor pulls # this daemon up, we expect it to adopt locks from a previous # instance of lvmlockd. extras="$extras -A 1" fi # This client only support "dlm" lock manager extras="$extras -g dlm" ocf_log info "starting ${LOCKD}..." ocf_run ${LOCKD} -p ${OCF_RESKEY_pidfile} $extras rc=$? if [ $rc -ne $OCF_SUCCESS ] ; then ocf_exit_reason "Failed to start ${LOCKD}, exit code: $rc" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # Each shared VG has its own lockspace. Besides, lvm_global lockspace # is for global use, and it should be the last one to close. It should # be enough to only check on lvm_global. wait_lockspaces_close() { local retries=0 ocf_log info "Waiting for all lockspaces to be closed" while [ $retries -lt "$TIMEOUT_COUNT" ] do if ! dlm_tool ls lvm_global | grep -Eqs "^name[[:space:]]+lvm_global" ; then return $OCF_SUCCESS fi sleep 0.5 retries=$((retries + 1)) done ocf_exit_reason "Failed to close all lockspaces clearly" exit $OCF_ERR_GENERIC } kill_stop() { local proc=$1 local pid=$2 local retries=0 ocf_log info "Killing $proc (pid=$pid)" while daemon_is_running $pid && [ $retries -lt "$TIMEOUT_COUNT" ] do if [ $retries -ne 0 ] ; then # don't sleep on the first try sleep 0.5 fi kill -s TERM $pid >/dev/null 2>&1 retries=$((retries + 1)) done } lvmlockd_stop() { local pid="" if ! silent_status ; then ocf_log info "${LOCKD} is not running" return $OCF_SUCCESS fi if [ -n "$(dlm_tool ls)" ]; then # We are going to stop lvmlockd, at this moment, we hope all shared VG have # been deactivated, otherwise we are in trouble: the stop action will fail! ocf_log info "stop the lockspaces of shared VG(s)..." ocf_run lvmlockctl --stop-lockspaces rc=$? if [ $rc -ne $OCF_SUCCESS ] ; then ocf_exit_reason "Failed to close lockspace, exit code: $rc" return $OCF_ERR_GENERIC fi fi wait_lockspaces_close pid=$(get_pid) kill_stop $LOCKD $pid if ocf_is_true $OCF_RESKEY_with_cmirrord; then pid=$(pgrep $CMIRRORD) kill_stop $CMIRRORD $pid fi if silent_status ; then ocf_exit_reason "Failed to stop, ${LOCKD} or ${CMIRRORD} still running." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } lvmlockd_monitor() { if silent_status ; then return $OCF_SUCCESS fi ocf_log info "${LOCKD} not running" return $OCF_NOT_RUNNING } lvmlockd_validate() { check_binary ${LOCKD} check_binary lvm check_binary dlm_tool check_binary pgrep check_binary lvmlockctl if ocf_is_true $OCF_RESKEY_with_cmirrord; then check_binary $CMIRRORD fi return $OCF_SUCCESS } # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac # Anything other than meta-data and usage must pass validation lvmlockd_validate || exit $? # Translate each action into the appropriate function call case $__OCF_ACTION in start) lvmlockd_start ;; stop) lvmlockd_stop ;; monitor) lvmlockd_monitor ;; validate-all) lvmlockd_validate ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/lxc.in b/heartbeat/lxc.in index 7a1c62253..b6c076b32 100644 --- a/heartbeat/lxc.in +++ b/heartbeat/lxc.in @@ -1,354 +1,358 @@ #!@BASH_SHELL@ # Should now conform to guidelines: # https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc # # LXC (Linux Containers) OCF RA. # Used to cluster enable the start, stop and monitoring of a LXC container. # # Copyright (c) 2011 AkurIT.com.au, Darren Thompson # All Rights Reserved. # # Without limiting the rights of the original copyright holders # This resource is licensed under GPL version 2 # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # OCF instance parameters # OCF_RESKEY_container # OCF_RESKEY_config # OCF_RESKEY_log # OCF_RESKEY_use_screen # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults +OCF_RESKEY_container_default="" +OCF_RESKEY_config_default="" OCF_RESKEY_log_default="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.log" OCF_RESKEY_use_screen_default="false" +: ${OCF_RESKEY_container=${OCF_RESKEY_container_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_use_screen=${OCF_RESKEY_use_screen_default}} # Set default TRANS_RES_STATE (temporary file to "flag" if resource was stated but not stopped) TRANS_RES_STATE="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.state" meta_data() { cat < 0.1 Allows LXC containers to be managed by the cluster. Notes for lxc Versions before 1.0.0, where the Container is stopped using kill -PWR instead of lxc-stop: It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" Manages LXC containers The unique name for this 'Container Instance' e.g. 'test1'. Container Name - + Absolute path to the file holding the specific configuration for this container e.g. '/etc/lxc/test1/config'. The LXC config file. - + Absolute path to the container log file Container log file Provides the option of capturing the 'root console' from the container and showing it on a separate screen. To see the screen output run 'screen -r {container name}' The default value is set to 'false', change to 'true' to activate this option Use 'screen' for container 'root console' output END } LXC_usage() { cat <${CGROUP_MOUNT_POINT}/notify_on_release return 0 } LXC_start() { # put this here as it's so long it gets messy later!!! if ocf_is_true $OCF_RESKEY_use_screen; then STARTCMD="screen -dmS ${OCF_RESKEY_container} lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log}" else STARTCMD="lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log} -d" fi LXC_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already running" ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi ocf_log info "Starting" ${OCF_RESKEY_container} ocf_run ${STARTCMD} || exit $OCF_ERR_GENERIC # Spin on status, wait for the cluster manager to time us out if # we fail while ! LXC_status; do ocf_log info "Container ${OCF_RESKEY_container} has not started, waiting" sleep 1 done ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } LXC_stop() { LXC_status if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi if ! ocf_version_cmp "`lxc_version`" 1.0.0 ; then # Use lxc-stop if we are newer than 1.0.0 timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) ocf_log info "Stopping Container ${OCF_RESKEY_container} using lxc-stop" # lxc-stop will return failure even if it reached the timeout and sucessfully hard-stopped the # Container so we check below if the Container is really stopped instead of using || exit $OCF_ERR_GENERIC ocf_run lxc-stop -n "${OCF_RESKEY_container}" -t ${timeout} LXC_status if [ $? -eq $OCF_SUCCESS ]; then # Try to manually hard-stop if the Container is still running ocf_run lxc-stop -n "${OCF_RESKEY_container}" -k || exit $OCF_ERR_GENERIC fi else # Use kill -PWR # If the container is running "init" and is able to perform and orderly shutdown, then it should be done. # It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. # On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" local shutdown_timeout local now declare -i PID=0 declare CMD= # This should work for traditional 'sysvinit' and 'upstart' lxc-ps --name "${OCF_RESKEY_container}" -- -C init -o pid,comm |while read CN PID CMD ;do [ $PID -gt 1 ] || continue [ "$CMD" = "init" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\"" kill -PWR $PID done # This should work for containers using 'systemd' instead of 'init' lxc-ps --name "${OCF_RESKEY_container}" -- -C systemd -o pid,comm |while read CN PID CMD ;do [ $PID -gt 1 ] || continue [ "$CMD" = "systemd" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\"" kill -PWR $PID done # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds now=$(date +%s) shutdown_timeout=$(( $now + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $now -lt $shutdown_timeout ]; do LXC_status status=$? case $status in "$OCF_NOT_RUNNING") ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS ;; "$OCF_SUCCESS") # Container is still running, keep waiting (until # shutdown_timeout expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac now=$(date +%s) done # If the container is still running, it will be stopped now. regardless of state! ocf_run lxc-stop -n ${OCF_RESKEY_container} || exit $OCF_ERR_GENERIC fi ocf_log info "Container" ${OCF_RESKEY_container} "stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS } LXC_status() { # run lxc-info with -s option for LXC-0.7.5 or later local lxc_info_opt="-s" ocf_version_cmp "`lxc_version`" 0.7.5 && lxc_info_opt="" S=`lxc-info $lxc_info_opt -n ${OCF_RESKEY_container}` ocf_log debug "State of ${OCF_RESKEY_container}: $S" if [[ "${S##* }" = "RUNNING" ]] ; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } LXC_monitor() { LXC_status && return $OCF_SUCCESS if [ -f $TRANS_RES_STATE ]; then ocf_log err "${OCF_RESKEY_container} is not running, but state file ${TRANS_RES_STATE} exists." exit $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } LXC_validate() { # Quick check that all required attributes are set if [ -z "${OCF_RESKEY_container}" ]; then ocf_log err "LXC container name not set!" exit $OCF_ERR_CONFIGURED fi if [ -z "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration filename name not set!" exit $OCF_ERR_CONFIGURED fi # Tests that apply only to non-probes if ! ocf_is_probe; then if ! [ -f "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration file \"${OCF_RESKEY_config}\" missing or not found!" exit $OCF_ERR_INSTALLED fi if ocf_is_true $OCF_RESKEY_use_screen; then check_binary screen fi check_binary lxc-start check_binary lxc-stop if ocf_version_cmp "`lxc_version`" 1.0.0 ; then check_binary lxc-ps fi check_binary lxc-info fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then LXC_usage exit $OCF_ERR_ARGS fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) LXC_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test LXC_validate case $__OCF_ACTION in start) LXC_start;; stop) LXC_stop;; status) LXC_status;; monitor) LXC_monitor;; validate-all) ;; *) LXC_usage ocf_log err "$0 was called with unsupported arguments: $*" exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/lxd-info.in b/heartbeat/lxd-info.in index 44ec49948..5fc928aff 100644 --- a/heartbeat/lxd-info.in +++ b/heartbeat/lxd-info.in @@ -1,148 +1,156 @@ #!@BASH_SHELL@ # # # LXD Registration Service OCF Resource Agent # It records (in the CIB) various attributes of a node # # Copyright (c) 2017 Mathieu Grzybek # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_pidfile_default="$HA_RSCTMP/LXDInfo-${OCF_RESOURCE_INSTANCE}" +OCF_RESKEY_delay_default="0s" +OCF_RESKEY_clone_default="0" + +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_delay=${OCF_RESKEY_delay_default}} +: ${OCF_RESKEY_clone=${OCF_RESKEY_clone_default}} + ####################################################################### meta_data() { cat < 1.0 This is a LXD Registration Service Resource Agent. It records (in the CIB) attributes about the number of running LXD containers running on the node. Sample output: lxd_containers: 5 Records various node attributes in the CIB PID file PID file - + Interval to allow values to stabilize Dampening Delay - + END } ####################################################################### LXDInfoStats() { value=$(lxc list|grep -ci RUNNING) echo -e "lxd_containers:\t$value" ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n lxd_containers -v $value } LXDInfo_usage() { cat < $OCF_RESKEY_pidfile LXDInfoStats exit $OCF_SUCCESS } LXDInfo_stop() { rm -f $OCF_RESKEY_pidfile ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -S state -n lxd_containers exit $OCF_SUCCESS } LXDInfo_monitor() { if [ -f "$OCF_RESKEY_pidfile" ] ; then LXDInfoStats exit $OCF_RUNNING fi exit $OCF_NOT_RUNNING } LXDInfo_validate() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then LXDInfo_usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_pidfile:="$HA_RSCTMP/LXDInfo-${OCF_RESOURCE_INSTANCE}"} -: ${OCF_RESKEY_clone:="0"} if [ x != x${OCF_RESKEY_delay} ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) LXDInfo_start ;; stop) LXDInfo_stop ;; monitor) LXDInfo_monitor ;; validate-all) LXDInfo_validate ;; usage|help) LXDInfo_usage exit $OCF_SUCCESS ;; *) LXDInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/machine-info.in b/heartbeat/machine-info.in index 0622b86a0..aa9bbd4c4 100644 --- a/heartbeat/machine-info.in +++ b/heartbeat/machine-info.in @@ -1,149 +1,157 @@ #!@BASH_SHELL@ # # # Virtual Machine and Container Registration Service OCF Resource Agent # It records (in the CIB) various attributes of a node # # Copyright (c) 2017 Mathieu Grzybek # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_pidfile_default="$HA_RSCTMP/MachineInfo-${OCF_RESOURCE_INSTANCE}" +OCF_RESKEY_delay_default="0s" +OCF_RESKEY_clone_default="0" + +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_delay=${OCF_RESKEY_delay_default}} +: ${OCF_RESKEY_clone=${OCF_RESKEY_clone_default}} + ####################################################################### meta_data() { cat < 1.0 This is a Virtual Machine and Container Registration Service Resource Agent. It records (in the CIB) attributes about the number of running virtual machines and containers running on the node. It uses systemd machinectl. Sample output: machines: 5 Records various node attributes in the CIB PID file PID file - + Interval to allow values to stabilize Dampening Delay - + END } ####################################################################### MachineInfoStats() { value=$(machinectl|awk '/machines listed/ {print $1}') echo -e "machines:\t$value" ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n machines -v $value } MachineInfo_usage() { cat < $OCF_RESKEY_pidfile MachineInfoStats exit $OCF_SUCCESS } MachineInfo_stop() { rm -f $OCF_RESKEY_pidfile ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -S state -n machines exit $OCF_SUCCESS } MachineInfo_monitor() { if [ -f "$OCF_RESKEY_pidfile" ] ; then MachineInfoStats exit $OCF_RUNNING fi exit $OCF_NOT_RUNNING } MachineInfo_validate() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then MachineInfo_usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_pidfile:="$HA_RSCTMP/MachineInfo-${OCF_RESOURCE_INSTANCE}"} -: ${OCF_RESKEY_clone:="0"} if [ x != x${OCF_RESKEY_delay} ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) MachineInfo_start ;; stop) MachineInfo_stop ;; monitor) MachineInfo_monitor ;; validate-all) MachineInfo_validate ;; usage|help) MachineInfo_usage exit $OCF_SUCCESS ;; *) MachineInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/mysql-proxy b/heartbeat/mysql-proxy index 4cb0bc9d9..b79f31125 100755 --- a/heartbeat/mysql-proxy +++ b/heartbeat/mysql-proxy @@ -1,719 +1,741 @@ #!/bin/sh # # Resource script for MySQL Proxy # # Description: Manages MySQL Proxy as an OCF resource in # an high-availability setup. # # Tested with MySQL Proxy 0.8.1 and 0.8.3 on Debian 6.0. # # Based on the mysql and Pure-Ftpd OCF resource agents. # # Author: Raoul Bhatia : Original Author # License: GNU General Public License (GPL) # # # usage: $0 {start|stop|reload|status|monitor|validate-all|meta-data} # # The "start" arg starts a MySQL Proxy instance # # The "stop" arg stops it. # # TODO # * add in-depth monitoring by querying the mysql-proxy admin port # # Test via # (note: this did not work with MySQL Proxy 0.8.1 and ocf-tester from resource-agents 3.9.2 on Debian 6.0) # # * /usr/sbin/ocf-tester -n mp -o binary="/usr/sbin/mysql-proxy" -o defaults_file="" -o parameters="--proxy-skip-profiling" \ # -o admin_address="127.0.0.1:4041" -o admin_username="root" -o admin_password="la" -o admin_lua_script="/usr/lib/mysql-proxy/lua/admin.lua" \ # -o proxy_backend_addresses="192.168.100.200:42006" -o proxy_address="/var/run/mysqld/mysqld.sock" /usr/lib/ocf/resource.d/heartbeat/mysql-proxy # # # OCF parameters: # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_defaults_file # OCF_RESKEY_proxy_backend_addresses # OCF_RESKEY_proxy_read_only_backend_addresses # OCF_RESKEY_proxy_address # OCF_RESKEY_log_level # OCF_RESKEY_keepalive # OCF_RESKEY_plugins # OCF_RESKEY_admin_address # OCF_RESKEY_admin_username # OCF_RESKEY_admin_password # OCF_RESKEY_admin_lua_script # OCF_RESKEY_test_table # OCF_RESKEY_test_user # OCF_RESKEY_test_passwd # OCF_RESKEY_parameters # OCF_RESKEY_pidfile # ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -: ${OCF_RESKEY_binary="/usr/sbin/mysql-proxy"} -: ${OCF_RESKEY_client_binary="mysql"} -: ${OCF_RESKEY_defaults_file=""} -: ${OCF_RESKEY_proxy_backend_addresses="127.0.0.1:3306"} -: ${OCF_RESKEY_proxy_read_only_backend_addresses=""} -: ${OCF_RESKEY_proxy_address=":4040"} -: ${OCF_RESKEY_log_level=""} -: ${OCF_RESKEY_keepalive=""} -: ${OCF_RESKEY_plugins=""} -: ${OCF_RESKEY_admin_address="127.0.0.1:4041"} -: ${OCF_RESKEY_admin_username=""} -: ${OCF_RESKEY_admin_password=""} -: ${OCF_RESKEY_admin_lua_script=""} -: ${OCF_RESKEY_test_table="mysql.user"} -: ${OCF_RESKEY_test_user=""} -: ${OCF_RESKEY_test_passwd=""} -: ${OCF_RESKEY_parameters=""} -: ${OCF_RESKEY_pidfile="${HA_RSCTMP}/mysql-proxy-${OCF_RESOURCE_INSTANCE}.pid"} +# Parameter defaults + +OCF_RESKEY_binary_default="/usr/sbin/mysql-proxy" +OCF_RESKEY_client_binary_default="mysql" +OCF_RESKEY_defaults_file_default="" +OCF_RESKEY_proxy_backend_addresses_default="127.0.0.1:3306" +OCF_RESKEY_proxy_read_only_backend_addresses_default="" +OCF_RESKEY_proxy_address_default=":4040" +OCF_RESKEY_log_level_default="" +OCF_RESKEY_keepalive_default="" +OCF_RESKEY_plugins_default="" +OCF_RESKEY_admin_address_default="127.0.0.1:4041" +OCF_RESKEY_admin_username_default="" +OCF_RESKEY_admin_password_default="" +OCF_RESKEY_admin_lua_script_default="" +OCF_RESKEY_test_table_default="mysql.user" +OCF_RESKEY_test_user_default="" +OCF_RESKEY_test_passwd_default="" +OCF_RESKEY_parameters_default="" +OCF_RESKEY_pidfile_default="${HA_RSCTMP}/mysql-proxy-${OCF_RESOURCE_INSTANCE}.pid" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}} +: ${OCF_RESKEY_defaults_file=${OCF_RESKEY_defaults_file_default}} +: ${OCF_RESKEY_proxy_backend_addresses=${OCF_RESKEY_proxy_backend_addresses_default}} +: ${OCF_RESKEY_proxy_read_only_backend_addresses=${OCF_RESKEY_proxy_read_only_backend_addresses_default}} +: ${OCF_RESKEY_proxy_address=${OCF_RESKEY_proxy_address_default}} +: ${OCF_RESKEY_log_level=${OCF_RESKEY_log_level_default}} +: ${OCF_RESKEY_keepalive=${OCF_RESKEY_keepalive_default}} +: ${OCF_RESKEY_plugins=${OCF_RESKEY_plugins_default}} +: ${OCF_RESKEY_admin_address=${OCF_RESKEY_admin_address_default}} +: ${OCF_RESKEY_admin_username=${OCF_RESKEY_admin_username_default}} +: ${OCF_RESKEY_admin_password=${OCF_RESKEY_admin_password_default}} +: ${OCF_RESKEY_admin_lua_script=${OCF_RESKEY_admin_lua_script_default}} +: ${OCF_RESKEY_test_table=${OCF_RESKEY_test_table_default}} +: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}} +: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}} +: ${OCF_RESKEY_parameters=${OCF_RESKEY_parameters_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} + USAGE="Usage: $0 {start|stop|reload|status|monitor|validate-all|meta-data}" ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 0.1 This script manages MySQL Proxy as an OCF resource in a high-availability setup. The default monitor operation will verify that mysql-proxy is running. The level 10 monitor operation is left out intentionally for possible future enhancements in conjunction with the admin plugin. The level 20 monitor operation will perform a SELECT on a given table to verify that the connection to a back-end server is actually working. Tested with MySQL Proxy 0.8.1 and 0.8.3 on Debian 6.0. Manages a MySQL Proxy instance Full path to the MySQL Proxy binary. For example, "/usr/sbin/mysql-proxy". Full path to MySQL Proxy binary - + Location of the MySQL client binary. MySQL client binary - + Full path to a MySQL Proxy configuration file. For example, "/etc/mysql-proxy.conf". Full path to configuration file - + Address:port of the remote back-end servers (default: 127.0.0.1:3306). MySQL Proxy back-end servers - + Address:port of the remote (read only) slave-server (default: ). MySql Proxy read only back-end servers - + Listening address:port of the proxy server (default: :4040). You can also specify a socket like "/tmp/mysql-proxy.sock". MySQL Proxy listening address - + Log all messages of level (error|warning|info|message|debug|) or higher. An empty value disables logging. MySQL Proxy log level. - + Try to restart the proxy if it crashed (default: ). Valid values: true or false. An empty value equals "false". Use keepalive option - + Whitespace separated list of plugins to load (default: ). Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy plugins - + Listening address:port of the admin plugin (default: 127.0.0.1:4041). Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy admin plugin listening address - + Username for the admin plugin (default: ). Required since MySQL Proxy 0.8.1, if the admin plugin is loaded. Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy admin plugin username - + Password for the admin plugin (default: ). Required since MySQL Proxy 0.8.1, if the admin plugin is loaded. Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy admin plugin password - + Script to execute by the admin plugin. Required since MySQL Proxy 0.8.1, if the admin plugin is loaded. Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy admin plugin lua script - + Table to be tested in monitor statement (in database.table notation) MySQL test table - + MySQL test user MySQL test user - + MySQL test user password MySQL test user password - + The MySQL Proxy daemon may be called with additional parameters. Specify any of them here. MySQL Proxy additional parameters - + PID file PID file - + END } isRunning() { kill -s 0 "$1" 2>/dev/null } mysqlproxy_status() { local PID if [ -f "${pidfile}" ]; then # MySQL Proxy is probably running PID=`head -n 1 "${pidfile}"` if [ ! -z "$PID" ] ; then isRunning "$PID" return $? fi fi # MySQL Proxy is not running false } mysqlproxy_start() { local PARAM_PREFIX OPTIONS local p pa pba proba local pid_dir socket_dir # if MySQL Proxy is running return success if mysqlproxy_status ; then ocf_log info "MySQL Proxy already running." return $OCF_SUCCESS fi PARAM_PREFIX='' # MySQL Proxy plugins to load # @TODO check if the plugins are actually available? if ocf_is_true $plugin_support; then for p in $plugins; do PARAM_PREFIX="$PARAM_PREFIX --plugins=$p" done fi # check if the MySQL Proxy defaults-file exist if [ -f "$defaults_file" ]; then PARAM_PREFIX="$PARAM_PREFIX --defaults-file=$defaults_file" fi # set log-level if [ ! -z "$log_level" ]; then PARAM_PREFIX="$PARAM_PREFIX --log-level=$log_level" fi # set keepalive if [ "$keepalive" = "true" ]; then PARAM_PREFIX="$PARAM_PREFIX --keepalive" fi # honor admin_* options if [ ! -z "$admin_username" ]; then PARAM_PREFIX="$PARAM_PREFIX --admin-username=$admin_username" fi if [ ! -z "$admin_password" ]; then PARAM_PREFIX="$PARAM_PREFIX --admin-password=$admin_password" fi if [ ! -z "$admin_lua_script" ]; then PARAM_PREFIX="$PARAM_PREFIX --admin-lua-script=$admin_lua_script" fi # make sure that the pid directory exists pid_dir=`dirname $pidfile` if [ ! -d $pid_dir ] ; then ocf_log info "Creating PID directory '$pid_dir'." mkdir -p $pid_dir #chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir # c/p from mysql ra; currently not needed fi # split multiple proxy-address options. # currently unsupported but let us hope for the future ;) for pa in $proxy_address; do [ -z "$pa" ] && continue OPTIONS=" $OPTIONS --proxy-address=$pa" # if $pa contains a slash, we are dealing with a socket # make sure that the socket directory exists if echo "$pa" | grep -q '/' ; then socket_dir=`dirname $pa` if [ ! -d $socket_dir ] ; then ocf_log info "Creating socket directory '$socket_dir'." mkdir -p $socket_dir #chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir # c/p from mysql ra; currently not needed fi fi done # split multiple proxy-backend-addresses options. for pba in $proxy_backend_addresses; do [ -z "$pba" ] && continue OPTIONS=" $OPTIONS --proxy-backend-addresses=$pba" done # split multiple proxy-backend-addresses options. for proba in $proxy_read_only_backend_addresses; do [ -z "$proba" ] && continue OPTIONS=" $OPTIONS --proxy-read-only-backend-addresses=$proba" done # build $OPTIONS and add admin-address and pidfile OPTIONS="$PARAM_PREFIX $OPTIONS --admin-address=$admin_address --pid-file=${pidfile}" # add additional parameters if [ -n "$parameters" ]; then OPTIONS="$OPTIONS $parameters" fi # start MySQL Proxy #start-stop-daemon --start --quiet --pidfile $pidfile --make-pidfile --name mysql-proxy --startas $binary -b -- $OPTIONS $binary --daemon $OPTIONS ret=$? if [ $ret -ne 0 ]; then ocf_log err "MySQL Proxy returned error: " $ret return $OCF_ERR_GENERIC fi # @TODO add an initial monitoring action? return $OCF_SUCCESS } mysqlproxy_stop() { local ret local pa if mysqlproxy_status ; then #start-stop-daemon --stop --quiet --retry 3 --exec $binary --pidfile $pidfile /bin/kill `cat "${pidfile}"` ret=$? if [ $ret -ne 0 ]; then ocf_log err "MySQL Proxy returned an error while stopping: " $ret return $OCF_ERR_GENERIC fi # grant some time for shutdown and recheck sleep 1 if mysqlproxy_status ; then ocf_log err "MySQL Proxy failed to stop." return $OCF_ERR_GENERIC fi # remove dangling socketfile, if specified for pa in $proxy_address; do if [ -S "$pa" ]; then ocf_log info "Removing dangling socket file '$pa'." rm -f "$pa" fi done # remove dangling pidfile if [ -f "${pidfile}" ]; then ocf_log info "Removing dangling pidfile '${pidfile}'." rm -f "${pidfile}" fi fi return $OCF_SUCCESS } mysqlproxy_reload() { # @TODO check if pidfile is empty # PID=`head -n 1 "${pidfile}"` # if [ ! -z "$PID" ] ; then if mysqlproxy_status; then ocf_log info "Reloading MySQL Proxy." kill -s HUP `cat ${pidfile}` fi } mysqlproxy_monitor() { local rc if [ "${OCF_RESKEY_CRM_meta_interval:-0}" -eq "0" ]; then # in case of probe, monitor operation is surely treated as # under suspension. This will call start operation. # (c/p from ocf:heartbeat:sfex) mysqlproxy_validate_all rc=$? [ $rc -ne 0 ] && return $rc fi if ! mysqlproxy_status ; then return $OCF_NOT_RUNNING fi if [ $OCF_CHECK_LEVEL -eq 20 ]; then mysqlproxy_monitor_20 rc=$? [ $rc -ne 0 ] && return $rc fi return $OCF_SUCCESS } mysqlproxy_monitor_20() { local rc local mysql_options pa local mysql_server_parameter mysql_server_host mysql_server_port if [ -z "$OCF_RESKEY_test_table" -o -z "$OCF_RESKEY_test_user" -a -z "$OCF_RESKEY_test_passwd" ]; then ocf_log warn "Missing proper configuration for OCF_CHECK_LEVEL=20 (test_table=[$OCF_RESKEY_test_table] test_user=[$OCF_RESKEY_test_user] test_password=[$OCF_RESKEY_test_passwd]). Not running in-depth monitoring." return $OCF_SUCCESS fi mysql_options="--connect_timeout=10 --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd" # cycle each address for pa in $proxy_address; do # build correct connect parameter if [ -S "$pa" ]; then # we need to monitor a mysql socket mysql_server_parameter="--socket=$pa" else # we need to monitor a host address mysql_server_parameter="" # split host:port # @TODO correctly handle IPv6 address # @TODO correctly handle 0.0.0.0 address mysql_server_host=`echo $pa | cut -d : -f 1` mysql_server_port=`echo $pa | cut -d : -f 2` if [ -n "$mysql_server_host" ]; then mysql_server_parameter="$mysql_server_parameter --host=$mysql_server_host" fi if [ -n "$mysql_server_port" ]; then mysql_server_parameter="$mysql_server_parameter --port=$mysql_server_port" fi fi # Check for test table ocf_run $mysql $mysql_server_parameter $mysql_options \ -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table" rc=$? if [ $rc -ne 0 ]; then ocf_log err "Failed to select from $OCF_RESKEY_test_table: " $rc return $OCF_ERR_GENERIC fi done return $OCF_SUCCESS } mysqlproxy_validate_all() { # local variables local config_error=0 # check that the MySQL Proxy binary exists and can be executed check_binary $binary # check MySQL client binary only if in-depth monitoring is requested # do not break backwards compatibility otherwise if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary $mysql fi # check for valid log-level echo $log_level | egrep -q "^(error|warning|info|message|debug|)$" if [ $? -ne 0 ]; then ocf_log err "MySQL Proxy log level '$log_level' not in valid range error|warning|info|message|debug" return $OCF_ERR_CONFIGURED fi # if we're running MySQL Proxy > 0.8.1 and there is any admin parameter set, # explicitly load the admin (and the proxy) plugin. # (version 0.8.2 does not load the admin plugin by default anymore) ocf_version_cmp "$version" "0.8.1" ret=$? if [ $ret -eq 2 ]; then # simple check: concat all parameters and check if the string has non-zero length if [ -n "$admin_username$admin_password$admin_lua_script$admin_address" ]; then plugins="proxy admin" has_plugin_admin=1 else has_plugin_admin=0 fi fi # check for required admin_* parameters for 0.8.1 and 0.8.2 (with admin module) # translated: if (version == 0.8.1 or (version > 0.8.1 and has_plugin_admin)) if [ $ret -eq 1 -o \( $ret -eq 2 -a $has_plugin_admin -eq 1 \) ]; then if [ -z "$admin_username" ]; then ocf_log err "Missing required parameter \"admin_username\"" config_error=1 fi if [ -z "$admin_password" ]; then ocf_log err "Missing required parameter \"admin_password\"" config_error=1 fi if [ -z "$admin_lua_script" ]; then ocf_log err "Missing required parameter \"admin_lua_script\"" config_error=1 fi # check if the admin_lua_script, if specified, exists if [ -n "$admin_lua_script" -a ! -e "$admin_lua_script" ]; then ocf_log err "MySQL Proxy admin lua script '$admin_lua_script' does not exist or is not readable." fi fi # issue a warning during start if the user wants to load a plugin # but this version of MySQL Proxy does not support the plugin architecture. if [ -n "$plugins" ] && ocf_is_false "$plugin_support" && [ $__OCF_ACTION = 'start' ]; then ocf_log warn "You are running MySQL Proxy version '$version'. This version does not support the plugin architecture. Please use version 0.7.0 or later to load the plugins '$plugins'." fi # exit in case we have found relevant config errors if [ $config_error -eq 1 ]; then exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi pidfile=$OCF_RESKEY_pidfile binary=$OCF_RESKEY_binary defaults_file=$OCF_RESKEY_defaults_file proxy_backend_addresses=$OCF_RESKEY_proxy_backend_addresses proxy_read_only_backend_addresses=$OCF_RESKEY_proxy_read_only_backend_addresses admin_address=$OCF_RESKEY_admin_address admin_username=$OCF_RESKEY_admin_username admin_password=$OCF_RESKEY_admin_password admin_lua_script=$OCF_RESKEY_admin_lua_script proxy_address=$OCF_RESKEY_proxy_address log_level=$OCF_RESKEY_log_level keepalive=$OCF_RESKEY_keepalive plugins=`echo $OCF_RESKEY_plugins | tr "[:space:]" "\n" | sort -u` mysql=$OCF_RESKEY_client_binary parameters=$OCF_RESKEY_parameters plugin_support=false has_plugin_admin=0 # 0 because this simplifies the if statements # debugging stuff #echo OCF_RESKEY_binary=$OCF_RESKEY_binary >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_defaults_file=$OCF_RESKEY_defaults_file >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_proxy_backend_addresses=$OCF_RESKEY_proxy_backend_addresses >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_proxy_read_only_backend_addresses=$OCF_RESKEY_proxy_read_only_backend_addresses >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_proxy_address=$OCF_RESKEY_proxy_address >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_log_level=$OCF_RESKEY_log_level >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_keepalive=$OCF_RESKEY_keepalive >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_admin_address=$OCF_RESKEY_admin_address >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_admin_username=$OCF_RESKEY_admin_username >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_admin_password=$OCF_RESKEY_admin_password >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_admin_lua_script=$OCF_RESKEY_admin_lua_script >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_parameters=$OCF_RESKEY_parameters >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_pidfile=$OCF_RESKEY_pidfile >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE # handle some parameters before performing any additional checks case $1 in meta-data) meta_data exit $? ;; usage) usage exit $OCF_SUCCESS ;; esac # determine MySQL Proxy version check_binary $binary version=`$binary --version | grep ^mysql-proxy | awk '{print $NF}'` # version 0.7.0 (and later) support the plugin architecture and load the admin plugin by default # version 0.8.1 loads admin plugin by default and requires the admin parameters to be set # version 0.8.2 does not load the admin plugin by default anymore ocf_version_cmp "$version" "0.7.0" ret=$? if [ $ret -eq 1 -o $ret -eq 2 ]; then plugin_support=true has_plugin_admin=1 fi # perform action case $1 in start) mysqlproxy_validate_all && mysqlproxy_start exit $? ;; stop) mysqlproxy_validate_all && mysqlproxy_stop exit $? ;; reload) mysqlproxy_reload exit $? ;; status) if mysqlproxy_status; then ocf_log info "MySQL Proxy is running." exit $OCF_SUCCESS else ocf_log info "MySQL Proxy is stopped." exit $OCF_NOT_RUNNING fi ;; monitor) mysqlproxy_monitor exit $? ;; validate-all) mysqlproxy_validate_all exit $? ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/nfsnotify.in b/heartbeat/nfsnotify.in index 6e3a8de35..7f710bca7 100644 --- a/heartbeat/nfsnotify.in +++ b/heartbeat/nfsnotify.in @@ -1,315 +1,323 @@ #!@BASH_SHELL@ # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ocf-directories +# Parameter defaults + +OCF_RESKEY_source_host_default="" +OCF_RESKEY_notify_args_default="false" + +: ${OCF_RESKEY_source_host=${OCF_RESKEY_source_host_default}} +: ${OCF_RESKEY_notify_args=${OCF_RESKEY_notify_args_default}} + ####################################################################### sbindir=$HA_SBIN_DIR if [ -z "$sbindir" ]; then sbindir=/usr/sbin fi SELINUX_ENABLED=-1 NFSNOTIFY_TMP_DIR="${HA_RSCTMP}/nfsnotify_${OCF_RESOURCE_INSTANCE}/" HA_STATD_PIDFILE="$NFSNOTIFY_TMP_DIR/rpc.statd_${OCF_RESOURCE_INSTANCE}.pid" HA_STATD_PIDFILE_PREV="$NFSNOTIFY_TMP_DIR/rpc.statd_${OCF_RESOURCE_INSTANCE}.pid.prev" STATD_PATH="/var/lib/nfs/statd" SM_NOTIFY_BINARY="${sbindir}/sm-notify" IS_RENOTIFY=0 meta_data() { cat < 1.0 This agent sends NFSv3 reboot notifications to clients which informs clients to reclaim locks. sm-notify reboot notifications Comma separated list of floating IP addresses or host names that clients use to access the nfs service. This will be used to set the source address and mon_name of the SN_NOTIFY reboot notifications. source IP addresses - + Additional arguments to send to the sm-notify command. By default this agent will always set sm-notify's '-f' option. When the source_host option is set, the '-v' option will be used automatically to set the proper source address. Any additional sm-notify arguments set with this option will be used in addition to the previous default arguments. sm-notify arguments - + END } v3notify_usage() { cat < /dev/null 2>&1 if [ $? -eq 0 ]; then # it is useful to know if sm-notify processes were actually left around # or not during the stop/start operation. Whether this condition is true # or false does not indicate a failure. It does indicate that # there are probably some unresponsive nfs clients out there that are keeping # the sm-notify processes retrying. ocf_log info "previous sm-notify processes terminated before $__OCF_ACTION action." fi } v3notify_stop() { killall_smnotify rm -f $HA_STATD_PIDFILE_PREV > /dev/null 2>&1 mv $HA_STATD_PIDFILE $HA_STATD_PIDFILE_PREV > /dev/null 2>&1 return $OCF_SUCCESS } check_statd_pidfile() { local binary="rpc.statd" local pidfile="$HA_STATD_PIDFILE" ocf_log debug "Checking status for ${binary}." if [ -e "$pidfile" ]; then cat /proc/$(cat $pidfile)/cmdline 2>/dev/null | grep -a "${binary}" > /dev/null 2>&1 if [ $? -eq 0 ]; then return $OCF_SUCCESS fi ocf_exit_reason "$(cat $pidfile) for $binary is no longer running, sm-notify needs to re-notify clients" return $OCF_ERR_GENERIC fi # if we don't have a pid file for rpc.statd, we have not yet sent the notifications return $OCF_NOT_RUNNING } write_statd_pid() { local binary="rpc.statd" local pidfile="$HA_STATD_PIDFILE" local pid pid=$(pgrep ${binary}) case $? in 0) ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}." mkdir -p $(dirname $pidfile) echo "$pid" > $pidfile return $OCF_SUCCESS;; 1) rm -f "$pidfile" > /dev/null 2>&1 ocf_log info "$binary is not running" return $OCF_NOT_RUNNING;; *) rm -f "$pidfile" > /dev/null 2>&1 ocf_exit_reason "Error encountered detecting pid status of $binary" return $OCF_ERR_GENERIC;; esac } copy_statd() { local src=$1 local dest=$2 if ! [ -d "$dest" ]; then mkdir -p "$dest" fi cp -rpfn $src/sm $src/sm.bak $src/state $dest > /dev/null 2>&1 # make sure folder ownership and selinux lables stay consistent [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$dest" [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$dest" } v3notify_start() { local rc=$OCF_SUCCESS local cur_statd local statd_backup local is_renotify=0 # monitor, see if we need to notify or not v3notify_monitor if [ $? -eq 0 ]; then return $OCF_SUCCESS fi # kill off any other sm-notify processes that might already be running. killall_smnotify # record the pid of rpc.statd. if this pid ever changes, we have to re-notify write_statd_pid rc=$? if [ $rc -ne 0 ]; then return $rc fi # if the last time we ran nfs-notify, it was with the same statd process, # consider this a re-notification. During re-notifications we do not let the # sm-notify binary have access to the real statd directory. if [ "$(cat $HA_STATD_PIDFILE)" = "$(cat $HA_STATD_PIDFILE_PREV 2>/dev/null)" ]; then ocf_log info "Renotifying clients" is_renotify=1 fi statd_backup="$STATD_PATH/nfsnotify.bu" copy_statd "$STATD_PATH" "$statd_backup" if [ -z "$OCF_RESKEY_source_host" ]; then if [ "$is_renotify" -eq 0 ]; then cur_statd="$STATD_PATH" else cur_statd="$statd_backup" fi ocf_log info "sending notifications on default source address." $SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -P $cur_statd if [ $? -ne 0 ]; then ocf_exit_reason "sm-notify execution failed, view syslog for more information" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS fi # do sm-notify for each ip for ip in `echo ${OCF_RESKEY_source_host} | sed 's/,/ /g'`; do # have the first sm-notify use the actual statd directory so the # notify list can be managed properly. if [ "$is_renotify" -eq 0 ]; then cur_statd="$STATD_PATH" # everything after the first notify we are considering a renotification # which means we don't use the real statd directory. is_renotify=1 else # use our copied statd directory for the remaining ip addresses cur_statd="$STATD_PATH/nfsnotify_${OCF_RESOURCE_INSTANCE}_${ip}" copy_statd "$statd_backup" "$cur_statd" fi ocf_log info "sending notifications with source address $ip" $SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -v $ip -P "$cur_statd" if [ $? -ne 0 ]; then ocf_exit_reason "sm-notify with source host set to [ $ip ] failed. view syslog for more information" return $OCF_ERR_GENERIC fi done return $OCF_SUCCESS } v3notify_monitor() { # verify rpc.statd is up, and that the rpc.statd pid is the same one we # found during the start. otherwise rpc.statd recovered and we need to notify # again. check_statd_pidfile } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) v3notify_usage exit $OCF_SUCCESS;; *) ;; esac which restorecon > /dev/null 2>&1 && selinuxenabled SELINUX_ENABLED=$? if [ $SELINUX_ENABLED -eq 0 ]; then export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')" fi case $__OCF_ACTION in start) v3notify_start;; stop) v3notify_stop;; monitor) v3notify_monitor;; validate-all) v3notify_validate;; *) v3notify_usage exit $OCF_ERR_UNIMPLEMENTED;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/nginx b/heartbeat/nginx index 020c79744..9a778c7f5 100755 --- a/heartbeat/nginx +++ b/heartbeat/nginx @@ -1,947 +1,956 @@ #!/bin/sh # # High-Availability nginx OCF resource agent # # nginx # # Description: starts/stops nginx servers. # # Author: Alan Robertson # Dejan Muhamedagic # This code is based significantly on the apache resource agent # # Support: users@clusterlabs.org # # License: GNU General Public License (GPL) # # Copyright: (C) 2002-2010 International Business Machines # # # Our parsing of the nginx config files is very rudimentary. # It'll work with lots of different configurations - but not every # possible configuration. # # Patches are being accepted ;-) # # OCF parameters: # OCF_RESKEY_configfile # OCF_RESKEY_nginx # OCF_RESKEY_port # OCF_RESKEY_options # OCF_RESKEY_status10regex # OCF_RESKEY_status10url # OCF_RESKEY_client # OCF_RESKEY_test20url # OCF_RESKEY_test20regex # OCF_RESKEY_test20conffile # OCF_RESKEY_test20name # OCF_RESKEY_external_monitor30_cmd # # # TO DO: # More extensive tests of extended monitor actions # Look at the --with-http_stub_status_module for validating # the configuration? (or is that automatically done?) # Checking could certainly result in better error # messages. # Allow for the fact that the config file and so on might all be # on shared disks - this affects the validate-all option. : ${OCF_FUNCTIONS_DIR=$OCF_ROOT/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_httpd_default="/usr/sbin/httpd" +OCF_RESKEY_status10regex_default="Reading: [0-9]+ Writing: [0-9]+ Waiting: [0-9]+" + +: ${OCF_RESKEY_httpd=${OCF_RESKEY_httpd_default}} +: ${OCF_RESKEY_status10regex=${OCF_RESKEY_status10regex_default}} + HA_VARRUNDIR=${HA_VARRUN} ####################################################################### # # Configuration options - usually you don't need to change these # ####################################################################### # NGINXDLIST="/usr/sbin/nginx /usr/local/sbin/nginx" # default options for http clients # NB: We _always_ test a local resource, so it should be # safe to connect from the local interface. WGETOPTS="-O- -q -L --no-proxy --bind-address=127.0.0.1" CURLOPTS="-o - -Ss -L --interface lo" LOCALHOST="http://localhost" NGINXDOPTS="" # # # End of Configuration options ####################################################################### CMD=`basename $0` # The config-file-pathname is the pathname to the configuration # file for this web server. Various appropriate defaults are # assumed if no config file is specified. usage() { cat <<-EOF usage: $0 action action: start start nginx stop stop nginx reload reload the nginx configuration status return the status of web server, running or stopped monitor return TRUE if the web server appears to be working. For this to be supported you must configure mod_status and give it a server-status URL - or configure what URL you wish to be monitored. You have to have installed either curl or wget for this to work. meta-data show meta data message validate-all validate the instance parameters EOF exit $1 } # # run the http client # curl_func() { cl_opts="$CURLOPTS $test_httpclient_opts" if [ x != "x$test_user" ] then echo "-u $test_user:$test_password" | curl -K - $cl_opts "$1" else curl $cl_opts "$1" fi } wget_func() { auth="" cl_opts="$WGETOPTS $test_httpclient_opts" [ x != "x$test_user" ] && auth="--http-user=$test_user --http-passwd=$test_password" wget $auth $cl_opts "$1" } # # rely on whatever the user provided userdefined() { $test_httpclient $test_httpclient_opts "$1" } # # find a good http client # findhttpclient() { # prefer curl if present... if [ "x$CLIENT" != x ] then echo "$CLIENT" elif which curl >/dev/null 2>&1 then echo "curl" elif which wget >/dev/null 2>&1 then echo "wget" else return 1 fi } gethttpclient() { [ -z "$test_httpclient" ] && test_httpclient=$ourhttpclient case "$test_httpclient" in curl|wget) echo ${test_httpclient}_func;; #these are supported *) echo userdefined;; esac } # test configuration good? is_testconf_sane() { if [ "x$test_regex" = x -o "x$test_url" = x ] then ocf_log err "test regular expression or test url empty" return 1 fi if [ "x$test_user$test_password" != x -a \( "x$test_user" = x -o "x$test_password" = x \) ] then ocf_log err "bad user authentication for extended test" return 1 fi return 0 } # # read the test definition from the config # readtestconf() { test_name="$1" # we look for this one or the first one if empty lcnt=0 readdef="" test_url="" test_regex="" test_user="" test_password="" test_httpclient="" test_httpclient_opts="" while read key value do lcnt=$((lcnt+1)) if [ "$readdef" ] then case "$key" in "url") test_url="$value" ;; "user") test_user="$value" ;; "password") test_password="$value" ;; "client") test_httpclient="$value" ;; "client_opts") test_httpclient_opts="$value" ;; "match") test_regex="$value" ;; "end") break ;; "#"*|"") ;; *) ocf_log err "$lcnt: $key: unknown keyword"; return 1 ;; esac else [ "$key" = "test" ] && [ -z "$test_name" -o "$test_name" = "$value" ] && readdef=1 fi done } nginxcat() { awk ' function procline() { split($0,a); if( a[1]~/^[Ii]nclude$/ ) { procinclude(a[2]); } else { if( a[1]=="root" ) { rootdir=a[2]; gsub("\"","",rootdir); } print; } } function printfile(infile, a) { while( (getline 0 ) { procline(); } close(infile); } function allfiles(dir, cmd,f) { cmd="find -L "dir" -type f"; while( ( cmd | getline f ) > 0 ) { printfile(f); } close(cmd); } function listfiles(pattern, cmd,f) { cmd="ls "pattern" 2>/dev/null"; while( ( cmd | getline f ) > 0 ) { printfile(f); } close(cmd); } function procinclude(spec) { if( rootdir!="" && spec!~/^\// ) { spec=rootdir"/"spec; } if( isdir(spec) ) { allfiles(spec); # read all files in a directory (and subdirs) } else { listfiles(spec); # there could be jokers } } function isdir(s) { return !system("test -d \""s"\""); } { procline(); } ' $1 | sed 's/#.*//;s/[[:blank:]]*$//;s/^[[:blank:]]*//' | grep -v '^$' } # # set parameters (as shell vars) from our nginx config file # get_nginx_params() { configfile=$1 shift 1 vars=`echo "$@" | sed 's/ /,/g'` eval ` nginxcat $configfile | awk -v vars="$vars" ' BEGIN{ split(vars,v,","); for( i in v ) vl[i]=tolower(v[i]); } { for( i in v ) if( tolower($1)==vl[i] ) { print v[i]"="$2 delete vl[i] break } } '` } # # Return the location(s) that are handled by the given handler # FindLocationForHandler() { PerlScript='while (<>) { /^\s*location\s+([^ \s{]+)\s*{/i && ($loc=$1); /^\s*stub_status\s+on\s*;$2/i && print "$loc\n"; }' nginxcat $1 | perl -e "$PerlScript" } # # Check if the port is valid # CheckPort() { lclport="$1" case "$lclport" in *:[0-9]*) lclport=`echo "$lclport" | sed 's%^[^:][^:]*:%%'` esac ocf_is_decimal "$lclport" && [ $lclport -gt 0 -a $lclport -lt 65537 ] } buildlocalurl() { [ "x$listen" != "x" ] && echo "http://${listen}" || echo "${LOCALHOST}:${PORT}" } # # Get all the parameters we need from the Nginx config file # GetParams() { ConfigFile=$1 DEFAULT_PID=`echo "$NGINX_CONFIGURATION" | sed -e 's%.*--pid-path=%%' -e 's% *--.*%%'` if [ ! -f $ConfigFile ] then return 1 fi get_nginx_params $ConfigFile root pid listen PidFile="$pid" case $PidFile in "") PidFile=$DEFAULT_PID ;; *) ;; esac for p in "$PORT" "$listen" 80 do if CheckPort "$p" then PORT="$p" break fi done echo $listen | grep ':' >/dev/null || # Listen could be just port spec listen="localhost:$listen" # # It's difficult to figure out whether the server supports # the status operation. # (we start our server with -DSTATUS - just in case :-)) # # Typically (but not necessarily) the status URL is /nginx_status # # For us to think status will work, we have to have the following things: # # - The server-status handler has to be mapped to some URL somewhere # # We assume that: # # - the "main" web server at $PORT will also support it if we can find it # somewhere in the file # - it will be supported at the same URL as the one we find in the file # # If this doesn't work for you, then set the status10url attribute. # if [ "X$STATUSURL" = "X" ] then StatusURL=`FindLocationForHandler $1 nginx_status | tail -1` STATUSURL="`buildlocalurl`$StatusURL" fi test ! -z "$PidFile" } # # return TRUE if a process with given PID is running # ProcessRunning() { NginxPID=$1 # Use /proc if it looks like it's here... if [ -d /proc -a -d /proc/1 ] then [ -d /proc/$NginxPID ] else # This assumes we're running as root... kill -0 "$NginxPID" >/dev/null 2>&1 fi } silent_status() { if [ -f $PidFile -a -s $PidFile ] && ocf_is_decimal "`cat $PidFile`" then ProcessRunning `cat $PidFile` else : No pid file false fi } start_nginx() { if silent_status then ocf_log info "$CMD already running (pid $NginxPID)" return $OCF_SUCCESS fi if ocf_run $NGINXD $OPTIONS -t -c $CONFIGFILE then : Configuration file $CONFIGFILE looks OK else return $OCF_ERR_INSTALLED fi NGINX_VERSION=`$NGINXD -v 2>&1` ocf_log info "Starting $NGINXD - $NGINX_VERSION" ocf_log info "$NGINXD build configuration: $NGINX_CONFIGURATION" if ocf_run $NGINXD $NGINXDOPTS $OPTIONS -c $CONFIGFILE then : $NGINXD started without errors! else return $OCF_ERR_GENERIC fi tries=0 # This looks like a potential infinite loop - but it's not in practice # The LRM will time us out and kill us if nginx never starts working. while monitor_nginx ec=$? if [ $ec -eq $OCF_NOT_RUNNING ] then tries=`expr $tries + 1` ocf_log info "Waiting for $NGINXD $OPTIONS -c $CONFIGFILE to come up (try $tries)" true else false fi do sleep 1 done return $ec } stop_nginx() { if silent_status then if kill $NginxPID then tries=0 while ProcessRunning $NginxPID && [ $tries -lt 10 ] do sleep 1 kill $NginxPID >/dev/null ocf_log info "Killing nginx PID $NginxPID" tries=`expr $tries + 1` done else ocf_log warn "Killing nginx PID $NginxPID FAILED." fi if ProcessRunning $NginxPID then ocf_log info "$CMD still running ($NginxPID)." false else ocf_log info "$CMD stopped." fi else ocf_log info "$CMD is not running." fi # # I'm not convinced this is a wonderful idea (AlanR) # for sig in SIGTERM SIGHUP SIGKILL do if pgrep -f "$NGINXD.*$CONFIGFILE" >/dev/null then pkill -$sig -f $NGINXD.*$CONFIGFILE >/dev/null ocf_log info "nginxd children were signalled ($sig)" sleep 1 else break fi done } reload_nginx() { if silent_status then if kill -1 $NginxPID then : $NGINX reload signal to $NginxPID succeeded return $OCF_SUCCESS fi return $OCF_ERR_GENERIC fi start_nginx } status_nginx() { silent_status rc=$? if [ $rc -eq 0 ] then ocf_log info "$CMD is running (pid $NginxPID)." return $OCF_SUCCESS else ocf_log info "$CMD is stopped." return $OCF_NOT_RUNNING fi } fixtesturl() { echo $test_url | grep -qs "^http" && return test_url="`buildlocalurl`$test_url" } monitor_nginx_external() { if [ -z "$EXTMONITOR" ] then ocf_log err "$External level 30 Monitor Command not configured." return $OCF_ERR_CONFIGURED fi extbase=`echo $EXTMONITOR | sed 's% .*%%'` if case "$extbase" in /*) test -f "$extbase" -a -x "$extbase";; *) which "$extbase" >/dev/null 2>&1 esac then : OK - $extbase seems to be there... else ocf_log err "$External monitor command [$extbase] is not installed." return $OCF_ERR_CONFIGURED fi if $extbase then : OK - $extbase succeeded else ocf_log err "$extbase reported failure [rc=$?]" return $OCF_NOT_RUNNING fi return $OCF_SUCCESS } monitor_nginx_extended() { if [ -f "$TESTCONFFILE" -a -r "$TESTCONFFILE" ] then readtestconf < $TESTCONFFILE else test_url="$TESTURL" test_regex="$TESTREGEX20" fi whattorun=`gethttpclient` fixtesturl is_testconf_sane || return $OCF_ERR_CONFIGURED $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null } monitor_nginx_basic() { if [ -z "$STATUSURL" ] then ocf_log err "status10url parameter empty" return $OCF_ERR_CONFIGURED elif [ -z "$ourhttpclient" ] then ocf_log err "could not find a http client; make sure that either wget or curl is available" return $OCF_ERR_CONFIGURED fi ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null } monitor_nginx() { silent_status if [ $? -ne 0 ] then ocf_log info "$CMD not running" return $OCF_NOT_RUNNING fi if [ -z "$OCF_CHECK_LEVEL" ] || [ "$OCF_CHECK_LEVEL" -lt 10 ] then return 0 fi ourhttpclient=`findhttpclient` # we'll need one if [ "$OCF_CHECK_LEVEL" -lt 20 ] then monitor_nginx_basic elif [ "$OCF_CHECK_LEVEL" -lt 30 ] then monitor_nginx_extended else monitor_nginx_external fi } metadata_nginx(){ cat < 1.0 This is the resource agent for the Nginx web/proxy server. This resource agent does not monitor POP or IMAP servers, as we don't know how to determine meaningful status for them. The start operation ends with a loop in which monitor is repeatedly called to make sure that the server started and that it is operational. Hence, if the monitor operation does not succeed within the start operation timeout, the nginx resource will end with an error status. The default monitor operation will verify that nginx is running. The level 10 monitor operation by default will try and fetch the /nginx_status page - which is commented out in sample nginx configurations. Make sure that the /nginx_status page works and that the access is restricted to localhost (address 127.0.0.1) plus whatever places _outside the cluster_ you want to monitor the server from. See the status10url and status10regex attributes for more details. The level 20 monitor operation will perform a more complex set of tests from a configuration file. The level 30 monitor operation will run an external command to perform an arbitrary monitoring operation. Manages an Nginx web/proxy server instance The full pathname of the Nginx configuration file. This file is parsed to provide defaults for various other resource agent parameters. configuration file path The full pathname of the httpd binary (optional). httpd binary path - + A port number that we can probe for status information using the statusurl. This will default to the port number found in the configuration file, or 80, if none can be found in the configuration file. httpd port The URL to monitor (the nginx server status page by default) when given a level 10 monitor operation. If left unspecified, it will be inferred from the nginx configuration file, or defaulted to /nginx_status. If you set this, make sure that it succeeds *only* from the localhost (127.0.0.1) and no other cluster nodes. Otherwise, the cluster software may complain about it being active on multiple nodes. url name Regular expression to match in the output of status10url. Case insensitive. monitor regular expression - + Client to use to query to Nginx for level 10 and level 20 tests. If not specified, the RA will try to find one on the system. Currently, wget and curl are supported, with curl being preferred. For example, you can set this parameter to "wget" if you prefer that to curl. http client URL to test. If it does not start with "http", then it's considered to be relative to the document root address. Level 20 monitor url Regular expression to match in the output of test20url. Case insensitive. Level 20 monitor regular expression A file which contains a more complex test configuration. Could be useful if you have to check more than one web application or in case sensitive info should be passed as arguments (passwords). Furthermore, using a config file is the only way to specify certain parameters. Please see README.webapps for examples and file description. Level 20 test configuration file Name of the test within the test configuration file. Level 20 test name Command string to run which implements level 30 monitoring. Level 30 test string Extra options to apply when starting nginx. nginx start options END exit $OCF_SUCCESS } validate_all_nginx() { if CheckPort $PORT # We are sure to succeed here, since we forced $PORT to be valid in GetParams() then : OK else ocf_log err "Port number $PORT is invalid!" exit $OCF_ERR_ARGS fi if [ -z $STATUSURL ] then : OK to be empty else case $STATUSURL in http://*/*) ;; *) ocf_log err "Invalid STATUSURL $STATUSURL" exit $OCF_ERR_ARGS ;; esac fi if [ ! -x $NGINXD ] then ocf_log err "NGINXD $NGINXD not found or is not an executable!" exit $OCF_ERR_ARGS fi if [ ! -f $CONFIGFILE ] then # We are sure to succeed here, since we have parsed $CONFIGFILE before getting here ocf_log err "Configuration file $CONFIGFILE not found!" exit $OCF_ERR_CONFIGURED fi if ocf_run $NGINXD $OPTIONS -t -c $CONFIGFILE then : Cool $NGINXD likes $CONFIGFILE else ocf_log err "$NGINXD $OPTIONS -t -c $CONFIGFILE reported a configuration error." return $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } if [ $# -eq 1 ] then COMMAND=$1 NGINXD="$OCF_RESKEY_httpd" PORT="$OCF_RESKEY_port" STATUSURL="$OCF_RESKEY_status10url" CONFIGFILE="$OCF_RESKEY_configfile" OPTIONS="$OCF_RESKEY_options" CLIENT=${OCF_RESKEY_client} - TESTREGEX=${OCF_RESKEY_status10regex:-'Reading: [0-9]+ Writing: [0-9]+ Waiting: [0-9]+'} + TESTREGEX="${OCF_RESKEY_status10regex}" TESTURL="$OCF_RESKEY_test20url" TESTREGEX20=${OCF_RESKEY_test20regex} TESTCONFFILE="$OCF_RESKEY_test20conffile" TESTNAME="$OCF_RESKEY_test20name" EXTMONITOR="$OCF_RESKEY_external_monitor30_cmd" else usage $OCF_ERR_ARGS fi LSB_STATUS_STOPPED=3 if [ "X$NGINXD" = X -o ! -f "$NGINXD" -o ! -x "$NGINXD" ] then NGINXD= for h in $NGINXDLIST do if [ -f "$h" -a -x "$h" ] then NGINXD="$h" break fi done # It is possible that we still do not have a valid httpd at this stage if [ -z "$NGINXD" ] then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; meta-data) metadata_nginx;; esac ocf_log err "nginx binary not found! Please verify you've installed it" exit $OCF_ERR_INSTALLED fi # Let the user know that the $NGINXD used is the one (s)he specified via $OCF_RESKEY_httpd if [ ! -z "$OCF_RESKEY_httpd" ] then ocf_log info "Using $NGINXD as nginx" fi fi httpd_basename=`basename $NGINXD` case $httpd_basename in *-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;; esac NGINX_CONFIGURATION=`$NGINXD -V 2>&1 |grep 'configure arguments:'` DEFAULT_CONFIG=`echo "$NGINX_CONFIGURATION" | sed -e 's%.*--conf-path=%%' -e 's% *--.*%%'` case "$CONFIGFILE" in "") CONFIGFILE=$DEFAULT_CONFIG;; *) ;; esac if [ ! -f "$CONFIGFILE" ] then case $COMMAND in stop) ocf_log warn "$CONFIGFILE not found - nginx considered stopped" exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac fi if [ "X$COMMAND" = Xmeta-data ] || GetParams $CONFIGFILE then : OK else ocf_log err "Cannot parse config file [$CONFIGFILE]" exit $OCF_ERR_CONFIGURED fi case $COMMAND in start) start_nginx;; stop) stop_nginx;; reload) reload_nginx;; status) status_nginx;; monitor) monitor_nginx;; meta-data) metadata_nginx;; validate-all) validate_all_nginx;; *) usage $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/openstack-info b/heartbeat/openstack-info index 209039928..c88c338dd 100755 --- a/heartbeat/openstack-info +++ b/heartbeat/openstack-info @@ -1,260 +1,265 @@ #!/bin/sh # # # OCF resource agent to set attributes from Openstack instance details. # It records (in the CIB) various attributes of a node # # Copyright (c) 2018 Mathieu Grzybek # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults +OCF_RESKEY_pidfile_default="$HA_RSCTMP/OSInfo-${OCF_RESOURCE_HOSTNAME}" +OCF_RESKEY_delay_default="0" +OCF_RESKEY_clone_default="0" OCF_RESKEY_openstackcli_default="/usr/bin/openstack" : ${OCF_RESKEY_openstackcli=${OCF_RESKEY_openstackcli_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_delay=${OCF_RESKEY_delay_default}} +: ${OCF_RESKEY_clone=${OCF_RESKEY_clone_default}} + ####################################################################### meta_data() { cat < 1.0 OCF resource agent to set attributes from Openstack instance details. It records (in the CIB) various attributes of a node. Sample output: openstack_az : nova openstack_flavor : c1.small openstack_id : 60ac4343-5828-49b1-8aac-7c69b1417f31 openstack_ports : 7960d889-9750-4160-bf41-c69a41ad72d9:96530d18-57a3-4718-af32-30f2a74c22a2,b0e55a06-bd75-468d-8baa-22cfeb65799f:a55ae917-8016-4b1e-8ffa-04311b9dc7d6 The layout of openstack_ports is a comma-separated list of tuples "subnet_id:port_id". Records various node attributes in the CIB PID file PID file - + Interval to allow values to stabilize Dampening Delay - + Valid Openstack credentials as openrc file from api_access/openrc. openrc file Path to command line tools for openstack. Path to Openstack CLI tool END } ####################################################################### OSInfoStats() { local result local value local node local node_id . $OCF_RESKEY_openrc node=$(crm_node -n|awk -F. '{print $1}') result=$($OCF_RESKEY_openstackcli server list \ --format value --column ID --column Name \ | grep $node) if [ $? -ne 0 ] ; then ocf_exit_reason "cannot find $node in instances list" exit $OCF_ERR_GENERIC fi node_id=$(echo $result|awk '{print $1}') # Nova data: flavor, AZ… result=$($OCF_RESKEY_openstackcli server show \ --format value \ --column flavor \ --column OS-EXT-AZ:availability_zone \ $node_id | tr '\n' ' ') ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_id -v $node_id value=$(echo $result|awk '{print $2}') ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_flavor -v $value value=$(echo $result|awk '{print $1}') ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_az -v $value # Network data: ports result=$($OCF_RESKEY_openstackcli port list \ --format value \ --column device_id \ --column id \ --column "Fixed IP Addresses" \ | awk "/^$node_id/ {gsub(\"subnet_id='\",\"\");gsub(\"'\",\"\");print \$NF\":\"\$2}" | tr '\n' ' ') value="" for p in $result ; do if [ -z "$value" ] ; then value="$p" else value="$value,$p" fi done ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_ports -v "$value" if [ ! -z "$OS_REGION_NAME" ] ; then ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_region -v "$OS_REGION_NAME" fi if [ ! -z "$OS_TENANT_ID" ] ; then ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_tenant_id -v "$OS_TENANT_ID" if [ ! -z "$OS_TENANT_NAME" ] ; then ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_tenant_name -v "$OS_TENANT_NAME" fi else ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_project_id -v "$OS_PROJECT_ID" if [ ! -z "$OS_PROJECT_NAME" ] ; then ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_project_name -v "$OS_PROJECT_NAME" fi fi } OSInfo_usage() { cat < $OCF_RESKEY_pidfile OSInfoStats exit $OCF_SUCCESS } OSInfo_stop() { rm -f $OCF_RESKEY_pidfile ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_id ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_flavor ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_az ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_ports ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_region ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_tenant_id ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_tenant_name ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_project_id ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_project_name exit $OCF_SUCCESS } OSInfo_monitor() { if [ -f "$OCF_RESKEY_pidfile" ] ; then OSInfoStats exit $OCF_RUNNING fi exit $OCF_NOT_RUNNING } OSInfo_validate() { check_binary "$OCF_RESKEY_openstackcli" if [ -z "$OCF_RESKEY_openrc" ]; then ocf_exit_reason "openrc parameter not set" return $OCF_ERR_CONFIGURED fi if [ ! -f "$OCF_RESKEY_openrc" ] ; then ocf_exit_reason "openrc file not found" return $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then OSInfo_usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_pidfile:="$HA_RSCTMP/OSInfo-${OCF_RESOURCE_HOSTNAME}"} -: ${OCF_RESKEY_clone:="0"} if [ x != x${OCF_RESKEY_delay} ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) OSInfo_start ;; stop) OSInfo_stop ;; monitor) OSInfo_monitor ;; validate-all) OSInfo_validate ;; usage|help) OSInfo_usage exit $OCF_SUCCESS ;; *) OSInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/oraasm b/heartbeat/oraasm index 82aa16561..3cc334212 100755 --- a/heartbeat/oraasm +++ b/heartbeat/oraasm @@ -1,179 +1,183 @@ #!/bin/sh # # License: GNU General Public License (GPL) # (c) 2017 O. Albrigtsen # and Linux-HA contributors # # ----------------------------------------------------------------------------- # O C F R E S O U R C E S C R I P T S P E C I F I C A T I O N # ----------------------------------------------------------------------------- # # NAME # oraasm : OCF resource agent script for Oracle ASM # # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_user_default="grid" +OCF_RESKEY_diskgroup_default="" +OCF_RESKEY_home_default="" : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_diskgroup=${OCF_RESKEY_diskgroup_default}} +: ${OCF_RESKEY_home=${OCF_RESKEY_home_default}} oraasm_usage() { cat < 0.75 OCF Resource script for Oracle ASM. It uses the ohasd init-script to manage a Oracle ASM Disk Group as a HA resource. Oracle ASM resource agent Oracle Grid user Oracle Grid user The name of the Oracle Disk Group. If not specified, then the Disk Group along with its home should be listed in /etc/oratab. Oracle Disk Group - + The Oracle Grid home directory home - + END } oraasm_methods() { cat <<-! start stop status monitor validate-all methods meta-data usage ! } oraasm_getconfig() { [ x = "x$OCF_RESKEY_home" ] && OCF_RESKEY_home=`awk -F: "/^+$OCF_RESKEY_diskgroup:/"'{print $2}' /etc/oratab` PATH="$OCF_RESKEY_home/bin:$PATH" ORA_ENVF=`mktemp` cat << EOF > $ORA_ENVF PATH="$OCF_RESKEY_home/bin:$PATH" EOF chmod 644 $ORA_ENVF trap "rm -f $ORA_ENVF" EXIT } oraasm_start() { # if resource is already running, no need to continue code after this. if oraasm_monitor; then ocf_log info "Oracle ASM is already running" return $OCF_SUCCESS fi ocf_run -q /etc/init.d/ohasd start while ! oraasm_monitor; do sleep 1 done return $OCF_SUCCESS } oraasm_stop() { oraasm_monitor if [ $? -ne $OCF_SUCCESS ]; then # Currently not running. Nothing to do. ocf_log info "Oracle ASM is already stopped" return $OCF_SUCCESS fi ocf_run -q /etc/init.d/ohasd stop # Wait for process to stop while oraasm_monitor; do sleep 1 done return $OCF_SUCCESS } oraasm_monitor() { su - $OCF_RESKEY_user -c ". $ORA_ENVF; crsctl check has | grep -q \"CRS-4638\"" case "$?" in 0) rc=$OCF_SUCCESS ;; 1) rc=$OCF_NOT_RUNNING ocf_log info "Oracle ASM is not running" ;; *) rc=$OCF_ERR_GENERIC ;; esac return $rc } oraasm_status() { rc=$(oraasm_monitor) return $rc } oraasm_validate_all() { if [ x = "x$OCF_RESKEY_home" ]; then ocf_exit_reason "home not set" return $OCF_ERR_CONFIGURED fi } OCF_REQUIRED_PARAMS="user diskgroup" OCF_REQUIRED_BINARIES="/etc/init.d/ohasd crsctl" ocf_rarun $* # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/heartbeat/oracle b/heartbeat/oracle index b34742314..0f464c173 100755 --- a/heartbeat/oracle +++ b/heartbeat/oracle @@ -1,775 +1,781 @@ #!/bin/sh # # # oracle # # Description: Manages an Oracle Database as a High-Availability # resource # # # Author: Dejan Muhamedagic # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2006 International Business Machines, Inc. # # This code inspired by the DB2 resource script # written by Alan Robertson # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 oracle::RK1::/oracle/10.2::orark1 # # See oracle_usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_sid # OCF_RESKEY_home (optional; else read it from /etc/oratab) # OCF_RESKEY_user (optional; figure it out by checking file ownership) # OCF_RESKEY_ipcrm (optional; defaults to "instance") # OCF_RESKEY_clear_backupmode (optional; default to "false") # OCF_RESKEY_shutdown_method (optional; default to "checkpoint/abort") # OCF_RESKEY_monuser (optional; defaults to "OCFMON") # OCF_RESKEY_monpassword (optional; defaults to "OCFMON") # OCF_RESKEY_monprofile (optional; defaults to "OCFMONPROFILE") # # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ora-common.sh ####################################################################### oracle_usage() { methods=`oracle_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 {$methods} $0 manages an Oracle Database instance as an HA resource. The 'start' operation starts the database. The 'stop' operation stops the database. The 'status' operation reports whether the database is running The 'monitor' operation reports whether the database seems to be working The 'dumpinstipc' operation prints IPC resources used by the instance The 'cleanup' operation tries to clean up after Oracle was brutally stopped The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } # Defaults +OCF_RESKEY_sid_default="" +OCF_RESKEY_home_default="" +OCF_RESKEY_user_default="" OCF_RESKEY_monuser_default="OCFMON" OCF_RESKEY_monpassword_default="OCFMON" OCF_RESKEY_monprofile_default="OCFMONPROFILE" +OCF_RESKEY_ipcrm_default="instance" +OCF_RESKEY_clear_backupmode_default="false" +OCF_RESKEY_shutdown_method_default="checkpoint/abort" oracle_meta_data() { cat < 1.0 Resource script for oracle. Manages an Oracle Database instance as an HA resource. Manages an Oracle Database instance The Oracle SID (aka ORACLE_SID). sid - + The Oracle home directory (aka ORACLE_HOME). If not specified, then the SID along with its home should be listed in /etc/oratab. home - + The Oracle owner (aka ORACLE_OWNER). If not specified, then it is set to the owner of file \$ORACLE_HOME/dbs/*\${ORACLE_SID}.ora. If this does not work for you, just set it explicitely. user - + Monitoring user name. Every connection as sysdba is logged in an audit log. This can result in a large number of new files created. A new user is created (if it doesn't exist) in the start action and subsequently used in monitor. It should have very limited rights. Make sure that the password for this user does not expire. monuser Password for the monitoring user. Make sure that the password for this user does not expire. monpassword Profile used by the monitoring user. If the profile does not exist, it will be created with a non-expiring password. monprofile Sometimes IPC objects (shared memory segments and semaphores) belonging to an Oracle instance might be left behind which prevents the instance from starting. It is not easy to figure out which shared segments belong to which instance, in particular when more instances are running as same user. What we use here is the "oradebug" feature and its "ipc" trace utility. It is not optimal to parse the debugging information, but I am not aware of any other way to find out about the IPC information. In case the format or wording of the trace report changes, parsing might fail. There are some precautions, however, to prevent stepping on other peoples toes. There is also a dumpinstipc option which will make us print the IPC objects which belong to the instance. Use it to see if we parse the trace file correctly. Three settings are possible: - none: don't mess with IPC and hope for the best (beware: you'll probably be out of luck, sooner or later) - instance: try to figure out the IPC stuff which belongs to the instance and remove only those (default; should be safe) - orauser: remove all IPC belonging to the user which runs the instance (don't use this if you run more than one instance as same user or if other apps running as this user use IPC) The default setting "instance" should be safe to use, but in that case we cannot guarantee that the instance will start. In case IPC objects were already left around, because, for instance, someone mercilessly killing Oracle processes, there is no way any more to find out which IPC objects should be removed. In that case, human intervention is necessary, and probably _all_ instances running as same user will have to be stopped. The third setting, "orauser", guarantees IPC objects removal, but it does that based only on IPC objects ownership, so you should use that only if every instance runs as separate user. Please report any problems. Suggestions/fixes welcome. ipcrm - + The clear of the backup mode of ORACLE. clear_backupmode - + How to stop Oracle is a matter of taste it seems. The default method ("checkpoint/abort") is: alter system checkpoint; shutdown abort; This should be the fastest safe way bring the instance down. If you find "shutdown abort" distasteful, set this attribute to "immediate" in which case we will shutdown immediate; If you still think that there's even better way to shutdown an Oracle instance we are willing to listen. shutdown_method - + END } # # methods: What methods/operations do we support? # oracle_methods() { cat <<-! start stop status monitor dumpinstipc showdbstat cleanup validate-all methods meta-data usage ! } # # Run commands as the Oracle owner... # execsql() { if [ "$US" = "$ORACLE_OWNER" ]; then sqlplus -S /nolog else su - $ORACLE_OWNER -s /bin/sh -c ". $ORA_ENVF; sqlplus -S /nolog" fi } # # Run commands in the oracle admin sqlplus... # common_sql_opts() { cat</dev/null; then return 0 fi output=`dbasql mk_mon_profile show_mon_profile` if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then return 0 elif echo "$output" | grep ORA-65140 >/dev/null 2>&1; then ocf_exit_reason "monprofile must start with C## for container databases" return $OCF_ERR_CONFIGURED else ocf_exit_reason "could not create $MONPROFILE oracle profile" ocf_log err "sqlplus output: $output" return 1 fi } check_mon_user() { local output local output2 output=`dbasql show_mon_user` if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then if echo "$output" | grep -w "EXPIRED" >/dev/null; then dbasql reset_mon_user_password fi output=`dbasql show_mon_user_profile` if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then return 0 else output=`dbasql set_mon_user_profile` output2=`dbasql show_mon_user_profile` if echo "$output2" | grep -iw "^$MONPROFILE" >/dev/null; then return 0 fi ocf_exit_reason "could not set profile for $MONUSR oracle user" ocf_log err "sqlplus output: $output( $output2 )" return 1 fi fi output=`dbasql mk_mon_user show_mon_user` if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then return 0 elif echo "$output" | grep ORA-65096 >/dev/null 2>&1; then ocf_exit_reason "monuser must start with C## for container databases" return $OCF_ERR_CONFIGURED else ocf_exit_reason "could not create $MONUSR oracle user" ocf_log err "sqlplus output: $output" return 1 fi } # # print the output of dbstat (for debugging) # showdbstat() { echo "Full output:" dbstat | execsql echo "Stripped output:" echo "<`dbasql dbstat`>" } # # IPC stuff: not overly complex, but quite involved :-/ # # Part 1: Oracle other_trace_junk() { echo $1 | sed 's/trc$/trm/' } dumpinstipc() { local output tracef output=`dbasql getipc` # filename in the 2nd line tracef=`echo "$output" | awk 'NR==2' | grep '^/.*trc$'` if [ "$tracef" ]; then echo $tracef else ocf_log warn "'dbasql getipc' failed: $output" return 1 fi } parseipc() { local inf=$1 if [ ! -f "$1" ]; then ocf_log warn "$1: no such ipc trace file" return 1 fi awk ' $3 == "Shmid" {n=1;next} n { if( $3~/^[0-9]+$/ ) print $3; n=0 } ' $inf | sort -u | sed 's/^/m:/' awk ' /Semaphore List/ {insems=1;next} insems { for( i=1; i<=NF; i++ ) if( $i~/^[0-9]+$/ ) print $i; } /system semaphore information/ {exit} ' $inf | sort -u | sed 's/^/s:/' TMPFILES="$TMPFILES $inf `other_trace_junk $inf`" } # Part 2: OS (ipcs,ipcrm) filteroraipc() { # this portable? grep -w $ORACLE_OWNER | awk '{print $2}' } ipcdesc() { local what=$1 case $what in m) echo "shared memory segment";; s) echo "semaphore";; q) echo "message queue";; esac } rmipc() { local what=$1 id=$2 ipcs -$what | filteroraipc | grep -iw $id >/dev/null 2>&1 || return ocf_log info "Removing `ipcdesc $what` $id." ipcrm -$what $id } ipcrm_orauser() { local what id for what in m s q; do for id in `ipcs -$what | filteroraipc`; do rmipc $what $id done done } ipcrm_instance() { local ipcobj for ipcobj; do rmipc `echo $ipcobj | sed 's/:/ /'` done } # # oracle_status: is the Oracle instance running? # # quick check to see if the instance is up is_proc_running() { ps -ef | grep -wiqs "[^ ]*[_]pmon_${ORACLE_SID}" } # instance in OPEN state? instance_live() { local status=`monsql_one dbstat` [ "$status" = OPEN ] && return 0 ocf_log warn "Unable to login as \"$MONUSR\", using \"sysdba\" user instead" status=`dbasql_one dbstat` if [ "$status" = OPEN ]; then return 0 else ocf_log info "$ORACLE_SID instance state is not OPEN (dbstat output: $status)" return 1 fi } ora_cleanup() { #rm -fr /tmp/.oracle #??? rm -f `ls $ORACLE_HOME/dbs/lk* | grep -i "$ORACLE_SID\$"` #return case $IPCRM in none) ;; instance) ipcrm_instance $* ;; orauser) ipcrm_orauser $* ;; esac } oracle_getconfig() { ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" - clear_backupmode=${OCF_RESKEY_clear_backupmode:-"false"} - shutdown_method=${OCF_RESKEY_shutdown_method:-"checkpoint/abort"} - IPCRM=${OCF_RESKEY_ipcrm:-"instance"} + clear_backupmode=${OCF_RESKEY_clear_backupmode:-${OCF_RESKEY_clear_backupmode_default}} + shutdown_method=${OCF_RESKEY_shutdown_method:-${OCF_RESKEY_shutdown_method_default}} + IPCRM=${OCF_RESKEY_ipcrm:-${OCF_RESKEY_ipcrm_default}} } # # oracle_start: Start the Oracle instance # # NOTE: We handle instance in the MOUNTED and STARTED states # efficiently # We *do not* handle instance in the restricted or read-only # mode, i.e. it appears as running, but its availability is # "not for general use" # oracle_start() { local status output if is_proc_running; then status="`dbasql_one dbstat`" case "$status" in "OPEN") : nothing to be done, we can leave right now ocf_log info "Oracle instance $ORACLE_SID already running" return $OCF_SUCCESS ;; "STARTED") output=`dbasql dbmount` ;; "MOUNTED") : we proceed if mounted ;; *) # status unknown output=`dbasql dbstop dbstart_mount` ;; esac else output="`dbasql dbstart_mount`" # try to cleanup in case of # ORA-01081: cannot start already-running ORACLE - shut it down first if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then ocf_log info "ORA-01081 error found, trying to cleanup oracle (dbstart_mount output: $output)" ora_cleanup output=`dbasql dbstop_immediate` output=`dbasql dbstart_mount` fi fi # oracle instance should be mounted. status="`dbasql_one dbstat`" case "$status" in "MOUNTED") ;; *) : error!! ocf_exit_reason "oracle $ORACLE_SID can not be mounted (status: $status)" return $OCF_ERR_GENERIC ;; esac # It is examined whether mode is "online backup mode", # and if it is true, makes clear the mode. # Afterwards, DB is opened. if is_clear_backupmode_set && is_instance_in_backup_mode; then clear_backup_mode fi output=`dbasql dbopen` # check/create the monitor profile if ! check_mon_profile; then # dbopen was failed if there is any $output [ -n "$output" ] && ocf_exit_reason "oracle $ORACLE_SID can not be opened: $output" return $OCF_ERR_GENERIC fi # check/create the monitor user if ! check_mon_user; then # dbopen was failed if there is any $output [ -n "$output" ] && ocf_exit_reason "oracle $ORACLE_SID can not be opened: $output" return $OCF_ERR_GENERIC fi if ! is_proc_running; then ocf_exit_reason "oracle process not running: $output" return $OCF_ERR_GENERIC elif ! instance_live; then ocf_exit_reason "oracle instance $ORACLE_SID not started: $output" return $OCF_ERR_GENERIC else : cool, we are up and running ocf_log info "Oracle instance $ORACLE_SID started: $output" return $OCF_SUCCESS fi } # # oracle_stop: Stop the Oracle instance # oracle_stop() { local status output ipc="" if is_proc_running; then [ "$IPCRM" = "instance" ] && ipc=$(parseipc `dumpinstipc`) output=`dbasql dbstop` else ocf_log info "Oracle instance $ORACLE_SID already stopped" return $OCF_SUCCESS fi ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids` # kill the procs if they hanged if is_proc_running; then ocf_exit_reason "Oracle instance $ORACLE_SID not stopped: $output" return $OCF_ERR_GENERIC else ocf_log info "Oracle instance $ORACLE_SID stopped: $output" sleep 1 # give em a chance to cleanup ocf_log info "Cleaning up for $ORACLE_SID" ora_cleanup "$ipc" return $OCF_SUCCESS fi } # # oracle_monitor: Can the Oracle instance do anything useful? # oracle_monitor() { if ! is_proc_running; then ocf_log info "oracle process not running" return $OCF_NOT_RUNNING fi if ! instance_live; then ocf_exit_reason "oracle instance $ORACLE_SID is down" return $OCF_ERR_GENERIC fi #ocf_log info "Oracle instance $ORACLE_SID is alive" return $OCF_SUCCESS } # other supported actions oracle_status() { if is_proc_running then echo Oracle instance $ORACLE_SID is running exit $OCF_SUCCESS else echo Oracle instance $ORACLE_SID is stopped exit $OCF_NOT_RUNNING fi } oracle_dumpinstipc() { is_proc_running && parseipc `dumpinstipc` } oracle_showdbstat() { showdbstat } oracle_cleanup() { if [ "$IPCRM" = "instance" ]; then ora_cleanup $(parseipc `dumpinstipc`) else ora_cleanup fi } oracle_validate_all() { case "${shutdown_method}" in "immediate") ;; "checkpoint/abort") ;; *) ocf_exit_reason "unsupported shutdown_method, please read meta-data" return $OCF_ERR_CONFIGURED ;; esac case "${IPCRM}" in "none"|"instance"|"orauser") ;; *) ocf_exit_reason "unsupported ipcrm setting, please read meta-data" return $OCF_ERR_CONFIGURED ;; esac ora_common_validate_all } # used in ora-common.sh show_procs() { ps -e -o pid,args | grep -i "[o]ra[a-zA-Z0-9_]*$ORACLE_SID$" } proc_pids() { show_procs | awk '{print $1}'; } PROCS_CLEANUP_TIME="30" MONUSR=${OCF_RESKEY_monuser:-$OCF_RESKEY_monuser_default} MONPWD=${OCF_RESKEY_monpassword:-$OCF_RESKEY_monpassword_default} MONPROFILE=${OCF_RESKEY_monprofile:-$OCF_RESKEY_monprofile_default} MONUSR=$(echo "$MONUSR" | awk '{print toupper($0)}') MONPROFILE=$(echo "$MONPROFILE" | awk '{print toupper($0)}') OCF_REQUIRED_PARAMS="sid" OCF_REQUIRED_BINARIES="sqlplus" ocf_rarun $* # # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/heartbeat/oralsnr b/heartbeat/oralsnr index f41595680..08fb8ae11 100755 --- a/heartbeat/oralsnr +++ b/heartbeat/oralsnr @@ -1,281 +1,293 @@ #!/bin/sh # # # oralsnr # # Description: Manages an Oracle Listener as a High-Availability # resource # # # Author: Dejan Muhamedagic # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2006 International Business Machines, Inc. # # This code inspired by the DB2 resource script # written by Alan Robertson # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 oralsnr::sid::home::user::listener # # See oralsnr_usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_sid (mandatory; for the monitor op) # OCF_RESKEY_home (optional; else read it from /etc/oratab) # OCF_RESKEY_user (optional; user to run the listener) # OCF_RESKEY_listener (optional; defaults to LISTENER) # # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ora-common.sh +# Parameter defaults + +OCF_RESKEY_sid_default="" +OCF_RESKEY_home_default="" +OCF_RESKEY_user_default="" +OCF_RESKEY_listener_default="LISTENER" + +: ${OCF_RESKEY_sid=${OCF_RESKEY_sid_default}} +: ${OCF_RESKEY_home=${OCF_RESKEY_home_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_listener=${OCF_RESKEY_listener_default}} + ####################################################################### SH=/bin/sh oralsnr_usage() { methods=`oralsnr_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages an Oracle Database instance as an HA resource. The 'start' operation starts the database. The 'stop' operation stops the database. The 'status' operation reports whether the database is running The 'monitor' operation reports whether the database seems to be working The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } oralsnr_meta_data() { cat < 1.0 Resource script for Oracle Listener. It manages an Oracle Listener instance as an HA resource. Manages an Oracle TNS listener The Oracle SID (aka ORACLE_SID). Necessary for the monitor op, i.e. to do tnsping SID. sid - + The Oracle home directory (aka ORACLE_HOME). If not specified, then the SID should be listed in /etc/oratab. home - + Run the listener as this user. user - + Listener instance to be started (as defined in listener.ora). Defaults to LISTENER. listener - + Full path to the directory that contains the Oracle listener tnsnames.ora configuration file. The shell variable TNS_ADMIN is set to the value provided. Full path to the directory containing tnsnames.ora END } # # methods: What methods/operations do we support? # oralsnr_methods() { cat <<-! start stop status monitor validate-all methods meta-data usage ! } # # Run commands as the Oracle owner... # runasdba() { if [ "$US" = "$ORACLE_OWNER" ]; then $SH else ( echo ". $ORA_ENVF" cat ) | su -s $SH - $ORACLE_OWNER fi } # # oralsnr_start: Start the Oracle listener instance # oralsnr_start() { if is_proc_running && test_tnsping; then : nothing to be done, we can leave right now ocf_log info "Listener $listener already running" return $OCF_SUCCESS fi output=`echo lsnrctl start $listener | runasdba` if test_tnsping; then : cool, we are up and running ocf_log info "Listener $listener running: $output" return $OCF_SUCCESS else ocf_exit_reason "Listener $listener appears to have started, but is not running properly: $output" ocf_log err "Probable Oracle configuration error" return $OCF_ERR_GENERIC fi } # # oralsnr_stop: Stop the Oracle instance # oralsnr_stop() { if is_proc_running; then output=`echo lsnrctl stop $listener | runasdba` else ocf_log info "Listener $listener already stopped" return $OCF_SUCCESS fi ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids` # kill the procs if they hanged if is_proc_running; then ocf_exit_reason "Listener $listener not stopped: $output" return $OCF_ERR_GENERIC else ocf_log info "Listener $listener stopped: $output" return $OCF_SUCCESS fi } # # is_proc_running: is the listener running? # is_proc_running() { show_procs | grep "." > /dev/null } # the following two should be run only if the process is running test_listener() { local output output=`lsnrctl status $listener` if echo "$output" | tail -1 | grep -qs 'completed successfully' then return $OCF_SUCCESS else ocf_exit_reason "$listener status failed: $output" return $OCF_ERR_GENERIC fi } # and does it work? test_tnsping() { local output output=`tnsping $ORACLE_SID` if echo "$output" | tail -1 | grep -qs '^OK'; then return $OCF_SUCCESS else ocf_exit_reason "tnsping $ORACLE_SID failed: $output" return $OCF_ERR_GENERIC fi } # # oralsnr_monitor: Can we connect to the listener? # oralsnr_monitor() { if is_proc_running; then test_listener && test_tnsping else return $OCF_NOT_RUNNING fi } oralsnr_status() { if is_proc_running then echo Listener $listener is running exit $OCF_SUCCESS else echo Listener $listener is stopped exit $OCF_NOT_RUNNING fi } oralsnr_getconfig() { ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" "$OCF_RESKEY_tns_admin" - listener=${OCF_RESKEY_listener:-"LISTENER"} + listener=${OCF_RESKEY_listener} } oralsnr_validate_all() { ora_common_validate_all } # used in ora-common.sh show_procs() { ps -U "$ORACLE_OWNER" -o pid,user,args | grep '[t]nslsnr' | grep -i -w "$listener" } proc_pids() { show_procs | awk '{print $1}'; } PROCS_CLEANUP_TIME="10" OCF_REQUIRED_PARAMS="sid" OCF_REQUIRED_BINARIES="lsnrctl tnsping" ocf_rarun $* # # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/heartbeat/ovsmonitor b/heartbeat/ovsmonitor index 120977462..8a80a8c92 100755 --- a/heartbeat/ovsmonitor +++ b/heartbeat/ovsmonitor @@ -1,450 +1,468 @@ #!/bin/sh # # OCF Resource Agent compliant script. # Monitor the vitality of a local OpenVSwitch bond. # # Based on the work by Alexander Krauth. # # Transfered from ethmonitor into ovsmonitor by Mathieu Grzybek. # # Copyright (c) 2017 Robert Euhus, Alexander Krauth, Lars Marowsky-Bré # Mathieu Grzybek # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # OCF parameters are as below # # OCF_RESKEY_bond # OCF_RESKEY_bridge # OCF_RESKEY_multiplicator # OCF_RESKEY_name # OCF_RESKEY_repeat_count # OCF_RESKEY_repeat_interval # OCF_RESKEY_pktcnt_timeout # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_bond_default="" +OCF_RESKEY_bridge_default="" +OCF_RESKEY_name_default="" +OCF_RESKEY_multiplier_default="1" +OCF_RESKEY_repeat_count_default="5" +OCF_RESKEY_repeat_interval_default="10" +OCF_RESKEY_pktcnt_timeout_default="5" +OCF_RESKEY_link_status_only_default="false" + +: ${OCF_RESKEY_bond=${OCF_RESKEY_bond_default}} +: ${OCF_RESKEY_bridge=${OCF_RESKEY_bridge_default}} +: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +: ${OCF_RESKEY_multiplier=${OCF_RESKEY_multiplier_default}} +: ${OCF_RESKEY_repeat_count=${OCF_RESKEY_repeat_count_default}} +: ${OCF_RESKEY_repeat_interval=${OCF_RESKEY_repeat_interval_default}} +: ${OCF_RESKEY_pktcnt_timeout=${OCF_RESKEY_pktcnt_timeout_default}} +: ${OCF_RESKEY_link_status_only=${OCF_RESKEY_link_status_only_default}} + ####################################################################### meta_data() { cat < 0.1 Monitor the vitality of a local ovs bond. You may set up this RA as a clone resource to monitor the network bonds on different nodes, with the same bond name. This is not related to the IP address or the network on which a bond is configured. You may use this RA to move resources away from a node, which has a faulty bond or prevent moving resources to such a node. This gives you independent control of the resources, without involving cluster intercommunication. But it requires your nodes to have more than one network bond. The resource configuration requires a monitor operation, because the monitor does the main part of the work. In addition to the resource configuration, you need to configure some location constraints, based on a CIB attribute value. The name of the attribute value is configured in the 'name' option of this RA. Example constraint configuration using crmsh location loc_connected_node my_resource_grp \ rule $id="rule_loc_connected_node" -INF: ovsmonitor-bond-public eq 0 Example constraint configuration using pcs. Only allow 'my_resource' to run on nodes where eth0 ethernet device is available. pcs constraint location my_resource rule score=-INFINITY ovsmonitor-bond-public ne 1 The ethmonitor works in 3 different modes to test the bond vitality. 1. call ovs-appctl to see if at least one of the bonding's link status is up (if link is down -> error) 2. call ovs-ofctl and watch the RX counter (if packages come around in a certain time -> success) 3. return error Monitors ovs bonding bonds The name of the network bond which should be monitored (e.g. bond-public). Bond bond name - + The name of the ovs bridge that contains the bridge. ovs bridge - + -The name of the CIB attribute to set. This is the name to be used in the constraints. Defaults to "ethmonitor-'bond_name'". +The name of the CIB attribute to set. This is the name to be used in the constraints. Defaults to "ovsmonitor-'bond_name'". Attribute name - + Multiplier for the value of the CIB attriobute specified in parameter name. Multiplier for result variable - + Specify how often the bond will be monitored, before the status is set to failed. You need to set the timeout of the monitoring operation to at least repeat_count * repeat_interval Monitor repeat count - + Specify how long to wait in seconds between the repeat_counts. Monitor repeat interval in seconds - + Timeout for the RX packet counter. Stop listening for packet counter changes after the given number of seconds. packet counter timeout - + Only report success based on link status. Do not perform RX counter related connectivity tests. link status check only - + END exit $OCF_SUCCESS } # # Return true, if the bond exists # is_bond() { # # List bonds but exclude FreeS/WAN ipsecN virtual bonds # ovs-appctl bond/show $OCF_RESKEY_bond 1>/dev/null 2>&1 } # # Return true, if the bridge exists # is_bridge() { # # List bonds but exclude FreeS/WAN ipsecN virtual bonds # #ovs-appctl bond/show $OCF_RESKEY_bond 1>/dev/null 2>&1 ovs-vsctl show|grep Bridge|grep -q $OCF_RESKEY_bridge } if_init() { local rc if [ X"$OCF_RESKEY_bond" = "X" ]; then ocf_exit_reason "Bond name (the bond parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi if [ X"$OCF_RESKEY_bridge" = "X" ]; then ocf_exit_reason "Bridge name (the bridge parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi BOND="$OCF_RESKEY_bond" BRIDGE="$OCF_RESKEY_bridge" if is_bond then if ! is_bridge then ocf_exit_reason "Bridge $OCF_RESKEY_bond does not exist" exit $OCF_ERR_CONFIGURED; fi else ocf_exit_reason "Bond $OCF_RESKEY_bond does not exist" exit $OCF_ERR_CONFIGURED; fi - : ${OCF_RESKEY_multiplier:="1"} if ! ocf_is_decimal "$OCF_RESKEY_multiplier"; then ocf_exit_reason "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]" exit $OCF_ERR_CONFIGURED fi ATTRNAME=${OCF_RESKEY_name:-"ovsmonitor-$BOND"} - REP_COUNT=${OCF_RESKEY_repeat_count:-5} + REP_COUNT=${OCF_RESKEY_repeat_count} if ! ocf_is_decimal "$REP_COUNT" -o [ $REP_COUNT -lt 1 ]; then ocf_exit_reason "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]" exit $OCF_ERR_CONFIGURED fi - REP_INTERVAL_S=${OCF_RESKEY_repeat_interval:-10} + REP_INTERVAL_S=${OCF_RESKEY_repeat_interval} if ! ocf_is_decimal "$REP_INTERVAL_S"; then ocf_exit_reason "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]" exit $OCF_ERR_CONFIGURED fi - : ${OCF_RESKEY_pktcnt_timeout:="5"} if ! ocf_is_decimal "$OCF_RESKEY_pktcnt_timeout"; then ocf_exit_reason "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # get the link status on $BOND # asks ip about running (up) bonds, returns the number of matching bond names that are up get_link_status () { #$IP2UTIL -o link show up dev "$BOND" | grep -v 'NO-CARRIER' | grep -c "$BOND" ovs-appctl bond/show "$BOND"|awk -F: '/^slave/ {print $2}'|grep -c enabled } # returns the number of received rx packets on $BOND get_rx_packets () { ocf_log debug "bond $BOND - bridge $BRIDGE" #$IP2UTIL -o -s link show dev "$BOND" \ # | sed 's/.* RX: [^0-9]*[0-9]* *\([0-9]*\) .*/\1/' local ovs_port for ovs_port in $(ovs-appctl bond/show $BOND|awk '/^slave/ {gsub(":","");print $2}') ; do ovs-ofctl dump-ports $BRIDGE $ovs_port done \ | awk -F, 'BEGIN{total=0} /rx/ {gsub(".*pkts=","");total=total+int($1)} END{print total}' } # watch for packet counter changes for max. OCF_RESKEY_pktcnt_timeout seconds # returns immedeately with return code 0 if any packets were received # otherwise 1 is returned watch_pkt_counter () { local RX_PACKETS_NEW local RX_PACKETS_OLD RX_PACKETS_OLD="`get_rx_packets`" for n in `seq $(( $OCF_RESKEY_pktcnt_timeout * 10 ))`; do sleep 0.1 RX_PACKETS_NEW="`get_rx_packets`" ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW" if [ "$RX_PACKETS_OLD" -ne "$RX_PACKETS_NEW" ]; then ocf_log debug "we received some packets." return 0 fi done return 1 } # # Check the bond depending on the level given as parameter: $OCF_RESKEY_check_level # # 10: watch for packet counter changes # # # 30: watch for packet counter changes in promiscios mode # # If unsuccessfull in levels 18 and above, # the tests for higher check levels are run. # if_check () { # always check link status first link_status="`get_link_status`" ocf_log debug "link_status: $link_status (up > 0, down = 0)" if [ $link_status -eq 0 ]; then ocf_log notice "link_status: DOWN" return $OCF_NOT_RUNNING fi # if using link_status_only, skip RX count related test if ocf_is_true "$OCF_RESKEY_link_status_only"; then return $OCF_SUCCESS fi # watch for packet counter changes ocf_log debug "watch for packet counter changes" watch_pkt_counter if [ $? -eq 0 ]; then return $OCF_SUCCESS else ocf_log debug "No packets received during packet watch timeout" fi # watch for packet counter changes in promiscios mode # ocf_log debug "watch for packet counter changes in promiscios mode" # be sure switch off promiscios mode in any case # TODO: check first, wether promisc is already on and leave it untouched. # trap "$IP2UTIL link set dev $BOND promisc off; exit" INT TERM EXIT # $IP2UTIL link set dev $BOND promisc on # watch_pkt_counter && return $OCF_SUCCESS # $IP2UTIL link set dev $BOND promisc off # trap - INT TERM EXIT # looks like it's not working (for whatever reason) return $OCF_NOT_RUNNING } ####################################################################### if_usage() { cat < /dev/null` sleep $sleep_time 2> /dev/null runs=$(($runs + 1)) fi if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error" fi done ocf_log debug "Monitoring return code: $mon_rc" if [ $mon_rc -eq $OCF_SUCCESS ]; then set_cib_value 1 attr_rc=$? else ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed." set_cib_value 0 attr_rc=$? fi ## The resource should not fail, if the bond is down. It should fail, if the update of the CIB variable has errors. ## To react on the bond failure you must use constraints based on the CIB variable value, not on the resource itself. exit $attr_rc } if_stop() { attrd_updater -D -n $ATTRNAME ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop } if_start() { local rc ha_pseudo_resource $OCF_RESOURCE_INSTANCE start rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "Failure to create ovsmonitor state file" return $rc fi # perform the first monitor during the start operation if_monitor return $? } if_validate() { check_binary ovs-vsctl check_binary ovs-appctl check_binary ovs-ofctl if_init } case $__OCF_ACTION in meta-data) meta_data ;; usage|help) if_usage exit $OCF_SUCCESS ;; esac if_validate case $__OCF_ACTION in start) if_start exit $? ;; stop) if_stop exit $? ;; monitor|status) if_monitor exit $? ;; validate-all) exit $? ;; *) if_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/pgsql b/heartbeat/pgsql index 5d652edaa..b1c070ead 100755 --- a/heartbeat/pgsql +++ b/heartbeat/pgsql @@ -1,2203 +1,2205 @@ #!/bin/sh # # Description: Manages a PostgreSQL Server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA # Florian Haas (florian@linbit.com) -- makeover # Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication # David Corlette (dcorlette@netiq.com) -- add support for non-standard library locations and non-standard port # # Copyright: 2006-2012 Serge Dubrouski # and other Linux-HA contributors # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Use runuser if available for SELinux. if [ -x /sbin/runuser ]; then SU=runuser else SU=su fi # # Get PostgreSQL Configuration parameter # get_pgsql_param() { local param_name param_name=$1 perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) { \$dir=\$1; \$dir =~ s/\s*\#.*//; \$dir =~ s/^'(\S*)'/\$1/; print \$dir;}" perl -ne "$perl_code" < $OCF_RESKEY_config } # Defaults OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl OCF_RESKEY_psql_default=/usr/bin/psql OCF_RESKEY_pgdata_default=/var/lib/pgsql/data OCF_RESKEY_pgdba_default=postgres OCF_RESKEY_pghost_default="" OCF_RESKEY_pgport_default=5432 OCF_RESKEY_pglibs_default=/usr/lib OCF_RESKEY_start_opt_default="" OCF_RESKEY_ctl_opt_default="" OCF_RESKEY_pgdb_default=template1 OCF_RESKEY_logfile_default=/dev/null +OCF_RESKEY_socketdir_default="" OCF_RESKEY_stop_escalate_default=90 OCF_RESKEY_monitor_user_default="" OCF_RESKEY_monitor_password_default="" OCF_RESKEY_monitor_sql_default="select now();" OCF_RESKEY_check_wal_receiver_default="false" # Defaults for replication OCF_RESKEY_rep_mode_default=none OCF_RESKEY_node_list_default="" OCF_RESKEY_restore_command_default="" OCF_RESKEY_archive_cleanup_command_default="" OCF_RESKEY_recovery_end_command_default="" OCF_RESKEY_master_ip_default="" OCF_RESKEY_repuser_default="postgres" OCF_RESKEY_primary_conninfo_opt_default="" OCF_RESKEY_restart_on_promote_default="false" OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp" OCF_RESKEY_xlog_check_count_default="3" OCF_RESKEY_crm_attr_timeout_default="5" OCF_RESKEY_stop_escalate_in_slave_default=90 OCF_RESKEY_replication_slot_name_default="" : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}} : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}} : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}} : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}} : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}} : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}} : ${OCF_RESKEY_pglibs=${OCF_RESKEY_pglibs_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf} : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}} : ${OCF_RESKEY_ctl_opt=${OCF_RESKEY_ctl_opt_default}} : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}} : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} +: ${OCF_RESKEY_socketdir=${OCF_RESKEY_socketdir_default}} : ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}} : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}} : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}} : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}} : ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}} # for replication : ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}} : ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}} : ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}} : ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}} : ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}} : ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}} : ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}} : ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}} : ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}} : ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}} : ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}} : ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}} : ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}} : ${OCF_RESKEY_replication_slot_name=${OCF_RESKEY_replication_slot_name_default}} usage() { cat < 1.0 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource. Manages a PostgreSQL database instance Path to pg_ctl command. pgctl Start options (-o start_opt in pg_ctl). "-i -p 5432" for example. start_opt Additional pg_ctl options (-w, -W etc..). ctl_opt Path to psql command. psql Path to PostgreSQL data directory. pgdata User that owns PostgreSQL. pgdba Hostname/IP address where PostgreSQL is listening pghost Port where PostgreSQL is listening pgport Custom location of the Postgres libraries. If not set, the standard location will be used. pglibs PostgreSQL user that pgsql RA will user for monitor operations. If it's not set pgdba user will be used. monitor_user Password for monitor user. monitor_password SQL script that will be used for monitor operations. monitor_sql Path to the PostgreSQL configuration file for the instance. Configuration file Database that will be used for monitoring. pgdb Path to PostgreSQL server log output file. logfile Unix socket directory for PostgreSQL. If you use PostgreSQL 9.3 or higher and define unix_socket_directories in the postgresql.conf, then you must set socketdir to determine which directory is used for psql command. socketdir - + Number of seconds to wait for stop (using -m fast) before resorting to -m immediate stop escalation Replication mode may be set to "async" or "sync" or "slave". They require PostgreSQL 9.1 or later. Once set, "async" and "sync" require node_list, master_ip, and restore_command parameters,as well as configuring PostgreSQL for replication (in postgresql.conf and pg_hba.conf). "slave" means that RA only makes recovery.conf before starting to connect to primary which is running somewhere. It doesn't need master/slave setting. It requires master_ip restore_command parameters. rep_mode All node names. Please separate each node name with a space. This is optional for replication. Defaults to all nodes in the cluster node list restore_command for recovery.conf. This is required for replication. restore_command archive_cleanup_command for recovery.conf. This is used for replication and is optional. archive_cleanup_command recovery_end_command for recovery.conf. This is used for replication and is optional. recovery_end_command Master's floating IP address to be connected from hot standby. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. master ip User used to connect to the master server. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. repuser primary_conninfo options of recovery.conf except host, port, user and application_name. This is optional for replication. primary_conninfo_opt If this is true, RA deletes recovery.conf and restarts PostgreSQL on promote to keep Timeline ID. It probably makes fail-over slower. It's recommended to set on-fail of promote up as fence. This is optional for replication. restart_on_promote Set this option when using replication slots. Can only use lower case letters, numbers and underscore for replication_slot_name. The replication slots would be created for each node, with the name adding the node name as postfix. For example, replication_slot_name is "sample" and 2 slaves which are "node1" and "node2" connect to their slots, the slots names are "sample_node1" and "sample_node2". If the node name contains a upper case letter, hyphen and dot, those characters will be converted to a lower case letter or an underscore. For example, Node-1.example.com to node_1_example_com. pgsql RA doesn't monitor and delete the replication slot. When the slave node has been disconnected in failure or the like, execute one of the following manually. Otherwise it may eventually cause a disk full because the master node will continue to accumulate the unsent WAL. 1. recover and reconnect the slave node to the master node as soon as possible. 2. delete the slot on the master node by following psql command. $ select pg_drop_replication_slot('replication_slot_name'); replication_slot_name Path to temporary directory. This is optional for replication. tmpdir Number of checks of xlog on monitor before promote. This is optional for replication. Note: For backward compatibility, the terms are unified with PostgreSQL 9. If you are using PostgreSQL 10 or later, replace "xlog" with "wal". Likewise, replacing "location" with "lsn". xlog check count The timeout of crm_attribute forever update command. Default value is 5 seconds. This is optional for replication. The timeout of crm_attribute forever update command. Number of seconds to wait for stop (using -m fast) before resorting to -m immediate in slave state. This is optional for replication. stop escalation_in_slave If this is true, RA checks wal_receiver process on monitor and notifies its status using "(resource name)-receiver-status" attribute. It's useful for checking whether PostgreSQL (hot standby) connects to primary. The attribute shows status as "normal" or "normal (master)" or "ERROR". Note that if you configure PostgreSQL as master/slave resource, then wal receiver is not running in the master and the attribute shows status as "normal (master)" consistently because it is normal status. check_wal_receiver EOF } # # Run the given command in the Resource owner environment... # runasowner() { local quietrun="" local loglevel="-err" local var for var in 1 2 do case "$1" in "-q") quietrun="-q" shift 1;; "info"|"warn"|"err") loglevel="-$1" shift 1;; *) ;; esac done ocf_run $quietrun $loglevel $SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*" } # # Shell escape # escape_string() { echo "$*" | sed -e "s/'/'\\\\''/g" } # # methods: What methods/operations do we support? # pgsql_methods() { cat </dev/null 2>&1" return $? fi # No PID file false } pgsql_wal_receiver_status() { local PID local receiver_parent_pids local pgsql_real_monitor_status=$1 PID=`head -n 1 $PIDFILE` receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al receiver process" | cut -d " " -f 3` if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" -q return 0 fi if [ $pgsql_real_monitor_status -eq "$OCF_RUNNING_MASTER" ]; then attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal (master)" -q return 0 fi attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" -q ocf_log warn "wal receiver process is not running" return 1 } # # pgsql_real_monitor # pgsql_real_monitor() { local loglevel local rc local output # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then ocf_log info "PostgreSQL is down" return $OCF_NOT_RUNNING fi if is_replication; then #Check replication state output=`exec_sql "${CHECK_MS_SQL}"` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status." return $OCF_ERR_GENERIC fi case "$output" in f) ocf_log debug "PostgreSQL is running as a primary." if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then if ocf_is_probe; then # Set initial score for primary. exec_with_retry 0 $CRM_MASTER -v $PROMOTE_ME fi return $OCF_RUNNING_MASTER fi ;; t) ocf_log debug "PostgreSQL is running as a hot standby." if ocf_is_probe; then # Set initial score for hot standby. exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE fi return $OCF_SUCCESS;; *) ocf_exit_reason "$CHECK_MS_SQL output is $output" return $OCF_ERR_GENERIC;; esac fi OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"` runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \ -c '$OCF_RESKEY_monitor_sql'" rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running." return $OCF_ERR_GENERIC fi if is_replication; then return $OCF_RUNNING_MASTER fi return $OCF_SUCCESS } pgsql_replication_monitor() { local rc rc=$1 if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then return $rc fi # If I am Master if [ $rc -eq $OCF_RUNNING_MASTER ]; then change_data_status "$NODENAME" "LATEST" change_pgsql_status "$NODENAME" "PRI" control_slave_status || return $OCF_ERR_GENERIC if [ "$RE_CONTROL_SLAVE" = "true" ]; then sleep 2 ocf_log info "re-controlling slave status." RE_CONTROL_SLAVE="none" control_slave_status || return $OCF_ERR_GENERIC fi return $rc fi # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor, # so I will get master node name using crm_mon -n print_crm_mon | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):].*Master" if [ $? -ne 0 ] ; then # If I am Slave and Master is not exist ocf_log info "Master does not exist." change_pgsql_status "$NODENAME" "HS:alone" have_master_right if [ $? -eq 0 ]; then rm -f ${XLOG_NOTE_FILE}.* fi else output=`exec_with_retry 0 $CRM_ATTR_FOREVER -N "$NODENAME" \ -n "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$output" = "DISCONNECT" ]; then change_pgsql_status "$NODENAME" "HS:alone" fi fi return $rc } #pgsql_monitor: pgsql_real_monitor() wrapper for replication pgsql_monitor() { local rc pgsql_real_monitor rc=$? if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then pgsql_wal_receiver_status $rc fi if ! is_replication; then return $rc else pgsql_replication_monitor $rc return $? fi } # pgsql_post_demote pgsql_post_demote() { DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE" if [ "$DEMOTE_NODE" != "$NODENAME" ]; then if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | tr '[A-Z]' '[a-z]' | grep $NODENAME; then show_master_baseline change_pgsql_status "$NODENAME" "HS:alone" fi fi return $OCF_SUCCESS } pgsql_pre_promote() { local master_baseline local my_master_baseline local cmp_location local number_of_nodes # If my data is newer than new master's one, I fail my resource. PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` number_of_nodes=`echo $NODE_LIST | wc -w` if [ $number_of_nodes -ge 3 -a \ "$OCF_RESKEY_rep_mode" = "sync" -a \ "$PROMOTE_NODE" != "$NODENAME" ]; then master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` if [ $? -eq 0 ]; then my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` # get older location cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\ sort | head -1` if [ "$cmp_location" != "$my_master_baseline" ]; then # We used to set the failcount to INF for the resource here in # order to move the master to the other node. However, setting # the failcount should be done only by the CRM and so this use # got deprecated in pacemaker version 1.1.17. Now we do the # "ban resource from the node". ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline" exec_with_retry 0 $CRM_RESOURCE -B -r $OCF_RESOURCE_INSTANCE -N $NODENAME -Q return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } pgsql_notify() { local type="${OCF_RESKEY_CRM_meta_notify_type}" local op="${OCF_RESKEY_CRM_meta_notify_operation}" local rc if ! is_replication; then return $OCF_SUCCESS fi ocf_log debug "notify: ${type} for ${op}" case $type in pre) case $op in promote) pgsql_pre_promote return $? ;; esac ;; post) case $op in promote) delete_xlog_location PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` if [ "$PROMOTE_NODE" != "$NODENAME" ]; then delete_master_baseline fi return $OCF_SUCCESS ;; demote) pgsql_post_demote return $? ;; start|stop) MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` if [ "$NODENAME" = "$MASTER_NODE" ]; then control_slave_status fi return $OCF_SUCCESS ;; esac ;; esac return $OCF_SUCCESS } control_slave_status() { local rc local data_status local target local all_data_status local tmp_data_status local number_of_nodes all_data_status=`exec_sql "${CHECK_REPLICATION_STATE_SQL}"` rc=$? if [ $rc -eq 0 ]; then if [ -n "$all_data_status" ]; then all_data_status=`echo $all_data_status | sed "s/\n/ /g"` fi else report_psql_error $rc err "Can't get PostgreSQL replication status." return 1 fi number_of_nodes=`echo $NODE_LIST | wc -w` for target in $NODE_LIST; do if [ "$target" = "$NODENAME" ]; then continue fi data_status="DISCONNECT" if [ -n "$all_data_status" ]; then for tmp_data_status in $all_data_status; do if ! echo $tmp_data_status | grep -q "^${target}|"; then continue fi data_status=`echo $tmp_data_status | cut -d "|" -f 2,3` ocf_log debug "node_name and data_status is $tmp_data_status" break done fi case "$data_status" in "STREAMING|SYNC") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_PROMOTE" change_pgsql_status "$target" "HS:sync" ;; "STREAMING|ASYNC") change_data_status "$target" "$data_status" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then change_master_score "$target" "$CAN_NOT_PROMOTE" set_sync_mode "$target" else if [ $number_of_nodes -le 2 ]; then change_master_score "$target" "$CAN_PROMOTE" else # I can't determine which slave's data is newest in async mode. change_master_score "$target" "$CAN_NOT_PROMOTE" fi fi change_pgsql_status "$target" "HS:async" ;; "STREAMING|POTENTIAL") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" change_pgsql_status "$target" "HS:potential" ;; "DISCONNECT") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then set_async_mode "$target" fi ;; *) change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then set_async_mode "$target" fi change_pgsql_status "$target" "HS:connected" ;; esac done return 0 } have_master_right() { local old local new local output local data_status local node local mylocation local count local newestXlog local oldfile local newfile ocf_log debug "Checking if I have a master right." data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \ "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi else if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "STREAMING|ASYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi fi ocf_log info "My data status=$data_status." show_xlog_location if [ $? -ne 0 ]; then ocf_exit_reason "Failed to show my xlog location." exit $OCF_ERR_GENERIC fi old=0 for count in `seq $OCF_RESKEY_xlog_check_count`; do if [ -f ${XLOG_NOTE_FILE}.$count ]; then old=$count continue fi break done new=`expr $old + 1` # get xlog locations of all nodes for node in ${NODE_LIST}; do output=`$CRM_ATTR_REBOOT -N "$node" -n \ "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null` if [ $? -ne 0 ]; then ocf_log warn "Can't get $node xlog location." continue else ocf_log info "$node xlog location : $output" echo "$node $output" >> ${XLOG_NOTE_FILE}.${new} if [ "$node" = "$NODENAME" ]; then mylocation=$output fi fi done oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null` newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null` if [ "$oldfile" != "$newfile" ]; then # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 return 1 fi if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \ head -1 | cut -d " " -f 2` if [ "$newestXlog" = "$mylocation" ]; then ocf_log info "I have a master right." exec_with_retry 5 $CRM_MASTER -v $PROMOTE_ME return 0 fi change_data_status "$NODENAME" "DISCONNECT" ocf_log info "I don't have correct master data." # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 fi return 1 } is_replication() { if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then return 0 fi return 1 } use_replication_slot() { if [ -n "$OCF_RESKEY_replication_slot_name" ]; then return 0 fi return 1 } create_replication_slot_name() { local number_of_nodes=0 local target local replication_slot_name local replication_slot_name_list_tmp local replication_slot_name_list if [ -n "$NODE_LIST" ]; then number_of_nodes=`echo $NODE_LIST | wc -w` fi if [ $number_of_nodes -le 0 ]; then replication_slot_name_list="" # The Master node should have some slots equal to the number of Slaves, and # the Slave nodes connect to their dedicated slot on the Master. # To ensuring that the slots name are each unique, add postfix to $OCF_RESKEY_replication_slot. # The postfix is "_$target". else for target in $NODE_LIST do if [ "$target" != "$NODENAME" ]; then # The Uppercase, "-" and "." don't allow to use in slot_name. # If the NODENAME contains them, convert upper case to lower case and "_" and "." to "_". target=`echo "$target" | tr 'A-Z.-' 'a-z__'` replication_slot_name="$OCF_RESKEY_replication_slot_name"_"$target" replication_slot_name_list_tmp="$replication_slot_name_list" replication_slot_name_list="$replication_slot_name_list_tmp $replication_slot_name" fi done fi echo $replication_slot_name_list } delete_replication_slot(){ DELETE_REPLICATION_SLOT_sql="SELECT pg_drop_replication_slot('$1');" output=`exec_sql "$DELETE_REPLICATION_SLOT_sql"` return $? } delete_replication_slots() { local replication_slot_name_list local replication_slot_name replication_slot_name_list=`create_replication_slot_name` ocf_log debug "replication slot names are $replication_slot_name_list." for replication_slot_name in $replication_slot_name_list do if [ `check_replication_slot $replication_slot_name` = "1" ]; then delete_replication_slot $replication_slot_name if [ $? -eq 0 ]; then ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)." else ocf_exit_reason "$output" return $OCF_ERR_GENERIC fi fi done } create_replication_slots() { local replication_slot_name local replication_slot_name_list local output local rc local CREATE_REPLICATION_SLOT_sql local DELETE_REPLICATION_SLOT_sql replication_slot_name_list=`create_replication_slot_name` ocf_log debug "replication slot names are $replication_slot_name_list." for replication_slot_name in $replication_slot_name_list do # If the same name slot is already exists, initialize(delete and create) the slot. if [ `check_replication_slot $replication_slot_name` = "1" ]; then delete_replication_slot $replication_slot_name if [ $? -eq 0 ]; then ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)." else ocf_exit_reason "$output" return $OCF_ERR_GENERIC fi fi CREATE_REPLICATION_SLOT_sql="SELECT pg_create_physical_replication_slot('$replication_slot_name');" output=`exec_sql "$CREATE_REPLICATION_SLOT_sql"` rc=$? if [ $rc -eq 0 ]; then ocf_log info "PostgreSQL creates the replication slot($replication_slot_name)." else ocf_exit_reason "$output" return $OCF_ERR_GENERIC fi done return 0 } # This function check the replication slot does exists. check_replication_slot(){ local replication_slot_name=$1 local output local CHECK_REPLICATION_SLOT_sql="SELECT count(*) FROM pg_replication_slots WHERE slot_name = '$replication_slot_name'" output=`exec_sql "$CHECK_REPLICATION_SLOT_sql"` echo "$output" } # On postgreSQL 10 or later, "location" means "lsn". get_my_location() { local rc local output local replay_loc local receive_loc local output1 local output2 local log1 local log2 local newer_location output=`exec_sql "$CHECK_XLOG_LOC_SQL"` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc err "Can't get my xlog location." return 1 fi replay_loc=`echo $output | cut -d "|" -f 1` receive_loc=`echo $output | cut -d "|" -f 2` output1=`echo "$replay_loc" | cut -d "/" -f 1` output2=`echo "$replay_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` replay_loc="${log1}${log2}" output1=`echo "$receive_loc" | cut -d "/" -f 1` output2=`echo "$receive_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` receive_loc="${log1}${log2}" newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1` echo "$newer_location" return 0 } # On postgreSQL 10 or later, "xlog_location" means "wal_lsn". show_xlog_location() { local location location=`get_my_location` || return 1 exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location" } # On postgreSQL 10 or later, "xlog_location" means "wal_lsn". delete_xlog_location() { exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D } show_master_baseline() { local rc local location location=`get_my_location` ocf_log info "My master baseline : $location." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location" } delete_master_baseline() { exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D } set_async_mode_all() { [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0 ocf_log info "Set all nodes into async mode." runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\"" if [ $? -ne 0 ]; then ocf_exit_reason "Can't set all nodes into async mode." return 1 fi return 0 } set_async_mode() { cat $REP_MODE_CONF | grep -q -E "(\"$1\")|([,' ]$1[,' ])" if [ $? -eq 0 ]; then ocf_log info "Setup $1 into async mode." runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\"" else ocf_log debug "$1 is already in async mode." return 0 fi exec_with_retry 0 reload_conf } set_sync_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then ocf_log debug "$sync_node_in_conf is already sync mode." else ocf_log info "Setup $1 into sync mode." runasowner -q err "echo \"synchronous_standby_names = '\\\"$1\\\"'\" > \"$REP_MODE_CONF\"" [ "$RE_CONTROL_SLAVE" = "false" ] && RE_CONTROL_SLAVE="true" exec_with_retry 0 reload_conf fi } reload_conf() { # Invoke pg_ctl runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload" if [ $? -eq 0 ]; then ocf_log info "Reload configuration file." else ocf_exit_reason "Can't reload configuration file." return 1 fi return 0 } user_recovery_conf() { local nodename_tmp # put archive_cleanup_command and recovery_end_command only when defined by user if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'" fi if [ -n "$OCF_RESKEY_recovery_end_command" ]; then echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'" fi if use_replication_slot; then nodename_tmp=`echo "$NODENAME" | tr 'A-Z.-' 'a-z__'` echo "primary_slot_name = '${OCF_RESKEY_replication_slot_name}_$nodename_tmp'" fi } make_recovery_conf() { runasowner "touch $RECOVERY_CONF" if [ $? -ne 0 ]; then ocf_exit_reason "Can't create recovery.conf." return 1 fi cat > $RECOVERY_CONF <> $RECOVERY_CONF ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}" return 0 } # change pgsql-status. # arg1:node, arg2: value change_pgsql_status() { local output if ! is_node_online $1; then return 0 fi output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then # If slave's disk is broken, RA cannot read PID file # and misjudges the PostgreSQL as down while it is running. # It causes overwriting of pgsql-status by Master because replication is still connected. if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then if [ "$1" != "$NODENAME" ]; then ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited." return 0 fi fi ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2" fi return 0 } # change pgsql-data-status. # arg1:node, arg2: value change_data_status() { local output if ! node_exist $1; then return 0 fi while : do output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2." exec_with_retry 0 exec_with_timeout 0 "$CRM_ATTR_FOREVER" -N $1 -n $PGSQL_DATA_STATUS_ATTR -v "$2" else break fi done return 0 } # set master-score # arg1:node, arg2: score, arg3: resoure set_master_score() { local current_score current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null` if [ -n "$current_score" -a "$current_score" != "$2" ]; then ocf_log info "Changing $3 master score on $1 : $current_score->$2." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "master-$3" -v "$2" fi return 0 } # change master-score # arg1:node, arg2: score change_master_score() { local instance if ! is_node_online $1; then return 0 fi if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then # If Pacemaker version is 1.0.x instance=0 while : do if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then break fi if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then instance=`expr $instance + 1` continue fi set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1 instance=`expr $instance + 1` done else # If globally-unique=false and Pacemaker version is 1.1.8 or higher # Master/Slave resource has no instance number set_master_score $1 $2 ${RESOURCE_NAME} || return 1 fi return 0 } report_psql_error() { local rc local loglevel local message rc=$1 loglevel=${2:-err} message="$3" ocf_log $loglevel "$message rc=$rc" if [ $rc -eq 1 ]; then ocf_exit_reason "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." elif [ $rc -eq 2 ]; then ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." elif [ $rc -eq 3 ]; then ocf_exit_reason "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." fi } # # timeout management function # arg1 timeout >= 0 (if arg1 is 0, OCF_RESKEY_crm_attr_timeout is used.) # arg2 : command # arg3 : command's args exec_with_timeout() { local func_pid local count=$OCF_RESKEY_crm_attr_timeout local rc if [ "$1" -ne 0 ]; then count=$1 fi shift $* & func_pid=$! sleep .1 while kill -s 0 $func_pid >/dev/null 2>&1; do sleep 1 count=`expr $count - 1` if [ $count -le 0 ]; then ocf_exit_reason "\"$*\" (pid=$func_pid) timed out." kill -s 9 $func_pid >/dev/null 2>&1 return 1 fi ocf_log info "Waiting($count). \"$*\" (pid=$func_pid)." done wait $func_pid } # retry command when command doesn't return 0 # arg1 : count >= 0 (if arg1 is 0, it retries command in infinitum(1day)) # arg2..argN : command and args exec_with_retry() { local count="86400" local output local rc if [ "$1" -ne 0 ]; then count=$1 fi shift while [ $count -gt 0 ]; do output=`$*` rc=$? if [ $rc -ne 0 ]; then ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"." count=`expr $count - 1` sleep 1 else printf "${output}" return 0 fi done ocf_exit_reason "giving up executing \"$*\"" return $rc } is_node_online() { print_crm_mon | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline" } node_exist() { print_crm_mon | tr '[A-Z]' '[a-z]' | grep -q "^node $1" } check_binary2() { if ! have_binary "$1"; then ocf_exit_reason "Setup problem: couldn't find command: $1" return 1 fi return 0 } check_config() { local rc=0 if [ ! -f "$1" ]; then if ocf_is_probe; then ocf_log info "Configuration file is $1 not readable during probe." rc=1 else ocf_exit_reason "Configuration file $1 doesn't exist" rc=2 fi fi return $rc } # Validate most critical parameters pgsql_validate_all() { local version local check_config_rc local rep_mode_string local socket_directories local rc version=`cat $OCF_RESKEY_pgdata/PG_VERSION` if ! check_binary2 "$OCF_RESKEY_pgctl" || ! check_binary2 "$OCF_RESKEY_psql"; then return $OCF_ERR_INSTALLED fi check_config "$OCF_RESKEY_config" check_config_rc=$? [ $check_config_rc -eq 2 ] && return $OCF_ERR_INSTALLED if [ $check_config_rc -eq 0 ]; then ocf_version_cmp "$version" "9.3" if [ $? -eq 0 ]; then : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`} else # unix_socket_directories is used by PostgreSQL 9.3 or higher. socket_directories=`get_pgsql_param unix_socket_directories` if [ -n "$socket_directories" ]; then # unix_socket_directories may have multiple socket directories and the pgsql RA can not know which directory is used for psql command. # Therefore, the user must set OCF_RESKEY_socketdir explicitly. if [ -z "$OCF_RESKEY_socketdir" ]; then ocf_exit_reason "In PostgreSQL 9.3 or higher, socketdir can't be empty if you define unix_socket_directories in the postgresql.conf." return $OCF_ERR_CONFIGURED fi fi fi fi getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist"; return $OCF_ERR_INSTALLED; fi if ocf_is_probe; then ocf_log info "Don't check $OCF_RESKEY_pgdata during probe" else if ! runasowner "test -w $OCF_RESKEY_pgdata"; then ocf_exit_reason "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM; fi fi if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ] then ocf_exit_reason "monitor password can't be empty" return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ] then ocf_exit_reason "monitor_user has to be set if monitor_password is set" return $OCF_ERR_CONFIGURED fi if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then ocf_exit_reason "Replication mode needs PostgreSQL 9.1 or higher." return $OCF_ERR_INSTALLED fi if [ ! -n "$OCF_RESKEY_master_ip" ]; then ocf_exit_reason "master_ip can't be empty." return $OCF_ERR_CONFIGURED fi fi if is_replication; then REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot" CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever" CRM_RESOURCE="${HA_SBIN_DIR}/crm_resource" CAN_NOT_PROMOTE="-INFINITY" CAN_PROMOTE="100" PROMOTE_ME="1000" CHECK_MS_SQL="select pg_is_in_recovery()" CHECK_SYNCHRONOUS_STANDBY_NAMES_SQL="show synchronous_standby_names" ocf_version_cmp "$version" "10" rc=$? if [ $rc -eq 1 ]||[ $rc -eq 2 ]; then CHECK_XLOG_LOC_SQL="select pg_last_wal_replay_lsn(),pg_last_wal_receive_lsn()" else CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()" fi CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication" PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status" PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status" PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc" PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline" NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'` RE_CONTROL_SLAVE="false" if ! ocf_is_ms; then ocf_exit_reason "Replication(rep_mode=async or sync) requires Master/Slave configuration." return $OCF_ERR_CONFIGURED fi if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then ocf_exit_reason "Invalid rep_mode : $OCF_RESKEY_rep_mode" return $OCF_ERR_CONFIGURED fi if [ ! -n "$NODE_LIST" ]; then ocf_exit_reason "node_list can't be empty." return $OCF_ERR_CONFIGURED fi if [ $check_config_rc -eq 0 ]; then rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if ! grep -q "^[[:space:]]*$rep_mode_string" $OCF_RESKEY_config; then ocf_log info "adding include directive into $OCF_RESKEY_config" echo "$rep_mode_string" >> $OCF_RESKEY_config fi else if grep -q "$rep_mode_string" $OCF_RESKEY_config; then ocf_log info "deleting include directive from $OCF_RESKEY_config" rep_mode_string=`echo $rep_mode_string | sed -e 's|/|\\\\/|g'` sed -i "/$rep_mode_string/d" $OCF_RESKEY_config fi fi fi if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then ocf_exit_reason "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM fi fi if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then if ocf_is_ms; then ocf_exit_reason "Replication(rep_mode=slave) does not support Master/Slave configuration." return $OCF_ERR_CONFIGURED fi fi if use_replication_slot; then ocf_version_cmp "$version" "9.4" rc=$? if [ $rc -eq 0 ]||[ $rc -eq 3 ]; then ocf_exit_reason "Replication slot needs PostgreSQL 9.4 or higher." return $OCF_ERR_CONFIGURED fi echo "$OCF_RESKEY_replication_slot_name" | grep -q -e '[^a-z0-9_]' if [ $? -eq 0 ]; then ocf_exit_reason "Invalid replication_slot_name($OCF_RESKEY_replication_slot_name). only use lower case letters, numbers, and the underscore character." return $OCF_ERR_CONFIGURED fi fi return $OCF_SUCCESS } # # Check if we need to create a log file # check_log_file() { if [ ! -e "$1" ] then touch $1 > /dev/null 2>&1 chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1 fi #Check if $OCF_RESKEY_pgdba can write to the log file if ! runasowner "test -w $1" then return 1 fi return 0 } # # Check if we need to create stats temp directory in tmpfs # check_stat_temp_directory() { local stats_temp stats_temp=`get_pgsql_param stats_temp_directory` if [ -z "$stats_temp" ]; then return fi if [ "${stats_temp#/}" = "$stats_temp" ]; then stats_temp="$OCF_RESKEY_pgdata/$stats_temp" fi if [ -d "$stats_temp" ]; then return fi if ! mkdir -p "$stats_temp"; then ocf_exit_reason "Can't create directory $stats_temp" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba: "$stats_temp"; then ocf_exit_reason "Can't change ownership for $stats_temp" exit $OCF_ERR_PERM fi if ! chmod 700 "$stats_temp"; then ocf_exit_reason "Can't change permissions for $stats_temp" exit $OCF_ERR_PERM fi } # # Check socket directory # check_socket_dir() { if [ ! -d "$OCF_RESKEY_socketdir" ]; then if ! mkdir "$OCF_RESKEY_socketdir"; then ocf_exit_reason "Can't create directory $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba:`getent passwd \ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" then ocf_exit_reason "Can't change ownership for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chmod 2775 "$OCF_RESKEY_socketdir"; then ocf_exit_reason "Can't change permissions for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi else if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then ocf_exit_reason "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi rm $OCF_RESKEY_socketdir/test.$$ fi } print_crm_mon() { if [ -z "$CRM_MON_OUTPUT" ]; then CRM_MON_OUTPUT=`exec_with_retry 0 crm_mon -n1` fi printf "${CRM_MON_OUTPUT}\n" } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_GENERIC fi PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status" RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf NODENAME=$(ocf_local_nodename | tr '[A-Z]' '[a-z]') case "$1" in methods) pgsql_methods exit $?;; meta-data) meta_data exit $OCF_SUCCESS;; esac pgsql_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) if is_replication; then change_pgsql_status "$NODENAME" "UNKNOWN" fi exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi US=`id -u -n` if [ $US != root -a $US != $OCF_RESKEY_pgdba ] then ocf_exit_reason "$0 must be run as root or $OCF_RESKEY_pgdba" exit $OCF_ERR_GENERIC fi # make psql command options if [ -n "$OCF_RESKEY_monitor_user" ]; then PGUSER=$OCF_RESKEY_monitor_user; export PGUSER PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" else psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" fi if [ -n "$OCF_RESKEY_pghost" ]; then psql_options="$psql_options -h $OCF_RESKEY_pghost" else if [ -n "$OCF_RESKEY_socketdir" ]; then psql_options="$psql_options -h $OCF_RESKEY_socketdir" fi fi if [ -n "$OCF_RESKEY_pgport" ]; then export PGPORT=$OCF_RESKEY_pgport fi if [ -n "$OCF_RESKEY_pglibs" ]; then if [ -n "$LD_LIBRARY_PATH" ]; then export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$OCF_RESKEY_pglibs else export LD_LIBRARY_PATH=$OCF_RESKEY_pglibs fi fi # What kind of method was invoked? case "$1" in status) if pgsql_status then ocf_log info "PostgreSQL is up" exit $OCF_SUCCESS else ocf_log info "PostgreSQL is down" exit $OCF_NOT_RUNNING fi;; monitor) pgsql_monitor exit $?;; start) pgsql_start exit $?;; promote) pgsql_promote exit $?;; demote) pgsql_demote exit $?;; notify) pgsql_notify exit $?;; stop) pgsql_stop exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/pingd b/heartbeat/pingd index 7e4d8395b..57fa0dba3 100755 --- a/heartbeat/pingd +++ b/heartbeat/pingd @@ -1,279 +1,297 @@ #!/bin/sh # # # pingd OCF Resource Agent # Records (in the CIB) the current number of ping nodes a # cluster node can connect to. # # Copyright (c) 2006 Andrew Beekhof # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_pidfile_default="$HA_RSCTMP/pingd-${OCF_RESOURCE_INSTANCE}" +OCF_RESKEY_user_default="root" +OCF_RESKEY_dampen_default="1s" +OCF_RESKEY_set_default="" +OCF_RESKEY_name_default="pingd" +OCF_RESKEY_section_default="" +OCF_RESKEY_multiplier_default="" +OCF_RESKEY_host_list_default="" +OCF_RESKEY_ignore_deprecation_default="false" + +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_dampen=${OCF_RESKEY_dampen_default}} +: ${OCF_RESKEY_set=${OCF_RESKEY_set_default}} +: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +: ${OCF_RESKEY_section=${OCF_RESKEY_section_default}} +: ${OCF_RESKEY_multiplier=${OCF_RESKEY_multiplier_default}} +: ${OCF_RESKEY_host_list=${OCF_RESKEY_host_list_default}} +: ${OCF_RESKEY_ignore_deprecation=${OCF_RESKEY_ignore_deprecation_default}} + ####################################################################### meta_data() { cat < 1.0 Deprecation warning: This agent is deprecated and may be removed from a future release. See the ocf:pacemaker:pingd resource agent for a supported alternative. -- This is a pingd Resource Agent. It records (in the CIB) the current number of ping nodes a node can connect to. Monitors connectivity to specific hosts or IP addresses ("ping nodes") (deprecated) PID file PID file - + The user we want to run pingd as The user we want to run pingd as - + The time to wait (dampening) further changes occur Dampening interval - + The name of the instance_attributes set to place the value in. Rarely needs to be specified. Set name - + The name of the attributes to set. This is the name to be used in the constraints. Attribute name - + The section place the value in. Rarely needs to be specified. Section name - + The number by which to multiply the number of connected ping nodes by Value multiplier - + The list of ping nodes to count. Defaults to all configured ping nodes. Rarely needs to be specified. Host list - + If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning - + END } ####################################################################### pingd_usage() { cat </dev/null if [ $? -eq 0 ]; then : Yes, user exists. We can further check his permission on crm_mon if necessary else ocf_exit_reason "The user $OCF_RESKEY_user does not exist!" exit $OCF_ERR_ARGS fi fi # Pidfile better be an absolute path case $OCF_RESKEY_pidfile in /*) ;; *) ocf_log warn "You should have pidfile($OCF_RESKEY_pidfile) of absolute path!" ;; esac # Check the update interval if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then : else ocf_exit_reason "Invalid update interval $OCF_RESKEY_update. It should be positive integer!" exit $OCF_ERR_ARGS fi echo "Validate OK" return $OCF_SUCCESS } if [ $# -ne 1 ]; then pingd_usage exit $OCF_ERR_ARGS fi -: ${OCF_RESKEY_pidfile:="$HA_RSCTMP/pingd-${OCF_RESOURCE_INSTANCE}"} -: ${OCF_RESKEY_name:="pingd"} -: ${OCF_RESKEY_dampen:="1s"} - if [ "$__OCF_ACTION" = "meta-data" ]; then meta_data exit $OCF_SUCCESS fi # Be obnoxious, log deprecation warning on every invocation (unless # suppressed by resource configuration). ocf_deprecated case $__OCF_ACTION in start) pingd_start ;; stop) pingd_stop ;; monitor) pingd_monitor ;; validate-all) pingd_validate ;; usage|help) pingd_usage exit $OCF_SUCCESS ;; *) pingd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/podman b/heartbeat/podman index 858023555..51f6ba883 100755 --- a/heartbeat/podman +++ b/heartbeat/podman @@ -1,472 +1,478 @@ #!/bin/sh # # The podman HA resource agent creates and launches a podman container # based off a supplied podman image. Containers managed by this agent # are both created and removed upon the agent's start and stop actions. # # Copyright (c) 2014 David Vossel # Michele Baldessari # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_reuse_default="0" + +: ${OCF_RESKEY_reuse=${OCF_RESKEY_reuse_default}} + ####################################################################### meta_data() { cat < 1.0 The podman HA resource agent creates and launches a podman container based off a supplied podman image. Containers managed by this agent are both created and removed upon the agent's start and stop actions. Podman container resource agent. The podman image to base this container off of. podman image The name to give the created container. By default this will be that resource's instance name. podman container name Allow the image to be pulled from the configured podman registry when the image does not exist locally. NOTE, this can drastically increase the time required to start the container if the image repository is pulled over the network. Allow pulling non-local images Add options to be appended to the 'podman run' command which is used when creating the container during the start action. This option allows users to do things such as setting a custom entry point and injecting environment variables into the newly created container. Note the '-d' option is supplied regardless of this value to force containers to run in the background. NOTE: Do not explicitly specify the --name argument in the run_opts. This agent will set --name using either the resource's instance or the name provided in the 'name' argument of this agent. run options Specify a command to launch within the container once it has initialized. run command A comma separated list of directories that the container is expecting to use. The agent will ensure they exist by running 'mkdir -p' Required mount points Specify the full path of a command to launch within the container to check the health of the container. This command must return 0 to indicate that the container is healthy. A non-zero return code will indicate that the container has failed and should be recovered. If 'podman exec' is supported, it is used to execute the command. If not, nsenter is used. Note: Using this method for monitoring processes inside a container is not recommended, as containerd tries to track processes running inside the container and does not deal well with many short-lived processes being spawned. Ensure that your container monitors its own processes and terminates on fatal error rather than invoking a command from the outside. monitor command Kill a container immediately rather than waiting for it to gracefully shutdown force kill Allow the container to be reused once it is stopped. By default, containers get removed once they are stopped. Enable this option to have the particular one persist when this happens. reuse container - + END } ####################################################################### REQUIRE_IMAGE_PULL=0 podman_usage() { cat <&1) rc=$? if [ $rc -eq 127 ]; then ocf_log err "monitor cmd failed (rc=$rc), output: $out" ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container." # there is no recovering from this, exit immediately exit $OCF_ERR_ARGS elif [ $rc -ne 0 ]; then ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out" rc=$OCF_ERR_GENERIC else ocf_log debug "monitor cmd passed: exit code = $rc" fi return $rc } container_exists() { podman inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1 } remove_container() { if ocf_is_true "$OCF_RESKEY_reuse"; then # never remove the container if we have reuse enabled. return 0 fi container_exists if [ $? -ne 0 ]; then # don't attempt to remove a container that doesn't exist return 0 fi ocf_log notice "Cleaning up inactive container, ${CONTAINER}." ocf_run podman rm $CONTAINER } podman_simple_status() { local val # retrieve the 'Running' attribute for the container val=$(podman inspect --format {{.State.Running}} $CONTAINER 2>/dev/null) if [ $? -ne 0 ]; then #not running as a result of container not being found return $OCF_NOT_RUNNING fi if ocf_is_true "$val"; then # container exists and is running return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } podman_monitor() { if [ -z "$OCF_RESKEY_monitor_cmd" ]; then podman_simple_status return $? fi monitor_cmd_exec } podman_create_mounts() { oldIFS="$IFS" IFS="," for directory in $OCF_RESKEY_mount_points; do mkdir -p "$directory" done IFS="$oldIFS" } podman_start() { podman_create_mounts local run_opts="-d --name=${CONTAINER}" # check to see if the container has already started podman_simple_status if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi if [ -n "$OCF_RESKEY_run_opts" ]; then run_opts="$run_opts $OCF_RESKEY_run_opts" fi if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}" podman pull "${OCF_RESKEY_image}" if [ $? -ne 0 ]; then ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}" return $OCF_ERR_GENERIC fi fi if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then ocf_log info "starting existing container $CONTAINER." ocf_run podman start $CONTAINER else # make sure any previous container matching our container name is cleaned up first. # we already know at this point it wouldn't be running remove_container ocf_log info "running container $CONTAINER for the first time" ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd fi if [ $? -ne 0 ]; then ocf_exit_reason "podman failed to launch container" return $OCF_ERR_GENERIC fi # wait for monitor to pass before declaring that the container is started while true; do podman_simple_status if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Newly created podman container exited after start" return $OCF_ERR_GENERIC fi monitor_cmd_exec if [ $? -eq $OCF_SUCCESS ]; then ocf_log notice "Container $CONTAINER started successfully" return $OCF_SUCCESS fi ocf_exit_reason "waiting on monitor_cmd to pass after start" sleep 1 done } podman_stop() { local timeout=60 podman_simple_status if [ $? -eq $OCF_NOT_RUNNING ]; then remove_container return $OCF_SUCCESS fi if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 )) if [ $timeout -lt 10 ]; then timeout=10 fi fi if ocf_is_true "$OCF_RESKEY_force_kill"; then ocf_run podman kill $CONTAINER else ocf_log debug "waiting $timeout second[s] before killing container" ocf_run podman stop -t=$timeout $CONTAINER fi if [ $? -ne 0 ]; then ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." return $OCF_ERR_GENERIC fi remove_container if [ $? -ne 0 ]; then ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } image_exists() { # if no tag was specified, use default "latest" local COLON_FOUND=0 local SLASH_FOUND=0 local SERVER_NAME="" local IMAGE_NAME="${OCF_RESKEY_image}" local IMAGE_TAG="latest" SLASH_FOUND="$(echo "${OCF_RESKEY_image}" | grep -o '/' | grep -c .)" if [ ${SLASH_FOUND} -ge 1 ]; then SERVER_NAME="$(echo ${IMAGE_NAME} | cut -d / -f 1-${SLASH_FOUND})" IMAGE_NAME="$(echo ${IMAGE_NAME} | awk -F'/' '{print $NF}')" fi COLON_FOUND="$(echo "${IMAGE_NAME}" | grep -o ':' | grep -c .)" if [ ${COLON_FOUND} -ge 1 ]; then IMAGE_TAG="$(echo ${IMAGE_NAME} | awk -F':' '{print $NF}')" IMAGE_NAME="$(echo ${IMAGE_NAME} | cut -d : -f 1-${COLON_FOUND})" fi # IMAGE_NAME might be following formats: # - image # - repository:port/image # - docker.io/image (some distro will display "docker.io/" as prefix) podman images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/|${SERVER_NAME}\/)?${IMAGE_NAME}:${IMAGE_TAG}\$" if [ $? -eq 0 ]; then # image found return 0 fi if ocf_is_true "$OCF_RESKEY_allow_pull"; then REQUIRE_IMAGE_PULL=1 ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start" return 0 fi # image not found. return 1 } podman_validate() { check_binary podman if [ -z "$OCF_RESKEY_image" ]; then ocf_exit_reason "'image' option is required" exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_monitor_cmd" ]; then podman exec --help >/dev/null 2>&1 if [ ! $? ]; then ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified" check_binary nsenter fi fi image_exists if [ $? -ne 0 ]; then ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # TODO : # When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters. # When a user appoints reuse, the resource agent cannot connect plural clones with a container. if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then if [ -n "$OCF_RESKEY_name" ]; then if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] then ocf_exit_reason "Cannot make plural clones from the same name parameter." exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] then ocf_exit_reason "Cannot make plural master from the same name parameter." exit $OCF_ERR_CONFIGURED fi fi : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`} else : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}} fi CONTAINER=$OCF_RESKEY_name case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; start) podman_validate podman_start;; stop) podman_stop;; monitor) podman_monitor;; validate-all) podman_validate;; usage|help) podman_usage exit $OCF_SUCCESS ;; *) podman_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/portblock b/heartbeat/portblock index cde10b940..2e84a101f 100755 --- a/heartbeat/portblock +++ b/heartbeat/portblock @@ -1,572 +1,582 @@ #!/bin/sh # # portblock: iptables temporary portblocking control # # Author: Sun Jiang Dong (initial version) # Philipp Reisner (per-IP filtering) # # License: GNU General Public License (GPL) # # Copyright: (C) 2005 International Business Machines # # OCF parameters are as below: # OCF_RESKEY_protocol # OCF_RESKEY_portno # OCF_RESKEY_action # OCF_RESKEY_ip # OCF_RESKEY_tickle_dir # OCF_RESKEY_sync_script ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults +OCF_RESKEY_protocol_default="" +OCF_RESKEY_portno_default="" +OCF_RESKEY_action_default="" OCF_RESKEY_ip_default="0.0.0.0/0" OCF_RESKEY_reset_local_on_unblock_stop_default="false" +OCF_RESKEY_tickle_dir_default="" +OCF_RESKEY_sync_script_default="" +: ${OCF_RESKEY_protocol=${OCF_RESKEY_protocol_default}} +: ${OCF_RESKEY_portno=${OCF_RESKEY_portno_default}} +: ${OCF_RESKEY_action=${OCF_RESKEY_action_default}} : ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} : ${OCF_RESKEY_reset_local_on_unblock_stop=${OCF_RESKEY_reset_local_on_unblock_stop_default}} +: ${OCF_RESKEY_tickle_dir=${OCF_RESKEY_tickle_dir_default}} +: ${OCF_RESKEY_sync_script=${OCF_RESKEY_sync_script_default}} ####################################################################### CMD=`basename $0` TICKLETCP=$HA_BIN/tickle_tcp usage() { cat <&2 usage: $CMD {start|stop|status|monitor|meta-data|validate-all} $CMD is used to temporarily block ports using iptables. It can be used to blackhole a port before bringing up an IP address, and enable it after a service is started. To do that for samba, the following can be used: crm configure < 1.0 Resource script for portblock. It is used to temporarily block ports using iptables. In addition, it may allow for faster TCP reconnects for clients on failover. Use that if there are long lived TCP connections to an HA service. This feature is enabled by setting the tickle_dir parameter and only in concert with action set to unblock. Note that the tickle ACK function is new as of version 3.0.2 and hasn't yet seen widespread use. Block and unblocks access to TCP and UDP ports The protocol used to be blocked/unblocked. protocol - + The port number used to be blocked/unblocked. portno - + The action (block/unblock) to be done on the protocol::portno. action - + If for some reason the long lived server side TCP sessions won't be cleaned up by a reconfiguration/flush/stop of whatever services this portblock protects, they would linger in the connection table, even after the IP is gone and services have been switched over to another node. An example would be the default NFS kernel server. These "known" connections may seriously confuse and delay a later switchback. Enabling this option will cause this agent to try to get rid of these connections by injecting a temporary iptables rule to TCP-reset outgoing packets from the blocked ports, and additionally tickle them locally, just before it starts to DROP incoming packets on "unblock stop". (try to) reset server TCP sessions when unblock stops The IP address used to be blocked/unblocked. ip The shared or local directory (_must_ be absolute path) which stores the established TCP connections. Tickle directory - + If the tickle_dir is a local directory, then the TCP connection state file has to be replicated to other nodes in the cluster. It can be csync2 (default), some wrapper of rsync, or whatever. It takes the file name as a single argument. For csync2, set it to "csync2 -xv". Connection state file synchronization script - + END } # # Because this is the normal usage, we consider "block" # resources to be pseudo-resources -- that is, their status can't # be reliably determined through external means. # This is because we expect an "unblock" resource to come along # and disable us -- but we're still in some sense active... # #active_grep_pat {udp|tcp} portno,portno active_grep_pat() { w="[ ][ ]*" any="0\\.0\\.0\\.0/0" echo "^DROP${w}${1}${w}--${w}${any}${w}${3}${w}multiport${w}dports${w}${2}\>" } #chain_isactive {udp|tcp} portno,portno ip chain_isactive() { PAT=`active_grep_pat "$1" "$2" "$3"` $IPTABLES $wait -n -L INPUT | grep "$PAT" >/dev/null } save_tcp_connections() { [ -z "$OCF_RESKEY_tickle_dir" ] && return statefile=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip if [ -z "$OCF_RESKEY_sync_script" ]; then netstat -tn |awk -F '[:[:space:]]+' ' $8 == "ESTABLISHED" && $4 == "'$OCF_RESKEY_ip'" \ {printf "%s:%s\t%s:%s\n", $4,$5, $6,$7}' | dd of="$statefile".new conv=fsync status=none && mv "$statefile".new "$statefile" else netstat -tn |awk -F '[:[:space:]]+' ' $8 == "ESTABLISHED" && $4 == "'$OCF_RESKEY_ip'" \ {printf "%s:%s\t%s:%s\n", $4,$5, $6,$7}' \ > $statefile $OCF_RESKEY_sync_script $statefile > /dev/null 2>&1 & fi } tickle_remote() { [ -z "$OCF_RESKEY_tickle_dir" ] && return echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip [ -r $f ] || return $TICKLETCP -n 3 < $f } tickle_local() { [ -z "$OCF_RESKEY_tickle_dir" ] && return f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip [ -r $f ] || return checkcmd="netstat -tn" if ! have_binary "netstat"; then checkcmd="ss -Htn" fi # swap "local" and "remote" address, # so we tickle ourselves. # We set up a REJECT with tcp-reset before we do so, so we get rid of # the no longer wanted potentially long lived "ESTABLISHED" connection # entries on the IP we are going to delet in a sec. These would get in # the way if we switch-over and then switch-back in quick succession. local i awk '{ print $2, $1; }' $f | $TICKLETCP $checkcmd | grep -Fw $OCF_RESKEY_ip || return for i in 0.1 0.5 1 2 4 ; do sleep $i awk '{ print $2, $1; }' $f | $TICKLETCP $checkcmd | grep -Fw $OCF_RESKEY_ip || break done } SayActive() { echo "$CMD DROP rule for INPUT chain [$*] is running (OK)" } SayConsideredActive() { echo "$CMD DROP rule for INPUT chain [$*] considered to be running (OK)" } SayInactive() { echo "$CMD DROP rule for INPUT chain [$*] is inactive" } #IptablesStatus {udp|tcp} portno,portno ip {block|unblock} IptablesStatus() { local rc rc=$OCF_ERR_GENERIC activewords="$CMD $1 $2 is running (OK)" if chain_isactive "$1" "$2" "$3"; then case $4 in block) SayActive $* rc=$OCF_SUCCESS ;; *) SayInactive $* rc=$OCF_NOT_RUNNING ;; esac else case $4 in block) if ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" status; then SayConsideredActive $* rc=$OCF_SUCCESS else SayInactive $* rc=$OCF_NOT_RUNNING fi ;; *) if ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" status; then SayActive $* #This is only run on real monitor events. save_tcp_connections rc=$OCF_SUCCESS else SayInactive $* rc=$OCF_NOT_RUNNING fi ;; esac fi return $rc } #IptablesBLOCK {udp|tcp} portno,portno ip IptablesBLOCK() { local rc=0 local try_reset=false if [ "$1/$4/$__OCF_ACTION" = tcp/unblock/stop ] && ocf_is_true $reset_local_on_unblock_stop then try_reset=true fi if chain_isactive "$1" "$2" "$3" then : OK -- chain already active else if $try_reset ; then $IPTABLES $wait -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset tickle_local fi $IPTABLES $wait -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP rc=$? if $try_reset ; then $IPTABLES $wait -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset fi fi return $rc } #IptablesUNBLOCK {udp|tcp} portno,portno ip IptablesUNBLOCK() { if chain_isactive "$1" "$2" "$3" then $IPTABLES $wait -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP else : Chain Not active fi return $? } #IptablesStart {udp|tcp} portno,portno ip {block|unblock} IptablesStart() { ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" start case $4 in block) IptablesBLOCK "$@";; unblock) IptablesUNBLOCK "$@" rc=$? tickle_remote #ignore run_tickle_tcp exit code! return $rc ;; *) usage; return 1; esac return $? } #IptablesStop {udp|tcp} portno,portno ip {block|unblock} IptablesStop() { ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" stop case $4 in block) IptablesUNBLOCK "$@";; unblock) save_tcp_connections IptablesBLOCK "$@" ;; *) usage; return 1;; esac return $? } # # Check if the port is valid, this function code is not decent, but works # CheckPort() { # Examples of valid port: "1080", "1", "0080" # Examples of invalid port: "1080bad", "0", "0000", "" echo $1 |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*' } IptablesValidateAll() { check_binary $IPTABLES case $protocol in tcp|udp) ;; *) ocf_log err "Invalid protocol $protocol!" exit $OCF_ERR_CONFIGURED ;; esac if CheckPort "$portno"; then : else ocf_log err "Invalid port number $portno!" exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_tickle_dir" ]; then if [ x"$action" != x"unblock" ]; then ocf_log err "Tickles are only useful with action=unblock!" exit $OCF_ERR_CONFIGURED fi if [ ! -d "$OCF_RESKEY_tickle_dir" ]; then ocf_log err "The tickle dir doesn't exist!" exit $OCF_ERR_INSTALLED fi fi case $action in block|unblock) ;; *) ocf_log err "Invalid action $action!" exit $OCF_ERR_CONFIGURED ;; esac if ocf_is_true $reset_local_on_unblock_stop; then if [ $action != unblock ] ; then ocf_log err "reset_local_on_unblock_stop is only relevant with action=unblock" exit $OCF_ERR_CONFIGURED fi if [ -z $OCF_RESKEY_tickle_dir ] ; then ocf_log warn "reset_local_on_unblock_stop works best with tickle_dir enabled as well" fi fi return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac if [ -z "$OCF_RESKEY_protocol" ]; then ocf_log err "Please set OCF_RESKEY_protocol" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_portno" ]; then ocf_log err "Please set OCF_RESKEY_portno" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_action" ]; then ocf_log err "Please set OCF_RESKEY_action" exit $OCF_ERR_CONFIGURED fi # iptables v1.4.20+ is required to use -w (wait) version=$(iptables -V | awk -F ' v' '{print $NF}') ocf_version_cmp "$version" "1.4.19.1" if [ "$?" -eq "2" ]; then wait="-w" else wait="" fi protocol=$OCF_RESKEY_protocol portno=$OCF_RESKEY_portno action=$OCF_RESKEY_action ip=$OCF_RESKEY_ip reset_local_on_unblock_stop=$OCF_RESKEY_reset_local_on_unblock_stop case $1 in start) IptablesStart $protocol $portno $ip $action ;; stop) IptablesStop $protocol $portno $ip $action ;; status|monitor) IptablesStatus $protocol $portno $ip $action ;; validate-all) IptablesValidateAll ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/postfix b/heartbeat/postfix index 703aa3246..3ba522319 100755 --- a/heartbeat/postfix +++ b/heartbeat/postfix @@ -1,415 +1,422 @@ #!/bin/sh # # Resource script for Postfix # # Description: Manages Postfix as an OCF resource in # an high-availability setup. # # Author: Raoul Bhatia : Original Author # License: GNU General Public License (GPL) # Note: If you want to run multiple Postfix instances, please see # http://amd.co.at/adminwiki/Postfix#Adding_a_Second_Postfix_Instance_on_one_Server # http://www.postfix.org/postconf.5.html # # # usage: $0 {start|stop|reload|monitor|validate-all|meta-data} # # The "start" arg starts a Postfix instance # # The "stop" arg stops it. # # OCF parameters: # OCF_RESKEY_binary # OCF_RESKEY_config_dir # OCF_RESKEY_parameters # ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -: ${OCF_RESKEY_binary="/usr/sbin/postfix"} -: ${OCF_RESKEY_config_dir=""} -: ${OCF_RESKEY_parameters=""} +# Parameter defaults + +OCF_RESKEY_binary_default="/usr/sbin/postfix" +OCF_RESKEY_config_dir_default="" +OCF_RESKEY_parameters_default="" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config_dir=${OCF_RESKEY_config_dir_default}} +: ${OCF_RESKEY_parameters=${OCF_RESKEY_parameters_default}} + USAGE="Usage: $0 {start|stop|reload|monitor|validate-all|meta-data}"; ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 0.1 This script manages Postfix as an OCF resource in a high-availability setup. Manages a highly available Postfix mail server instance Full path to the Postfix binary. For example, "/usr/sbin/postfix". Full path to Postfix binary - + Full path to a Postfix configuration directory. For example, "/etc/postfix". Full path to configuration directory - + The Postfix daemon may be called with additional parameters. Specify any of them here. - + END } postfix_running() { local loglevel loglevel=${1:-err} # run Postfix status if available if ocf_is_true $status_support; then $binary $OPTION_CONFIG_DIR status 2>&1 ret=$? if [ $ret -ne 0 ]; then ocf_log $loglevel "Postfix status: " $ret fi return $ret fi # manually check Postfix's pid PIDFILE=${queue_dir}/pid/master.pid if [ -f $PIDFILE ]; then PID=`head -n 1 $PIDFILE` kill -s 0 $PID >/dev/null 2>&1 && [ `ps -p $PID | grep master | wc -l` -eq 1 ] return $? fi # Postfix is not running false } postfix_start() { # if Postfix is running return success if postfix_running info; then ocf_log info "Postfix already running." return $OCF_SUCCESS fi # start Postfix $binary $OPTIONS start >/dev/null 2>&1 ret=$? if [ $ret -ne 0 ]; then ocf_exit_reason "Postfix returned error: " $ret return $OCF_ERR_GENERIC fi # grant some time for startup/forking the sub processes # and loop initial monitoring until success or timeout while true; do sleep 1 # break if postfix is up and running; log failure otherwise postfix_running info && break ocf_log info "Postfix failed initial monitor action: " $ret done ocf_log info "Postfix started." return $OCF_SUCCESS } postfix_stop() { # if Postfix is not running return success if ! postfix_running info; then ocf_log info "Postfix already stopped." return $OCF_SUCCESS fi # stop Postfix $binary $OPTIONS stop >/dev/null 2>&1 ret=$? if [ $ret -ne 0 ]; then ocf_exit_reason "Postfix returned an error while stopping: " $ret return $OCF_ERR_GENERIC fi # grant some time for shutdown and recheck 5 times for i in 1 2 3 4 5; do if postfix_running info; then sleep 1 else break fi done # escalate to abort if we did not stop by now # @TODO shall we loop here too? if postfix_running info; then ocf_exit_reason "Postfix failed to stop. Escalating to 'abort'." $binary $OPTIONS abort >/dev/null 2>&1; ret=$? sleep 5 # postfix abort did not succeed if postfix_running; then ocf_exit_reason "Postfix failed to abort." return $OCF_ERR_GENERIC fi fi ocf_log info "Postfix stopped." return $OCF_SUCCESS } postfix_reload() { if postfix_running; then ocf_log info "Reloading Postfix." $binary $OPTIONS reload fi } postfix_monitor() { local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi if postfix_running $status_loglevel; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } postfix_validate_all() { # check that the Postfix binaries exist and can be executed check_binary "$binary" check_binary "postconf" # if true, run in-depth directory checks dir_check=true # check config_dir and alternate_config_directories parameter if [ "x$config_dir" != "x" ]; then if [ ! -d "$config_dir" ]; then if ocf_is_probe; then ocf_log info "Postfix configuration directory '$config_dir' not readable during probe." # skip in-depth directory checks if config file isn't readable during probe dir_check=false else ocf_exit_reason "Postfix configuration directory '$config_dir' does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi alternate_config_directories=`postconf -h alternate_config_directories 2>/dev/null | grep "$config_dir/\?"` if [ "x$alternate_config_directories" = "x" ]; then ocf_exit_reason "Postfix main configuration must contain correct 'alternate_config_directories' parameter." return $OCF_ERR_INSTALLED fi fi # check spool/queue and data directories (if applicable) # this is required because "postfix check" does not catch all errors if ocf_is_true $dir_check; then if [ ! -d "$queue_dir" ]; then if ocf_is_probe; then ocf_log info "Postfix queue directory '$queue_dir' not readable during probe." else ocf_exit_reason "Postfix queue directory '$queue_dir' does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi if ocf_is_true $status_support; then data_dir=`postconf $OPTION_CONFIG_DIR -h data_directory 2>/dev/null` data_dir_count=`echo "$data_dir" | tr ',' ' ' | wc -w` if [ $data_dir_count -gt 1 ]; then ocf_exit_reason "Postfix data directory '$orig_data_dir' cannot be set to multiple directories." return $OCF_ERR_INSTALLED fi if [ ! -d "$data_dir" ]; then if ocf_is_probe; then ocf_log info "Postfix data directory '$data_dir' not readable during probe." else ocf_exit_reason "Postfix data directory '$data_dir' does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi fi # check directory permissions if ocf_is_true $status_support; then user=`postconf $OPTION_CONFIG_DIR -h mail_owner 2>/dev/null` for dir in $data_dir; do if ! su -s /bin/sh - $user -c "test -w $dir"; then if ocf_is_probe; then ocf_log info "Directory '$dir' is not writable by user '$user' during probe." else ocf_exit_reason "Directory '$dir' is not writable by user '$user'." return $OCF_ERR_PERM; fi fi done fi fi # run Postfix internal check, if not probing if ! ocf_is_probe; then $binary $OPTIONS check >/dev/null 2>&1 ret=$? if [ $ret -ne 0 ]; then ocf_exit_reason "Postfix 'check' failed: " $ret return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi binary=$OCF_RESKEY_binary config_dir=$OCF_RESKEY_config_dir parameters=$OCF_RESKEY_parameters # handle parameters case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac # build Postfix options string *outside* to access from each method OPTIONS='' OPTION_CONFIG_DIR='' # check for Postfix's postconf binary check_binary "postconf" # check if the Postfix config_dir exist if [ "x$config_dir" != "x" ]; then # remove all trailing slashes to ease "postconf alternate_config_directories" match config_dir=`echo $config_dir | sed 's/\/*$//'` # reset config_dir if it equals Postfix's default config_directory postconf -h config_directory 2>/dev/null | grep -q "^$config_dir/\?$" if [ $? -eq 0 ]; then config_dir="" fi # set OPTIONS if config_dir is still set # save OPTION_CONFIG_DIR seperatly if [ "x$config_dir" != "x" ]; then OPTION_CONFIG_DIR="-c $config_dir" OPTIONS=$OPTION_CONFIG_DIR fi fi # add all additional parameters to options string if [ "x$parameters" != "x" ]; then OPTIONS="$OPTIONS $parameters" fi # important directories, used in different methods queue_dir=`postconf $OPTION_CONFIG_DIR -h queue_directory 2>/dev/null` # check Postfix version and status support status_support=false postfix_version=`postconf -h mail_version 2>/dev/null` ocf_version_cmp "$postfix_version" "2.5.0" ret=$? # we need Postfix 2.5.0 or greater for status/data_directory support if [ $ret -eq 1 -o $ret -eq 2 ]; then status_support=true fi postfix_validate_all ret=$? LSB_STATUS_STOPPED=3 if [ $ret -ne $OCF_SUCCESS ]; then case $1 in stop) exit $OCF_SUCCESS ;; *) exit $ret;; esac fi case $1 in monitor) postfix_monitor exit $? ;; start) postfix_start exit $? ;; stop) postfix_stop exit $? ;; reload) postfix_reload exit $? ;; validate-all) exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/pound b/heartbeat/pound index e77a00a09..7a0ad547b 100755 --- a/heartbeat/pound +++ b/heartbeat/pound @@ -1,339 +1,343 @@ #!/bin/sh # # # Pound # # Description: Manage pound instances as a HA resource # # Author: Taro Matsuzawa # # License: GNU General Public License (GPL) # # See usage() for more details # # OCF instance parameters: # OCF_RESKEY_pid # OCF_RESKEY_binary # OCF_RESKEY_ctl_binary # OCF_RESKEY_socket_path # OCF_RESKEY_config # OCF_RESKEY_name # OCF_RESKEY_maxfiles # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Set default paramenter values # Set these two first, as other defaults depend on it OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE} : ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +OCF_RESKEY_config_default="" OCF_RESKEY_binary_default=pound OCF_RESKEY_ctl_binary_default=poundctl OCF_RESKEY_pid_default=/var/run/pound_${OCF_RESKEY_name}.pid OCF_RESKEY_socket_path_default=/var/lib/pound/pound.cfg +OCF_RESKEY_maxfiles_default="" +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_ctl_binary=${OCF_RESKEY_ctl_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_socket_path=${OCF_RESKEY_socket_path_default}} +: ${OCF_RESKEY_maxfiles=${OCF_RESKEY_maxfiles_default}} meta_data() { cat < 1.0 The Pound Resource Agent can manage Pound instances. Manage a Pound instance The Pound configuration file that Pound should manage, for example "/etc/pound.cfg". Pound configuration file - + Override the name of the instance that should be given to Pound (defaults to the resource identifier). Instance name Write the process's PID to the specified file. The default will include the specified name, i.e.: "/var/run/pound_production.pid". Unlike what this help message shows, it is most likely not necessary to change this parameter. Pidfile This is used to start Pound server. Normally use pound. This is used to watch Pound status via Unix socket. Normally use poundctl. Write the process's Unix socket. This parameter is same 'Control' parameter in configuration file, i.e.: Control "/var/lib/pound/pound.cfg". Determines how many files pound is allowed to open at a time. Helps to fix the 'Too many open files' error message. Allowed number of open files. - + END } ####################################################################### pound_usage() { cat < : Pure-FTPd script # Author: Achim Stumpf : Rewrite as Proftpd # License: GNU General Public License (GPL) # # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg starts Proftpd. # # The "stop" arg stops it. # # OCF parameters: # OCF_RESKEY_binary # OCF_RESKEY_conffile # OCF_RESKEY_pidfile # OCF_RESKEY_curl_binary # OCF_RESKEY_curl_url # OCF_RESKEY_test_user # OCF_RESKEY_test_pass # ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -: ${OCF_RESKEY_binary="/usr/sbin/proftpd"} -: ${OCF_RESKEY_conffile="/etc/proftpd.conf"} -: ${OCF_RESKEY_pidfile="/var/run/proftpd.pid"} -: ${OCF_RESKEY_curl_binary="/usr/bin/curl"} -: ${OCF_RESKEY_curl_url="ftp://localhost/"} -: ${OCF_RESKEY_test_user="test"} -: ${OCF_RESKEY_test_pass=""} +# Parameter defaults + +OCF_RESKEY_binary_default="/usr/sbin/proftpd" +OCF_RESKEY_conffile_default="/etc/proftpd.conf" +OCF_RESKEY_pidfile_default="/var/run/proftpd.pid" +OCF_RESKEY_curl_binary_default="/usr/bin/curl" +OCF_RESKEY_curl_url_default="ftp://localhost/" +OCF_RESKEY_test_user_default="test" +OCF_RESKEY_test_pass_default="" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_conffile=${OCF_RESKEY_conffile_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_curl_binary=${OCF_RESKEY_curl_binary_default}} +: ${OCF_RESKEY_curl_url=${OCF_RESKEY_curl_url_default}} +: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}} +: ${OCF_RESKEY_test_pass=${OCF_RESKEY_test_pass_default}} USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 This script manages Proftpd in an Active-Passive setup OCF Resource Agent compliant FTP script. The Proftpd binary The Proftpd binary - + The Proftpd configuration file name with full path. For example, "/etc/proftpd.conf" Configuration file name with full path - + The Proftpd PID file. The location of the PID file is configured in the Proftpd configuration file. PID file - + The absolute path to the curl binary for monitoring with OCF_CHECK_LEVEL greater zero. The absolute path to the curl binary - + The URL which is checked by curl with OCF_CHECK_LEVEL greater zero. The URL which is checked by curl - + The name of the ftp user for monitoring with OCF_CHECK_LEVEL greater zero. The name of the ftp user - + The password of the ftp user for monitoring with OCF_CHECK_LEVEL greater zero. The password of the ftp user - + END exit $OCF_SUCCESS } isRunning() { kill -s 0 "$1" > /dev/null 2>&1 } proftpd_status() { if [ -f "$OCF_RESKEY_pidfile" ] then # Proftpd is probably running PID=`head -n 1 $OCF_RESKEY_pidfile` if [ ! -z "$PID" ] ; then isRunning "$PID" && `ps -p $PID | grep proftpd > /dev/null 2>&1` return $? fi fi # Proftpd is not running return $OCF_NOT_RUNNING; } proftpd_start() { # make a few checks and start Proftpd if ocf_is_root ; then : ; else ocf_log err "You must be root" exit $OCF_ERR_PERM fi # if Proftpd is running return success if proftpd_status ; then ocf_log info "Proftpd is running already" exit $OCF_SUCCESS fi # starting Proftpd ${OCF_RESKEY_binary} --config ${OCF_RESKEY_conffile} 2>/dev/null if [ "$?" -ne 0 ]; then ocf_log err "Proftpd returned error" $? exit $OCF_ERR_GENERIC fi exit $OCF_SUCCESS } proftpd_stop() { if proftpd_status ; then PID=`head -n 1 $OCF_RESKEY_pidfile` if [ ! -z "$PID" ]; then ocf_log info "Killing Proftpd PID $PID" kill $PID > /dev/null 2>&1 if [ "$?" -eq 0 ]; then TRIES=0 while isRunning "$PID" && [ "$TRIES" -lt 30 ] do sleep 1 ocf_log info "Proftpd PID $PID is still running" TRIES=`expr $TRIES + 1` done isRunning "$PID" RET=$? if [ "$RET" -eq 0 ]; then ocf_log info "Killing Proftpd PID $PID with SIGKILL" kill -s 9 $PID > /dev/null 2>&1 while isRunning "$PID" do sleep 1 ocf_log info "Proftpd PID $PID is still running" done fi else ocf_log err "Killing Proftpd PID $PID FAILED" exit $OCF_ERR_GENERIC fi fi fi exit $OCF_SUCCESS } proftpd_monitor() { proftpd_status RET=$? if [ "$RET" -ne 0 -o "$OCF_CHECK_LEVEL" = 0 ]; then if [ "$RET" -eq 0 ]; then PID=`head -n 1 $OCF_RESKEY_pidfile` ocf_log debug "Proftpd monitor on PID $PID succeeded" return $OCF_SUCCESS else ocf_log debug "Proftpd monitor on PID $PID failed" return $OCF_NOT_RUNNING fi else ${OCF_RESKEY_curl_binary} -sS -u "${OCF_RESKEY_test_user}:${OCF_RESKEY_test_pass}" ${OCF_RESKEY_curl_url} > /dev/null 2>&1 if [ "$?" -eq 0 ]; then ocf_log debug "Proftpd monitor with curl on URL $OCF_RESKEY_curl_url succeeded" return $OCF_SUCCESS else ocf_log err "Proftpd monitor with curl on URL $OCF_RESKEY_curl_url failed" return $OCF_NOT_RUNNING fi fi } proftpd_validate_all() { # check that the proftpd binary exists if [ ! -x "$OCF_RESKEY_binary" ]; then ocf_log err "Proftpd binary $OCF_RESKEY_binary does not exist" exit $OCF_ERR_INSTALLED fi # check that the Proftpd config file exists if [ ! -f "$OCF_RESKEY_conffile" ]; then ocf_log err "Proftpd config file $OCF_RESKEY_conffile does not exist" exit $OCF_ERR_CONFIGURED fi # check that the curl binary exists if [ ! -x "$OCF_RESKEY_curl_binary" ]; then ocf_log err "$OCF_RESKEY_curl_binary does not exist" exit $OCF_ERR_INSTALLED fi } # # Main # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi case $1 in start) proftpd_validate_all proftpd_start ;; stop) proftpd_stop ;; status) if proftpd_status then ocf_log info "Proftpd is running" exit $OCF_SUCCESS else ocf_log info "Proftpd is stopped" exit $OCF_NOT_RUNNING fi ;; monitor) proftpd_monitor exit $? ;; validate-all) proftpd_validate_all exit $OCF_SUCCESS ;; meta-data) meta_data ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster index 5e697fa36..cf8ca21a6 100755 --- a/heartbeat/rabbitmq-cluster +++ b/heartbeat/rabbitmq-cluster @@ -1,602 +1,608 @@ #!/bin/sh # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_set_policy_default="" + +: ${OCF_RESKEY_set_policy=${OCF_RESKEY_set_policy_default}} + ####################################################################### # This arbitrary value here is used by the rmq_start action to # signify that the resource agent must retry the start process # It might potentially conflict with OCF assigned error code # in the future. RMQ_TRY_RESTART_ERROR_CODE=126 RMQ_SERVER=/usr/sbin/rabbitmq-server RMQ_CTL=/usr/sbin/rabbitmqctl RMQ_EVAL="${RMQ_CTL} eval -q" RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia" RMQ_PID_DIR="/var/run/rabbitmq" RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid" RMQ_LOG_DIR="/var/log/rabbitmq" if [ "$__OCF_ACTION" != "meta-data" ]; then NODENAME=$(ocf_attribute_target) fi # this attr represents the current active local rmq node name. # when rmq stops or the node is fenced, this attr disappears RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}" # this attr represents the last known active local rmq node name # when rmp stops or the node is fenced, the attr stays forever so # we can continue to map an offline pcmk node to it's rmq node name # equivalent. RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}" meta_data() { cat < 1.0 Starts cloned rabbitmq cluster instance. NB: note that this RA cannot be spawned across a mix of pacemaker and pacemaker-remote nodes. Only on pacemaker *or* pacemaker-remote nodes exclusively. rabbitmq clustered Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance. rabbitmqctl set_policy args - + END } ####################################################################### rmq_usage() { cat < /dev/null 2>&1 } rmq_local_node() { local node_name=$($RMQ_CTL status 2>&1 | sed -n -e "s/^.*[S|s]tatus of node \(.*\)\s.*$/\1/p" | tr -d "'") if [ -z "$node_name" ]; then node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null | grep "\s*RABBITMQ_NODENAME=" | awk -F= '{print $2}') fi echo "$node_name" } rmq_join_list() { local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") # If join_list is empty we want to check if there are any remote nodes # where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector) if [ -z "$join_list" ]; then # Get all the nodes written in the ATTR_COOKIE no matter if # they are online or not. This will be one line per node like # rabbit@overcloud-rabbit-0 # rabbit@overcloud-rabbit-1 # ... local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") # The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...' local filter=$(crm_mon -r --as-xml | xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - | xargs -n1 echo | awk -F= '{print "-e "$2}') # export the intersection which gives us only the nodes that # a) wrote their namein the cib attrd # b) run on nodes where pacemaker_remote is enabled join_list="$(echo $remote_join_list | grep $filter)" fi echo $join_list } rmq_write_nodename() { local node_name=$(rmq_local_node) if [ -z "$node_name" ]; then ocf_log err "Failed to determine rabbitmq node name, exiting" exit $OCF_ERR_GENERIC fi # store the pcmknode to rmq node mapping as a transient attribute. This allows # us to retrieve the join list with a simple xpath. ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name" # the pcmknode to rmq node mapping as a permanent attribute as well. this lets # us continue to map offline nodes to their equivalent rmq node name ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --name "$RMQ_CRM_ATTR_COOKIE_LAST_KNOWN" -v "$node_name" } rmq_delete_nodename() { # remove node-name ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D } prepare_dir () { if [ ! -d ${1} ] ; then mkdir -p ${1} chown -R rabbitmq:rabbitmq ${1} chmod 755 ${1} fi } remove_pid () { rm -f ${RMQ_PID_FILE} > /dev/null 2>&1 } rmq_app_running() { if $RMQ_EVAL 'application:which_applications().' | grep -q '{rabbit,'; then ocf_log debug "RabbitMQ application is running" return $OCF_SUCCESS else ocf_log debug "RabbitMQ application is stopped" rmq_delete_nodename return $OCF_NOT_RUNNING fi } rmq_node_alive() { if $RMQ_EVAL 'ok.'; then ocf_log debug "RabbitMQ node is alive" return $OCF_SUCCESS else ocf_log debug "RabbitMQ node is down" rmq_delete_nodename return $OCF_NOT_RUNNING fi } rmq_monitor() { local rc status=$($RMQ_EVAL 'rabbit_mnesia:cluster_status_from_mnesia().' 2>&1) if echo "${status}" | grep -q '^{ok'; then pcs_running=$(rmq_join_list | wc -w) ocf_log debug "Pacemaker thinks ${pcs_running} RabbitMQ nodes are running" rmq_running=$($RMQ_EVAL 'length(mnesia:system_info(running_db_nodes)).') ocf_log debug "RabbitMQ thinks ${rmq_running} RabbitMQ nodes are running" if [ $(( $rmq_running * 2 )) -lt $pcs_running ]; then ocf_log info "RabbitMQ is a minority partition, failing monitor" rmq_delete_nodename return $OCF_ERR_GENERIC fi ocf_log debug "RabbitMQ server is running normally" rmq_write_nodename return $OCF_SUCCESS else ocf_log info "RabbitMQ server could not get cluster status from mnesia" ocf_log debug "${status}" rmq_delete_nodename return $OCF_NOT_RUNNING fi } rmq_init_and_wait() { local rc prepare_dir $RMQ_PID_DIR prepare_dir $RMQ_LOG_DIR remove_pid # the server startup script uses this environment variable export RABBITMQ_PID_FILE="$RMQ_PID_FILE" setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" & ocf_log info "Waiting for server to start" $RMQ_CTL wait $RMQ_PID_FILE rc=$? if [ $rc -ne $OCF_SUCCESS ]; then remove_pid ocf_log info "rabbitmq-server start failed: $rc" return $OCF_ERR_GENERIC fi rmq_app_running return $? } rmq_set_policy() { $RMQ_CTL set_policy "$@" > /dev/null 2>&1 } rmq_start_first() { local rc ocf_log info "Bootstrapping rabbitmq cluster" rmq_wipe_data rmq_init_and_wait rc=$? if [ $rc -eq 0 ]; then rc=$OCF_SUCCESS ocf_log info "cluster bootstrapped" rmq_write_nodename if [ -n "$OCF_RESKEY_set_policy" ]; then # do not quote set_policy, we are passing in arguments rmq_set_policy $OCF_RESKEY_set_policy if [ $? -ne 0 ]; then ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy" rc=$OCF_ERR_GENERIC else ocf_log info "Policy set: $OCF_RESKEY_set_policy" fi fi else ocf_log info "failed to bootstrap cluster. Check SELINUX policy" rc=$OCF_ERR_GENERIC fi return $rc } rmq_is_clustered() { $RMQ_EVAL 'rabbit_mnesia:is_clustered().' | grep -q true } rmq_join_existing() { local join_list="$1" local rc=$OCF_ERR_GENERIC ocf_log info "Joining existing cluster with [ $(echo $join_list | tr '\n' ' ') ] nodes." rmq_init_and_wait if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi if rmq_is_clustered; then ocf_log info "Successfully re-joined existing rabbitmq cluster automatically" return $OCF_SUCCESS fi # unconditionally join the cluster $RMQ_CTL stop_app > /dev/null 2>&1 for node in $(echo "$join_list"); do ocf_log info "Attempting to join cluster with target node $node" $RMQ_CTL join_cluster $node if [ $? -eq 0 ]; then ocf_log info "Joined cluster by connecting to node $node, starting app" $RMQ_CTL start_app rc=$? if [ $rc -ne 0 ]; then ocf_log err "'$RMQ_CTL start_app' failed" fi break; fi done if [ "$rc" -ne 0 ]; then ocf_log info "Join process incomplete, shutting down." return $OCF_ERR_GENERIC fi ocf_log info "Successfully joined existing rabbitmq cluster" return $OCF_SUCCESS } rmq_forget_cluster_node_remotely() { local running_cluster_nodes="$1" local node_to_forget="$2" ocf_log info "Forgetting $node_to_forget via nodes [ $(echo $running_cluster_nodes | tr '\n' ' ') ]." for running_cluster_node in $running_cluster_nodes; do $RMQ_CTL -n $running_cluster_node forget_cluster_node $node_to_forget if [ $? = 0 ]; then ocf_log info "Succeeded forgetting $node_to_forget via $running_cluster_node." return else ocf_log err "Failed to forget node $node_to_forget via $running_cluster_node." fi done } rmq_notify() { node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}" mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" # When notifications are on, this agent is going to "forget" nodes once they # leave the cluster. This is thought to resolve some issues where rabbitmq # blocks trying to sync with an offline node after a fencing action occurs. if ! [ "${mode}" = "post-stop" ]; then return $OCF_SUCCESS fi rmq_monitor if [ $? -ne $OCF_SUCCESS ]; then # only run forget when we are for sure active return $OCF_SUCCESS fi # forget each stopped rmq instance in the provided pcmk node in the list. for node in $(echo "$node_list"); do local rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $(ocf_attribute_target $node) -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" if [ -z "$rmq_node" ]; then ocf_log warn "Unable to map pcmk node $node to a known rmq node." continue fi ocf_log notice "Forgetting stopped node $rmq_node" $RMQ_CTL forget_cluster_node $rmq_node if [ $? -ne 0 ]; then ocf_log warn "Unable to forget offline node $rmq_node." fi done return $OCF_SUCCESS } rmq_try_start() { local join_list="" local rc rmq_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi join_list=$(rmq_join_list) # No join list means no active instances are up. This instance # is the first, so it needs to bootstrap the rest if [ -z "$join_list" ]; then rmq_start_first rc=$? return $rc fi # Try to join existing cluster ocf_log info "wiping data directory before joining" local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" rmq_stop rmq_wipe_data rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node" rmq_join_existing "$join_list" rc=$? if [ $rc -ne 0 ]; then # we could not join the rabbitmq cluster from any of the running nodes # this might be due to a unexpected reset of those nodes. Give ourself # a chance to start by retrying the entire start sequence. ocf_log warn "Failed to join the RabbitMQ cluster from nodes ${join_list}. Stopping local unclustered rabbitmq" rmq_stop ocf_log warn "Re-detect available rabbitmq nodes and try to start again" # return an unused OCF value to signify a "retry" condition return $RMQ_TRY_RESTART_ERROR_CODE fi # Restore users, user permissions, and policies (if any) BaseDataDir=`dirname $RMQ_DATA_DIR` $RMQ_EVAL " %% Run only if Mnesia is ready. lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso begin Restore = fun(Table, PostprocessFun, Filename) -> case file:consult(Filename) of {error, _} -> ok; {ok, [Result]} -> lists:foreach(fun(X) -> mnesia:dirty_write(Table, PostprocessFun(X)) end, Result), file:delete(Filename) end end, %% Restore users Upgrade = fun ({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5}; ({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D} end, Downgrade = fun ({internal_user, A, B, C}) -> {internal_user, A, B, C}; ({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C}; %% Incompatible scheme, so we will loose user's password ('B' value) during conversion. %% Unfortunately, this case will require manual intervention - user have to run: %% rabbitmqctl change_password ({internal_user, A, B, C, _}) -> {internal_user, A, B, C} end, %% Check db scheme first [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), case WildPattern of %% Version < 3.6.0 {internal_user,'_','_','_'} -> Restore(rabbit_user, Downgrade, \"$BaseDataDir/users.erl\"); %% Version >= 3.6.0 {internal_user,'_','_','_','_'} -> Restore(rabbit_user, Upgrade, \"$BaseDataDir/users.erl\") end, NoOp = fun(X) -> X end, %% Restore user permissions Restore(rabbit_user_permission, NoOp, \"$BaseDataDir/users_perms.erl\"), %% Restore policies Restore(rabbit_runtime_parameters, NoOp, \"$BaseDataDir/policies.erl\") end. " return $OCF_SUCCESS } rmq_start() { local rc=$RMQ_TRY_RESTART_ERROR_CODE while [ $rc -eq $RMQ_TRY_RESTART_ERROR_CODE ]; do rmq_try_start rc=$? done return $rc } rmq_stop() { # Backup users, user permissions, and policies BaseDataDir=`dirname $RMQ_DATA_DIR` $RMQ_EVAL " %% Run only if Mnesia is still available. lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso begin Backup = fun(Table, SelectPattern, Filter, Filename) -> Result = case catch mnesia:dirty_select(Table, [{SelectPattern, [Filter], ['\\\$_']}]) of {'EXIT', _} -> []; Any -> Any end, Result /= [] andalso file:write_file(Filename, io_lib:fwrite(\"~p.~n\", [Result])) end, %% Backup users %% Check db scheme first [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), UsersSelectPattern = case WildPattern of %% Version < 3.6.0 {internal_user,'_','_','_'} -> {internal_user, '\\\$1', '_', '_'}; %% Version >= 3.6.0 {internal_user,'_','_','_','_'} -> {internal_user, '\\\$1', '_', '_', '_'} end, Backup(rabbit_user, UsersSelectPattern, {'/=', '\\\$1', <<\"guest\">>}, \"$BaseDataDir/users.erl\"), %% Backup user permissions Backup(rabbit_user_permission, {'\\\$1', {'\\\$2', '\\\$3','\\\$4'}, '\\\$5'}, {'/=', '\\\$3', <<\"guest\">>}, \"$BaseDataDir/users_perms.erl\"), %% Backup policies Backup(rabbit_runtime_parameters, {runtime_parameters, {'_', '\\\$1', '_'}, '_'}, {'==', '\\\$1', <<\"policy\">>}, \"$BaseDataDir/policies.erl\") end. " rmq_node_alive if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi $RMQ_CTL stop rc=$? if [ $rc -ne 0 ]; then ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc" return $rc fi #TODO add kill logic stop_wait=1 while [ $stop_wait = 1 ]; do rmq_app_running rc=$? if [ "$rc" -eq $OCF_NOT_RUNNING ]; then stop_wait=0 break elif [ "$rc" -ne $OCF_SUCCESS ]; then ocf_log info "rabbitmq-server stop failed: $rc" exit $OCF_ERR_GENERIC fi sleep 1 done rmq_delete_nodename remove_pid return $OCF_SUCCESS } rmq_validate() { check_binary $RMQ_SERVER check_binary $RMQ_CTL # This resource only makes sense as a clone right now. at some point # we may want to verify the following. #TODO verify cloned #TODO verify ordered=true # Given that this resource does the cluster join explicitly, # having a cluster_nodes list in the static config file will # likely conflict with this agent. #TODO verify no cluster list in rabbitmq conf #cat /etc/rabbitmq/rabbitmq.config | grep "cluster_nodes" return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) rmq_start;; stop) rmq_stop;; monitor) rmq_monitor;; validate-all) rmq_validate;; notify) rmq_notify;; usage|help) rmq_usage exit $OCF_SUCCESS ;; *) rmq_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/redis.in b/heartbeat/redis.in index e257bcc5e..ec7186d8b 100644 --- a/heartbeat/redis.in +++ b/heartbeat/redis.in @@ -1,713 +1,726 @@ #!@BASH_SHELL@ # # Resource agent script for redis server. # # Copyright (c) 2013 Patrick Hemmer # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -: ${OCF_RESKEY_bin:=/usr/bin/redis-server} -: ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli} -: ${OCF_RESKEY_user:=redis} -: ${OCF_RESKEY_rundir:=/var/run/redis} -: ${OCF_RESKEY_pidfile_name:=redis-server.pid} -: ${OCF_RESKEY_socket_name:=redis.sock} -: ${OCF_RESKEY_port:=6379} -: ${OCF_RESKEY_tunnel_host:=127.0.0.1} - -if [ -z "$OCF_RESKEY_config" ]; then - if [ -f "/etc/redis.conf" ]; then - OCF_RESKEY_config="/etc/redis.conf" - else - OCF_RESKEY_config="/etc/redis/redis.conf" - fi +# Parameter defaults + +OCF_RESKEY_bin_default="/usr/bin/redis-server" +OCF_RESKEY_client_bin_default="/usr/bin/redis-cli" +if [ -f "/etc/redis.conf" ]; then + OCF_RESKEY_config_default="/etc/redis.conf" +else + OCF_RESKEY_config_default="/etc/redis/redis.conf" fi +OCF_RESKEY_user_default="redis" +OCF_RESKEY_rundir_default="/var/run/redis" +OCF_RESKEY_pidfile_name_default="redis-server.pid" +OCF_RESKEY_socket_name_default="redis.sock" +OCF_RESKEY_port_default="6379" +OCF_RESKEY_tunnel_host_default="127.0.0.1" +OCF_RESKEY_tunnel_port_map_default="" +OCF_RESKEY_wait_last_known_master_default="false" + +: ${OCF_RESKEY_bin=${OCF_RESKEY_bin_default}} +: ${OCF_RESKEY_client_bin=${OCF_RESKEY_client_bin_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_rundir=${OCF_RESKEY_rundir_default}} +: ${OCF_RESKEY_pidfile_name=${OCF_RESKEY_pidfile_name_default}} +: ${OCF_RESKEY_socket_name=${OCF_RESKEY_socket_name_default}} +: ${OCF_RESKEY_port=${OCF_RESKEY_port_default}} +: ${OCF_RESKEY_tunnel_host=${OCF_RESKEY_tunnel_host_default}} +: ${OCF_RESKEY_tunnel_port_map=${OCF_RESKEY_tunnel_port_map_default}} +: ${OCF_RESKEY_wait_last_known_master=${OCF_RESKEY_wait_last_known_master_default}} CHECK_SLAVE_STATE=0 REDIS_CHECK_DUMP="/usr/bin/redis-check-dump" REDIS_SERVER="$OCF_RESKEY_bin" REDIS_CLIENT="$OCF_RESKEY_client_bin" REDIS_CONFIG="$OCF_RESKEY_config" REDIS_USER="$OCF_RESKEY_user" REDIS_RUNDIR="$OCF_RESKEY_rundir" REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name" REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name" REDIS_REPLICATION_PORT="$OCF_RESKEY_port" if ! [ -f $REDIS_CHECK_DUMP ]; then REDIS_CHECK_DUMP="$(which redis-check-dump 2>/dev/null)" fi if [ -z "$REDIS_CHECK_DUMP" ]; then REDIS_CHECK_DUMP="$(which redis-check-rdb 2>/dev/null)" fi if [ -r "$REDIS_CONFIG" ]; then REDIS_DUMP_DIR="$(grep "^\s*dir\s" < "$REDIS_CONFIG" | awk '{ print $2 }' 2>/dev/null)" REDIS_DUMP_FILE="$(grep "^\s*dbfilename\s" < "$REDIS_CONFIG" | awk '{ print $2 }' 2>/dev/null)" fi : ${REDIS_DUMP_DIR:=/var/lib/redis/} : ${REDIS_DUMP_FILE:=dump.rdb} redis_meta_data() { cat < 1.0 Resource agent script for redis server. This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config. When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000. Redis server Path to \`redis-server\` Path to \`redis-server\` - + Path to \`redis-cli\` Path to \`redis-cli\` - + Path to 'redis.conf' Path to 'redis.conf' - + User to run redis as Redis user - + Directory to store socket and pid file in Redis var/run dir - + The filename to use for the pidfile. Will be created in the rundir. Should only be a basename, not a full path. Redis pidfile name - + The filename to use for the socket. Will be crated in the rundir. Should only be a basename, not a full path. Redis socket name - + Port for replication client to connect to on remote server Replication port - + When replication traffic is tunnelled, this is the host to target to forward outgoing traffic to the redis master. The resource agent configures the redis slave to target the master via tunnel_host:tunnel_port. Note that in order to enable replication traffic tunneling, parameter {tunnel_port_map} must be populated. Tunnel host for replication traffic - + A mapping of pacemaker node names to redis port number. To be used when redis servers need to tunnel replication traffic. On every node where the redis resource is running, the redis server listens to a different port. Each redis server can access its peers for replication traffic via a tunnel accessible at {tunnel_host}:port. The mapping the form of: pcmk1-name:port-for-redis1;pcmk2-name:port-for-redis2;pcmk3-name:port-for-redis3 where the redis resource started on node pcmk1-name would listen on port port-for-redis1 Mapping of Redis server name to redis port - + During redis cluster bootstrap, wait for the last known master to be promoted before allowing any other instances in the cluster to be promoted. This lessens the risk of data loss when persistent data is in use. Wait for last known master - + EOI } INSTANCE_ATTR_NAME=$(echo "${OCF_RESOURCE_INSTANCE}" | awk -F : '{print $1}') CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication" MASTER_HOST="" MASTER_ACTIVE_CACHED="" MASTER_ACTIVE="" master_is_active() { if [ -z "$MASTER_ACTIVE_CACHED" ]; then # determine if a master instance is already up and is healthy crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 MASTER_ACTIVE=$? MASTER_ACTIVE_CACHED="true" fi return $MASTER_ACTIVE } set_master() { MASTER_HOST="$1" ${CRM_ATTR_REPL_INFO} -v "$1" -q } last_known_master() { if [ -z "$MASTER_HOST" ]; then MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)" fi echo "$MASTER_HOST" } crm_master_reboot() { local node node=$(ocf_attribute_target) "${HA_SBIN_DIR}/crm_master" -N "$node" -l reboot "$@" } calculate_score() { perf_score="$1" connected_clients="$2" if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then # only set perferred score by slave_priority if # we are not waiting for the last known master. Otherwise # we want the agent to have complete control over the scoring. perf_score="" connected_clients="0" fi if [[ -z "$perf_score" ]]; then if [[ "$(last_known_master)" == "$NODENAME" ]]; then perf_score=1000 else perf_score=1 fi fi perf_score=$(( perf_score + connected_clients )) echo "$perf_score" } set_score() { local score local last_master score="$1" if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then last_master="$(last_known_master)" if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted" return fi fi ocf_log debug "monitor: Setting master score to '$score'" crm_master_reboot -v "$score" } redis_client() { ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $*" if [ -n "$clientpasswd" ]; then # Starting with 4.0.10 there is a warning on stderr when using a pass # Once we stop supporting versions < 5.0.0 we can add --no-auth-warning here ("$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" 2>&1 >&3 3>&- | grep -v "Using a password" >&2 3>&-) 3>&1 | sed 's/\r//' else "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//' fi } simple_status() { local pid if ! [ -f "$REDIS_PIDFILE" ]; then return $OCF_NOT_RUNNING fi pid="$(<"$REDIS_PIDFILE")" pidof $(basename "$REDIS_SERVER") | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING ocf_log debug "monitor: redis-server running under pid $pid" return $OCF_SUCCESS } redis_monitor() { local res local master_name local last_known_master_port simple_status res=$? if (( res != OCF_SUCCESS )); then return $res fi typeset -A info while read line; do [[ "$line" == "#"* ]] && continue [[ "$line" != *":"* ]] && continue IFS=':' read -r key value <<< "$line" info[$key]="$value" done < <(redis_client info) if [[ -z "${info[role]}" ]]; then ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`" return $OCF_ERR_GENERIC fi if ocf_is_ms; then # Here we see if a score has already been set. # If score isn't set we the redis setting 'slave_priority'. # If that isn't set, we default to 1000 for a master, and 1 for slave. # We then add 1 for each connected client score="$(crm_master_reboot -G --quiet 2>/dev/null)" if [[ -z "$score" ]]; then score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}") set_score "$score" fi if [[ "${info[role]}" == "master" ]]; then if ocf_is_probe; then set_master "$NODENAME" fi return $OCF_RUNNING_MASTER fi if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then if [[ "${info[master_link_status]}" != "up" ]]; then ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})" return $OCF_ERR_GENERIC fi if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then if [ -n "${OCF_RESKEY_tunnel_port_map}" ]; then master_name=$(port_to_redis_node ${info[master_port]}) last_known_master_port=$(redis_node_to_port $(last_known_master)) if [[ "${info[master_host]}" != "${OCF_RESKEY_tunnel_host}" ]] || [[ "${info[master_port]}" != "${last_known_master_port}" ]]; then ocf_log err "monitor: Slave mode current tunnelled connection to redis server does not match running master. tunnelled='${info[master_host]}:${info[master_port]} (${master_name})', running='$(last_known_master)'" return $OCF_ERR_GENERIC fi else ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)" return $OCF_ERR_GENERIC fi fi fi fi return $OCF_SUCCESS } redis_node_to_port() { local node=$1 echo "$OCF_RESKEY_tunnel_port_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$node"'" {print $2;exit}' } port_to_redis_node() { local port=$1 echo "$OCF_RESKEY_tunnel_port_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$port"'" {print $1;exit}' } get_tunnel_port_from_master() { local master_name=$1 crm_attribute --node "$master_name" -l forever --name ${INSTANCE_ATTR_NAME}-tunnel-port --query -q 2>/dev/null } get_master_from_tunnel_port() { local master_name=$1 crm_attribute --node "$master_name" -l forever --name ${INSTANCE_ATTR_NAME}-tunnel-port --query -q 2>/dev/null } check_dump_file() { if ! have_binary "$REDIS_CHECK_DUMP"; then return 0 fi $REDIS_CHECK_DUMP ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE} 2>&1 } redis_start() { local size redis_monitor status=$? if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then ocf_log info "start: redis is already running" return $OCF_SUCCESS fi [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR" chown -R "$REDIS_USER" "$REDIS_RUNDIR" if have_binary "restorecon"; then restorecon -Rv "$REDIS_RUNDIR" fi # check for 0 byte database dump file. This is an unrecoverable start # condition that we can avoid by deleting the 0 byte database file. if [ -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" ]; then size="$(stat --format "%s" ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE})" if [ "$?" -eq "0" ] && [ "$size" -eq "0" ]; then ocf_log notice "Detected 0 byte ${REDIS_DUMP_FILE}, deleting zero length file to avoid start failure." rm -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" fi fi ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)" while true; do # wait for redis to start typeset -A info while read line; do [[ "$line" == "#"* ]] && continue [[ "$line" != *":"* ]] && continue IFS=':' read -r key value <<< "$line" info[$key]="$value" done < <(redis_client info) if (( info[loading] == 0 )); then break elif (( info[loading] == 1 )); then sleep "${info[loading_eta_seconds]}" elif pidof $(basename "$REDIS_SERVER") >/dev/null; then # unknown error, but the process still exists. # This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail # See https://github.com/antirez/redis/issues/2368 # It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out sleep 1 else check_output="$(check_dump_file)" ocf_log err "start: Unknown error waiting for redis to start. redis-check-dump output=${check_output//$'\n'/; }" return $OCF_ERR_GENERIC fi done while ! [ -s "$REDIS_PIDFILE" ]; do ocf_log debug "start: Waiting for pid file '$REDIS_PIDFILE' to appear" sleep 1 done ocf_is_ms && redis_demote # pacemaker expects resources to start in slave mode redis_monitor status=$? if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then return $OCF_SUCCESS fi check_output="$(check_dump_file)" ocf_log err "start: Unknown error starting redis. redis-server output=${output//$'\n'/; } redis-check-dump output=${check_output//$'\n'/; }" return $status } redis_stop() { redis_monitor status=$? if (( status == OCF_NOT_RUNNING )); then ocf_log info "stop: redis is already stopped" crm_master_reboot -D return $OCF_SUCCESS fi pid="$(<"$REDIS_PIDFILE")" kill -TERM "$pid" while true; do simple_status status=$? if (( status == OCF_NOT_RUNNING )); then crm_master_reboot -D return $OCF_SUCCESS fi sleep 1 done } redis_promote() { redis_monitor status=$? if (( status == OCF_RUNNING_MASTER )); then ocf_log info "promote: Already running as master" set_master "$NODENAME" return $OCF_SUCCESS elif (( status != OCF_SUCCESS )); then ocf_log err "promote: Node is not running as a slave" return $OCF_ERR_GENERIC fi redis_client slaveof no one redis_monitor status=$? if (( status == OCF_RUNNING_MASTER )); then set_master "$NODENAME" return $OCF_SUCCESS fi ocf_log err "promote: Unknown error while promoting to master (status=$status)" return $OCF_ERR_GENERIC } redis_demote() { local master_host local master_port local tunnel_port # client kill is only supported in Redis 2.8.12 or greater version=$(redis_client -v | awk '{print $NF}') ocf_version_cmp "$version" "2.8.11" client_kill=$? CHECK_SLAVE_STATE=1 redis_monitor status=$? if (( status == OCF_SUCCESS )); then ocf_log info "demote: Already running as slave" return $OCF_SUCCESS elif (( status == OCF_NOT_RUNNING )); then ocf_log err "demote: Failed to demote, redis not running." return $OCF_NOT_RUNNING fi master_host="$(last_known_master)" master_port="${REDIS_REPLICATION_PORT}" # The elected master has to remain a slave during startup. # During this period a placeholder master host is assigned. if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then CHECK_SLAVE_STATE=0 master_host="no-such-master" elif ! master_is_active; then # no master has been promoted yet. we'll be notified when the # master starts. CHECK_SLAVE_STATE=0 master_host="no-such-master" fi if [ -n "${OCF_RESKEY_tunnel_port_map}" ]; then # master_host can be the special marker "no-such-master" # while a master is being selected. In this case, no # tunnel port is returned, but this is not fatal. tunnel_port=$(redis_node_to_port "$master_host") if [ -n "$tunnel_port" ]; then ocf_log info "demote: Setting master to '$master_host' via local tunnel '${OCF_RESKEY_tunnel_host}' on port '$tunnel_port'" master_host="${OCF_RESKEY_tunnel_host}" master_port="$tunnel_port" fi else ocf_log info "demote: Setting master to '$master_host'" fi redis_client slaveof "$master_host" "$master_port" # Wait forever for the slave to connect to the master and finish the # sync. Timeout is controlled by Pacemaker "op start timeout=XX". # # hint: redis master_link_status will only come "up" when # the SYNC with the master has completed. # This can take an arbitraty time (data) and should # only be parametrized by the start operation timeout # by the administrator, not by this resource agent code while true; do # Wait infinite if replication is syncing # Then start/demote operation timeout determines timeout if [ "$client_kill" -eq 2 ]; then redis_client CLIENT PAUSE 2000 fi redis_monitor status=$? if (( status == OCF_SUCCESS )); then if [ "$client_kill" -eq 2 ]; then redis_client CLIENT KILL type normal fi return $OCF_SUCCESS fi sleep 1 done ocf_log err "demote: Unexpected error setting slave mode (status=$status)" return $OCF_ERR_GENERIC } redis_notify() { mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" case "$mode" in post-demote|post-promote) # change the master redis_monitor status=$? if (( status == OCF_SUCCESS )); then # were a slave # calling demote updates the slave's connection # to the newly appointed Master instance. redis_demote fi ;; esac return $OCF_SUCCESS } redis_validate() { if [[ -x "$REDIS_SERVER" ]]; then ocf_log err "validate: $REDIS_SERVER does not exist or is not executable" return $OCF_ERR_INSTALLED fi if [[ -x "$REDIS_CLIENT" ]]; then ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable" return $OCF_ERR_INSTALLED fi if [[ -f "$REDIS_CONFIG" ]]; then ocf_log err "validate: $REDIS_CONFIG does not exist" return $OCF_ERR_CONFIGURED fi if ! getent passwd "$REDIS_USER" &>/dev/null; then ocf_log err "validate: $REDIS_USER is not a valid user" return $OCF_ERR_CONFIGURED fi } if [ "$__OCF_ACTION" != "meta-data" ]; then NODENAME=$(ocf_attribute_target) fi if [ -r "$REDIS_CONFIG" ]; then clientpasswd="$(sed -n -e 's/^\s*requirepass\s*\(.*\)\s*$/\1/p' < $REDIS_CONFIG | tail -n 1)" fi ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}" case "${1:-$__OCF_ACTION}" in status|monitor) redis_monitor ;; start) redis_start ;; stop) redis_stop ;; restart) redis_stop && redis_start ;; promote) redis_promote ;; demote) redis_demote ;; notify) redis_notify ;; meta-data) redis_meta_data ;; validate-all) redis_validate ;; *) echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}" exit $OCF_ERR_UNIMPLEMENTED ;; esac status=$? ocf_log debug "exit_status=$status" exit $status diff --git a/heartbeat/rsyncd b/heartbeat/rsyncd index 86c771e32..cd4ab91c5 100755 --- a/heartbeat/rsyncd +++ b/heartbeat/rsyncd @@ -1,270 +1,280 @@ #!/bin/sh # # Resource script for rsync daemon # # Description: Manages rsync daemon as an OCF resource in # an High Availability setup. # # Author: Dhairesh Oza # License: GNU General Public License (GPL) # # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg starts rsyncd. # # The "stop" arg stops it. # # OCF parameters: # OCF_RESKEY_binpath # OCF_RESKEY_conffile # OCF_RESKEY_bwlimit # # Note:This RA requires that the rsyncd config files has a "pid file" # entry so that it is able to act on the correct process ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_binpath_default="rsync" +OCF_RESKEY_conffile_default="/etc/rsyncd.conf" +OCF_RESKEY_bwlimit_default="" + +: ${OCF_RESKEY_binpath=${OCF_RESKEY_binpath_default}} +: ${OCF_RESKEY_conffile=${OCF_RESKEY_conffile_default}} +: ${OCF_RESKEY_bwlimit=${OCF_RESKEY_bwlimit_default}} + USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 This script manages rsync daemon Manages an rsync daemon The rsync binary path. For example, "/usr/bin/rsync" Full path to the rsync binary - + The rsync daemon configuration file name with full path. For example, "/etc/rsyncd.conf" Configuration file name with full path - + This option allows you to specify a maximum transfer rate in kilobytes per second. This option is most effective when using rsync with large files (several megabytes and up). Due to the nature of rsync transfers, blocks of data are sent, then if rsync determines the transfer was too fast, it will wait before sending the next data block. The result is an average transfer rate equaling the specified limit. A value of zero specifies no limit. limit I/O bandwidth, KBytes per second - + END exit $OCF_SUCCESS } get_pid_and_conf_file() { if [ -n "$OCF_RESKEY_conffile" ]; then CONF_FILE=$OCF_RESKEY_conffile else CONF_FILE="/etc/rsyncd.conf" fi grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null if [ $? -eq 0 ]; then PIDFILE=`grep -v "^#" "$CONF_FILE" | grep "pid file" | awk -F "=" '{ print $2 }'` fi } rsyncd_status() { if [ -n "$PIDFILE" -a -f $PIDFILE ]; then # rsync is probably running PID=`cat $PIDFILE` if [ -n "$PID" ]; then if ps -p $PID | grep rsync >/dev/null ; then ocf_log info "rsync daemon running" return $OCF_SUCCESS else ocf_log info "rsync daemon is not running but pid file exists" return $OCF_ERR_GENERIC fi else ocf_exit_reason "PID file empty!" return $OCF_ERR_GENERIC fi fi # rsyncd is not running ocf_log info "rsync daemon is not running" return $OCF_NOT_RUNNING } rsyncd_start() { # if rsyncd is running return success rsyncd_status retVal=$? if [ $retVal -eq $OCF_SUCCESS ]; then exit $OCF_SUCCESS elif [ $retVal -ne $OCF_NOT_RUNNING ]; then ocf_exit_reason "Error. Unknown status." exit $OCF_ERR_GENERIC fi if [ -n "$OCF_RESKEY_binpath" ]; then COMMAND="$OCF_RESKEY_binpath --daemon" else COMMAND="rsync --daemon" fi if [ -n "$OCF_RESKEY_conffile" ]; then COMMAND="$COMMAND --config $OCF_RESKEY_conffile" fi if [ -n "$OCF_RESKEY_bwlimit" ]; then COMMAND="$COMMAND --bwlimit $OCF_RESKEY_bwlimit" fi if grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null ; then $COMMAND; if [ $? -ne 0 ]; then ocf_exit_reason "Error. rsync daemon returned error $?." exit $OCF_ERR_GENERIC fi else ocf_exit_reason "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA." return $OCF_ERR_GENERIC fi ocf_log info "Started rsync daemon." exit $OCF_SUCCESS } rsyncd_stop() { if rsyncd_status ; then PID=`cat $PIDFILE` if [ -n "$PID" ] ; then kill $PID if [ $? -ne 0 ]; then kill -s KILL $PID if [ $? -ne 0 ]; then ocf_exit_reason "Error. Could not stop rsync daemon." return $OCF_ERR_GENERIC fi fi rm $PIDFILE 2>/dev/null fi fi ocf_log info "Stopped rsync daemon." exit $OCF_SUCCESS } rsyncd_monitor() { rsyncd_status } rsyncd_validate_all() { if [ -n "$OCF_RESKEY_binpath" -a ! -x "$OCF_RESKEY_binpath" ]; then ocf_exit_reason "Binary path $OCF_RESKEY_binpath does not exist." exit $OCF_ERR_ARGS fi if [ -n "$OCF_RESKEY_conffile" -a ! -f "$OCF_RESKEY_conffile" ]; then ocf_exit_reason "Config file $OCF_RESKEY_conffile does not exist." exit $OCF_ERR_ARGS fi if grep -v "^#" "$CONF_FILE" | grep "pid file" > /dev/null ; then : else ocf_exit_reason "Error. \"pid file\" entry required in the rsyncd config file by rsyncd OCF RA." return $OCF_ERR_GENERIC fi #Not checking "$OCF_RESKEY_bwlimit" return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in start) get_pid_and_conf_file rsyncd_start ;; stop) get_pid_and_conf_file rsyncd_stop ;; status) get_pid_and_conf_file rsyncd_status ;; monitor)get_pid_and_conf_file rsyncd_monitor ;; validate-all) get_pid_and_conf_file rsyncd_validate_all ;; meta-data) meta_data ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/rsyslog.in b/heartbeat/rsyslog.in index b029a09c5..9cb9a0ad6 100644 --- a/heartbeat/rsyslog.in +++ b/heartbeat/rsyslog.in @@ -1,254 +1,264 @@ #!@BASH_SHELL@ # # Description: Manages a rsyslog instance, provided by NTT OSSC as an # OCF High-Availability resource under Heartbeat/LinuxHA control # # Copyright (c) 2011 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ############################################################################## # OCF parameters: # OCF_RESKEY_rsyslog_binary : Path to rsyslog binary. # Default is "/sbin/rsyslogd" # OCF_RESKEY_configfile : Configuration file # OCF_RESKEY_start_opts : Startup options # # Only OCF_RESKEY_configfile must be specified. Each of the rests # has its default value or refers OCF_RESKEY_configfile to make # its value when no explicit value is given. # # Further infomation for setup: # There are sample configurations at the end of this file. # ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_configfile_default="" +OCF_RESKEY_rsyslog_binary_default="/sbin/rsyslogd" +OCF_RESKEY_start_opts_default="" + +: ${OCF_RESKEY_configfile=${OCF_RESKEY_configfile_default}} +: ${OCF_RESKEY_rsyslog_binary=${OCF_RESKEY_rsyslog_binary_default}} +: ${OCF_RESKEY_start_opts=${OCF_RESKEY_start_opts_default}} + usage() { cat <<-! usage: $0 action action: start : start a new rsyslog instance stop : stop the running rsyslog instance status : return the status of rsyslog, run or down monitor : return TRUE if the rsyslog appears to be working. meta-data : show meta data message validate-all: validate the instance parameters ! return $OCF_ERR_UNIMPLEMENTED } metadata_rsyslog() { cat < 1.0 This script manages a rsyslog instance as an HA resource. rsyslog resource agent This parameter specifies a configuration file for a rsyslog instance managed by this RA. Configuration file - + This parameter specifies rsyslog's executable file. rsyslog executable - + This parameter specifies startup options for a rsyslog instance managed by this RA. When no value is given, no startup options is used. Don't use option '-F'. It causes a stuck of a start action. Start options - + END return $OCF_SUCCESS } monitor_rsyslog() { set -- $(pgrep -f "$PROCESS_PATTERN" 2>/dev/null) case $# in 0) ocf_log debug "No rsyslog process for $CONFIGFILE" return $OCF_NOT_RUNNING;; 1) return $OCF_SUCCESS;; esac ocf_log warn "Multiple rsyslog process for $CONFIGFILE" return $OCF_SUCCESS } start_rsyslog() { local ocf_status monitor_rsyslog if [ $? = "$OCF_SUCCESS" ]; then return $OCF_SUCCESS fi $RSYSLOG_EXE -f $CONFIGFILE $START_OPTS 2>&1 ocf_status=$? if [ "$ocf_status" != "$OCF_SUCCESS" ]; then return $OCF_ERR_GENERIC fi while true; do monitor_rsyslog if [ $? = "$OCF_SUCCESS" ]; then return $OCF_SUCCESS fi sleep 1 done } stop_rsyslog() { pkill -TERM -f "$PROCESS_PATTERN" typeset lapse_sec=0 while pgrep -f "$PROCESS_PATTERN" > /dev/null; do sleep 1 lapse_sec=$(( lapse_sec + 1 )) ocf_log debug "stop_rsyslog[${OCF_RESOURCE_INSTANCE}]: stop NORM $lapse_sec/$OCF_RESKEY_CRM_meta_timeout" if [ $lapse_sec -ge $OCF_RESKEY_CRM_meta_timeout ]; then break fi done lapse_sec=0 while pgrep -f "$PROCESS_PATTERN" > /dev/null; do pkill -KILL -f "$PROCESS_PATTERN" sleep 1 lapse_sec=$(( lapse_sec + 1 )) ocf_log debug "stop_rsyslog[${OCF_RESOURCE_INSTANCE}]: suspend rsyslog by SIGKILL ($lapse_sec/@@@)" done return $OCF_SUCCESS } status_rsyslog() { monitor_rsyslog rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "rsyslog service is running." elif [ $rc = $OCF_NOT_RUNNING ]; then echo "rsyslog service is stopped." fi return $rc } validate_all_rsyslog() { ocf_log info "validate_all_rsyslog[${OCF_RESOURCE_INSTANCE}]" return $OCF_SUCCESS } if [[ "$1" = "meta-data" ]]; then metadata_rsyslog exit $? fi CONFIGFILE="${OCF_RESKEY_configfile}" if [[ -z "$CONFIGFILE" ]]; then ocf_log err "undefined parameter:configfile" exit $OCF_ERR_CONFIGURED fi if [[ ! -f "$CONFIGFILE" ]]; then ocf_log err "Config file $CONFIGFILE does not exist." exit $OCF_ERR_CONFIGURED fi -RSYSLOG_EXE="${OCF_RESKEY_rsyslog_binary-/sbin/rsyslogd}" +RSYSLOG_EXE="${OCF_RESKEY_rsyslog_binary}" if [[ ! -x "$RSYSLOG_EXE" ]]; then ocf_log err "Invalid value:rsyslog_binary:$RSYSLOG_EXE" exit $OCF_ERR_CONFIGURED fi START_OPTS=${OCF_RESKEY_start_opts} PROCESS_PATTERN="$RSYSLOG_EXE -f $CONFIGFILE" COMMAND=$1 case "$COMMAND" in start) ocf_log debug "[${OCF_RESOURCE_INSTANCE}] Enter rsyslog start" start_rsyslog func_status=$? ocf_log debug "[${OCF_RESOURCE_INSTANCE}] Leave rsyslog start $func_status" exit $func_status ;; stop) ocf_log debug "[${OCF_RESOURCE_INSTANCE}] Enter rsyslog stop" stop_rsyslog func_status=$? ocf_log debug "[${OCF_RESOURCE_INSTANCE}] Leave rsyslog stop $func_status" exit $func_status ;; status) status_rsyslog exit $? ;; monitor) monitor_rsyslog func_status=$? exit $func_status ;; validate-all) validate_all_rsyslog exit $? ;; *) usage ;; esac diff --git a/heartbeat/scsi2reservation b/heartbeat/scsi2reservation index b1b1dc245..3e4ff9584 100755 --- a/heartbeat/scsi2reservation +++ b/heartbeat/scsi2reservation @@ -1,170 +1,176 @@ #!/bin/sh # by hxinwei@gmail.com # License: GNU General Public License 2 (GPL2) if [ -n "$OCF_DEBUG_LIBRARY" ]; then . $OCF_DEBUG_LIBRARY else : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi -: ${OCF_RESKEY_scsi_reserve="/usr/sbin/scsi_reserve"} -: ${OCF_RESKEY_sharedisk="/dev/sdb"} -: ${OCF_RESKEY_start_loop=10} +# Parameter defaults + +OCF_RESKEY_scsi_reserve_default="/usr/sbin/scsi_reserve" +OCF_RESKEY_sharedisk_default="/dev/sdb" +OCF_RESKEY_start_loop_default="10" + +: ${OCF_RESKEY_scsi_reserve=${OCF_RESKEY_scsi_reserve_default}} +: ${OCF_RESKEY_sharedisk=${OCF_RESKEY_sharedisk_default}} +: ${OCF_RESKEY_start_loop=${OCF_RESKEY_start_loop_default}} scsi2reserve_meta_data() { cat < 1.0 The scsi-2-reserve resource agent is a place holder for SCSI-2 reservation. A healthy instance of scsi-2-reserve resource, indicates the own of the specified SCSI device. This resource agent depends on the scsi_reserve from scsires package, which is Linux specific. scsi-2 reservation The scsi_reserve is a command from scsires package. It helps one to issue SCSI-2 reservation on SCSI devices. Manages exclusive access to shared storage media thrugh SCSI-2 reservations - + The shared disk that can be reserved. Shared disk. - + We are going to try several times before giving up. Start_loop indicates how many times we are going to re-try. Times to re-try before giving up. - + END return $OCF_SUCCESS } scsi2reserve_usage() { cat <] # # Example: # # /usr/sbin/sfex_init -n 10 /dev/sdb1 # # if further information is necessary, See README. # ####################################################################### # Initialization: # switching ocf-shellfuncs path : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_device_default="" +OCF_RESKEY_index_default="1" +OCF_RESKEY_collision_timeout_default="1" +OCF_RESKEY_monitor_interval_default="10" +OCF_RESKEY_lock_timeout_default="100" + +: ${OCF_RESKEY_device=${OCF_RESKEY_device_default}} +: ${OCF_RESKEY_index=${OCF_RESKEY_index_default}} +: ${OCF_RESKEY_collision_timeout=${OCF_RESKEY_collision_timeout_default}} +: ${OCF_RESKEY_monitor_interval=${OCF_RESKEY_monitor_interval_default}} +: ${OCF_RESKEY_lock_timeout=${OCF_RESKEY_lock_timeout_default}} + ####################################################################### SFEX_DAEMON=${HA_BIN}/sfex_daemon usage() { cat < 1.3 Resource script for SF-EX. It manages a shared storage medium exclusively . Manages exclusive access to shared storage using Shared Disk File EXclusiveness (SF-EX) Block device path that stores exclusive control data. block device - + Location in block device where exclusive control data is stored. 1 or more is specified. Default is 1. index - + Waiting time when a collision of lock acquisition is detected. Default is 1 second. waiting time for lock acquisition - + -Monitor interval(sec). Default is 10 seconds +Monitor interval(sec). Default is ${OCF_RESKEY_monitor_interval_default} seconds monitor interval - + -Valid term of lock(sec). Default is 100 seconds. +Valid term of lock(sec). Default is ${OCF_RESKEY_lock_timeout_default} seconds. The lock_timeout is calculated by the following formula. lock_timeout = monitor_interval + "The expiration time of the lock" We suggest 90 seconds as a default value of the "The expiration time of the lock", but you should change it in consideration of access delay to the shared disk and the switch time of the multipath driver. The lock timeout have an impact on start action timeout because start action timeout value is calculated by the following formula. start timeout = collision_timeout + lock_timeout + "safety margin" The "safety margin" is decided within the range of about 10-20 seconds(It depends on your system requirement). Valid term of lock - + END } # # START: Exclusive control starts. # # It loops permanently until the lock can be acquired when locked with # the other node. In this case, the reception of the stop signal by the # timeout time passage set to CIB becomes the only stop opportunity. # sfex_start() { ocf_log info "sfex_daemon: starting..." sfex_monitor if [ $? -eq $OCF_SUCCESS ]; then ocf_log info "sfex_daemon already started." return $OCF_SUCCESS fi $SFEX_DAEMON -i $INDEX -c $COLLISION_TIMEOUT -t $LOCK_TIMEOUT -m $MONITOR_INTERVAL -r ${OCF_RESOURCE_INSTANCE} $DEVICE rc=$? if [ $rc -ne 0 ]; then ocf_log err "sfex_daemon failed to start." return $OCF_ERR_GENERIC fi while : do sfex_monitor if [ $? -eq $OCF_SUCCESS ]; then ocf_log info "sfex_daemon: started." return $OCF_SUCCESS fi ocf_log debug "Waiting for the start-up of the sfex_daemon..." sleep 1 done ocf_log err "Can't find a sfex_daemon process. Starting a sfex_daemon failed." return $OCF_ERR_GENERIC } # # STOP: stop exclusive control # sfex_stop() { ocf_log info "sfex_daemon: stopping..." # Check the sfex daemon has already stopped. sfex_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "sfex_daemon already stopped." return $OCF_SUCCESS fi # Stop sfex daemon by sending SIGTERM signal. pid=`/usr/bin/pgrep -f "$SFEX_DAEMON .* ${OCF_RESOURCE_INSTANCE} "` /bin/kill $pid rc=$? if [ $rc -ne 0 ]; then ocf_log err "sfex_daemon failed to stop" return $rc fi #sfex could be in state D if the device is gone, and then not terminate. #Wait and check again if the daemon is already properly shutdown. shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) count=0 while [ $count -lt $shutdown_timeout ] do sfex_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "sfex_daemon: stopped." return $OCF_SUCCESS fi count=`expr $count + 1` ocf_log debug "waiting for sfex_daemon to exit ($count/$shutdown_timeout)" sleep 1 done sfex_monitor if [ $? -ne $OCF_NOT_RUNNING ]; then ocf_log warn "regular shutdown of sfex_daemon timed out, using SIGKILL" /bin/kill -s KILL $pid fi while : do sfex_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then break; fi ocf_log debug "waiting for sfex_daemon to exit after SIGKILL" sleep 1 done ocf_log info "sfex_daemon: stopped." return $OCF_SUCCESS } sfex_monitor() { ocf_log debug "sfex_monitor: started..." # Find a sfex_daemon process using daemon name and resource name. if /usr/bin/pgrep -f "$SFEX_DAEMON .* ${OCF_RESOURCE_INSTANCE} " > /dev/null 2>&1; then ocf_log debug "sfex_monitor: complete. sfex_daemon is running." return $OCF_SUCCESS fi ocf_log debug "sfex_monitor: complete. sfex_daemon is not running." return $OCF_NOT_RUNNING } # # main process # # check arguments if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi OP=$1 # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # check parameters DEVICE=$OCF_RESKEY_device -INDEX=${OCF_RESKEY_index:-1} -COLLISION_TIMEOUT=${OCF_RESKEY_collision_timeout:-1} -LOCK_TIMEOUT=${OCF_RESKEY_lock_timeout:-100} -MONITOR_INTERVAL=${OCF_RESKEY_monitor_interval:-10} +INDEX=${OCF_RESKEY_index} +COLLISION_TIMEOUT=${OCF_RESKEY_collision_timeout} +LOCK_TIMEOUT=${OCF_RESKEY_lock_timeout} +MONITOR_INTERVAL=${OCF_RESKEY_monitor_interval} sfex_validate () { if [ -z "$DEVICE" ]; then ocf_log err "Please set OCF_RESKEY_device to device for sfex meta-data" exit $OCF_ERR_ARGS fi if [ ! -w "$DEVICE" ]; then ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" exit $OCF_ERR_ARGS fi } if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then ocf_log err "THIS RA DO NOT SUPPORT CLONE MODE!" exit $OCF_ERR_CONFIGURED fi case $OP in start) sfex_start ;; stop) sfex_stop ;; monitor) sfex_monitor ;; validate-all) sfex_validate ;; *) exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/sg_persist.in b/heartbeat/sg_persist.in index 177ec0279..7af38034e 100644 --- a/heartbeat/sg_persist.in +++ b/heartbeat/sg_persist.in @@ -1,686 +1,695 @@ #!@BASH_SHELL@ # # # OCF Resource Agent compliant PERSISTENT SCSI RESERVATION resource script. # # # Copyright (c) 2011 Evgeny Nifontov and lwang@suse.com All Rights Reserved. # # "Heartbeat drbd OCF Resource Agent: 2007, Lars Marowsky-Bree" was used # as example of multistate OCF Resource Agent. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OCF instance parameters # OCF_RESKEY_binary # OCF_RESKEY_devs # OCF_RESKEY_required_devs_nof # OCF_RESKEY_reservation_type # OCF_RESKEY_master_score_base # OCF_RESKEY_master_score_dev_factor # OCF_RESKEY_master_score_delay # # TODO # # 1) PROBLEM: devices which were not accessible during 'start' action, will be never registered/reserved # TODO: 'Master' and 'Salve' registers new devs in 'monitor' action # TODO: 'Master' reserves new devs in 'monitor' action ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -# set default values - : ${sg_persist_binary="sg_persist"} # binary name for the resource - : ${devs=""} # device list - : ${required_devs_nof=1} # number of required devices - : ${reservation_type=1} # reservation type - : ${master_score_base=0} # master score base - : ${master_score_dev_factor=100} # device factor for master score - : ${master_score_delay=30} # delay for master score +# Parameter defaults + +OCF_RESKEY_binary_default="sg_persist" # binary name for the resource +OCF_RESKEY_devs_default="" # device list +OCF_RESKEY_required_devs_nof_default="1" # number of required devices +OCF_RESKEY_reservation_type_default="1" # reservation type +OCF_RESKEY_master_score_base_default="0" # master score base +OCF_RESKEY_master_score_dev_factor_default="100" # device factor for master score +OCF_RESKEY_master_score_delay_default="30" # delay for master score + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_devs=${OCF_RESKEY_devs_default}} +: ${OCF_RESKEY_required_devs_nof=${OCF_RESKEY_required_devs_nof_default}} +: ${OCF_RESKEY_reservation_type=${OCF_RESKEY_reservation_type_default}} +: ${OCF_RESKEY_master_score_base=${OCF_RESKEY_master_score_base_default}} +: ${OCF_RESKEY_master_score_dev_factor=${OCF_RESKEY_master_score_dev_factor_default}} +: ${OCF_RESKEY_master_score_delay=${OCF_RESKEY_master_score_delay_default}} ####################################################################### meta_data() { cat < 1.1 This resource agent manages SCSI PERSISTENT RESERVATIONS. "sg_persist" from sg3_utils is used, please see its documentation. Should be used as multistate (Master/Slave) resource Slave registers its node id ("crm_node -i") as reservation key ( --param-rk ) on each device in the "devs" list. Master reserves all devices from "devs" list with reservation "--prout-type" value from "reservation_type" parameter. Manages SCSI PERSISTENT RESERVATIONS The name of the binary that manages the resource. the binary name of the resource - + Device list. Multiple devices can be listed with blank space as separator. Shell wildcards are allowed. device list Minimum number of "working" devices from device list 1) existing 2) "sg_persist --read-keys \$device" works (Return code 0) resource actions "start","monitor","promote" and "validate-all" return "\$OCF_ERR_INSTALLED" if the actual number of "working" devices is less then "required_devs_nof". resource actions "stop" and "demote" tries to remove reservations and registration keys from all working devices, but always return "\$OCF_SUCCESS" minimum number of working devices - + reservation type reservation type - + master_score_base value "master_score_base" value is used in "master_score" calculation: master_score = \$master_score_base + \$master_score_dev_factor * \$working_devs if set to bigger value in sg_persist resource configuration on some node, this node will be "preferred" for master role. base master_score value - + Working device factor in master_score calculation each "working" device provides additional value to "master_score", so the node that sees more devices will be preferred for the "Master"-role Setting it to 0 will disable this behavior. working device factor in master_score calculation - + master/slave decreases/increases its master_score after delay of \$master_score_delay seconds so if some device gets inaccessible, the slave decreases its master_score first and the resource will no be watched and after this device reappears again the master increases its master_score first this can work only if the master_score_delay is bigger then monitor interval on both master and slave Setting it to 0 will disable this behavior. master_score decrease/increase delay time - + END exit $OCF_SUCCESS } sg_persist_init() { if ! ocf_is_root ; then ocf_log err "You must be root to perform this operation." exit $OCF_ERR_PERM fi - SG_PERSIST=${OCF_RESKEY_binary:-"$sg_persist_binary"} + SG_PERSIST=${OCF_RESKEY_binary} check_binary $SG_PERSIST ROLE=$OCF_RESKEY_CRM_meta_role NOW=$(date +%s) RESOURCE="${OCF_RESOURCE_INSTANCE}" MASTER_SCORE_VAR_NAME="master-${OCF_RESOURCE_INSTANCE//:/-}" PENDING_VAR_NAME="pending-$MASTER_SCORE_VAR_NAME" #only works with corocync CRM_NODE="${HA_SBIN_DIR}/crm_node" NODE_ID_DEC=$($CRM_NODE -i) NODE=$($CRM_NODE -l | $GREP -w ^$NODE_ID_DEC) NODE=${NODE#$NODE_ID_DEC } NODE=${NODE% *} MASTER_SCORE_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$MASTER_SCORE_VAR_NAME --node=$NODE" CRM_MASTER="${HA_SBIN_DIR}/crm_master --lifetime=reboot" PENDING_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$PENDING_VAR_NAME --node=$NODE" NODE_ID_HEX=$(printf '0x%x' $NODE_ID_DEC) if [ -z "$NODE_ID_HEX" ]; then ocf_log err "Couldn't get node id with \"$CRM_NODE\"" exit $OCF_ERR_INSTALLED fi ocf_log debug "$RESOURCE: NODE:$NODE, ROLE:$ROLE, NODE_ID DEC:$NODE_ID_DEC HEX:$NODE_ID_HEX" - DEVS=${OCF_RESKEY_devs:=$devs} - REQUIRED_DEVS_NOF=${OCF_RESKEY_required_devs_nof:=$required_devs_nof} - RESERVATION_TYPE=${OCF_RESKEY_reservation_type:=$reservation_type} - MASTER_SCORE_BASE=${OCF_RESKEY_master_score_base:=$master_score_base} - MASTER_SCORE_DEV_FACTOR=${OCF_RESKEY_master_score_dev_factor:=$master_score_dev_factor} - MASTER_SCORE_DELAY=${OCF_RESKEY_master_score_delay:=$master_score_delay} + DEVS=${OCF_RESKEY_devs} + REQUIRED_DEVS_NOF=${OCF_RESKEY_required_devs_nof} + RESERVATION_TYPE=${OCF_RESKEY_reservation_type} + MASTER_SCORE_BASE=${OCF_RESKEY_master_score_base} + MASTER_SCORE_DEV_FACTOR=${OCF_RESKEY_master_score_dev_factor} + MASTER_SCORE_DELAY=${OCF_RESKEY_master_score_delay} ocf_log debug "$RESOURCE: DEVS=$DEVS" ocf_log debug "$RESOURCE: REQUIRED_DEVS_NOF=$REQUIRED_DEVS_NOF" ocf_log debug "$RESOURCE: RESERVATION_TYPE=$RESERVATION_TYPE" ocf_log debug "$RESOURCE: MASTER_SCORE_BASE=$MASTER_SCORE_BASE" ocf_log debug "$RESOURCE: MASTER_SCORE_DEV_FACTOR=$MASTER_SCORE_DEV_FACTOR" ocf_log debug "$RESOURCE: MASTER_SCORE_DELAY=$MASTER_SCORE_DELAY" #expand path wildcards DEVS=$(echo $DEVS) if [ -z "$DEVS" ]; then ocf_log err "\"devs\" not defined" exit $OCF_ERR_INSTALLED fi sg_persist_check_devs sg_persist_get_status } sg_persist_action_usage() { cat <&1` [ $? -eq 0 ] || continue WORKING_DEVS+=($dev) echo "$READ_KEYS" | $GREP -qw $NODE_ID_HEX\$ [ $? -eq 0 ] || continue REGISTERED_DEVS+=($dev) READ_RESERVATION=`$SG_PERSIST --in --read-reservation $dev 2>&1` [ $? -eq 0 ] || continue echo "$READ_RESERVATION" | $GREP -qw $NODE_ID_HEX\$ if [ $? -eq 0 ]; then RESERVED_DEVS+=($dev) fi reservation_key=`echo $READ_RESERVATION | $GREP -o 'Key=0x[0-9a-f]*' | $GREP -o '0x[0-9a-f]*'` if [ -n "$reservation_key" ]; then DEVS_WITH_RESERVATION+=($dev) RESERVATION_KEYS+=($reservation_key) fi done WORKING_DEVS_NOF=${#WORKING_DEVS[*]} ocf_log debug "$RESOURCE: working devices: `sg_persist_echo_array ${WORKING_DEVS[*]}`" ocf_log debug "$RESOURCE: number of working devices: $WORKING_DEVS_NOF" ocf_log debug "$RESOURCE: registered devices: `sg_persist_echo_array ${REGISTERED_DEVS[*]}`" ocf_log debug "$RESOURCE: reserved devices: `sg_persist_echo_array ${RESERVED_DEVS[*]}`" ocf_log debug "$RESOURCE: devices with reservation: `sg_persist_echo_array ${DEVS_WITH_RESERVATION[*]}`" ocf_log debug "$RESOURCE: reservation keys: `sg_persist_echo_array ${RESERVATION_KEYS[*]}`" MASTER_SCORE=$(($MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF)) ocf_log debug "$RESOURCE: master_score: $MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF = $MASTER_SCORE" } sg_persist_check_devs() { for dev in $DEVS do if [ -e "$dev" ]; then EXISTING_DEVS+=($dev) fi done EXISTING_DEVS_NOF=${#EXISTING_DEVS[*]} if [ $EXISTING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then ocf_log err "Number of existing devices=$EXISTING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF" exit $OCF_ERR_INSTALLED fi } sg_persist_is_registered() { for registered_dev in ${REGISTERED_DEVS[*]} do if [ "$registered_dev" == "$1" ]; then return 0 fi done return 1 } sg_persist_get_reservation_key() { for array_index in ${!DEVS_WITH_RESERVATION[*]} do if [ "${DEVS_WITH_RESERVATION[$array_index]}" == "$1" ]; then echo ${RESERVATION_KEYS[$array_index]} return 0 fi done echo "" } sg_persist_echo_array() { str_count=0 arr_str="" for str in "$@" do arr_str="$arr_str[$str_count]:$str " str_count=$(($str_count+1)) done echo $arr_str } sg_persist_parse_act_pending() { ACT_PENDING_TS=0 ACT_PENDING_SCORE=0 if [ -n "$ACT_PENDING" ]; then ACT_PENDING_TS=${ACT_PENDING%%_*} ACT_PENDING_SCORE=${ACT_PENDING##*_} fi } sg_persist_clear_pending() { if [ -n "$ACT_PENDING" ]; then DO_PENDING_UPDATE="YES" NEW_PENDING="" fi } sg_persist_new_master_score() { DO_MASTER_SCORE_UPDATE="YES" NEW_MASTER_SCORE=$1 } sg_persist_new_pending() { DO_PENDING_UPDATE="YES" NEW_PENDING=$1 } # Functions invoked by resource manager actions sg_persist_action_start() { ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE ocf_run $PENDING_ATTRIBUTE --update="" if [ $WORKING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then ocf_log err "$RESOURCE: Number of working devices=$WORKING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF" exit $OCF_ERR_GENERIC fi for dev in ${WORKING_DEVS[*]} do if sg_persist_is_registered $dev ; then : OK else ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=0 --param-sark=$NODE_ID_HEX $dev if [ $? -ne $OCF_SUCCESS ] then return $OCF_ERR_GENERIC fi fi done return $OCF_SUCCESS } sg_persist_action_stop() { if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log debug "$RESOURCE stop: already no registrations" else # Clear preference for becoming master ocf_run $MASTER_SCORE_ATTRIBUTE --delete ocf_run $PENDING_ATTRIBUTE --delete for dev in ${REGISTERED_DEVS[*]} do ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev done fi return $OCF_SUCCESS } sg_persist_action_monitor() { ACT_MASTER_SCORE=`$MASTER_SCORE_ATTRIBUTE --query --quiet 2>/dev/null` ocf_log debug "$RESOURCE monitor: ACT_MASTER_SCORE=$ACT_MASTER_SCORE" ACT_PENDING=`$PENDING_ATTRIBUTE --query --quiet 2>/dev/null` ocf_log debug "$RESOURCE monitor: ACT_PENDING=$ACT_PENDING" sg_persist_parse_act_pending ocf_log debug "$RESOURCE monitor: ACT_PENDING_TS=$ACT_PENDING_TS" ocf_log debug "$RESOURCE monitor: ACT_PENDING_VAL=$ACT_PENDING_SCORE" ocf_log debug "$MASTER_SCORE, $ACT_MASTER_SCORE, $ROLE" DO_MASTER_SCORE_UPDATE="NO" DO_PENDING_UPDATE="NO" if [ -n "$ACT_MASTER_SCORE" ] then if [ $ACT_MASTER_SCORE -eq $MASTER_SCORE ]; then sg_persist_clear_pending else case $ROLE in Master) if [ $MASTER_SCORE -lt $ACT_MASTER_SCORE ]; then if [ -n "$ACT_PENDING" ] then if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi else if [ $MASTER_SCORE_DELAY -eq 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending else sg_persist_new_pending "${NOW}_${MASTER_SCORE}" fi fi else sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi ;; Slave) if [ $MASTER_SCORE -gt $ACT_MASTER_SCORE ]; then if [ -n "$ACT_PENDING" ]; then if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi else if [ $MASTER_SCORE_DELAY -eq 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending else sg_persist_new_pending "${NOW}_${MASTER_SCORE}" fi fi else sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi ;; *) ;; esac fi fi if [ $DO_MASTER_SCORE_UPDATE == "YES" ]; then ocf_run $MASTER_SCORE_ATTRIBUTE --update=$NEW_MASTER_SCORE fi if [ $DO_PENDING_UPDATE == "YES" ]; then ocf_run $PENDING_ATTRIBUTE --update=$NEW_PENDING fi if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log debug "$RESOURCE monitor: no registrations" if [ -n "$ACT_MASTER_SCORE" ]; then ocf_run $MASTER_SCORE_ATTRIBUTE --delete ocf_run $PENDING_ATTRIBUTE --delete fi return $OCF_NOT_RUNNING fi if [ ${#RESERVED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then if [ -z "$ACT_MASTER_SCORE" ]; then ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE ocf_run $PENDING_ATTRIBUTE --update="" fi return $OCF_RUNNING_MASTER fi if [ ${#REGISTERED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then if [ -z "$ACT_MASTER_SCORE" ]; then ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE ocf_run $PENDING_ATTRIBUTE --update="" fi if [ $RESERVATION_TYPE -eq 7 ] || [ $RESERVATION_TYPE -eq 8 ]; then if [ ${#DEVS_WITH_RESERVATION[*]} -gt 0 ]; then return $OCF_RUNNING_MASTER else return $OCF_SUCCESS fi else return $OCF_SUCCESS fi fi ocf_log err "$RESOURCE monitor: unexpected state" return $OCF_ERR_GENERIC } sg_persist_action_promote() { if [ ${#RESERVED_DEVS[*]} -gt 0 ]; then ocf_log info "$RESOURCE promote: already master" return $OCF_SUCCESS fi for dev in ${WORKING_DEVS[*]} do reservation_key=`sg_persist_get_reservation_key $dev` case $RESERVATION_TYPE in 1|3|5|6) if [ -z "$reservation_key" ]; then ocf_run $SG_PERSIST --out --no-inquiry --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi else ocf_run $SG_PERSIST --out --no-inquiry --preempt --param-sark=$reservation_key --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi fi ;; 7|8) if [ -z "$reservation_key" ]; then ocf_run $SG_PERSIST --out --no-inquiry --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ] then return $OCF_ERR_GENERIC fi else ocf_log info "$RESOURCE promote: there already exist an reservation holder, all registrants become reservation holders" return $OCF_SUCCESS fi ;; *) return $OCF_ERR_ARGS ;; esac done return $OCF_SUCCESS } sg_persist_action_demote() { case $RESERVATION_TYPE in 1|3|5|6) if [ ${#RESERVED_DEVS[*]} -eq 0 ]; then ocf_log info "$RESOURCE demote: already slave" return $OCF_SUCCESS fi for dev in ${RESERVED_DEVS[*]} do ocf_run $SG_PERSIST --out --no-inquiry --release --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi done ;; 7|8) #in case of 7/8, --release won't release the reservation unless unregister the key. if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log info "$RESOURCE demote: already slave" return $OCF_SUCCESS fi for dev in ${REGISTERED_DEVS[*]} do ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi done ;; *) return $OCF_ERR_ARGS ;; esac return $OCF_SUCCESS } sg_persist_action_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" set -- $OCF_RESKEY_CRM_meta_notify_active_resource local n_active="$#" set -- $OCF_RESKEY_CRM_meta_notify_stop_resource local n_stop="$#" set -- $OCF_RESKEY_CRM_meta_notify_start_resource local n_start="$#" ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active $n_active - starting $n_start - stopping $n_stop" return $OCF_SUCCESS } sg_persist_action_validate_all () { if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then ocf_log err "Master options misconfigured." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then echo "Incorrect parameter count." sg_persist_action_usage exit $OCF_ERR_ARGS fi ACTION=$1 case $ACTION in meta-data) meta_data ;; validate-all) sg_persist_init sg_persist_action_validate_all ;; start|promote|monitor|stop|demote) ocf_log debug "$RESOURCE: starting action \"$ACTION\"" sg_persist_init sg_persist_action_$ACTION exit $? ;; notify) sg_persist_action_notify exit $? ;; usage|help) sg_persist_action_usage exit $OCF_SUCCESS ;; *) sg_persist_action_usage exit $OCF_ERR_ARGS ;; esac diff --git a/heartbeat/slapd.in b/heartbeat/slapd.in index 5181e8644..bd3995bdc 100644 --- a/heartbeat/slapd.in +++ b/heartbeat/slapd.in @@ -1,577 +1,594 @@ #!@BASH_SHELL@ # # Stand-alone LDAP Daemon (slapd) # # Description: Manages Stand-alone LDAP Daemon (slapd) as an OCF resource in # an high-availability setup. # # Authors: Jeroen Koekkoek # nozawat@gmail.com # John Keith Hohm # # License: GNU General Public License (GPL) # Copyright: (C) 2011 Pagelink B.V. # # The OCF code was inspired by the Postfix resource script written by # Raoul Bhatia . # # The code for managing the slapd instance is based on the the slapd init # script found in Debian GNU/Linux 6.0. # # OCF parameters: # OCF_RESKEY_slapd # OCF_RESKEY_ldapsearch # OCF_RESKEY_config # OCF_RESKEY_pidfile # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_services # OCF_RESKEY_watch_suffix # OCF_RESKEY_ignore_suffix # OCF_RESKEY_bind_dn # OCF_RESKEY_password # OCF_RESKEY_parameters # OCF_RESKEY_stop_escalate # OCF_RESKEY_maxfiles # ################################################################################ # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -: ${OCF_RESKEY_slapd="/usr/sbin/slapd"} -: ${OCF_RESKEY_ldapsearch="ldapsearch"} -: ${OCF_RESKEY_config=""} -: ${OCF_RESKEY_pidfile=""} -: ${OCF_RESKEY_user=""} -: ${OCF_RESKEY_group=""} -: ${OCF_RESKEY_services="ldap:///"} -: ${OCF_RESKEY_watch_suffix=""} -: ${OCF_RESKEY_ignore_suffix=""} -: ${OCF_RESKEY_bind_dn=""} -: ${OCF_RESKEY_password=""} -: ${OCF_RESKEY_parameters=""} -: ${OCF_RESKEY_stop_escalate=15} -: ${OCF_RESKEY_maxfiles=""} +# Parameter defaults + +OCF_RESKEY_slapd_default="/usr/sbin/slapd" +OCF_RESKEY_ldapsearch_default="ldapsearch" +OCF_RESKEY_config_default="" +OCF_RESKEY_pidfile_default="" +OCF_RESKEY_user_default="" +OCF_RESKEY_group_default="" +OCF_RESKEY_services_default="ldap:///" +OCF_RESKEY_watch_suffix_default="" +OCF_RESKEY_ignore_suffix_default="" +OCF_RESKEY_bind_dn_default="" +OCF_RESKEY_password_default="" +OCF_RESKEY_parameters_default="" +OCF_RESKEY_stop_escalate_default="15" +OCF_RESKEY_maxfiles_default="" + +: ${OCF_RESKEY_slapd=${OCF_RESKEY_slapd_default}} +: ${OCF_RESKEY_ldapsearch=${OCF_RESKEY_ldapsearch_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} +: ${OCF_RESKEY_services=${OCF_RESKEY_services_default}} +: ${OCF_RESKEY_watch_suffix=${OCF_RESKEY_watch_suffix_default}} +: ${OCF_RESKEY_ignore_suffix=${OCF_RESKEY_ignore_suffix_default}} +: ${OCF_RESKEY_bind_dn=${OCF_RESKEY_bind_dn_default}} +: ${OCF_RESKEY_password=${OCF_RESKEY_password_default}} +: ${OCF_RESKEY_parameters=${OCF_RESKEY_parameters_default}} +: ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}} +: ${OCF_RESKEY_maxfiles=${OCF_RESKEY_maxfiles_default}} USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}" ORIG_IFS=$IFS NEWLINE=' ' ################################################################################ usage() { echo $USAGE >&2 } meta_data() { cat < 0.1 Resource script for Stand-alone LDAP Daemon (slapd). It manages a slapd instance as an OCF resource. Manages a Stand-alone LDAP Daemon (slapd) instance Full path to the slapd binary. For example, "/usr/sbin/slapd". Full path to slapd binary - + Full path to the ldapsearch binary. For example, "/usr/bin/ldapsearch". Full path to ldapsearch binary - + Full path to a slapd configuration directory or a slapd configuration file. For example, "/etc/ldap/slapd.d" or "/etc/ldap/slapd.conf". Full path to configuration directory or file - + File to read the PID from; read from olcPidFile/pidfile in config if not set. File to read PID from - + User name or id slapd will run with. The group id is also changed to this user's gid, unless the group parameter is used to override. User name or id slapd will run with - + Group name or id slapd will run with. Group name or id slapd will run with - + LDAP (and other scheme) URLs slapd will serve. For example, "ldap://127.0.0.1:389 ldaps:/// ldapi:///" LDAP (and other scheme) URLs to serve - + Suffix (database backend) that will be monitored for availability. Multiple suffixes can be specified by providing a space separated list. By providing one or more suffixes here, the ignore_suffix parameter is discarded. All suffixes will be monitored if left blank. Suffix that will be monitored for availability. - + Suffix (database backend) that will not be monitored for availability. Multiple suffixes can be specified by providing a space separated list. No suffix will be excluded if left blank. Suffix that will not be monitored for availability. - + Distinguished Name used to bind to the LDAP directory for testing. Leave blank to bind to the LDAP directory anonymously. Distinguished Name used to bind to the LDAP directory for testing. - + Password used to bind to the LDAP directory for testing. Password used to bind to the LDAP directory for testing. - + slapd may be called with additional parameters. Specify any of them here. Any additional parameters to slapd. - + Number of seconds to wait for shutdown (using SIGTERM) before resorting to SIGKILL Seconds before stop escalation to KILL - + Maximum number of open files (for ulimit -n) Max open files - + END } watch_suffix() { local rc if [ -n "$OCF_RESKEY_watch_suffix" ]; then if echo "'$OCF_RESKEY_watch_suffix'" | grep "'$1'" >/dev/null 2>&1; then rc=0 else rc=1 fi else if echo "'$OCF_RESKEY_ignore_suffix'" | grep "'$1'" >/dev/null 2>&1; then rc=1 else rc=0 fi fi return $rc } slapd_pid() { local pid if [ -f "$pid_file" ]; then pid=`head -n 1 "$pid_file" 2>/dev/null` if [ "X$pid" != "X" ]; then echo "$pid" return $OCF_SUCCESS fi ocf_exit_reason "slapd pid file '$pid_file' empty." return $OCF_ERR_GENERIC fi ocf_log info "slapd pid file '$pid_file' does not exist." return $OCF_NOT_RUNNING } slapd_status() { local pid=$1 if ! kill -0 $pid >/dev/null 2>&1; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi } slapd_start() { local options local reason local rc local state slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_SUCCESS ]; then ocf_log info "slapd already running." return $state elif [ $state -eq $OCF_ERR_GENERIC ]; then return $state fi options="-u $user -g $group" if [ -d "$config" ]; then options="$options -F $config" elif [ -f "$config" ]; then options="$options -f $config" else ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi if [ -n "$parameters" ]; then options="$options $parameters" fi if [ -n "$OCF_RESKEY_maxfiles" ]; then ulimit -n $OCF_RESKEY_maxfiles u_rc=$? if [ "$u_rc" -ne 0 ]; then ocf_log warn "Could not set ulimit for open files for slapd to '$OCF_RESKEY_maxfiles'" fi fi if [ -n "$services" ]; then $slapd -h "$services" $options 2>&1; rc=$? else $slapd $options 2>&1; rc=$? fi if [ $rc -ne 0 ]; then ocf_exit_reason "slapd returned error." return $OCF_ERR_GENERIC fi while true; do slapd_monitor start if [ $? = "$OCF_SUCCESS" ]; then break fi sleep 1 done ocf_log info "slapd started." return $OCF_SUCCESS } slapd_stop() { local pid local rc local state pid=`slapd_pid`; slapd_status $pid; state=$? if [ $state -eq $OCF_NOT_RUNNING ]; then ocf_log info "slapd already stopped." return $OCF_SUCCESS elif [ $state -eq $OCF_ERR_GENERIC ]; then return $state fi ocf_stop_processes TERM $OCF_RESKEY_stop_escalate $pid; rc=$? if [ $rc -eq 1 ]; then ocf_log err "cannot stop slapd." return $OCF_ERR_GENERIC fi if [ -f "$pid_file" ]; then rm -f "$pid_file" >/dev/null 2>&1 fi ocf_log info "slapd stopped." return $OCF_SUCCESS } slapd_monitor() { local options local rc local state local suffix local suffixes local err_option="-info" slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_NOT_RUNNING ]; then if [ -z "$1" ];then if ! ocf_is_probe; then ocf_exit_reason "slapd process not found." fi fi return $state elif [ $state -ne $OCF_SUCCESS ]; then ocf_exit_reason "slapd returned error." return $state fi if [ -d "$config" ]; then for suffix in `find "$config"/'cn=config' -type f -name olcDatabase* -exec \ sed -ne 's/^[[:space:]]*olcSuffix:[[:space:]]\+\(.\+\)/\1/p' {} \;` do suffix=${suffix#\"*} suffix=${suffix%\"*} if watch_suffix $suffix; then suffixes="$suffixes $suffix" fi done elif [ -f "$config" ]; then for suffix in `sed -ne 's/^[[:space:]]*suffix[[:space:]]\+\(.\+\)/\1/p' "$config"` do suffix=${suffix#\"*} suffix=${suffix%\"*} if watch_suffix $suffix; then suffixes="$suffixes $suffix" fi done else if ocf_is_probe; then ocf_log info "slapd configuration '$config' does not exist during probe." else ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi fi options="-LLL -s base -x" if [ -n "$bind_dn" ]; then options="$options -D $bind_dn -w $password" fi [ -z "$1" ] && err_option="" for suffix in $suffixes; do ocf_run -q $err_option "$ldapsearch" -H "$services" -b "$suffix" $options >/dev/null 2>&1; rc=$? case "$rc" in "0") ocf_log debug "slapd database with suffix '$suffix' reachable" ;; "49") ocf_exit_reason "slapd database with suffix '$suffix' unreachable. Invalid credentials." return $OCF_ERR_CONFIGURED ;; *) if [ -z "$1" ] || [ -n "$1" -a $rc -ne 1 ]; then ocf_exit_reason "slapd database with suffix '$suffix' unreachable. exit code ($rc)" fi state=$OCF_ERR_GENERIC ;; esac done return $state } slapd_validate_all() { check_binary "$slapd" check_binary "$ldapsearch" if [ -z "$pid_file" ]; then if [ -d "$config" ]; then pid_file=`sed -ne \ 's/^olcPidFile:[[:space:]]\+\(.\+\)[[:space:]]*/\1/p' \ "$config"/'cn=config.ldif' 2>/dev/null` elif [ -f "$config" ]; then pid_file=`sed -ne \ 's/^pidfile[[:space:]]\+\(.\+\)/\1/p' \ "$config" 2>/dev/null` else if ocf_is_probe; then ocf_log info "slapd configuration '$config' does not exist during probe." else ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi fi fi if [ -z "$user" ]; then user=`id -nu 2>/dev/null` elif ! id "$user" >/dev/null 2>&1; then ocf_exit_reason "slapd user '$user' does not exist" return $OCF_ERR_INSTALLED fi if [ -z "$group" ]; then group=`id -ng 2>/dev/null` elif ! grep "^$group:" /etc/group >/dev/null 2>&1; then ocf_exit_reason "slapd group '$group' does not exist" return $OCF_ERR_INSTALLED fi pid_dir=`dirname "$pid_file"` if [ ! -d "$pid_dir" ]; then mkdir -p "$pid_dir" chown -R "$user" "$pid_dir" chgrp -R "$group" "$pid_dir" fi return $OCF_SUCCESS } # # Main # slapd=$OCF_RESKEY_slapd ldapsearch=$OCF_RESKEY_ldapsearch config=$OCF_RESKEY_config user=$OCF_RESKEY_user group=$OCF_RESKEY_group services=$OCF_RESKEY_services bind_dn=$OCF_RESKEY_bind_dn password=$OCF_RESKEY_password parameters=$OCF_RESKEY_parameters pid_file=$OCF_RESKEY_pidfile if [ -z "$config" ]; then config_dirname="/etc/ldap" if [ -e "/etc/openldap" ]; then config_dirname="/etc/openldap" fi config="$config_dirname/slapd.conf" if [ -e "$config_dirname/slapd.d" ]; then config="$config_dirname/slapd.d" fi fi if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac slapd_validate_all rc=$? [ $rc -eq $OCF_SUCCESS ] || exit $rc case $1 in status) slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_SUCCESS ]; then ocf_log debug "slapd is running." elif [ $state -eq $OCF_NOT_RUNNING ]; then ocf_log debug "slapd is stopped." fi exit $state ;; start) slapd_start exit $? ;; stop) slapd_stop exit $? ;; monitor) slapd_monitor; state=$? exit $state ;; validate-all) exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/syslog-ng.in b/heartbeat/syslog-ng.in index 77e3b9c1a..47a23f188 100644 --- a/heartbeat/syslog-ng.in +++ b/heartbeat/syslog-ng.in @@ -1,445 +1,467 @@ #!@BASH_SHELL@ # # Description: Manages a syslog-ng instance, provided by NTT OSSC as an # OCF High-Availability resource under Heartbeat/LinuxHA control # # Copyright (c) 2009 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ############################################################################## # OCF parameters: # OCF_RESKEY_syslog_ng_binary : Path to syslog-ng binary. # Default is "/sbin/syslog-ng" # OCF_RESKEY_configfile : Configuration file # OCF_RESKEY_start_opts : Startup options # OCF_RESKEY_kill_term_timeout: Number of seconds to await to confirm a # normal stop method # # Only OCF_RESKEY_configfile must be specified. Each of the rests # has its default value or refers OCF_RESKEY_configfile to make # its value when no explicit value is given. # # Further infomation for setup: # There are sample configurations at the end of this file. # ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_configfile_default="" +OCF_RESKEY_syslog_ng_binary_default="/sbin/syslog-ng" +OCF_RESKEY_syslog_ng_ctl_default="/sbin/syslog-ng-ctl" +OCF_RESKEY_qdisk_dir_default="" +OCF_RESKEY_control_file_default="" +OCF_RESKEY_persist_file_default="" +OCF_RESKEY_pidfile_default="" +OCF_RESKEY_start_opts_default="" +OCF_RESKEY_kill_term_timeout_default="10" + +: ${OCF_RESKEY_configfile=${OCF_RESKEY_configfile_default}} +: ${OCF_RESKEY_syslog_ng_binary=${OCF_RESKEY_syslog_ng_binary_default}} +: ${OCF_RESKEY_syslog_ng_ctl=${OCF_RESKEY_syslog_ng_ctl_default}} +: ${OCF_RESKEY_qdisk_dir=${OCF_RESKEY_qdisk_dir_default}} +: ${OCF_RESKEY_control_file=${OCF_RESKEY_control_file_default}} +: ${OCF_RESKEY_persist_file=${OCF_RESKEY_persist_file_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_start_opts=${OCF_RESKEY_start_opts_default}} +: ${OCF_RESKEY_kill_term_timeout=${OCF_RESKEY_kill_term_timeout_default}} + usage() { cat <<-! usage: $0 action action: start : start a new syslog-ng instance stop : stop the running syslog-ng instance status : return the status of syslog-ng, run or down monitor : return TRUE if the syslog-ng appears to be working. meta-data : show meta data message validate-all: validate the instance parameters ! return $OCF_ERR_UNIMPLEMENTED } metadata_syslog_ng() { cat < 1.0 This script manages a syslog-ng instance as an HA resource. For Premium Edition you should set the following parameters (based on default path being "/opt/syslog-ng"): syslog_ng_binary="/opt/syslog-ng/sbin/syslog-ng" syslog_ng_ctl="/opt/syslog-ng/sbin/syslog-ng-ctl" control_file="/opt/syslog-ng/var/run/syslog-ng.ctl" persist_file="/opt/syslog-ng/var/syslog-ng.persist" pidfile="/opt/syslog-ng/var/run/syslog-ng.pid" Additional parameter for Premium Edition 6 only: qdisk_dir="/opt/syslog-ng/var/" Syslog-ng resource agent This parameter specifies a configuration file for a syslog-ng instance managed by this RA. Configuration file - + This parameter specifies syslog-ng's executable file. syslog-ng executable - + This parameter specifies the path of the syslog-ng-ctl executable file. syslog-ng-ctl executable - + This parameter specifies the directory used for holding disk buffers of syslog-ng (only supported in Premium Edition 6). disk buffer directory (PE6 only) - + This parameter specifies the path, where syslog-ng would place its control socket, through which it can be controlled. process control socket - + This parameter specifies the path for syslog-ng's persist file, which holds persistent information about the mapping of destinations and disk buffers, the internal state of sources, etc. persist file path - + This parameter specifies the path where the pid file of syslog-ng resides. pidfile path - + This parameter specifies startup options for a syslog-ng instance managed by this RA. When no value is given, no startup options is used. Don't use option '-F'. It causes a stuck of a start action. Start options - + On a stop action, a normal stop method(pkill -TERM) is firstly used. And then the confirmation of its completion is waited for the specified seconds by this parameter. The default value is 10. Number of seconds to await to confirm a normal stop method - + END return $OCF_SUCCESS } monitor_syslog_ng() { set -- $(pgrep -f "$PROCESS_PATTERN" 2>/dev/null) case $# in 0) ocf_log debug "No syslog-ng process for $CONFIGFILE" return $OCF_NOT_RUNNING;; 1) return $OCF_SUCCESS;; esac ocf_log warn "Multiple syslog-ng process for $CONFIGFILE" return $OCF_SUCCESS } start_syslog_ng() { monitor_syslog_ng if [[ $? = "$OCF_SUCCESS" ]]; then return $OCF_SUCCESS fi # set -- $SYSLOG_NG_OPTS # ocf_run "$SYSLOG_NG_EXE" -f "$SYSLOG_NG_CONF" "$@" # reduce to this? ocf_run "$SYSLOG_NG_EXE" -f "$CONFIGFILE" $START_OPTS ocf_status=$? if [[ "$ocf_status" != "$OCF_SUCCESS" ]]; then return $OCF_ERR_GENERIC fi while true; do monitor_syslog_ng if [[ $? = "$OCF_SUCCESS" ]]; then return $OCF_SUCCESS fi sleep 1 done } stop_syslog_ng() { if [ -x "$SYSLOG_NG_CTL" ]; then if [ -n "${OCF_RESKEY_control_file}" ] && [ -S "${OCF_RESKEY_control_file}" ]; then "$SYSLOG_NG_CTL" stop "$CONTROL_FILE" CTL_STATUS=$? [ $CTL_STATUS -ne 0 ] && pkill -TERM -f "$PROCESS_PATTERN" else pkill -TERM -f "$PROCESS_PATTERN" fi else pkill -TERM -f "$PROCESS_PATTERN" fi typeset lapse_sec=0 while pgrep -f "$PROCESS_PATTERN" > /dev/null; do sleep 1 lapse_sec=$(( lapse_sec + 1 )) ocf_log debug "stop_syslog_ng[$SYSLOG_NG_NAME]: stop NORM $lapse_sec/$KILL_TERM_TIMEOUT" if [ $lapse_sec -ge $KILL_TERM_TIMEOUT ]; then break fi done # if the process can't be removed, then the following part is # not going to be executed (the RA will be killed by lrmd on # timeout) and the pidfile will remain; don't know if that # has any consequences # 2009/09/18 Nakahira # If the syslog-ng process hangs, syslog-ng RA waits # $KILL_TERM_TIMEOUT seconds. # The stop timeout of RA should be longer than $KILL_TERM_TIMEOUT. lapse_sec=0 while pgrep -f "$PROCESS_PATTERN" > /dev/null; do pkill -KILL -f "$PROCESS_PATTERN" sleep 1 lapse_sec=$(( lapse_sec + 1 )) ocf_log debug "stop_syslog_ng[$SYSLOG_NG_NAME]: suspend syslog_ng by SIGKILL ($lapse_sec/@@@)" done return $OCF_SUCCESS } status_syslog_ng() { # ???? why not monitor and then print running or stopped monitor_syslog_ng rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "Syslog-ng service is running." elif [ $rc = $OCF_NOT_RUNNING ]; then echo "Syslog-ng service is stopped." else echo "Mutiple syslog-ng process for $CONFIGFILE." fi return $rc } validate_all_syslog_ng() { ocf_log info "validate_all_syslog_ng[$SYSLOG_NG_NAME]" return $OCF_SUCCESS } if [[ "$1" = "meta-data" ]]; then metadata_syslog_ng exit $? fi CONFIGFILE="${OCF_RESKEY_configfile}" if [[ -z "$CONFIGFILE" ]]; then ocf_log err "undefined parameter:configfile" exit $OCF_ERR_CONFIGURED fi SYSLOG_NG_NAME=${CONFIGFILE##*/} SYSLOG_NG_NAME=${SYSLOG_NG_NAME%.*} -SYSLOG_NG_EXE="${OCF_RESKEY_syslog_ng_binary:-/sbin/syslog-ng}" +SYSLOG_NG_EXE="${OCF_RESKEY_syslog_ng_binary}" if [[ ! -x "$SYSLOG_NG_EXE" ]]; then ocf_log err "Invalid value:syslog_ng_binary:$SYSLOG_NG_EXE" exit $OCF_ERR_CONFIGURED fi -SYSLOG_NG_CTL="${OCF_RESKEY_syslog_ng_ctl:-/sbin/syslog-ng-ctl}" +SYSLOG_NG_CTL="${OCF_RESKEY_syslog_ng_ctl}" # actually, the pidfile has no function; the status is checked by # testing for a running process only -KILL_TERM_TIMEOUT="${OCF_RESKEY_kill_term_timeout-10}" +KILL_TERM_TIMEOUT="${OCF_RESKEY_kill_term_timeout}" if ! ocf_is_decimal "$KILL_TERM_TIMEOUT"; then ocf_log err "Invalid value:kill_term_timeout:$KILL_TERM_TIMEOUT" exit $OCF_ERR_CONFIGURED fi QDISK_DIR="${OCF_RESKEY_qdisk_dir}" CONTROL_FILE="${OCF_RESKEY_control_file}" PERSIST_FILE="${OCF_RESKEY_persist_file}" PID_FILE="${OCF_RESKEY_pidfile}" EXECUTABLE=$(basename "$SYSLOG_NG_EXE") PROCESS_PATTERN="$EXECUTABLE -f $CONFIGFILE" COMMAND=$1 [ -n "$QDISK_DIR" ] && QDISK_DIR="--qdisk-dir $QDISK_DIR" [ -n "$PERSIST_FILE" ] && PERSIST_FILE="--persist-file $PERSIST_FILE" [ -n "$CONTROL_FILE" ] && CONTROL_FILE="--control $CONTROL_FILE" [ -n "$PID_FILE" ] && PID_FILE="--pidfile $PID_FILE" START_OPTS="${OCF_RESKEY_start_opts} $QDISK_DIR $CONTROL_FILE $PERSIST_FILE $PID_FILE" case "$COMMAND" in start) ocf_log debug "[$SYSLOG_NG_NAME] Enter syslog_ng start" start_syslog_ng func_status=$? ocf_log debug "[$SYSLOG_NG_NAME] Leave syslog_ng start $func_status" exit $func_status ;; stop) ocf_log debug "[$SYSLOG_NG_NAME] Enter syslog_ng stop" stop_syslog_ng func_status=$? ocf_log debug "[$SYSLOG_NG_NAME] Leave syslog_ng stop $func_status" exit $func_status ;; status) status_syslog_ng exit $? ;; monitor) #ocf_log debug "[$SYSLOG_NG_NAME] Enter syslog_ng monitor" monitor_syslog_ng func_status=$? #ocf_log debug "[$SYSLOG_NG_NAME] Leave syslog_ng monitor $func_status" exit $func_status ;; validate-all) validate_all_syslog_ng exit $? ;; *) usage ;; esac # vim: set sw=4 ts=4 : ### A sample snippet of cib.xml for a syslog-ng resource ## # # # # # # # # # # # # ### A sample syslog-ng configuration file for a log collecting host ### ### This sample is for a log collecting host by syslog-ng. ### A syslog-ng process configurated by this sample accepts all messages ### from a certain network. Any message from the network is preserved into ### a file for security infomation. Restricting messages to "authpriv" from ### the network is done on log sending hosts. (See the sample below) ### Any internal message of the syslog-ng process is preserved into its ### dedicated file. And any "authpriv" internal message of the syslog-ng ### process is also preserved into the security infomation file. ### ### Change "f_incoming" to suit your enviroment. ### If you use it as a configuration file for the sample cib.xml above, ### save it into "/etc/syslog-ng/syslog-ng-ext.conf". ## #options { # sync (0); # time_reopen (10); # log_fifo_size (1000); # long_hostnames (off); # use_dns (yes); # use_fqdn (no); # create_dirs (no); # keep_hostname (yes); }; # #source s_internal { internal(); }; #source s_incoming { udp(port(514)); }; #filter f_internal { facility(authpriv); }; #filter f_incoming { netmask("172.20.0.0/255.255.192.0"); }; # #destination d_internal { file("/var/log/syslog-ng-ext.log" perm(0640));}; #destination d_incoming { # file("/var/log/secure-ext.log" create_dirs(yes) perm(0640)); }; # #log { source(s_internal); destination(d_internal); }; #log { source(s_internal); filter(f_internal); destination(d_incoming); }; #log { source(s_incoming); filter(f_incoming); destination(d_incoming); }; ### A sample snippet of syslog-ng configuration file for a log sending host ### ### This sample is for a log sending host that uses syslog-ng. ### ### Replace "syslog-ng-ext" to the IP address or the hostname of your ### log collecting host and append it to "syslog-ng.conf" of each log sending ### host. See the install default syslog-ng.conf to know what "s_sys" and ### "f_auth" are. ## #destination d_outgoing { udp("syslog-ng-ext" port(514)); }; #log { source(s_sys); filter(f_auth); destination(d_outgoing); }; ### A sample snippet of syslog configuration file for a log sending host ### ### This sample is for a log sending host that uses syslog. ### ### Replace "syslog-ng-ext" to the IP address or the hostname of your ### log collecting host and append it to "syslog.conf" of each log sending ### host. ## # authpriv.* @syslog-ng-ext diff --git a/heartbeat/tomcat b/heartbeat/tomcat index 833870038..bc580fbb1 100755 --- a/heartbeat/tomcat +++ b/heartbeat/tomcat @@ -1,765 +1,816 @@ #!/bin/sh # # Description: Manages a Tomcat Server as an OCF High-Availability # resource under Heartbeat/LinuxHA control # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # Copyright (c) 2007 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # ####################################################################### # OCF parameters: # OCF_RESKEY_tomcat_name - The name of the resource. Default is tomcat # OCF_RESKEY_script_log - A destination of the log of this script. Default /var/log/OCF_RESKEY_tomcat_name.log # OCF_RESKEY_tomcat_stop_timeout - Time-out at the time of the stop. Default is 5. DEPRECATED # OCF_RESKEY_tomcat_suspend_trialcount - The re-try number of times awaiting a stop. Default is 10. DEPRECATED # OCF_RESKEY_tomcat_user - A user name to start a resource. # OCF_RESKEY_statusurl - URL for state confirmation. Default is http://127.0.0.1:8080 # OCF_RESKEY_max_stop_time - The max time it should take for proper shutdown. Restrictions, only Tomcat6. # OCF_RESKEY_java_home - Home directory of Java. Default is none # OCF_RESKEY_java_opts - Options to pass to Java JVM for start and stop. Default is none # OCF_RESKEY_catalina_home - Home directory of Tomcat. Default is none # OCF_RESKEY_catalina_base - Base directory of Tomcat. Default is OCF_RESKEY_catalina_home # OCF_RESKEY_catalina_out - Log file name of Tomcat. Default is OCF_RESKEY_catalina_base/logs/catalina.out # OCF_RESKEY_catalina_pid - A PID file name of Tomcat. Default is OCF_RESKEY_catalina_base/logs/catalina.pid # OCF_RESKEY_tomcat_start_opts - Start options of Tomcat. Default is none. # OCF_RESKEY_catalina_opts - CATALINA_OPTS environment variable. Default is none. # OCF_RESKEY_catalina_tmpdir - CATALINA_TMPDIR environment variable. Default is none. # OCF_RESKEY_catalina_rotate_log - Control catalina.out logrotation flag. Default is NO. # OCF_RESKEY_catalina_rotatetime - catalina.out logrotation time span(seconds). Default is 86400. # OCF_RESKEY_java_endorsed_dirs - JAVA_ENDORSED_DIRS environment variable. Default is none. # OCF_RESKEY_logging_config - LOGGING_CONFIG environment variable. Default is none. # OCF_RESKEY_logging_manager - LOGGING_MANAGER environment variable. Default is none. ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Use runuser if available for SELinux. if [ -x /sbin/runuser ]; then SU=runuser else SU=su fi usage() { cat <<-EOF usage: $0 action action: start start Tomcat stop stop Tomcat status return the status of Tomcat, up or down monitor return TRUE if Tomcat appears to be working. You have to have installed $WGETNAME for this to work. meta-data show meta data message validate-all validate the instance parameters EOF } isrunning_tomcat() { $WGET --tries=20 -O /dev/null $RESOURCE_STATUSURL >/dev/null 2>&1 } isalive_tomcat() { if ocf_is_true $SYSTEMD; then systemctl is-active tomcat@${TOMCAT_NAME} > /dev/null 2>&1 return $? fi # As the server stops, the PID file disappears. To avoid race conditions, # we will have remembered the PID of a running instance on script entry. local pid=$rememberedPID # If there is a PID file, attempt to use that if [ -f $CATALINA_PID ]; then local tmp ocf_log debug "Reading pid from $CATALINA_PID" tmp=`head -n 1 $CATALINA_PID` if [ $? -eq 0 ]; then pid=$tmp fi fi if [ -n "$pid" ] && [ "$pid" -gt 0 ]; then # Retry message for restraint ocf_log debug "Sending noop signal to $pid" kill -s 0 $pid >/dev/null 2>&1 return $? fi # No PID file false } # Check rotatelogs process and restart if it is stopped monitor_rotatelogs() { pgrep -f "$ROTATELOGS.*$CATALINA_BASE/logs/catalina_%F.log" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log warn "A rotatelogs command for $CATALINA_BASE/logs/catalina_%F.log is not running. Restarting it." start_rotatelogs if [ $? -eq 0 ]; then ocf_log info "Restart rotatelogs process succeeded." else ocf_log warn "Restart rotatelogs process failed." fi fi } monitor_tomcat() { isalive_tomcat || return $OCF_NOT_RUNNING isrunning_tomcat || return $OCF_ERR_GENERIC if ocf_is_true ${CATALINA_ROTATE_LOG}; then # Monitor rotatelogs process and restart it if it is stopped. # And never consider rotatelogs process failure to be a monitor failure # as long as Tomcat process works fine. monitor_rotatelogs fi return $OCF_SUCCESS } start_rotatelogs() { # -s is required because tomcat5.5's login shell is /bin/false $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \ -c "$ROTATELOGS -l \"$CATALINA_BASE/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \ < "$CATALINA_OUT" > /dev/null 2>&1 & } # Execute catalina.out log rotation rotate_catalina_out() { # Check catalina_%F.log is writable or not. CURRENT_ROTATELOG_SUFFIX=`date +"%F"` $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \ -c "touch \"$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log\"" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_exit_reason "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable." return $OCF_ERR_GENERIC fi # Clean up and set permissions on required files rm -rf "$CATALINA_BASE"/temp/* if [ -p "$CATALINA_OUT" ]; then rm -f "$CATALINA_OUT" elif [ -e "$CATALINA_OUT" ]; then DATE=`date +"%F-%H%M%S"` ocf_log warn "$CATALINA_OUT already exists. It is saved as $CATALINA_OUT-$DATE" mv "$CATALINA_OUT" "$CATALINA_OUT-$DATE" fi mkfifo -m700 "$CATALINA_OUT" chown --dereference "$RESOURCE_TOMCAT_USER" "$CATALINA_OUT" || true start_rotatelogs } create_systemd_config() { cat<<-EOF > /etc/sysconfig/tomcat@${TOMCAT_NAME} JAVA_HOME=${JAVA_HOME} JAVA_OPTS="${JAVA_OPTS}" CATALINA_HOME=${CATALINA_HOME} CATALINA_BASE=${CATALINA_BASE} CATALINA_OUT=${CATALINA_OUT} CATALINA_OPTS="${CATALINA_OPTS}" CATALINA_TMPDIR="${CATALINA_TMPDIR}" JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}" LOGGING_CONFIG="${LOGGING_CONFIG}" LOGGING_MANAGER="${LOGGING_MANAGER}" TOMCAT_CFG=${TOMCAT_CFG} EOF } # shellcheck disable=SC2068 tomcatCommand() { if ocf_is_true $SYSTEMD; then systemctl $@ tomcat@${TOMCAT_NAME} else cat<<-END_TOMCAT_COMMAND export JAVA_HOME=${JAVA_HOME} export JAVA_OPTS="${JAVA_OPTS}" export CATALINA_HOME=${CATALINA_HOME} export CATALINA_BASE=${CATALINA_BASE} export CATALINA_OUT=${CATALINA_OUT} export CATALINA_PID=${CATALINA_PID} export CATALINA_OPTS="${CATALINA_OPTS}" export CATALINA_TMPDIR="${CATALINA_TMPDIR}" export JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}" export LOGGING_CONFIG="${LOGGING_CONFIG}" export LOGGING_MANAGER="${LOGGING_MANAGER}" export TOMCAT_CFG=${TOMCAT_CFG} $TOMCAT_START_SCRIPT $@ END_TOMCAT_COMMAND fi } # shellcheck disable=SC2068 attemptTomcatCommand() { if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then TOMCAT_CFG=$(mktemp "${HA_RSCTMP}/tomcat-tmp-XXXXX.cfg") export TOMCAT_CFG fi if ocf_is_true $SYSTEMD; then tomcatCommand $@ elif [ "$RESOURCE_TOMCAT_USER" = root ]; then "$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1 else tomcatCommand $@ | $SU - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 fi if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then rm -f "$TOMCAT_CFG" fi } start_tomcat() { if ocf_is_true $SYSTEMD; then create_systemd_config fi cd "$CATALINA_HOME/bin" || return $OCF_ERR_GENERIC validate_all_tomcat || exit $? monitor_tomcat if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi # Remove $CATALINA_PID if it exists rm -f $CATALINA_PID #ocf_log debug "catalina.out rotation FLG = ${CATALINA_ROTATE_LOG}" if ocf_is_true ${CATALINA_ROTATE_LOG}; then rotate_catalina_out if [ $? -eq 0 ]; then ocf_log debug "Rotate catalina.out succeeded." else ocf_exit_reason "Rotate catalina.out failed. Avoid starting tomcat without catalina.out rotation." return $OCF_ERR_GENERIC fi fi echo "`date "+%Y/%m/%d %T"`: start ===========================" >> "$TOMCAT_CONSOLE" ocf_log debug "CATALINA_OPTS value = ${CATALINA_OPTS}" attemptTomcatCommand start ${TOMCAT_START_OPTS} & while true; do monitor_tomcat if [ $? -eq $OCF_SUCCESS ]; then break fi ocf_log debug "start_tomcat[$TOMCAT_NAME]: retry monitor_tomcat" sleep 3 done return $OCF_SUCCESS } stop_tomcat() { local stop_time local RA_TIMEOUT=20 local TOMCAT_STOP_OPTS="" if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then RA_TIMEOUT=$((OCF_RESKEY_CRM_meta_timeout/1000)) fi STOP_TIMEOUT=$((RA_TIMEOUT-5)) if [ -n "$MAX_STOP_TIME" ]; then if [ $MAX_STOP_TIME -gt $RA_TIMEOUT ]; then ocf_log warn "max_stop_timeout must be shorter than the timeout of stop operation." fi if [ $MAX_STOP_TIME -eq 0 ]; then STOP_TIMEOUT=$RA_TIMEOUT else STOP_TIMEOUT=$MAX_STOP_TIME fi fi cd "$CATALINA_HOME/bin" memorize_pid # This lets monitoring continue to work reliably echo "`date "+%Y/%m/%d %T"`: stop ###########################" >> "$TOMCAT_CONSOLE" if [ "$TOMCAT_START_SCRIPT" = "$CATALINA_HOME/bin/catalina.sh" ]; then TOMCAT_STOP_OPTS="$STOP_TIMEOUT -force" fi stop_time=$(date +%s) attemptTomcatCommand stop $TOMCAT_STOP_OPTS lapse_sec=0 while isalive_tomcat; do sleep 1 lapse_sec=`expr $(date +%s) - $stop_time` if [ $lapse_sec -ge $STOP_TIMEOUT ]; then ocf_log debug "stop_tomcat[$TOMCAT_NAME]: stop failed, killing with SIGKILL ($lapse_sec)" kill -s KILL $rememberedPID > /dev/null 2>&1 fi done if ocf_is_true ${CATALINA_ROTATE_LOG}; then rm -f "$CATALINA_PID" "${CATALINA_OUT}" else rm -f "$CATALINA_PID" fi return $OCF_SUCCESS } metadata_tomcat() { cat < 1.0 Resource script for Tomcat. It manages a Tomcat instance as a cluster resource. Manages a Tomcat servlet environment instance The name of the resource, added as a Java parameter in JAVA_OPTS: -Dname=<tomcat_name> to Tomcat process on start. Used to ensure process is still running and must be unique. The name of the resource - + Log file, used during start and stop operations. Log file - + Time-out for stop operation. DEPRECATED Time-out for the stop operation. DEPRECATED - + Maximum number of times to retry stop operation before suspending and killing Tomcat. DEPRECATED. Does not retry. Max retry count for stop operation. DEPRECATED - + The user who starts Tomcat. The user who starts Tomcat - + URL for state confirmation. URL for state confirmation - + Number of seconds to wait during a stop before drastic measures (force kill) are used on the tomcat process. This number MUST be less than your cluster stop timeout for the resource. The default value is five seconds before the timeout value of stop operation. When it is over this value, it stops a process in kill commands. This parameter is only effective on Tomcat 6 or later. The max time it should take for proper shutdown. - + Home directory of Java. Home directory of Java - + Java JVM options used on start and stop. Java options parsed to JVM, used on start and stop. - + Home directory of Tomcat. Home directory of Tomcat - + Instance directory of Tomcat Instance directory of Tomcat, defaults to catalina_home - + Log file name of Tomcat Log file name of Tomcat, defaults to catalina_base/logs/catalina.out - + A PID file name for Tomcat. A PID file name for Tomcat - + Force use of systemd when available. Force use of systemd when available - + Absolute path to the custom tomcat start script to use. Tomcat start script location - + Tomcat start options. Tomcat start options - + Catalina options, for the start operation only. Catalina options - + Temporary directory of Tomcat Temporary directory of Tomcat, defaults to none - + Rotate catalina.out flag. Rotate catalina.out flag - + catalina.out rotation interval (seconds). catalina.out rotation interval (seconds) - + Java_endorsed_dirs of tomcat Java_endorsed_dirs of Tomcat, defaults to none - + Logging_config of tomcat Logging_config of Tomcat, defaults to none - + Logging_manager of tomcat Logging_manager of Tomcat, defaults to none. - + END return $OCF_SUCCESS } validate_all_tomcat() { local port local rc=$OCF_SUCCESS ocf_log info "validate_all_tomcat[$TOMCAT_NAME]" check_binary $WGET if ! ocf_is_true $OCF_RESKEY_force_systemd && [ -z "${TOMCAT_START_SCRIPT}" ]; then ocf_exit_reason "No default tomcat start script detected. Please specify start script location using the 'tomcat_start_script' option" rc=$OCF_ERR_CONFIGURED fi if [ -n "$MAX_STOP_TIME" ] && [ "$MAX_STOP_TIME" -lt 0 ]; then ocf_exit_reason "max_stop_time must be set to a value greater than 0." rc=$OCF_ERR_CONFIGURED fi if echo "$RESOURCE_STATUSURL" | grep -q ":[0-9][0-9]*" ; then port=${RESOURCE_STATUSURL##*:} port=${port%%/*} ocf_log debug "Tomcat port is $port" ocf_log debug "grep port=\"$port\" $CATALINA_BASE/conf/server.xml" grep "port=\"$port\"" $CATALINA_BASE/conf/server.xml > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_exit_reason "Your configured status URL specifies a port ($port), but the server does not have a connector listening to that port in $CATALINA_BASE/conf/server.xml" rc=$OCF_ERR_INSTALLED fi fi if ocf_is_true ${CATALINA_ROTATE_LOG}; then if [ ! -x "$ROTATELOGS" ]; then ocf_exit_reason "rotatelogs command does not exist." rc=$OCF_ERR_INSTALLED fi fi return $rc } # As we stop tomcat, it removes it's own pid file...we still want to know what it was memorize_pid() { if [ -f $CATALINA_PID ]; then rememberedPID=$(cat $CATALINA_PID) fi } # ### tomcat RA environment variables # +# Parameter defaults + +OCF_RESKEY_tomcat_name_default="tomcat" +OCF_RESKEY_catalina_home_default="" +OCF_RESKEY_catalina_base_default="" + +: ${OCF_RESKEY_tomcat_name=${OCF_RESKEY_tomcat_name_default}} +: ${OCF_RESKEY_catalina_home=${OCF_RESKEY_catalina_home_default}} +: ${OCF_RESKEY_catalina_base=${OCF_RESKEY_catalina_base_default}} + +# Only default to true for RedHat systems without catalina.sh +if [ -e "${OCF_RESKEY_catalina_home}/bin/catalina.sh" ] || ! is_redhat_based; then + OCF_RESKEY_force_systemd_default=0 +else + OCF_RESKEY_force_systemd_default=1 +fi + +: ${OCF_RESKEY_force_systemd=${OCF_RESKEY_force_systemd_default}} + +if [ -z "${OCF_RESKEY_tomcat_start_script}" ]; then + if ocf_is_true $OCF_RESKEY_force_systemd && \ + systemd_is_running; then + SYSTEMD=1 + elif [ -e "${OCF_RESKEY_catalina_home}/bin/catalina.sh" ]; then + TOMCAT_START_SCRIPT="${OCF_RESKEY_catalina_home}/bin/catalina.sh" + elif [ -e "/usr/sbin/tomcat" ]; then + REDIRECT_DEFAULT_CONFIG=1 + TOMCAT_START_SCRIPT="/usr/sbin/tomcat" + elif [ -e "/usr/sbin/tomcat6" ]; then + REDIRECT_DEFAULT_CONFIG=1 + TOMCAT_START_SCRIPT="/usr/sbin/tomcat6" + fi +fi + +OCF_RESKEY_script_log_default="/var/log/${OCF_RESKEY_tomcat_name}.log" +OCF_RESKEY_tomcat_stop_timeout_default="" +OCF_RESKEY_tomcat_suspend_trialcount_default="" +OCF_RESKEY_tomcat_user_default="root" +OCF_RESKEY_statusurl_default="http://127.0.0.1:8080" +OCF_RESKEY_max_stop_time_default="" +OCF_RESKEY_java_home_default="" +OCF_RESKEY_java_opts_default="" +OCF_RESKEY_catalina_out_default="${OCF_RESKEY_catalina_base-${OCF_RESKEY_catalina_home}}/logs/catalina.out" +OCF_RESKEY_catalina_pid_default="" +OCF_RESKEY_tomcat_start_script_default="${TOMCAT_START_SCRIPT}" +OCF_RESKEY_tomcat_start_opts_default="" +OCF_RESKEY_catalina_opts_default="" +OCF_RESKEY_catalina_tmpdir_default="" +OCF_RESKEY_catalina_rotate_log_default="NO" +OCF_RESKEY_catalina_rotatetime_default="86400" +OCF_RESKEY_java_endorsed_dirs_default="" +OCF_RESKEY_logging_config_default="" +OCF_RESKEY_logging_manager_default="" + +: ${OCF_RESKEY_script_log=${OCF_RESKEY_script_log_default}} +: ${OCF_RESKEY_tomcat_stop_timeout=${OCF_RESKEY_tomcat_stop_timeout_default}} +: ${OCF_RESKEY_tomcat_suspend_trialcount=${OCF_RESKEY_tomcat_suspend_trialcount_default}} +: ${OCF_RESKEY_tomcat_user=${OCF_RESKEY_tomcat_user_default}} +: ${OCF_RESKEY_statusurl=${OCF_RESKEY_statusurl_default}} +: ${OCF_RESKEY_max_stop_time=${OCF_RESKEY_max_stop_time_default}} +: ${OCF_RESKEY_java_home=${OCF_RESKEY_java_home_default}} +: ${OCF_RESKEY_java_opts=${OCF_RESKEY_java_opts_default}} +: ${OCF_RESKEY_catalina_out=${OCF_RESKEY_catalina_out_default}} +: ${OCF_RESKEY_catalina_pid=${OCF_RESKEY_catalina_pid_default}} +: ${OCF_RESKEY_tomcat_start_script=${OCF_RESKEY_tomcat_start_script_default}} +: ${OCF_RESKEY_tomcat_start_opts=${OCF_RESKEY_tomcat_start_opts_default}} +: ${OCF_RESKEY_catalina_opts=${OCF_RESKEY_catalina_opts_default}} +: ${OCF_RESKEY_catalina_tmpdir=${OCF_RESKEY_catalina_tmpdir_default}} +: ${OCF_RESKEY_catalina_rotate_log=${OCF_RESKEY_catalina_rotate_log_default}} +: ${OCF_RESKEY_catalina_rotatetime=${OCF_RESKEY_catalina_rotatetime_default}} +: ${OCF_RESKEY_java_endorsed_dirs=${OCF_RESKEY_java_endorsed_dirs_default}} +: ${OCF_RESKEY_logging_config=${OCF_RESKEY_logging_config_default}} +: ${OCF_RESKEY_logging_manager=${OCF_RESKEY_logging_manager_default}} + COMMAND=$1 -TOMCAT_NAME="${OCF_RESKEY_tomcat_name-tomcat}" -TOMCAT_CONSOLE="${OCF_RESKEY_script_log-/var/log/$TOMCAT_NAME.log}" -RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user-root}" -RESOURCE_STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}" +TOMCAT_NAME="${OCF_RESKEY_tomcat_name}" +TOMCAT_CONSOLE="${OCF_RESKEY_script_log}" +RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user}" +RESOURCE_STATUSURL="${OCF_RESKEY_statusurl}" JAVA_HOME="${OCF_RESKEY_java_home}" JAVA_OPTS="${OCF_RESKEY_java_opts}" CATALINA_HOME="${OCF_RESKEY_catalina_home}" CATALINA_BASE="${OCF_RESKEY_catalina_base-${OCF_RESKEY_catalina_home}}" -CATALINA_OUT="${OCF_RESKEY_catalina_out-$CATALINA_BASE/logs/catalina.out}" +CATALINA_OUT="${OCF_RESKEY_catalina_out}" CATALINA_PID=$OCF_RESKEY_catalina_pid if [ -z "$CATALINA_PID" ]; then mkdir -p "${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/" if [ "${RESOURCE_TOMCAT_USER}" != "root" ]; then chown ${RESOURCE_TOMCAT_USER} "${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/" fi CATALINA_PID="${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/catalina.pid" fi -# Only default to true for RedHat systems without catalina.sh -if [ -e "$CATALINA_HOME/bin/catalina.sh" ] || ! is_redhat_based; then - OCF_RESKEY_force_systemd_default=0 -else - OCF_RESKEY_force_systemd_default=1 -fi - MAX_STOP_TIME="${OCF_RESKEY_max_stop_time}" -: ${OCF_RESKEY_force_systemd=${OCF_RESKEY_force_systemd_default}} TOMCAT_START_OPTS="${OCF_RESKEY_tomcat_start_opts}" TOMCAT_START_SCRIPT="${OCF_RESKEY_tomcat_start_script}" CATALINA_OPTS="-Dname=$TOMCAT_NAME ${OCF_RESKEY_catalina_opts}" CATALINA_TMPDIR="${OCF_RESKEY_catalina_tmpdir}" -CATALINA_ROTATE_LOG="${OCF_RESKEY_catalina_rotate_log-NO}" -CATALINA_ROTATETIME="${OCF_RESKEY_catalina_rotatetime-86400}" +CATALINA_ROTATE_LOG="${OCF_RESKEY_catalina_rotate_log}" +CATALINA_ROTATETIME="${OCF_RESKEY_catalina_rotatetime}" JAVA_ENDORSED_DIRS="${OCF_RESKEY_java_endorsed_dirs}" LOGGING_CONFIG="${OCF_RESKEY_logging_config}" LOGGING_MANAGER="${OCF_RESKEY_logging_manager}" -if [ -z "${TOMCAT_START_SCRIPT}" ]; then - if ocf_is_true $OCF_RESKEY_force_systemd && \ - systemd_is_running; then - SYSTEMD=1 - elif [ -e "$CATALINA_HOME/bin/catalina.sh" ]; then - TOMCAT_START_SCRIPT="$CATALINA_HOME/bin/catalina.sh" - elif [ -e "/usr/sbin/tomcat" ]; then - REDIRECT_DEFAULT_CONFIG=1 - TOMCAT_START_SCRIPT="/usr/sbin/tomcat" - elif [ -e "/usr/sbin/tomcat6" ]; then - REDIRECT_DEFAULT_CONFIG=1 - TOMCAT_START_SCRIPT="/usr/sbin/tomcat6" - fi -fi - LSB_STATUS_STOPPED=3 if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case "$COMMAND" in meta-data) metadata_tomcat; exit $OCF_SUCCESS;; help|usage) usage; exit $OCF_SUCCESS;; esac if [ ! -d "$JAVA_HOME" -o ! -d "$CATALINA_HOME" -o ! -d "$CATALINA_BASE" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac ocf_exit_reason "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist." exit $OCF_ERR_INSTALLED fi export JAVA_HOME JAVA_OPTS CATALINA_HOME CATALINA_BASE CATALINA_OUT CATALINA_PID CATALINA_OPTS CATALINA_TMPDIR JAVA_ENDORSED_DIRS LOGGING_CONFIG LOGGING_MANAGER JAVA=${JAVA_HOME}/bin/java if [ ! -x "$JAVA" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac ocf_exit_reason "java command does not exist." exit $OCF_ERR_INSTALLED fi ROTATELOGS="" if ocf_is_true ${CATALINA_ROTATE_LOG}; then # Look for rotatelogs/rotatelogs2 if [ -x /usr/sbin/rotatelogs ]; then ROTATELOGS=/usr/sbin/rotatelogs elif [ -x /usr/sbin/rotatelogs2 ]; then ROTATELOGS=/usr/sbin/rotatelogs2 fi fi # # ------------------ # the main script # ------------------ # case "$COMMAND" in start) ocf_log debug "[$TOMCAT_NAME] Enter tomcat start" start_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat start $func_status" exit $func_status ;; stop) ocf_log debug "[$TOMCAT_NAME] Enter tomcat stop" stop_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat stop $func_status" exit $func_status ;; status) if monitor_tomcat; then echo tomcat instance $TOMCAT_NAME is running exit $OCF_SUCCESS else echo tomcat instance $TOMCAT_NAME is stopped exit $OCF_NOT_RUNNING fi exit $? ;; monitor) #ocf_log debug "[$TOMCAT_NAME] Enter tomcat monitor" monitor_tomcat func_status=$? #ocf_log debug "[$TOMCAT_NAME] Leave tomcat monitor $func_status" exit $func_status ;; meta-data) metadata_tomcat exit $? ;; validate-all) validate_all_tomcat exit $? ;; usage|help) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/varnish b/heartbeat/varnish index 452b38108..07a4f6449 100755 --- a/heartbeat/varnish +++ b/heartbeat/varnish @@ -1,502 +1,504 @@ #!/bin/sh # # # Varnish # # Description: Manage varnish instances as a HA resource # # Author: Léon Keijser # # License: GNU General Public License (GPL) # # See usage() for more details # # OCF instance parameters: # OCF_RESKEY_pid # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_config # OCF_RESKEY_name # OCF_RESKEY_listen_address # OCF_RESKEY_mgmt_address # OCF_RESKEY_ttl # OCF_RESKEY_varnish_user # OCF_RESKEY_varnish_group # OCF_RESKEY_backend_type # OCF_RESKEY_backend_size # OCF_RESKEY_backend_file # OCF_RESKEY_thread_pools # OCF_RESKEY_thread_pool_min # OCF_RESKEY_thread_pool_max # OCF_RESKEY_thread_pool_timeout # OCF_RESKEY_secret # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Set default paramenter values # Set these two first, as other defaults depend on it OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE} : ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +OCF_RESKEY_config_default="" OCF_RESKEY_binary_default=varnishd OCF_RESKEY_client_binary_default=varnishadm OCF_RESKEY_pid_default=/var/run/varnishd_${OCF_RESKEY_name}.pid OCF_RESKEY_listen_address_default=0.0.0.0:80 OCF_RESKEY_ttl_default=600 OCF_RESKEY_varnish_user_default=varnish OCF_RESKEY_varnish_group_default=varnish OCF_RESKEY_backend_type_default=malloc OCF_RESKEY_backend_size_default=1G OCF_RESKEY_backend_file_default=/var/lib/varnish/${OCF_RESKEY_name}.bin OCF_RESKEY_thread_pools_default=2 OCF_RESKEY_thread_pool_min_default=100 OCF_RESKEY_thread_pool_max_default=3000 OCF_RESKEY_thread_pool_timeout_default=120 OCF_RESKEY_maxfiles_default=131072 OCF_RESKEY_max_locked_memory_default=82000 OCF_RESKEY_secret_default=/etc/varnish/secret +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_listen_address=${OCF_RESKEY_listen_address_default}} : ${OCF_RESKEY_ttl=${OCF_RESKEY_ttl_default}} : ${OCF_RESKEY_varnish_user=${OCF_RESKEY_varnish_user_default}} : ${OCF_RESKEY_varnish_group=${OCF_RESKEY_varnish_group_default}} : ${OCF_RESKEY_backend_type=${OCF_RESKEY_backend_type_default}} : ${OCF_RESKEY_backend_size=${OCF_RESKEY_backend_size_default}} : ${OCF_RESKEY_backend_file=${OCF_RESKEY_backend_file_default}} : ${OCF_RESKEY_thread_pools=${OCF_RESKEY_thread_pools_default}} : ${OCF_RESKEY_thread_pool_min=${OCF_RESKEY_thread_pool_min_default}} : ${OCF_RESKEY_thread_pool_max=${OCF_RESKEY_thread_pool_max_default}} : ${OCF_RESKEY_thread_pool_timeout=${OCF_RESKEY_thread_pool_timeout_default}} : ${OCF_RESKEY_maxfiles=${OCF_RESKEY_maxfiles_default}} : ${OCF_RESKEY_max_locked_memory=${OCF_RESKEY_max_locked_memory_default}} : ${OCF_RESKEY_secret=${OCF_RESKEY_secret_default}} meta_data() { cat < 1.0 The Varnish Resource Agent can manage several varnishd instances throughout the cluster. It does so by creating a unique PID file and requires a unique listen address and name for each instance. Manage a Varnish instance The VCL configuration file that Varnish should manage, for example "/etc/varnish/default.vcl". VCL file - + Override the name of the instance that should be given to Varnish (defaults to the resource identifier). Instance name Write the process's PID to the specified file. The default will include the specified name, i.e.: "/var/run/varnish_production.pid". Unlike what this help message shows, it is most likely not necessary to change this parameter. Listen address Listen on this address:port, for example "192.168.1.1:80" Listen address Provide a management interface, for example "127.0.0.1:2222" Management interface Specify a hard minimum time to live for cached documents. TTL Specify the name of an unprivileged user to which the child process should switch before it starts accepting connections. Unprivileged user Specify the name of an unprivileged group to which the child process should switch before it starts accepting connections. Unprivileged group Use the specified storage backend. Valid options are 'malloc' for memory and 'file' for a file backend. Backend type Specify the size of the backend. For example "1G". Backend size Specify the backend filename if you use backend_type file. For example /var/lib/varnish/mybackend.bin Backend file Number of worker thread pools. Each pool has the minimum, maximum and timeout values configured in the thread_pool_min, thread_pool_max and thread_pool_timeout parameters Worker thread pools Start at least min but no more than max worker threads with the specified idle timeout in each pool. Minimum worker threads Start at least min but no more than max worker threads with the specified idle timeout in each pool. Maximum worker threads Start at least min but no more than max worker threads with the specified idle timeout in each pool. Worker threads timeout This is used to control Varnish via a CLI. It's currently only used to check the status of the running child process. Varnish admin utility Maximum number of open files (for ulimit -n) Max open files Locked shared memory limit (for ulimit -l) Max locked memory Path to a file containing a secret used for authorizing access to the management port. Path of the secret file - + END } ####################################################################### varnish_usage() { cat < : vsftpd script # License: GNU General Public License (GPLv2) # # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg starts vsftpd. # # The "stop" arg stops it. # # OCF parameters: # OCF_RESKEY_binpath # OCF_RESKEY_conffile # OCF_RESKEY_pidfile # ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -: ${OCF_RESKEY_binpath="/usr/sbin/vsftpd"} -: ${OCF_RESKEY_conffile="/etc/vsftpd/vsftpd.conf"} -: ${OCF_RESKEY_pidfile="/var/run/vsftpd.pid"} +# Parameter defaults + +OCF_RESKEY_binpath_default="/usr/sbin/vsftpd" +OCF_RESKEY_conffile_default="/etc/vsftpd/vsftpd.conf" +OCF_RESKEY_pidfile_default="/var/run/vsftpd.pid" + +: ${OCF_RESKEY_binpath=${OCF_RESKEY_binpath_default}} +: ${OCF_RESKEY_conffile=${OCF_RESKEY_conffile_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 This script manages vsftpd Manages an vsftpd The vsftpd binary path. For example, "/usr/sbin/vsftpd" Full path to the vsftpd binary - + The vsftpd configuration file name with full path. For example, "/etc/vsftpd/vsftpd.conf" Configuration file name with full path - + The vsftpd pidfile with full path. For example, "/var/run/vsftpd.pid" PID file with full path - + END exit $OCF_SUCCESS } get_pidfile() { PIDFILE=$OCF_RESKEY_pidfile } vsftpd_status() { if [ -n "$PIDFILE" -a -f $PIDFILE ]; then # vsftpd is probably running PID=`cat $PIDFILE` if [ -n "$PID" ]; then if ps -p $PID | grep vsftpd >/dev/null ; then ocf_log info "vsftpd daemon running" return $OCF_SUCCESS else ocf_log info "vsftpd daemon is not running but pid file exists" return $OCF_ERR_GENERIC fi else ocf_log err "PID file empty!" return $OCF_ERR_GENERIC fi fi # vsftpd is not running ocf_log info "vsftpd daemon is not running" return $OCF_NOT_RUNNING } vsftpd_start() { # if vsftpd is running return success vsftpd_status retVal=$? if [ $retVal -eq $OCF_SUCCESS ]; then exit $OCF_SUCCESS elif [ $retVal -ne $OCF_NOT_RUNNING ]; then ocf_log err "Error. Unknown status." exit $OCF_ERR_GENERIC fi if [ -n "$OCF_RESKEY_binpath" ]; then COMMAND="$OCF_RESKEY_binpath" fi if [ -n "$OCF_RESKEY_conffile" ]; then COMMAND="$COMMAND $OCF_RESKEY_conffile" fi $COMMAND; if [ $? -ne 0 ]; then ocf_log err "Error. vsftpd returned error $?." exit $OCF_ERR_GENERIC fi PID=$( pgrep $OCF_RESKEY_binpath ) case $? in 0) ocf_log info "PID file (pid:${PID} at $PIDFILE) created for vsftpd." ocf_log info "Started vsftpd." echo $PID > $PIDFILE exit $OCF_SUCCESS ;; 1) rm -f "$PIDFILE" > /dev/null 2>&1 ocf_log info "$Error getting pid." exit $OCF_ERR_GENERIC ;; *) rm -f "$PIDFILE" > /dev/null 2>&1 ocf_exit_reason "Error encountered detecting pid of vsftpd." exit $OCF_ERR_GENERIC ;; esac } vsftpd_stop() { if vsftpd_status ; then PID=`cat $PIDFILE` if [ -n "$PID" ] ; then kill $PID if [ $? -ne 0 ]; then kill -s KILL $PID if [ $? -ne 0 ]; then ocf_log err "Error. Could not stop vsftpd daemon." return $OCF_ERR_GENERIC fi fi rm $PIDFILE 2>/dev/null fi fi ocf_log info "Stopped vsftpd daemon." exit $OCF_SUCCESS } vsftpd_monitor() { vsftpd_status } vsftpd_validate_all() { check_binary $OCF_RESKEY_binpath if [ -n "$OCF_RESKEY_conffile" -a ! -f "$OCF_RESKEY_conffile" ]; then ocf_log err "Config file $OCF_RESKEY_conffile does not exist." exit $OCF_ERR_ARGS fi return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in start) get_pidfile vsftpd_start ;; stop) get_pidfile vsftpd_stop ;; status) get_pidfile vsftpd_status ;; monitor)get_pidfile vsftpd_monitor ;; validate-all) vsftpd_validate_all ;; meta-data) meta_data ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac