diff --git a/ci/build.sh b/ci/build.sh index aaa99db2f..a04973aa4 100755 --- a/ci/build.sh +++ b/ci/build.sh @@ -1,81 +1,81 @@ #!/usr/bin/env bash set -o pipefail [[ "${DEBUG:-}" ]] && set -x declare -i failed failed=0 # SC2046: Quote this to prevent word splitting. # SC1090: Can't follow non-constant source. Use a directive to specify location. # SC2039: In POSIX sh, 'local' is undefined. # SC2086: Double quote to prevent globbing and word splitting. # SC2154: var is referenced but not assigned. ignored_errors="SC1090,SC2039,SC2154" success() { printf "\r\033[2K [ \033[00;32mOK\033[0m ] Checking %s...\n" "$1" } warn() { printf "\r\033[2K [\033[0;33mWARNING\033[0m] Checking %s...\n" "$1" } fail() { printf "\r\033[2K [\033[0;31mFAIL\033[0m] Checking %s...\n" "$1" failed=$((failed + 1)) } check() { local script="$1" out="$(shellcheck -s sh -f gcc -x -e "$ignored_errors" "$script" 2>&1)" rc=$? if [ $rc -eq 0 ]; then success "$script" elif echo "$out" | grep -i 'error' >/dev/null; then fail "$script" else warn "$script" fi echo "$out" } find_prunes() { local prunes="! -path './.git/*'" if [ -f .gitmodules ]; then while read -r module; do prunes="$prunes ! -path './$module/*'" done < <(grep path .gitmodules | awk '{print $3}') fi echo "$prunes" } find_cmd() { echo "find heartbeat -type f -and \( -perm /111 -or -name '*.sh' \) $(find_prunes)" } check_all_executables() { echo "Checking executables and .sh files..." while read -r script; do file --mime "$script" | grep 'charset=binary' >/dev/null 2>&1 && continue head=$(head -n1 "$script") [[ "$head" =~ .*ruby.* ]] && continue [[ "$head" =~ .*zsh.* ]] && continue [[ "$head" =~ ^#compdef.* ]] && continue [[ "$script" =~ ^.*\.c ]] && continue [[ "$script" =~ ^.*\.orig ]] && continue [[ "$script" =~ ^ldirectord.in ]] && continue check "$script" done < <(eval "$(find_cmd)") if [ $failed -gt 0 ]; then echo "$failed failures detected." exit 1 fi exit 0 } ./autogen.sh ./configure -make +make check [ $? ] || failed=$((failed + 1)) check_all_executables diff --git a/heartbeat/AoEtarget b/heartbeat/AoEtarget index b18b3a035..176ecd0ff 100755 --- a/heartbeat/AoEtarget +++ b/heartbeat/AoEtarget @@ -1,245 +1,245 @@ #!/bin/bash # # # AoEtarget OCF RA. # Manages an ATA-over-Ethernet (AoE) target utilizing the vblade utility. # # (c) 2009-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ###################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_nic_default="eth0" OCF_RESKEY_pid_default="${HA_RSCTMP}/AoEtarget-${OCF_RESOURCE_INSTANCE}.pid" OCF_RESKEY_binary_default="/usr/sbin/vblade" : ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} ####################################################################### meta_data() { cat < - + 1.0 This resource agent manages an ATA-over-Ethernet (AoE) target using vblade. It exports any block device, or file, as an AoE target using the specified Ethernet device, shelf, and slot number. Manages ATA-over-Ethernet (AoE) target exports The local block device (or file) to export as an AoE target. Device to export The local Ethernet interface to use for exporting this AoE target. Ethernet interface The AoE shelf number to use when exporting this target. AoE shelf number The AoE slot number to use when exporting this target. AoE slot number The file to record the daemon pid to. Daemon pid file Location of the vblade binary. vblade binary EOF } ####################################################################### AoEtarget_usage() { cat <&1 & rc=$? pid=$! if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC fi echo $pid > ${OCF_RESKEY_pid} && return $OCF_SUCCESS return $OCF_ERR_GENERIC } AoEtarget_stop() { AoEtarget_monitor if [ $? -eq $OCF_SUCCESS ]; then ocf_log info "Unxporting device ${OCF_RESKEY_device} on ${OCF_RESKEY_nic} as shelf ${OCF_RESKEY_shelf}, slot ${OCF_RESKEY_slot}" pid=$(cat ${OCF_RESKEY_pid}) kill -TERM $pid # loop until we're really stopped, wait for the LRM to time us # out if not while AoEtarget_monitor; do sleep 1 done fi # Clean up pid file rm -f ${OCF_RESKEY_pid} return $OCF_SUCCESS } AoEtarget_monitor() { ocf_pidfile_status ${OCF_RESKEY_pid} >/dev/null 2>&1 rc=$? if [ $rc -eq 2 ]; then # no pid file, must assume we're not running return $OCF_NOT_RUNNING elif [ $rc -eq 1 ]; then # stale pid file, assume something went wrong return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } AoEtarget_validate() { # Is our binary executable? if [ ! -x ${OCF_RESKEY_binary} ]; then ocf_log error "${OCF_RESKEY_binary} not found or not executable" return $OCF_ERR_INSTALLED fi # Do we have all required variables? for var in device nic shelf slot pid; do param="OCF_RESKEY_${var}" if [ -z "${!param}" ]; then ocf_log error "Missing resource parameter \"$var\"!" return $OCF_ERR_CONFIGURED fi done # Is the pid file directory writable? pid_dir=`dirname "$OCF_RESKEY_pid"` touch "$pid_dir/$$" if [ $? != 0 ]; then ocf_log error "Cannot create pid file in $pid_dir -- check directory permissions" return $OCF_ERR_INSTALLED fi rm "$pid_dir/$$" # Does the device we are trying to export exist? if [ ! -e ${OCF_RESKEY_device} ]; then ocf_log error "${OCF_RESKEY_device} does not exist" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) AoEtarget_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test AoEtarget_validate || exit $? case $__OCF_ACTION in start) AoEtarget_start ;; stop) AoEtarget_stop ;; status|monitor) AoEtarget_monitor ;; reload) ocf_log err "Reloading..." AoEtarget_start ;; validate-all) AoEtarget_validate ;; *) AoEtarget_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/CTDB b/heartbeat/CTDB index 63f636dc6..b23ffae59 100755 --- a/heartbeat/CTDB +++ b/heartbeat/CTDB @@ -1,770 +1,770 @@ #!/bin/sh # # OCF Resource Agent for managing CTDB # # Copyright (c) 2009-2010 Novell Inc., Tim Serong # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OVERVIEW # # When run by itself, CTDB can handle IP failover and includes scripts # to manage various services (Samba, Winbind, HTTP, etc.). When run as # a resource in a Pacemaker cluster, this additional functionality # should not be used; instead one should define separate resources for # CTDB, Samba, Winbind, IP addresses, etc. # # As of 2010-11-17, there is no separate OCF Samba or Winbind RA, so # it is still possible to configure CTDB so that it manages these # resources itself. In future, once Samba and Winbind RAs are # available, this ability will be deprecated and ultimately removed. # # This RA intentionally provides no ability to configure CTDB such that # it manages IP failover, HTTP, NFS, etc. # # # TODO: # - ctdb_stop doesn't really support multiple independent CTDB instances, # unless they're running from distinct ctdbd binaries (it uses pkill # $OCF_RESKEY_ctdbd_binary if "ctdb stop" doesn't work, which it might # not under heavy load - this will kill all ctdbd instances on the # system). OTOH, running multiple CTDB instances per node is, well, # AFAIK, completely crazy. Can't run more than one in a vanilla CTDB # cluster, with the CTDB init script. So it might be nice to address # this for complete semantic correctness of the RA, but shouldn't # actually cause any trouble in real life. # - As much as possible, get rid of auto config generation # - Especially smb.conf # - Verify timeouts are sane # - Monitor differentiate between error and not running? # - Do we need to verify globally unique setting? # - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on # current nodes) # - Look at enabling set_ctdb_variables() if necessary. # - Probably possible for sysconfig file to not be restored if # CTDB dies unexpectedly. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Default parameter values: # Some distro's ctdb package stores the persistent db in /var/lib/ctdb, # others store in /var/ctdb. This attempts to detect the correct default # directory. var_prefix="/var/lib/ctdb" if [ ! -d "$var_prefix" ] && [ -d "/var/ctdb" ]; then var_prefix="/var/ctdb" fi run_prefix="/run" if [ ! -d "$var_prefix" ] && [ -d "/var/run" ]; then var_prefix="/var/run" fi : ${OCF_RESKEY_ctdb_manages_samba:=no} : ${OCF_RESKEY_ctdb_manages_winbind:=no} : ${OCF_RESKEY_ctdb_service_smb:=""} : ${OCF_RESKEY_ctdb_service_nmb:=""} : ${OCF_RESKEY_ctdb_service_winbind:=""} : ${OCF_RESKEY_ctdb_samba_skip_share_check:=yes} : ${OCF_RESKEY_ctdb_monitor_free_memory:=100} : ${OCF_RESKEY_ctdb_start_as_disabled:=no} : ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb} : ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb} : ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd} : ${OCF_RESKEY_ctdb_dbdir:=${var_prefix}} : ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb} : ${OCF_RESKEY_ctdb_rundir:=${run_prefix}/ctdb} : ${OCF_RESKEY_ctdb_socket:=${OCF_RESKEY_ctdb_rundir}/ctdbd.socket} : ${OCF_RESKEY_ctdb_debuglevel:=2} : ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf} : ${OCF_RESKEY_smb_passdb_backend:=tdbsam} : ${OCF_RESKEY_smb_idmap_backend:=tdb2} ####################################################################### meta_data() { cat < - + 1.0 This resource agent manages CTDB, allowing one to use Clustered Samba in a Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2 or GFS2) on which the CTDB lock will be stored. Create /etc/ctdb/nodes containing a list of private IP addresses of each node in the cluster, then configure this RA as a clone. This agent expects the samba and windbind resources to be managed outside of CTDB's control as a separate set of resources controlled by the cluster manager. The optional support for enabling CTDB management of these daemons will be depreciated. For more information see http://linux-ha.org/wiki/CTDB_(resource_agent) CTDB Resource Agent The location of a shared lock file, common across all nodes. This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock CTDB shared lock file Should CTDB manage starting/stopping the Samba service for you? This will be deprecated in future, in favor of configuring a separate Samba resource. Should CTDB manage Samba? Should CTDB manage starting/stopping the Winbind service for you? This will be deprecated in future, in favor of configuring a separate Winbind resource. Should CTDB manage Winbind? Name of smb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of smb init script Name of nmb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of nmb init script Name of winbind init script. Only necessary if CTDB is managing Winbind directly. Will usually be auto-detected. Name of winbind init script If there are very many shares it may not be feasible to check that all of them are available during each monitoring interval. In that case this check can be disabled. Skip share check during monitor? If the amount of free memory drops below this value the node will become unhealthy and ctdb and all managed services will be shutdown. Once this occurs, the administrator needs to find the reason for the OOM situation, rectify it and restart ctdb with "service ctdb start". Minimum amount of free memory (MB) When set to yes, the CTDB node will start in DISABLED mode and not host any public ip addresses. Start CTDB disabled? The directory containing various CTDB configuration files. The "nodes" and "notify.sh" scripts are expected to be in this directory, as is the "events.d" subdirectory. CTDB config file directory Full path to the CTDB binary. CTDB binary path Full path to the CTDB cluster daemon binary. CTDB Daemon binary path Full path to the domain socket that ctdbd will create, used for local clients to attach and communicate with the ctdb daemon. CTDB socket location The directory to put the local CTDB database files in. Persistent database files will be put in ctdb_dbdir/persistent. CTDB database directory Full path to log file. To log to syslog instead, use the value "syslog". CTDB log file location Full path to ctdb runtime directory, used for storage of socket lock state. CTDB runtime directory location What debug level to run at (0-10). Higher means more verbose. CTDB debug level Path to default samba config file. Only necessary if CTDB is managing Samba. Path to smb.conf The directory for smbd to use for storing such files as smbpasswd and secrets.tdb. Old versions of CTBD (prior to 1.0.50) required this to be on shared storage. This parameter should not be set for current versions of CTDB, and only remains in the RA for backwards compatibility. Samba private dir (deprecated) Which backend to use for storing user and possibly group information. Only necessary if CTDB is managing Samba. Samba passdb backend Which backend to use for SID/uid/gid mapping. Only necessary if CTDB is managing Samba. Samba idmap backend Which fileid:algorithm to use with vfs_fileid. The correct value depends on which clustered filesystem is in use, e.g.: for OCFS2, this should be set to "fsid". Only necessary if CTDB is managing Samba. Samba VFS fileid algorithm END } ####################################################################### # Figure out path to /etc/sysconfig/ctdb (same logic as # loadconfig() from /etc/ctdb/functions if [ -f /etc/sysconfig/ctdb ]; then CTDB_SYSCONFIG=/etc/sysconfig/ctdb elif [ -f /etc/default/ctdb ]; then CTDB_SYSCONFIG=/etc/default/ctdb elif [ -f "$OCF_RESKEY_ctdb_config_dir/ctdb" ]; then CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb fi # Backup paths CTDB_SYSCONFIG_BACKUP=${CTDB_SYSCONFIG}.ctdb-ra-orig invoke_ctdb() { # CTDB's defaults are: local timeout local timelimit timeout=3 timelimit=120 # ...but we override with the timeout for the current op: if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((OCF_RESKEY_CRM_meta_timeout/1000)) timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000)) fi $OCF_RESKEY_ctdb_binary --socket="$OCF_RESKEY_ctdb_socket" \ -t $timeout -T $timelimit \ "$@" } # Enable any event scripts that are explicitly required. # Any others will ultimately be invoked or not based on how they ship # with CTDB, but will generally have no effect, beacuase the relevant # CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb. enable_event_scripts() { local event_dir event_dir=$OCF_RESKEY_ctdb_config_dir/events.d if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then chmod u+x "$event_dir/10.interface" else chmod a-x "$event_dir/10.interface" fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then chmod u+x "$event_dir/11.routing" else chmod a-x "$event_dir/11.routing" fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || \ ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then chmod u+x "$event_dir/50.samba" else chmod a-x "$event_dir/50.samba" fi } # This function has no effect (currently no way to set CTDB_SET_*) # but remains here in case we need it in future. set_ctdb_variables() { rv=$OCF_SUCCESS set | grep ^CTDB_SET_ | cut -d_ -f3- | while read v; do varname=$(echo "$v" | cut -d= -f1) value=$(echo "$v" | cut -d= -f2) invoke_ctdb setvar "$varname" "$value" || rv=$OCF_ERR_GENERIC done || rv=$OCF_ERR_GENERIC return $rv } # Add necessary settings to /etc/samba/smb.conf. In a perfect world, # we'd be able to generate a new, temporary, smb.conf file somewhere, # something like: # include = /etc/samba/smb.conf # [global] # clustering = yes # # ...etc... # Unfortunately, we can't do this, because there's no way to tell the # smb init script where the temporary config is, so we just edit # the default config file. init_smb_conf() { # Don't screw around with the config if CTDB isn't managing Samba! ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 # replace these things in smb.conf local repl repl='# CTDB-RA:|passdb backend|clustering|idmap backend|private dir|ctdbd socket' local private_dir [ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n" local vfs_fileid local do_vfs do_vfs=0 if [ -n "$OCF_RESKEY_smb_fileid_algorithm" ]; then repl="${repl}|fileid:algorithm|fileid:mapping" vfs_fileid="\tfileid:algorithm = $OCF_RESKEY_smb_fileid_algorithm\n" if sed -n '/^[[:space:]]*\[global\]/,/^[[:space:]]*\[/p' $OCF_RESKEY_smb_conf | \ grep -Eq '^[[:space:]]*vfs objects'; then # vfs objects already specified, will append fileid to existing line do_vfs=1 else vfs_fileid="$vfs_fileid\tvfs objects = fileid\n" fi fi # Preserve permissions of smb.conf cp -a "$OCF_RESKEY_smb_conf" "$OCF_RESKEY_smb_conf.$$" awk ' /^[[:space:]]*\[/ { global = 0 } /^[[:space:]]*\[global\]/ { global = 1 } { if(global) { if ('$do_vfs' && $0 ~ /^[[:space:]]vfs objects/ && $0 !~ /fileid/) { print $0" fileid" } else if ($0 !~ /^[[:space:]]*('"$repl"')/) { print } } else { print } }' "$OCF_RESKEY_smb_conf" | sed "/^[[:space:]]*\[global\]/ a\\ \t# CTDB-RA: Begin auto-generated section (do not change below)\n\ \tpassdb backend = $OCF_RESKEY_smb_passdb_backend\n\ \tclustering = yes\n\ \tidmap backend = $OCF_RESKEY_smb_idmap_backend\n\ \tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir$vfs_fileid\ \t# CTDB-RA: End auto-generated section (do not change above)" > "$OCF_RESKEY_smb_conf.$$" dd conv=notrunc,fsync of="$OCF_RESKEY_smb_conf.$$" if=/dev/null >/dev/null 2>&1 mv "$OCF_RESKEY_smb_conf.$$" "$OCF_RESKEY_smb_conf" } # Get rid of that section we added cleanup_smb_conf() { ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 # preserve permissions of smb.conf cp -a "$OCF_RESKEY_smb_conf" "$OCF_RESKEY_smb_conf.$$" sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' "$OCF_RESKEY_smb_conf" > "$OCF_RESKEY_smb_conf.$$" mv "$OCF_RESKEY_smb_conf.$$" "$OCF_RESKEY_smb_conf" } append_ctdb_sysconfig() { [ -n "$2" ] && echo "$1=$2" >> "$CTDB_SYSCONFIG" } # Generate a new, minimal CTDB config file that's just enough # to get CTDB running as configured by the RA parameters. generate_ctdb_sysconfig() { # Backup existing sysconfig if we're not already using an auto-generated one grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG || cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP if [ $? -ne 0 ]; then ocf_log warn "Unable to backup $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP" fi ocf_log info "Generating new $CTDB_SYSCONFIG" # Note to maintainers and other random hackers: # Parameters may need to be set here, for CTDB event # scripts to pick up, or may need to be passed to ctdbd # when starting, or both. Be careful. The CTDB source # tree and manpages are your friends. As a concrete # example, setting CTDB_START_AS_DISABLED here is # completely useless, as this is actually a command line # argument for ctdbd; it's not used anywhere else. cat >$CTDB_SYSCONFIG </dev/null for pdbase in $persistent_db_dir/*.tdb.[0-9]; do /usr/bin/tdbdump "$pdbase" >/dev/null 2>/dev/null || { ocf_exit_reason "Persistent database $pdbase is corrupted! CTDB will not start." return $OCF_ERR_GENERIC } done # Add necessary configuration to smb.conf init_smb_conf if [ $? -ne 0 ]; then ocf_exit_reason "Failed to update $OCF_RESKEY_smb_conf." return $OCF_ERR_GENERIC fi # Generate new CTDB sysconfig generate_ctdb_sysconfig enable_event_scripts # Use logfile by default, or syslog if asked for local log_option log_option="--logfile=$OCF_RESKEY_ctdb_logfile" if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then log_option="--syslog" elif [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then # ensure the logfile's directory exists, otherwise ctdb will fail to start mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile) fi # ensure ctdb's rundir exists, otherwise it will fail to start mkdir -p $OCF_RESKEY_ctdb_rundir 2>/dev/null # public addresses file (should not be present, but need to set for correctness if it is) local pub_addr_option pub_addr_option="" [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \ pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses" # start as disabled local start_as_disabled start_as_disabled="--start-as-disabled" ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled="" # Start her up "$OCF_RESKEY_ctdbd_binary" \ --reclock="$OCF_RESKEY_ctdb_recovery_lock" \ --nlist="$OCF_RESKEY_ctdb_config_dir/nodes" \ --socket="$OCF_RESKEY_ctdb_socket" \ --dbdir="$OCF_RESKEY_ctdb_dbdir" \ --dbdir-persistent="$OCF_RESKEY_ctdb_dbdir/persistent" \ --event-script-dir="$OCF_RESKEY_ctdb_config_dir/events.d" \ --notification-script="$OCF_RESKEY_ctdb_config_dir/notify.sh" \ --transport=tcp \ $start_as_disabled $log_option $pub_addr_option \ -d "$OCF_RESKEY_ctdb_debuglevel" if [ $? -ne 0 ]; then # cleanup smb.conf cleanup_smb_conf ocf_exit_reason "Failed to execute $OCF_RESKEY_ctdbd_binary." return $OCF_ERR_GENERIC else # Wait a bit for CTDB to stabilize # (until start times out if necessary) while true; do # Initial sleep is intentional (ctdb init script # has sleep after ctdbd start, but before invoking # ctdb to talk to it) sleep 1 status=$(invoke_ctdb status 2>/dev/null) if [ $? -ne 0 ]; then # CTDB will be running, kill it before returning ctdb_stop ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status" return $OCF_ERR_GENERIC fi if ! echo "$status" | grep -qs 'UNHEALTHY (THIS'; then # Status does not say this node is unhealthy, # so we're good to go. Do a bit of final # setup and (hopefully) return success. set_ctdb_variables return $? fi done fi # ctdbd will (or can) actually still be running at this point, so kill it ctdb_stop ocf_exit_reason "Timeout waiting for CTDB to stabilize" return $OCF_ERR_GENERIC } ctdb_stop() { # Do nothing if already stopped pkill -0 -f "$OCF_RESKEY_ctdbd_binary" || return $OCF_SUCCESS # Tell it to die nicely invoke_ctdb shutdown >/dev/null 2>&1 rv=$? # No more Mr. Nice Guy count=0 while pkill -0 -f "$OCF_RESKEY_ctdbd_binary" ; do sleep 1 count=$((count + 1)) [ $count -gt 10 ] && { ocf_log info "killing ctdbd " pkill -9 -f "$OCF_RESKEY_ctdbd_binary" pkill -9 -f "${OCF_RESKEY_ctdb_config_dir}/events.d/" } done # Cleanup smb.conf cleanup_smb_conf # It was a clean shutdown, return success [ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS # Unclean shutdown, return success if there's no ctdbds left (we # killed them forcibly, but at least they're good and dead). pkill -0 -f "$OCF_RESKEY_ctdbd_binary" || return $OCF_SUCCESS # Problem: ctdb shutdown didn't work and neither did some vigorous # kill -9ing. Only thing to do is report failure. return $OCF_ERR_GENERIC } ctdb_monitor() { local status # "ctdb status" exits non-zero if CTDB isn't running. # It can also exit non-zero if there's a timeout (ctdbd blocked, # stalled, massive load, or otherwise wedged). If it's actually # not running, STDERR will say "Errno:Connection refused(111)", # whereas if it's wedged, it'll say various other unpleasant things. status=$(invoke_ctdb status 2>&1) if [ $? -ne 0 ]; then if echo "$status" | grep -qs 'Connection refused'; then return $OCF_NOT_RUNNING elif echo "$status" | grep -qs 'No such file or directory'; then return $OCF_NOT_RUNNING else ocf_exit_reason "CTDB status call failed: $status" return $OCF_ERR_GENERIC fi fi if echo "$status" | grep -Eqs '(OK|DISABLED) \(THIS'; then return $OCF_SUCCESS fi ocf_exit_reason "CTDB status is bad: $status" return $OCF_ERR_GENERIC } ctdb_validate() { # Required binaries (full path to tdbdump is intentional, as that's # what's used in ctdb_start, which was lifted from the init script) for binary in pkill /usr/bin/tdbdump; do check_binary $binary done if [ -z "$CTDB_SYSCONFIG" ]; then ocf_exit_reason "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)" return $OCF_ERR_INSTALLED fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then ocf_exit_reason "Samba config file '$OCF_RESKEY_smb_conf' does not exist." return $OCF_ERR_INSTALLED fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!" fi if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then ocf_exit_reason "$OCF_RESKEY_ctdb_config_dir/nodes does not exist." return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then ocf_exit_reason "ctdb_recovery_lock not specified." return $OCF_ERR_CONFIGURED fi lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") touch "$lock_dir/$$" 2>/dev/null if [ $? != 0 ]; then ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." return $OCF_ERR_ARGS fi rm "$lock_dir/$$" return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ctdb_start;; stop) ctdb_stop;; monitor) ctdb_monitor;; validate-all) ctdb_validate;; usage|help) ctdb_usage exit $OCF_SUCCESS ;; *) ctdb_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/Dummy b/heartbeat/Dummy index fac38af70..61b442c6a 100755 --- a/heartbeat/Dummy +++ b/heartbeat/Dummy @@ -1,180 +1,180 @@ #!/bin/sh # # # Dummy OCF RA. Does nothing but wait a few seconds, can be # configured to fail occassionally. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Bree # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < - + 1.0 This is a Dummy Resource Agent. It does absolutely nothing except keep track of whether its running or not. Its purpose in life is for testing and to serve as a template for RA writers. NB: Please pay attention to the timeouts specified in the actions section below. They should be meaningful for the kind of resource the agent manages. They should be the minimum advised timeouts, but they shouldn't/cannot cover _all_ possible resource instances. So, try to be neither overly generous nor too stingy, but moderate. The minimum timeouts should never be below 10 seconds. Example stateless resource agent Location to store the resource state in. State file Fake attribute that can be changed to cause a reload Fake attribute that can be changed to cause a reload END } ####################################################################### dummy_usage() { cat < - + 1.0 Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. -- This is a Evmsd Resource Agent. Controls clustered EVMS volume management (deprecated) If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning END } ####################################################################### evmsd_usage() { cat < - + 1.0 Enables and disables network routes. Supports host and net routes, routes via a gateway address, and routes using specific source addresses. This resource agent is useful if a node's routing table needs to be manipulated based on node role assignment. Consider the following example use case: - One cluster node serves as an IPsec tunnel endpoint. - All other nodes use the IPsec tunnel to reach hosts in a specific remote network. Then, here is how you would implement this scheme making use of the Route resource agent: - Configure an ipsec LSB resource. - Configure a cloned Route OCF resource. - Create an order constraint to ensure that ipsec is started before Route. - Create a colocation constraint between the ipsec and Route resources, to make sure no instance of your cloned Route resource is started on the tunnel endpoint itself. Manages network routes The destination network (or host) to be configured for the route. Specify the netmask suffix in CIDR notation (e.g. "/24"). If no suffix is given, a host route will be created. Specify "0.0.0.0/0" or "default" if you want this resource to set the system default route. Destination network The outgoing network device to use for this route. Outgoing network device The gateway IP address to use for this route. Gateway IP address The source IP address to be configured for the route. Source IP address The routing table to be configured for the route. Routing table END } ####################################################################### create_route_spec() { # Creates a route specification for use by "ip route (add|del|show)" route_spec="to ${OCF_RESKEY_destination}" if [ -n "${OCF_RESKEY_device}" ]; then route_spec="${route_spec} dev ${OCF_RESKEY_device}" fi if [ -n "${OCF_RESKEY_gateway}" ]; then route_spec="${route_spec} via ${OCF_RESKEY_gateway}" fi if [ -n "${OCF_RESKEY_source}" ]; then route_spec="${route_spec} src ${OCF_RESKEY_source}" fi if [ -n "${OCF_RESKEY_table}" ]; then route_spec="${route_spec} table ${OCF_RESKEY_table}" fi echo "$route_spec" } route_usage() { cat </dev/null 2>&1; then ocf_exit_reason "Network device ${OCF_RESKEY_device} appears not to be available on this system." # OCF_ERR_ARGS prevents the resource from running anywhere at all, # maybe another node has the interface? # OCF_ERR_INSTALLED just prevents starting on this particular node. return $OCF_ERR_INSTALLED fi fi # The following tests must return $OCF_ERR_INSTALLED, but only if # the resource is actually running (i.e., not during probes) if ! ocf_is_probe; then # If a source address has been configured, is it available on # this system? if [ -n "${OCF_RESKEY_source}" ]; then if ! ip address show | grep -w ${OCF_RESKEY_source} >/dev/null 2>&1; then ocf_exit_reason "Source address ${OCF_RESKEY_source} appears not to be available on this system." # same reason as with _device: return $OCF_ERR_INSTALLED fi fi # If a gateway address has been configured, is it reachable? if [ -n "${OCF_RESKEY_gateway}" ]; then if ! ip route get ${OCF_RESKEY_gateway} >/dev/null 2>&1; then ocf_exit_reason "Gateway address ${OCF_RESKEY_gateway} is unreachable." # same reason as with _device: return $OCF_ERR_INSTALLED fi fi fi return $OCF_SUCCESS } # These two actions must always succeed case $__OCF_ACTION in meta-data) meta_data # OCF variables are not set when querying meta-data exit 0 ;; usage|help) route_usage exit $OCF_SUCCESS ;; esac # Don't do anything if the necessary utilities aren't present for binary in ip grep; do check_binary $binary done route_validate || exit $? case $OCF_RESKEY_destination in *:*) addr_family="-6" ;; *) addr_family="-4" ;; esac case $__OCF_ACTION in start) route_start;; stop) route_stop;; status|monitor) route_status;; reload) ocf_log info "Reloading..." route_start ;; validate-all) ;; *) route_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" exit $rc diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase index d0206a2df..d803df9d5 100755 --- a/heartbeat/SAPDatabase +++ b/heartbeat/SAPDatabase @@ -1,341 +1,341 @@ #!/bin/sh # # SAPDatabase # # Description: Manages any type of SAP supported database instance # as a High-Availability OCF compliant resource. # # Author: Alexander Krauth, October 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006, 2007, 2010, 2012 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_SID # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DBTYPE (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB) # OCF_RESKEY_DBINSTANCE (optional, Database instance name, if not equal to SID) # OCF_RESKEY_DBOSUSER (optional, the Linux user that owns the database processes on operating system level) # OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor all database services) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # Deprecated parameters: # OCF_RESKEY_NETSERVICENAME # OCF_RESKEY_DBJ2EE_ONLY # OCF_RESKEY_JAVA_HOME # OCF_RESKEY_DIR_BOOTSTRAP # OCF_RESKEY_DIR_SECSTORE # OCF_RESKEY_DB_JARS # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### SH=/bin/sh usage() { methods=`sapdatabase_methods` methods=`echo $methods | tr ' ' '|'` cat <<-EOF usage: $0 ($methods) $0 manages a SAP database of any type as an HA resource. Currently Oracle, MaxDB, DB/2 UDB, Sybase ASE and SAP HANA Database are supported. ABAP databases as well as JAVA only databases are supported. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!) The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports EOF } meta_data() { cat < 2.14 -Manages a SAP database instance as an HA resource. Resource script for SAP databases. It manages a SAP database of any type as an HA resource. The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB. The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure. The resource agent supports the following databases: - Oracle 10.2, 11.2 and 12 - DB/2 UDB for Windows and Unix 9.x - SAP-DB / MaxDB 7.x - Sybase ASE 15.7 - SAP HANA Database since 1.00 - with SAP note 1625203 (http://sdn.sap.com) In fact this resource agent does not run any database commands directly. It uses the SAP standard process SAPHostAgent to control the database. The SAPHostAgent must be installed on each cluster node locally. It will not work, if you try to run the SAPHostAgent also as a HA resource. Please follow SAP note 1031096 for the installation of SAPHostAgent. The required minimum version of SAPHostAgent is: Release: 7.20 Patch Number: 90 or compile time after: Dec 17 2011 +Manages a SAP database instance as an HA resource. The unique database system identifier. e.g. P01 Database system ID The full qualified path where to find saphostexec and saphostctrl. Usually you can leave this empty. Then the default: /usr/sap/hostctrl/exe is used. path of saphostexec and saphostctrl The name of the database vendor you use. Set either: ADA, DB6, ORA, SYB, HDB database vendor Must be used for special database implementations, when database instance name is not equal to the SID (e.g. Oracle DataGuard) Database instance name, if not equal to SID The parameter can be set, if the database processes on operating system level are not executed with the default user of the used database type. Defaults: ADA=taken from /etc/opt/sdb, DB6=db2SID, ORA=oraSID and oracle, SYB=sybSID, HDB=SIDadm the Linux user that owns the database processes on operating system level Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore This controls how the resource agent monitors the database. If set to true, it will use 'saphostctrl -function GetDatabaseStatus' to test the database state. If set to false, only operating system processes are monitored. Activates application level monitoring If you set this to true, 'saphostctrl -function StartDatabase' will always be called with the '-force' option. Enable or disable automatic startup recovery Defines which services are monitored by the SAPDatabase resource agent, if STRICT_MONITORING is set to true. Service names must correspond with the output of the 'saphostctrl -function GetDatabaseStatus' command. Database services to monitor Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore The full qualified path where to find a script or program which should be executed before this resource gets started. path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got started. path to a post-start script The full qualified path where to find a script or program which should be executed before this resource gets stopped. path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got stopped. path to a post-start script END } # # methods: What methods/operations do we support? # sapdatabase_methods() { cat <<-EOF start stop status monitor recover validate-all methods meta-data usage EOF } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { NAME="$1" VALUE="$2" if [ -n "$VALUE" ] then if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return $OCF_SUCCESS } # # saphostctrl_installed # saphostctrl_installed() { OCF_RESKEY_DIR_EXECUTABLE_default="/usr/sap/hostctrl/exe" : ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} SAPHOSTCTRL="${OCF_RESKEY_DIR_EXECUTABLE}/saphostctrl" SAPHOSTEXEC="${OCF_RESKEY_DIR_EXECUTABLE}/saphostexec" SAPHOSTSRV="${OCF_RESKEY_DIR_EXECUTABLE}/sapstartsrv" SAPHOSTOSCOL="${OCF_RESKEY_DIR_EXECUTABLE}/saposcol" have_binary $SAPHOSTCTRL && have_binary $SAPHOSTEXEC } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) sapdatabase_methods exit $?;; *);; esac if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # mandatory parameter check if [ -z "$OCF_RESKEY_SID" ]; then ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" exit $OCF_ERR_ARGS fi SID=`echo "$OCF_RESKEY_SID"` if [ -z "$OCF_RESKEY_DBTYPE" ]; then ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ADA,DB6,ORA,SYB,HDB)!" exit $OCF_ERR_ARGS fi DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'` # source functions and initialize global variables if saphostctrl_installed; then . ${OCF_FUNCTIONS_DIR}/sapdb.sh else if [ -n "${OCF_RESKEY_DBOSUSER}" ]; then ocf_exit_reason "Usage of parameter OCF_RESKEY_DBOSUSER is not possible without having SAP Host-Agent installed" exit $OCF_ERR_ARGS fi . ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh fi sapdatabase_init # we always want to fall to the faster status method in case of a probe by the cluster ACTION=$1 if ocf_is_probe then ACTION=status fi # What kind of method was invoked? case "$ACTION" in start|stop|status|recover) sapdatabase_$ACTION exit $?;; monitor) sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING exit $?;; validate-all) sapdatabase_validate exit $?;; *) sapdatabase_methods exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance index ea946dbed..49e60aa30 100755 --- a/heartbeat/SAPInstance +++ b/heartbeat/SAPInstance @@ -1,943 +1,943 @@ #!/bin/sh # # SAPInstance # # Description: Manages a single SAP Instance as a High-Availability # resource. One SAP Instance is defined by one # SAP Instance-Profile. start/stop handles all services # of the START-Profile, status and monitor care only # about essential services. # # Author: Alexander Krauth, June 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006-2008 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_InstanceName # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_WAITTIME (optional, to solve timing problems during J2EE-Addin start) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false) # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor critical services only) # OCF_RESKEY_SHUTDOWN_METHOD (optional, defaults to NORMAL, KILL: terminate the SAP instance with OS commands - faster, at your own risk) # OCF_RESKEY_ERS_InstanceName (optional, InstanceName of the ERS instance in a Master/Slave configuration) # OCF_RESKEY_ERS_START_PROFILE (optional, START_PROFILE of the ERS instance in a Master/Slave configuration) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # # TODO: - Option to shutdown sapstartsrv for non-active instances -> that means: do probes only with OS tools (sapinstance_status) # - Option for better standalone enqueue server monitoring, using ensmon (test enque-deque) # - Option for cleanup abandoned enqueue replication tables # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### SH=/bin/sh sapinstance_usage() { methods=`sapinstance_methods` methods=`echo $methods | tr ' ' '|'` cat <<-EOF usage: $0 ($methods) $0 manages a SAP Instance as an HA resource. The 'start' operation starts the instance or the ERS instance in a Master/Slave configuration The 'stop' operation stops the instance The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'promote' operation starts the primary instance in a Master/Slave configuration The 'demote' operation stops the primary instance and starts the ERS instance The 'notify' operation always returns SUCCESS The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports EOF } sapinstance_meta_data() { cat < 2.14 -Manages a SAP instance as an HA resource. Usually a SAP system consists of one database and at least one or more SAP instances (sometimes called application servers). One SAP Instance is defined by having exactly one instance profile. The instance profiles can usually be found in the directory /sapmnt/SID/profile. Each instance must be configured as it's own resource in the cluster configuration. The resource agent supports the following SAP versions: - SAP WebAS ABAP Release 6.20 - 7.40 - SAP WebAS Java Release 6.40 - 7.40 - SAP WebAS ABAP + Java Add-In Release 6.20 - 7.40 (Java is not monitored by the cluster in that case) When using a SAP Kernel 6.40 please check and implement the actions from the section "Manual postprocessing" from SAP note 995116 (http://sdn.sap.com). Other versions may also work with this agent, but have not been verified. All operations of the SAPInstance resource agent are done by using the startup framework called SAP Management Console or sapstartsrv that was introduced with SAP kernel release 6.40. Find more information about the SAP Management Console in SAP note 1014480. Using this framework defines a clear interface for the Heartbeat cluster, how it sees the SAP system. The options for monitoring the SAP system are also much better than other methods like just watching the ps command for running processes or doing some pings to the application. sapstartsrv uses SOAP messages to request the status of running SAP processes. Therefore it can actually ask a process itself what it's status is, independent from other problems that might exist at the same time. sapstartsrv knows 4 status colours: - GREEN = everything is fine - YELLOW = something is wrong, but the service is still working - RED = the service does not work - GRAY = the service has not been started The SAPInstance resource agent will interpret GREEN and YELLOW as OK. That means that minor problems will not be reported to the Heartbeat cluster. This prevents the cluster from doing an unwanted failover. The statuses RED and GRAY are reported as NOT_RUNNING to the cluster. Depending on the status the cluster expects from the resource, it will do a restart, failover or just nothing. +Manages a SAP instance as an HA resource. The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci. Usually this is the name of the SAP instance profile. Instance name: SID_INSTANCE_VIR-HOSTNAME The full qualified path where to find sapstartsrv and sapcontrol. Specify this parameter, if you have changed the SAP kernel directory location after the default SAP installation. Path of sapstartsrv and sapcontrol The full qualified path where to find the SAP START profile. Specify this parameter, if you have changed the SAP profile directory location after the default SAP installation. Path of start profile The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. Start profile name After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start ishandled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.Usually the resource agent waits until all services are started and the SAP Management Console reports a GREEN status. A double stack installation (ABAP + Java AddIn) consists of an ABAP dispatcher and a JAVA instance. Normally the start of the JAVA instance takes much longer than the start of the ABAP instance. For a JAVA Instance you may need to configure a much higher timeout for the start operation of the resource in Heartbeat. The disadvantage here is, that the discovery of a failed start by the cluster takes longer. Somebody might say: For me it is important, that the ABAP instance is up and running. A failure of the JAVA instance shall not cause a failover of the SAP instance. Actually the SAP MC reports a YELLOW status, if the JAVA instance of a double stack system fails. From the resource agent point of view YELLOW means:everything is OK. Setting START_WAITTIME to a lower value determines the resource agent to check the status of the instance during a start operation after that time. As it would wait normally for a GREEN status, now it reports SUCCESS to the cluster in case of a YELLOW status already after the specified time. That is only useful for double stack systems. Check the successful start after that time (do not wait for J2EE-Addin) The SAPInstance resource agent tries to recover a failed start attempt automaticaly one time. This is done by killing runing instance processes, removing the kill.sap file and executing cleanipc. Sometimes a crashed SAP instance leaves some processes and/or shared memory segments behind. Setting this option to true will try to remove those leftovers during a start operation. That is to reduce manual work for the administrator. Enable or disable automatic startup recovery Within a SAP instance there can be several services. Usually you will find the defined services in the START profile of the related instance (Attention: with SAP Release 7.10 the START profile content was moved to the instance profile). Not all of those services are worth to monitor by the cluster. For example you properly do not like to failover your SAP instance, if the central syslog collector daemon fails. Those services are monitored within the SAPInstance resource agent: - disp+work - msg_server - enserver - enrepserver - jcontrol - jstart That names match the strings used in the output of the command 'sapcontrol -nr [Instance-Nr] -function GetProcessList'. The default should fit most cases where you want to manage a SAP Instance from the cluster. You may change this with this parameter, if you like to monitor more/less or other services that sapstartsrv supports. You may specify multiple services separated by a | (pipe) sign in this parameter: disp+work|msg_server|enserver Services to monitor Usual a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the gracefull stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !! Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL) Only used in a Master/Slave resource configuration: The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile. The enqueue replication instance must be installed, before you want to configure a master-slave cluster recource. The master-slave configuration in the cluster must use this properties: clone_max = 2 clone_node_max = 1 master_node_max = 1 master_max = 1 Enqueue replication instance name: SID_INSTANCE_VIR-HOSTNAME Only used in a Master/Slave resource configuration: The parameter ERS_InstanceName must also be set in this configuration. The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. Enqueue replication start profile name The full qualified path where to find a script or program which should be executed before this resource gets started. Path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got started. Path to a post-start script The full qualified path where to find a script or program which should be executed before this resource gets stopped. Path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got stopped. Path to a post-start script END } # # methods: What methods/operations do we support? # sapinstance_methods() { cat <<-EOF start stop status monitor promote demote notify validate-all methods meta-data usage EOF } # # is_clone : find out if we are configured to run in a Master/Slave configuration # is_clone() { if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ] \ && [ "$OCF_RESKEY_CRM_meta_clone_max" -gt 0 ] then if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] || \ [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] || \ [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] || \ [ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ] then ocf_log err "Clone options misconfigured. (expect: clone_max=2,clone_node_max=1,master_node_max=1,master_max=1)" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_ERS_InstanceName" ] then ocf_log err "In a Master/Slave configuration the ERS_InstanceName parameter is mandatory." exit $OCF_ERR_ARGS fi else return 0 fi return 1 } # # abnormal_end : essential things are missing, but in the natur of a SAP installation - which can be very different # from customer to customer - we cannot handle this always as an error # This would be the case, if the software is installed on shared disks and not visible # to all cluster nodes at all times. # abnormal_end() { local err_msg=$1 ocf_is_probe && { sapinstance_status exit $? } if [ "$ACTION" = "stop" ] then cleanup_instance exit $OCF_SUCCESS fi ocf_log err $err_msg exit $OCF_ERR_CONFIGURED } # # sapinstance_init : Define global variables with default values, if optional parameters are not set # # sapinstance_init() { local myInstanceName="$1" SID=`echo "$myInstanceName" | cut -d_ -f1` InstanceName=`echo "$myInstanceName" | cut -d_ -f2` InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'` SAPVIRHOST=`echo "$myInstanceName" | cut -d_ -f3` # optional OCF parameters, we try to guess which directories are correct if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] then if have_binary /usr/sap/$SID/$InstanceName/exe/sapstartsrv && have_binary /usr/sap/$SID/$InstanceName/exe/sapcontrol then DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv" SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol" elif have_binary /usr/sap/$SID/SYS/exe/run/sapstartsrv && have_binary /usr/sap/$SID/SYS/exe/run/sapcontrol then DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run" SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv" SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol" fi else if have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" && have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" then DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE" SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" fi fi sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" [ -z "$DIR_EXECUTABLE" ] && abnormal_end "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!" if [ -z "$OCF_RESKEY_DIR_PROFILE" ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" fi if [ "$myInstanceName" != "$OCF_RESKEY_InstanceName" ] then currentSTART_PROFILE=$OCF_RESKEY_ERS_START_PROFILE else currentSTART_PROFILE=$OCF_RESKEY_START_PROFILE fi if [ -z "$currentSTART_PROFILE" ] then SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" else SAPSTARTPROFILE="$currentSTART_PROFILE" fi if [ -z "$OCF_RESKEY_START_WAITTIME" ] then export OCF_RESKEY_START_WAITTIME=3600 fi if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ] then export OCF_RESKEY_MONITOR_SERVICES="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart" fi # as root user we need the library path to the SAP kernel to be able to call sapcontrol if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH export LD_LIBRARY_PATH fi return $OCF_SUCCESS } # # check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance. # We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance, # because then we have two instances with the same instance number. # check_sapstartsrv() { local restart=0 local runninginst="" local chkrc=$OCF_SUCCESS local output="" if [ ! -S /tmp/.sapstream5${InstanceNr}13 ]; then ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName (no UDS), it will be started now" restart=1 else output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script` if [ $? -eq 0 ] then runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3` if [ "$runninginst" != "$InstanceName" ] then ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed" restart=1 else output=`$SAPCONTROL -nr $InstanceNr -function AccessCheck Start` if [ $? -ne 0 ]; then ocf_log warn "FAILED : sapcontrol -nr $InstanceNr -function AccessCheck Start (`ls -ld1 /tmp/.sapstream5${InstanceNr}13`)" ocf_log warn "sapstartsrv will be restarted to try to solve this situation, otherwise please check sapstsartsrv setup (SAP Note 927637)" restart=1 fi fi else ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now" restart=1 fi fi if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi if [ $restart -eq 1 ] then if [ -d /usr/sap/$SID/SYS/profile/ ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else abnormal_end "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!" fi [ ! -r $SAPSTARTPROFILE ] && abnormal_end "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!" pkill -9 -f "sapstartsrv.*$runninginst" # removing the unix domain socket files as they might have wrong permissions # or ownership - they will be recreated by sapstartsrv during next start rm -f /tmp/.sapstream5${InstanceNr}13 rm -f /tmp/.sapstream5${InstanceNr}14 $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm # now make sure the daemon has been started and is able to respond local srvrc=1 while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ] do sleep 1 $SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1 srvrc=$? done if [ $srvrc -ne 1 ] then ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !" chkrc=$OCF_SUCCESS else ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!" chkrc=$OCF_ERR_GENERIC ocf_is_probe && chkrc=$OCF_NOT_RUNNING fi fi return $chkrc } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { local NAME="$1" local VALUE="$2" if [ -n "$VALUE" ] then if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return 0 } # # cleanup_instance : remove resources (processes and shared memory) from a crashed instance) # cleanup_instance() { pkill -9 -f -U $sidadm $InstanceName ocf_log info "Terminated instance using 'pkill -9 -f -U $sidadm $InstanceName'" # it is necessary to call cleanipc as user sidadm if the system has 'vmcj/enable = ON' set - otherwise SHM-segments in /dev/shm/SAP_ES2* cannot be removed su - $sidadm -c "cleanipc $InstanceNr remove" ocf_log info "Tried to remove shared memory resources using 'cleanipc $InstanceNr remove' as user $sidadm" ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/kill.sap ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/shutdown.sap ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgcpid ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgspid return 0 } # # sapinstance_start : Start the SAP instance # sapinstance_start() { sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" local rc=$OCF_NOT_RUNNING local output="" local loopcount=0 while [ $loopcount -lt 2 ] do loopcount=$(($loopcount + 1)) check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ]; then output=`$SAPCONTROL -nr $InstanceNr -function Start` rc=$? ocf_log info "Starting SAP Instance $SID-$InstanceName: $output" fi if [ $rc -ne 0 ] then ocf_log err "SAP Instance $SID-$InstanceName start failed." return $OCF_ERR_GENERIC fi local startrc=1 while [ $startrc -gt 0 ] do local waittime_start=`date +%s` output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10` startrc=$? local waittime_stop=`date +%s` if [ $startrc -ne 0 ] then if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ] then sapinstance_monitor NOLOG if [ $? -eq $OCF_SUCCESS ] then output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running." startrc=0; loopcount=2 fi else if [ $loopcount -eq 1 ] && ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER then ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output" ocf_log warn "Try to recover $SID-$InstanceName" cleanup_instance else loopcount=2 fi startrc=-1 fi else loopcount=2 fi done done if [ $startrc -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName started: $output" rc=$OCF_SUCCESS sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" else ocf_log err "SAP Instance $SID-$InstanceName start failed: $output" rc=$OCF_NOT_RUNNING fi return $rc } # # sapinstance_recover: Try startup of failed instance by cleaning up resources # sapinstance_recover() { cleanup_instance sapinstance_start return $? } # # sapinstance_stop: Stop the SAP instance # sapinstance_stop() { local output="" local rc sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" if [ "$OCF_RESKEY_SHUTDOWN_METHOD" = "KILL" ] then ocf_log info "Stopping SAP Instance $SID-$InstanceName with shutdown method KILL!" cleanup_instance return $OCF_SUCCESS fi check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ]; then output=`$SAPCONTROL -nr $InstanceNr -function Stop` rc=$? ocf_log info "Stopping SAP Instance $SID-$InstanceName: $output" fi if [ $rc -eq 0 ] then output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1` if [ $? -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName stopped: $output" rc=$OCF_SUCCESS else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" return $rc } # # sapinstance_monitor: Can the given SAP instance do anything useful? # sapinstance_monitor() { local MONLOG=$1 local rc check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ] then local count=0 local SERVNO local output output=`$SAPCONTROL -nr $InstanceNr -function GetProcessList -format script` # we have to parse the output, because the returncode doesn't tell anything about the instance status for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` do local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` local STATE=0 local SEARCH case $COLOR in GREEN|YELLOW) STATE=$OCF_SUCCESS;; *) STATE=$OCF_NOT_RUNNING;; esac SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] then if [ $STATE -eq $OCF_NOT_RUNNING ] then [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" rc=$STATE fi count=1 fi done if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] then if ocf_is_probe then rc=$OCF_NOT_RUNNING else [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!" rc=$OCF_ERR_GENERIC fi fi fi return $rc } # # sapinstance_status: Lightweight check of SAP instance only with OS tools # sapinstance_status() { local pid local pids [ ! -f "/usr/sap/$SID/$InstanceName/work/kill.sap" ] && return $OCF_NOT_RUNNING pids=`grep '^kill -[0-9]' /usr/sap/$SID/$InstanceName/work/kill.sap | awk '{print $3}'` for pid in $pids do [ `pgrep -f -U $sidadm $InstanceName | grep -c $pid` -gt 0 ] && return $OCF_SUCCESS done return $OCF_NOT_RUNNING } # # sapinstance_validate: Check the semantics of the input parameters # sapinstance_validate() { local rc=$OCF_SUCCESS if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!" rc=$OCF_ERR_ARGS fi if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!" rc=$OCF_ERR_ARGS fi return $rc } # # sapinstance_start_clone # sapinstance_start_clone() { sapinstance_init $OCF_RESKEY_ERS_InstanceName ${HA_SBIN_DIR}/crm_master -v 50 -l reboot sapinstance_start return $? } # # sapinstance_stop_clone # sapinstance_stop_clone() { sapinstance_init $OCF_RESKEY_ERS_InstanceName ${HA_SBIN_DIR}/crm_master -v 0 -l reboot sapinstance_stop return $? } # # sapinstance_monitor_clone # sapinstance_monitor_clone() { # first check with the status function (OS tools) if there could be something like a SAP instance running # as we do not know here, if we are in master or slave state we do not want to start our monitoring # agents (sapstartsrv) on the wrong host local rc sapinstance_init $OCF_RESKEY_InstanceName if sapinstance_status; then if sapinstance_monitor; then ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot return $OCF_RUNNING_MASTER fi # by nature of the SAP enqueue server we have to make sure # that we do a failover to the slave (enqueue replication server) # in case the enqueue process has failed. We signal this to the # cluster by setting our master preference to a lower value than the slave. ${HA_SBIN_DIR}/crm_master -v 10 -l reboot return $OCF_FAILED_MASTER fi sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_status && sapinstance_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot fi return $rc } # # sapinstance_promote_clone: In a Master/Slave configuration get Master by starting the SCS instance and stopping the ERS instance # The order is important here to behave correct from the application levels view # sapinstance_promote_clone() { local rc sapinstance_init $OCF_RESKEY_InstanceName ocf_log info "Promoting $SID-$InstanceName to running Master." sapinstance_start rc=$? if [ $rc -eq $OCF_SUCCESS ]; then sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_stop rc=$? fi return $rc } # # sapinstance_demote_clone: In a Master/Slave configuration get Slave by stopping the SCS instance and starting the ERS instance # sapinstance_demote_clone() { local rc sapinstance_init $OCF_RESKEY_InstanceName ocf_log info "Demoting $SID-$InstanceName to a slave." sapinstance_stop rc=$? if [ $rc -eq $OCF_SUCCESS ]; then sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_start rc=$? fi return $rc } # # sapinstance_notify: Handle master scoring - to make sure a slave gets the next master # sapinstance_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" if [ "${n_type}_${n_op}" = "post_promote" ]; then # After promotion of one master in the cluster, we make sure that all clones reset their master # value back to 100. This is because a failed monitor on a master might have degree one clone # instance to score 10. ${HA_SBIN_DIR}/crm_master -v 100 -l reboot elif [ "${n_type}_${n_op}" = "pre_demote" ]; then # if we are a slave and a demote event is announced, make sure we are highest on the list to become master # that is, when a slave resource was started after the promote event of an already running master (e.g. node of slave was down) # We also have to make sure to overrule the globally set resource_stickiness or any fail-count factors => INFINITY local n_uname="$OCF_RESKEY_CRM_meta_notify_demote_uname" if [ ${n_uname} != ${NODENAME} ]; then ${HA_SBIN_DIR}/crm_master -v INFINITY -l reboot fi fi } # # 'main' starts here... # ## GLOBALS SID="" sidadm="" InstanceName="" InstanceNr="" SAPVIRHOST="" DIR_EXECUTABLE="" SAPSTARTSRV="" SAPCONTROL="" DIR_PROFILE="" SAPSTARTPROFILE="" CLONE=0 NODENAME=$(ocf_local_nodename) if ( [ $# -ne 1 ] ) then sapinstance_usage exit $OCF_ERR_ARGS fi ACTION=$1 if [ "$ACTION" = "status" ]; then ACTION=monitor fi # These operations don't require OCF instance parameters to be set case "$ACTION" in usage|methods) sapinstance_$ACTION exit $OCF_SUCCESS;; meta-data) sapinstance_meta_data exit $OCF_SUCCESS;; notify) sapinstance_notify exit $OCF_SUCCESS;; *);; esac if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # parameter check if [ -z "$OCF_RESKEY_InstanceName" ] then ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!" exit $OCF_ERR_ARGS fi is_clone; CLONE=$? if [ ${CLONE} -eq 1 ] then CLACT=_clone else if [ "$ACTION" = "promote" -o "$ACTION" = "demote" ] then ocf_log err "$ACTION called in a non master/slave environment" exit $OCF_ERR_ARGS fi sapinstance_init $OCF_RESKEY_InstanceName fi # What kind of method was invoked? case "$ACTION" in start|stop|monitor|promote|demote) sapinstance_$ACTION$CLACT exit $?;; validate-all) sapinstance_validate exit $?;; *) sapinstance_methods exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/SendArp b/heartbeat/SendArp index 9d0b48726..dbcc7dc15 100755 --- a/heartbeat/SendArp +++ b/heartbeat/SendArp @@ -1,267 +1,267 @@ #!/bin/sh # # # Copyright (c) 2006, Huang Zhen # Converting original heartbeat RA to OCF RA. # # Copyright (C) 2004 Horms # # Based on IPaddr2: Copyright (C) 2003 Tuomo Soini # # License: GNU General Public License (GPL) # Support: users@clusterlabs.org # # This script send out gratuitous Arp for an IP address # # It can be used _instead_ of the IPaddr2 or IPaddr resource # to send gratuitous arp for an IP address on a given interface, # without adding the address to that interface. I.e. if for # some reason you want to send gratuitous arp for addresses # managed by IPaddr2 or IPaddr on an additional interface. # # OCF parameters are as below: # OCF_RESKEY_ip # OCF_RESKEY_nic # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs SENDARP=$HA_BIN/send_arp SENDARPPIDDIR=${HA_RSCTMP} BASEIP="$OCF_RESKEY_ip" INTERFACE="$OCF_RESKEY_nic" RESIDUAL="" SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP" BACKGROUND=${OCF_RESKEY_background:-"yes"} # Set default values : ${ARP_INTERVAL_MS=200} # milliseconds between ARPs : ${ARP_REPEAT=5} # repeat count : ${ARP_BACKGROUND=$BACKGROUND} # no to run in foreground : ${ARP_NETMASK=ffffffffffff} # netmask for ARP ####################################################################### sendarp_meta_data() { cat < - + 1.0 This RA can be used _instead_ of the IPaddr2 or IPaddr RA to send gratuitous ARP for an IP address on a given interface, without adding the address to that interface. For example, if for some resaon you wanted to send gratuitous ARP for addresses managed by IPaddr2 or IPaddr on an additional interface. Broadcasts unsolicited ARP announcements The IP address for sending ARP packet. IP address The NIC for sending ARP packet. NIC Send ARPs in background. Set to false if you want to test if sending ARPs succeeded. Send ARPs in background END } ####################################################################### sendarp_usage() { cat < - + 1.0 This is a searchd Resource Agent. It manages the Sphinx Search Daemon. Manages the Sphinx search daemon. searchd configuration file Configuration file searchd binary searchd binary Search binary for functional testing in the monitor action. search binary Test query for functional testing in the monitor action. The query does not need to match any documents in the index. The purpose is merely to test whether the search daemon is is able to query its indices and respond properly. test query END } ####################################################################### searchd_usage() { cat < /dev/null && [ `ps -p "$1" | grep searchd | wc -l` -eq 1 ] } searchd_status() { pidfile=`grep -v "^#" "$OCF_RESKEY_config" | grep -w pid_file | awk -F "[ \t]*=[ \t]*" '{ print $2 }'` if [ -f "$pidfile" ] ; then PID=`head -n 1 $pidfile` if [ ! -z "$PID" ] ; then isRunning "$PID" if [ $? = 0 ] ; then return 0 fi fi fi false } searchd_check() { $OCF_RESKEY_search --config $OCF_RESKEY_config --noinfo "$OCF_RESKEY_testQuery" > /dev/null } searchd_monitor() { if ! searchd_validate ; then return $OCF_NOT_RUNNING fi if searchd_status ; then if searchd_check ; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi else return $OCF_NOT_RUNNING fi } searchd_validate() { if [ ! -x "$OCF_RESKEY_search" ]; then ocf_log err "search binary '$OCF_RESKEY_search' does not exist or cannot be executed" return $OCF_ERR_ARGS fi if [ ! -x "$OCF_RESKEY_searchd" ]; then ocf_log err "searchd binary '$OCF_RESKEY_searchd' does not exist or cannot be executed" return $OCF_ERR_ARGS fi if [ ! -f "$OCF_RESKEY_config" ]; then ocf_log err "config file '$OCF_RESKEY_config' does not exist" return $OCF_ERR_ARGS fi return $OCF_SUCCESS } : ${OCF_RESKEY_config=/etc/sphinx/sphinx.conf} : ${OCF_RESKEY_search=/usr/local/bin/search} : ${OCF_RESKEY_searchd=/usr/local/bin/searchd} : ${OCF_RESKEY_testQuery=Heartbeat_Monitor_Query_Match_string} case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) searchd_start;; stop) searchd_stop;; monitor) searchd_monitor;; validate-all) searchd_validate;; usage|help) searchd_usage exit $OCF_SUCCESS ;; *) searchd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/Squid b/heartbeat/Squid index 70c7c3d67..1f93f0ae7 100755 --- a/heartbeat/Squid +++ b/heartbeat/Squid @@ -1,446 +1,446 @@ #!/bin/bash # # Description: Manages a Squid Server provided by NTT OSSC as an # OCF High-Availability resource under Heartbeat/LinuxHA control # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # Copyright (c) 2008 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # ####################################################################### # OCF parameters: # OCF_RESKEY_squid_exe : Executable file # OCF_RESKEY_squid_conf : Configuration file # OCF_RESKEY_squid_pidfile: Process id file # OCF_RESKEY_squid_port : Port number # OCF_RESKEY_debug_mode : Debug mode # OCF_RESKEY_debug_log : Debug log file # OCF_RESKEY_squid_stop_timeout: # Number of seconds to await to confirm a # normal stop method # # OCF_RESKEY_squid_exe, OCF_RESKEY_squid_conf, OCF_RESKEY_squid_pidfile # and OCF_RESKEY_squid_port must be specified. Each of the rests # has its default value or refers OCF_RESKEY_squid_conf to make # its value when no explicit value is given. ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs usage() { cat <<-! usage: $0 action action: start : start a new squid instance stop : stop the running squid instance status : return the status of squid, run or down monitor : return TRUE if the squid appears to be working. meta-data : show meta data message validate-all: validate the instance parameters ! return $OCF_ERR_ARGS } metadata_squid() { cat < 1.0 The resource agent of Squid. This manages a Squid instance as an HA resource. Manages a Squid proxy server instance This is a required parameter. This parameter specifies squid's executable file. -Executable file +Executable file This is a required parameter. This parameter specifies a configuration file for a squid instance managed by this RA. -Configuration file +Configuration file This is a required parameter. This parameter specifies a process id file for a squid instance managed by this RA. -Pidfile +Pidfile This is a required parameter. This parameter specifies a port number for a squid instance managed by this RA. If plural ports are used, you must specifiy the only one of them. -Port number +Port number On stop, a squid shutdown is invoked first. If the resource doesn't stop within this timeout, we resort to stopping processes by sending signals and finally KILLing them. -how long to wait for squid shutdown to stop the +how long to wait for squid shutdown to stop the instance before resorting to kill This is an optional parameter. This RA runs in debug mode when this parameter includes 'x' or 'v'. If 'x' is included, both of STDOUT and STDERR redirect to the logfile specified by "debug_log", and then the builtin shell option 'x' is turned on. It is similar about 'v'. -Debug mode +Debug mode This is an optional and omittable parameter. This parameter specifies a destination file for debug logs and works only if this RA run in debug mode. Refer to "debug_mode" about debug mode. If no value is given but it's requied, it's made by the following rules: "/var/log/" as a directory part, the basename of the configuration file given by "syslog_ng_conf" as a basename part, ".log" as a suffix. -A destination of the debug log +A destination of the debug log END return $OCF_SUCCESS } get_pids() { SQUID_PIDS=( ) # Seek by pattern SQUID_PIDS[0]=$(pgrep -f "$PROCESS_PATTERN") # Seek by pidfile SQUID_PIDS[1]=$(awk '1{print $1}' $SQUID_PIDFILE 2>/dev/null) if [[ -n "${SQUID_PIDS[1]}" ]]; then typeset exe exe=$(ls -l "/proc/${SQUID_PIDS[1]}/exe") if [[ $? = 0 ]]; then exe=${exe##*-> } if ! [[ "$exe" = $SQUID_EXE ]]; then SQUID_PIDS[1]="" fi else SQUID_PIDS[1]="" fi fi # Seek by port SQUID_PIDS[2]=$( netstat -apn | awk '/tcp.*:'$SQUID_PORT' .*LISTEN/ && $7~/^[1-9]/ { sub("\\/.*", "", $7); print $7; exit}') } are_all_pids_found() { if [[ -n "${SQUID_PIDS[0]}" ]] && [[ -n "${SQUID_PIDS[1]}" ]] && [[ -n "${SQUID_PIDS[2]}" ]] then return 0 else return 1 fi } are_pids_sane() { if [[ "${SQUID_PIDS[1]}" = "${SQUID_PIDS[2]}" ]]; then return $OCF_SUCCESS else ocf_exit_reason "$SQUID_NAME:Pid unmatch" return $OCF_ERR_GENERIC fi } is_squid_dead() { if [[ -z "${SQUID_PIDS[0]}" ]] && [[ -z "${SQUID_PIDS[2]}" ]] then return 0 else return 1 fi } monitor_squid() { typeset trialcount=0 while true; do get_pids if are_all_pids_found; then are_pids_sane return $OCF_SUCCESS fi if is_squid_dead; then return $OCF_NOT_RUNNING fi ocf_log info "$SQUID_NAME:Inconsistent processes:" \ "${SQUID_PIDS[0]},${SQUID_PIDS[1]},${SQUID_PIDS[2]}" (( trialcount = trialcount + 1 )) if (( trialcount > SQUID_CONFIRM_TRIALCOUNT )); then ocf_exit_reason "$SQUID_NAME:Inconsistency of processes remains unsolved" return $OCF_ERR_GENERIC fi sleep 1 done } start_squid() { typeset status monitor_squid status=$? if [[ $status != $OCF_NOT_RUNNING ]]; then return $status fi set -- "$SQUID_OPTS" ocf_run $SQUID_EXE -f "$SQUID_CONF" "$@" status=$? if [[ $status != $OCF_SUCCESS ]]; then return $OCF_ERR_GENERIC fi while true; do get_pids if are_all_pids_found && are_pids_sane; then return $OCF_SUCCESS fi ocf_log info "$SQUID_NAME:Waiting for squid to be invoked" sleep 1 done return $OCF_ERR_GENERIC } stop_squid() { typeset lapse_sec if ocf_run $SQUID_EXE -f $SQUID_CONF -k shutdown; then lapse_sec=0 while true; do get_pids if is_squid_dead; then rm -f $SQUID_PIDFILE return $OCF_SUCCESS fi (( lapse_sec = lapse_sec + 1 )) if (( lapse_sec > SQUID_STOP_TIMEOUT )); then break fi sleep 1 ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ "stop NORM $lapse_sec/$SQUID_STOP_TIMEOUT" done fi while true; do get_pids ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ "try to stop by SIGKILL:${SQUID_PIDS[0]} ${SQUID_PIDS[2]}" kill -KILL ${SQUID_PIDS[0]} ${SQUID_PIDS[2]} sleep 1 if is_squid_dead; then rm -f $SQUID_PIDFILE return $OCF_SUCCESS fi done return $OCF_ERR_GENERIC } status_squid() { return $OCF_SUCCESS } validate_all_squid() { ocf_log info "validate_all_squid[$SQUID_NAME]" return $OCF_SUCCESS } : === Debug ${0##*/} $1 === if [[ "$1" = "meta-data" ]]; then metadata_squid exit $? fi SQUID_CONF="${OCF_RESKEY_squid_conf}" if [[ -z "$SQUID_CONF" ]]; then ocf_exit_reason "SQUID_CONF is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_NAME="${SQUID_CONF##*/}" SQUID_NAME="${SQUID_NAME%.*}" DEBUG_LOG="${OCF_RESKEY_debug_log-/var/log/squid_${SQUID_NAME}_debug}.log" DEBUG_MODE="" case $OCF_RESKEY_debug_mode in *x*) DEBUG_MODE="${DEBUG_MODE}x";; esac case $OCF_RESKEY_debug_mode in *v*) DEBUG_MODE="${DEBUG_MODE}v";; esac if [ -n "$DEBUG_MODE" ]; then PS4='\d \t \h '"${1-unknown} " export PS4 exec 1>>$DEBUG_LOG 2>&1 set -$DEBUG_MODE fi SQUID_EXE="${OCF_RESKEY_squid_exe}" if [[ -z "$SQUID_EXE" ]]; then ocf_exit_reason "SQUID_EXE is not defined" exit $OCF_ERR_CONFIGURED fi if [[ ! -x "$SQUID_EXE" ]]; then ocf_exit_reason "$SQUID_EXE is not found" exit $OCF_ERR_CONFIGURED fi SQUID_PIDFILE="${OCF_RESKEY_squid_pidfile}" if [[ -z "$SQUID_PIDFILE" ]]; then ocf_exit_reason "SQUID_PIDFILE is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_PORT="${OCF_RESKEY_squid_port}" if [[ -z "$SQUID_PORT" ]]; then ocf_exit_reason "SQUID_PORT is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_OPTS="${OCF_RESKEY_squid_opts}" SQUID_PIDS=( ) SQUID_CONFIRM_TRIALCOUNT="${OCF_RESKEY_squid_confirm_trialcount-3}" SQUID_STOP_TIMEOUT="${OCF_RESKEY_squid_stop_timeout-10}" SQUID_SUSPEND_TRIALCOUNT="${OCF_RESKEY_squid_suspend_trialcount-10}" PROCESS_PATTERN="$SQUID_EXE -f $SQUID_CONF" COMMAND=$1 case "$COMMAND" in start) ocf_log debug "[$SQUID_NAME] Enter squid start" start_squid func_status=$? ocf_log debug "[$SQUID_NAME] Leave squid start $func_status" exit $func_status ;; stop) ocf_log debug "[$SQUID_NAME] Enter squid stop" stop_squid func_status=$? ocf_log debug "[$SQUID_NAME] Leave squid stop $func_status" exit $func_status ;; status) status_squid exit $? ;; monitor) #ocf_log debug "[$SQUID_NAME] Enter squid monitor" monitor_squid func_status=$? #ocf_log debug "[$SQUID_NAME] Leave squid monitor $func_status" exit $func_status ;; validate-all) validate_all_squid exit $? ;; *) usage ;; esac # vim: set sw=4 ts=4 : diff --git a/heartbeat/Stateful b/heartbeat/Stateful index 1a90e2706..2e2608ff4 100755 --- a/heartbeat/Stateful +++ b/heartbeat/Stateful @@ -1,189 +1,189 @@ #!/bin/sh # # # Example of a stateful OCF Resource Agent. # # Copyright (c) 2006 Andrew Beekhof # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" ####################################################################### meta_data() { cat < - + 1.0 This is an example resource agent that impliments two states Example stateful resource agent Location to store the resource state in State file END exit $OCF_SUCCESS } ####################################################################### stateful_usage() { cat < ${OCF_RESKEY_state} } stateful_check_state() { target=$1 if [ -f ${OCF_RESKEY_state} ]; then state=`cat ${OCF_RESKEY_state}` if [ "x$target" = "x$state" ]; then return $OCF_SUCCESS fi else if [ "x$target" = "x" ]; then return $OCF_SUCCESS fi fi return $OCF_ERR_GENERIC } stateful_start() { stateful_check_state master if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_MASTER fi stateful_update slave $CRM_MASTER -v 5 return $OCF_SUCCESS } stateful_demote() { stateful_check_state if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_NOT_RUNNING fi stateful_update slave $CRM_MASTER -v 5 return $OCF_SUCCESS } stateful_promote() { stateful_check_state if [ $? = 0 ]; then return $OCF_NOT_RUNNING fi stateful_update master $CRM_MASTER -v 10 return $OCF_SUCCESS } stateful_stop() { $CRM_MASTER -D stateful_check_state master if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_MASTER fi if [ -f ${OCF_RESKEY_state} ]; then rm ${OCF_RESKEY_state} fi return $OCF_SUCCESS } stateful_monitor() { stateful_check_state "master" if [ $? = 0 ]; then return $OCF_RUNNING_MASTER fi stateful_check_state "slave" if [ $? = 0 ]; then return $OCF_SUCCESS fi if [ -f ${OCF_RESKEY_state} ]; then echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents" cat ${OCF_RESKEY_state} return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } stateful_validate() { exit $OCF_SUCCESS } : ${OCF_RESKEY_state=${HA_RSCTMP}/Stateful-${OCF_RESOURCE_INSTANCE}.state} case $__OCF_ACTION in meta-data) meta_data;; start) stateful_start;; promote) stateful_promote;; demote) stateful_demote;; stop) stateful_stop;; monitor) stateful_monitor;; validate-all) stateful_validate;; usage|help) stateful_usage $OCF_SUCCESS;; *) stateful_usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? diff --git a/heartbeat/awseip b/heartbeat/awseip index a1bee44f1..471954e09 100755 --- a/heartbeat/awseip +++ b/heartbeat/awseip @@ -1,247 +1,250 @@ #!/bin/sh # # # Manage Elastic IP with Pacemaker # # # Copyright 2016 guessi # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # # Prerequisites: # # - preconfigured AWS CLI running environment (AccessKey, SecretAccessKey, etc.) # - a reserved secondary private IP address for EC2 instances high availablity # - IAM user role with the following permissions: # * DescribeInstances # * AssociateAddress # * DisassociateAddress # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # # Defaults # OCF_RESKEY_awscli_default="/usr/bin/awscli" OCF_RESKEY_api_delay_default="1" : ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}} : ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}} meta_data() { cat < - + 1.0 description command line tools for aws services aws cli tools reserved elastic ip for ec2 instance reserved elastic ip for ec2 instance reserved allocation id for ec2 instance reserved allocation id for ec2 instance predefined private ip address for ec2 instance predefined private ip address for ec2 instance a short delay between API calls, to avoid sending API too quick a short delay between API calls END } ####################################################################### awseip_usage() { cat < # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # # Prerequisites: # # - preconfigured AWS CLI running environment (AccessKey, SecretAccessKey, etc.) # - a reserved secondary private IP address for EC2 instances high availablity # - IAM user role with the following permissions: # * DescribeInstances # * AssignPrivateIpAddresses # * UnassignPrivateIpAddresses # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # # Defaults # OCF_RESKEY_awscli_default="/usr/bin/awscli" OCF_RESKEY_api_delay_default="1" : ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}} : ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}} meta_data() { cat < - + 1.0 description command line tools for aws services aws cli tools reserved secondary private ip for ec2 instance reserved secondary private ip for ec2 instance a short delay between API calls, to avoid sending API too quick a short delay between API calls END } ####################################################################### awsvip_usage() { cat < # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ocf-directories ####################################################################### meta_data() { cat < - + 1.0 This agent manages the clvmd daemon. clvmd Start with cmirrord (cluster mirror log daemon). activate cmirrord Options to clvmd. Refer to clvmd.8 for detailed descriptions. Daemon Options Whether or not to activate all cluster volume groups after starting the clvmd or not. Note that clustered volume groups will always be deactivated before the clvmd stops regardless of what this option is set to. Activate volume groups END } ####################################################################### : ${OCF_RESKEY_daemon_options:="-d0"} : ${OCF_RESKEY_activate_vgs:="true"} sbindir=$HA_SBIN_DIR if [ -z $sbindir ]; then sbindir=/usr/sbin fi DAEMON="clvmd" CMIRROR="cmirrord" DAEMON_PATH="${sbindir}/clvmd" CMIRROR_PATH="${sbindir}/cmirrord" LVMCONF="${sbindir}/lvmconf" LOCK_FILE="/var/lock/subsys/$DAEMON" # attempt to detect where the vg tools are located # for some reason this isn't consistent with sbindir # in some distros. vgtoolsdir=$(dirname $(which vgchange 2> /dev/null) 2> /dev/null) if [ -z "$vgtoolsdir" ]; then vgtoolsdir="$sbindir" fi LVM_VGCHANGE=${vgtoolsdir}/vgchange LVM_VGDISPLAY=${vgtoolsdir}/vgdisplay LVM_VGSCAN=${vgtoolsdir}/vgscan # Leaving this in for legacy. We do not want to advertize # the abilty to set options in the systconfig exists, we want # to expand the OCF style options as necessary instead. [ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster [ -f /etc/sysconfig/$DAEMON ] && . /etc/sysconfig/$DAEMON CLVMD_TIMEOUT="90" if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then CLVMD_TIMEOUT=$(($OCF_RESKEY_CRM_meta_timeout/1000)) fi clvmd_usage() { cat </dev/null | grep -a "${binary}" > /dev/null 2>&1 if [ $? -eq 0 ];then # shortcut without requiring pgrep to search through all procs return $OCF_SUCCESS fi fi pid=$(pgrep ${binary}) case $? in 0) ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}." echo "$pid" > $pidfile return $OCF_SUCCESS;; 1) rm -f "$pidfile" > /dev/null 2>&1 ocf_log info "$binary is not running" return $OCF_NOT_RUNNING;; *) rm -f "$pidfile" > /dev/null 2>&1 ocf_exit_reason "Error encountered detecting pid status of $binary" return $OCF_ERR_GENERIC;; esac } clvmd_status() { local rc local mirror_rc clvmd_validate if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Unable to monitor, Environment validation failed." return $? fi check_process $DAEMON rc=$? mirror_rc=$rc if ocf_is_true $OCF_RESKEY_with_cmirrord; then check_process $CMIRROR mirror_rc=$? fi # If these ever don't match, return error to force recovery if [ $mirror_rc -ne $rc ]; then return $OCF_ERR_GENERIC fi return $rc } # NOTE: replace this with vgs, once display filter per attr is implemented. clustered_vgs() { ${LVM_VGDISPLAY} 2>/dev/null | awk 'BEGIN {RS="VG Name"} {if (/Clustered/) print $1;}' } wait_for_process() { local binary=$1 local timeout=$2 local count=0 ocf_log info "Waiting for $binary to exit" while [ $count -le $timeout ]; do check_process $binary if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "$binary terminated" return $OCF_SUCCESS fi sleep 1 count=$((count+1)) done return $OCF_ERR_GENERIC } time_left() { local end=$1 local default=$2 local now=$SECONDS local result=0 result=$(( $end - $now )) if [ $result -lt $default ]; then return $default fi return $result } clvmd_stop() { local LVM_VGS local rc=$OCF_SUCCESS local end=$(( $SECONDS + $CLVMD_TIMEOUT )) clvmd_status if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi check_process $DAEMON if [ $? -ne $OCF_NOT_RUNNING ]; then LVM_VGS="$(clustered_vgs)" if [ -n "$LVM_VGS" ]; then ocf_log info "Deactivating clustered VG(s):" ocf_run ${LVM_VGCHANGE} -anl $LVM_VGS if [ $? -ne 0 ]; then ocf_exit_reason "Failed to deactivate volume groups, cluster vglist = $LVM_VGS" return $OCF_ERR_GENERIC fi fi ocf_log info "Signaling $DAEMON to exit" killall -TERM $DAEMON if [ $? != 0 ]; then ocf_exit_reason "Failed to signal -TERM to $DAEMON" return $OCF_ERR_GENERIC fi wait_for_process $DAEMON $CLVMD_TIMEOUT rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "$DAEMON failed to exit" return $rc fi rm -f $LOCK_FILE fi check_process $CMIRROR if [ $? -ne $OCF_NOT_RUNNING ] && ocf_is_true $OCF_RESKEY_with_cmirrord; then local timeout ocf_log info "Signaling $CMIRROR to exit" killall -INT $CMIRROR time_left $end 10; timeout=$? wait_for_process $CMIRROR $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then killall -KILL $CMIRROR time_left $end 10; timeout=$? wait_for_process $CMIRROR $(time_left $end 10) rc=$? fi fi return $rc } start_process() { local binary_path=$1 local opts=$2 check_process "$(basename $binary_path)" if [ $? -ne $OCF_SUCCESS ]; then ocf_log info "Starting $binary_path: " ocf_run $binary_path $opts rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to launch $binary_path, exit code $rc" exit $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } clvmd_activate_all() { if ! ocf_is_true "$OCF_RESKEY_activate_vgs"; then ocf_log info "skipping vg activation, activate_vgs is set to $OCF_RESKEY_activate_vgs" return $OCF_SUCCESS fi # Activate all volume groups by leaving the # "volume group name" parameter empty ocf_run ${LVM_VGCHANGE} -aay if [ $? -ne 0 ]; then ocf_log info "Failed to activate VG(s):" clvmd_stop return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } clvmd_start() { local rc=0 local CLVMDOPTS="-T${CLVMD_TIMEOUT} $OCF_RESKEY_daemon_options" clvmd_validate if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Unable to start, Environment validation failed." return $? fi clvmd_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "$DAEMON already started" clvmd_activate_all return $?; fi # autoset locking type to clusted when lvmconf tool is available if [ -x "$LVMCONF" ]; then $LVMCONF --enable-cluster > /dev/null 2>&1 fi # if either of these fail, script will exit OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_with_cmirrord; then start_process $CMIRROR_PATH fi start_process $DAEMON_PATH "$CLVMDOPTS" # Refresh local cache. # # It's possible that new PVs were added to this, or other VGs # while this node was down. So we run vgscan here to avoid # any potential "Missing UUID" messages with subsequent # LVM commands. # The following step would be better and more informative to the user: # 'action "Refreshing VG(s) local cache:" ${LVM_VGSCAN}' # but it could show warnings such as: # 'clvmd not running on node x-y-z Unable to obtain global lock.' # and the action would be shown as FAILED when in reality it didn't. # Ideally vgscan should have a startup mode that would not print # unnecessary warnings. ${LVM_VGSCAN} > /dev/null 2>&1 touch $LOCK_FILE clvmd_activate_all clvmd_status return $? } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; start) clvmd_start;; stop) clvmd_stop;; monitor) clvmd_status;; validate-all) clvmd_validate;; usage|help) clvmd_usage;; *) clvmd_usage exit $OCF_ERR_UNIMPLEMENTED;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/dhcpd b/heartbeat/dhcpd index 996a316f7..1596ddd7a 100755 --- a/heartbeat/dhcpd +++ b/heartbeat/dhcpd @@ -1,558 +1,558 @@ #!/bin/sh # # Resource Agent for managing dhcpd resources. # # License: GNU General Public License (GPL) # (c) 2011-2012 Chris Bowlby, # # A fair amount of this script has been pulled from the official 0dhcpd # init script. Those portions have been integrated into this script to # ensure consistent behavior between the resource agent and the # original script. The copyrights and original authors are credited # as follows: # # Copyright (c) 1996, 1997, 1998 S.u.S.E. GmbH # Copyright (c) 1998, 1999, 2000, 2001 SuSE GmbH # Copyright (c) 2002, 2003 SuSE Linux AG # Copyright (c) 2004-2008 SUSE LINUX Products GmbH, Nuernberg, Germany. # # Author(s) : Rolf Haberrecker , 1997-1999 # Peter Poeml , 2000-2006 # Marius Tomaschewski , 2006-2010 # # and Linux-HA contributors # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_binary_default="dhcpd" OCF_RESKEY_pid_default="/var/run/dhcpd.pid" OCF_RESKEY_user_default=dhcpd OCF_RESKEY_group_default=nogroup OCF_RESKEY_config_default="" OCF_RESKEY_chrooted_default="true" OCF_RESKEY_chrooted_path_default="/var/lib/dhcp" OCF_RESKEY_leases_default="/db/dhcpd.leases" OCF_RESKEY_interface_default="" OCF_RESKEY_includes_default="" # On some systems, the chrooted default is slightly different. # Lets do our best to support both by default. if [ ! -d "$OCF_RESKEY_chrooted_path_default" ]; then if [ -d "/var/lib/dhcpd" ]; then OCF_RESKEY_chrooted_path_default="/var/lib/dhcpd" fi fi : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} : ${OCF_RESKEY_chrooted=${OCF_RESKEY_chrooted_default}} : ${OCF_RESKEY_chrooted_path=${OCF_RESKEY_chrooted_path_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_leases=${OCF_RESKEY_leases_default}} : ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} : ${OCF_RESKEY_includes=${OCF_RESKEY_includes_default}} # To enable support for different versions of dhcp, we need # to know what version we are being run against. DHCP_VERSION_MAJOR=`$OCF_RESKEY_binary --version 2>&1 | awk -F- '{print $3}' | awk -F. '{print $1}' | sed s/^[a-zA-Z]//g` # These files are always copied by default to ensure the chroot environment works. DEFAULT_FILE_LIST="/etc/gai.conf /etc/nsswitch.conf /etc/resolv.conf /etc/host.conf /etc/hosts /etc/localtime /dev/urandom" usage() { cat < - + 0.1 Manage an ISC DHCP server service in a chroot environment. Chrooted ISC DHCP server resource agent. The absolute path to the DHCP server configuration file. Configuration file Configure the dhcpd service to run in a chrooted or non-chrooted mode. Enable chroot mode The absolute path of the chrooted DHCP environment. The chrooted path The binary for the DHCP server process. An absolute path definition is not required, but can be used to override environment path. dhcpd binary The system user the DHCP server process will run as when it is chrooted. dhcpd owner The system group the DHCP server process will run as when it is chrooted. dhcpd group owner The network interface(s) the DHCP server process will bind to. A blank value will bind the process to all interfaces. Network Interface This parameter provides a means to copy include files into the chrooted environment. If a dhcpd.conf file contains a line similar to this: include "/etc/named.keys"; Then an admin also has to tell the dhcpd RA that this file should be pulled into the chrooted environment. This is a space delimited list. Include files The leases database file, relative to chrooted_path. Leases file The path and filename of the PID file. It is relative to chrooted_path. PID file EOF } # Validate most critical parameters dhcpd_validate_all() { check_binary $OCF_RESKEY_binary if ! ocf_is_probe; then # Test for the appropriate configuration files depending on if # chroot mode is enabled. if ocf_is_true $OCF_RESKEY_chrooted ; then if ! test -e "$OCF_RESKEY_chrooted_path"; then ocf_exit_reason "Path $OCF_RESKEY_chrooted_path does not exist." return $OCF_ERR_INSTALLED fi if test -n "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config"; then ocf_exit_reason "Configuration file $OCF_RESKEY_chrooted_path/$OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi else if test -n "$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_config"; then ocf_exit_reason "Configuration file $OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi fi fi if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then ocf_exit_reason "User $OCF_RESKEY_user doesn't exist" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } # dhcpd_monitor. Send a request to dhcpd and check response. dhcpd_monitor() { # Assume chrooted mode is being used, but if not update the PIDF # variable to point to the non-chrooted PID file. PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ! ocf_is_true $OCF_RESKEY_chrooted ; then PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` fi ocf_pidfile_status $PIDF >/dev/null 2>&1 || return $OCF_NOT_RUNNING return $OCF_SUCCESS } # Initialize Chroot dhcpd_initialize_chroot() { # If we are running the initialization for the first time, we need to make # the new chrooted folder, in case we are not using the same default. if ! [ -d $OCF_RESKEY_chrooted_path ] ; then ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use." fi # Make sure all sub-paths are created if something went wrong during # a partial run. for i in db dev etc lib64 var/run; do mkdir -p $OCF_RESKEY_chrooted_path/$i done # If we are running version 4 of the dhcp server, we need to mount a proc partition. if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then mkdir -p $OCF_RESKEY_chrooted_path/proc if ! [ -e $OCF_RESKEY_chrooted_path/proc/net/dev ] ; then mount -t proc -o ro proc $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1 fi fi # If the folder to store the PID file does not exist, make it. if ! [ -d "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" ] ; then mkdir -p "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" fi # Ensure all permissions are in place if the folder was re-created. chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_leases` chown -R $OCF_RESKEY_user:$OCF_RESKEY_group "$OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_pid`" ## If there is no conf file, we can't initialize the chrooted ## environment, return with "program not configured" if ! [ -f $OCF_RESKEY_config ] ; then ocf_exit_reason "dhcpd has not been configured." return $OCF_ERR_CONFIGURED fi # If the leases file does not exist, create it, as this is a fresh install. if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases fi # Remove the random device. test -e "$OCF_RESKEY_chrooted_path/dev/urandom" && rm -f $OCF_RESKEY_chrooted_path/dev/urandom # Test for the existance of the defined include files, and append # them to the list of files to be copied. for i in $OCF_RESKEY_includes ; do if [ -e $i ] ; then DEFAULT_FILE_LIST="$DEFAULT_FILE_LIST $i" else ocf_exit_reason "include file $i does not exist" return $OCF_ERR_INSTALLED fi done # Ensure all "modified" non-chrooted configuration files are copied into the chrooted environment. for i in $OCF_RESKEY_config $DEFAULT_FILE_LIST; do # First, lets make sure the directory exists within the chrooted environment. if test -d "$i" ; then mkdir -p $OCF_RESKEY_chrooted_path/$i elif test -e "$i" ; then mkdir -p "`dirname $OCF_RESKEY_chrooted_path/$i`" fi # Next, we copy the configuration file into place. cp -aL "$i" "$OCF_RESKEY_chrooted_path/${i%/*}/" > /dev/null 2>&1 || { ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } done libdir=$(basename $(echo ${OCF_RESKEY_chrooted_path}/lib*)) if test -x /usr/bin/ldd ; then get_ldd_deps() { ldd_wl="/$libdir/lib" ldd_bl="/$libdir/libc\." /usr/bin/ldd "$1" | while read a b c d ; do [ -n "$c" ] || continue echo "$c" | grep -q "$ldd_wl" || continue echo "$c" | grep -q "$ldd_bl" && continue echo $c done } else get_ldd_deps() { :; } fi cplibs=`for i in /$libdir/libresolv.so.* /$libdir/libnss_*.so.* /$libdir/libpthread.so.0 /$libdir/libdl.so.2 do if [ -s "$i" ] ; then echo "$i" get_ldd_deps "$i" fi done | sort -u` for i in $cplibs ; do if [ -s "$i" ]; then cp -pL "$i" "${OCF_RESKEY_chrooted_path}/$libdir/" || { ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } fi done return $OCF_SUCCESS } # Initialize a non-chroot environment dhcpd_initialize() { ## If there is no conf file, we can't start a dhcp service. if ! [ -f $OCF_RESKEY_config ] ; then ocf_exit_reason "dhcpd has not been configured." return $OCF_ERR_CONFIGURED fi # As with the standard DHCP init script, we can still use the # chrooted default path for storing the leases file. This behavior # is consistent with the existing /etc/init.d/dhcpd script. if ! [ -d $OCF_RESKEY_chrooted_path ] ; then ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use." fi # If the leases file does not exist, create it, as this is a fresh install. if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases fi # if the PID storage path does not exist, make it, and setup the permissions. # NOTE: This part of the script has a potential security flaw, in that if someone # puts in /var/run as the path, it will change ownership to the dhcpd user # and group. However, all that would do is allow that user to view the contents # of the files, which they can do now anyway. If this becomes an issue, I can work # in some changes. # We need to append "dhcpd" to the path for the PID file storage folder, because # if /var/run is used, that folders permissions can not be changed, otherwise it affects # more then just one application. if ! [ -d `dirname $OCF_RESKEY_pid`/dhcpd ] ; then mkdir -p `dirname $OCF_RESKEY_pid`/dhcpd if [ -n "$OCF_RESKEY_user" -a "x$OCF_RESKEY_user" != "xroot" ] ; then chown $OCF_RESKEY_user `dirname $OCF_RESKEY_pid`/dhcpd fi if [ -n "$OCF_RESKEY_group" -a "x$OCF_RESKEY_group" != "xwheel" ] ; then chgrp $OCF_RESKEY_group `dirname $OCF_RESKEY_pid`/dhcpd fi fi return $OCF_SUCCESS } # Start dhcpd_start() { # Lets make sure we are not already running. if dhcpd_monitor; then ocf_log info "dhcpd already running" return $OCF_SUCCESS fi # Only initialize the chrooted path(s) if chroot mode is enabled. if ocf_is_true $OCF_RESKEY_chrooted ; then dhcpd_initialize_chroot || { ocf_exit_reason "Could not fully initialize the chroot environment." ; return $OCF_ERR_INSTALLED; } else dhcpd_initialize || { ocf_exit_reason "Could not fully initialize the runtime environment." ; return $OCF_ERR_INSTALLED; } fi dhcpd_validate_all || exit # Define an empty string variable, to ensure it exists when needed. DHCPD_ARGS="" # To ensure consistent behavior with the standard DHCPD init script, # use the chrooted default path for storing a leases file, when not in # a chrooted enviroment. if ocf_is_true $OCF_RESKEY_chrooted ; then DHCPD_ARGS="$DHCPD_ARGS -chroot $OCF_RESKEY_chrooted_path -lf $OCF_RESKEY_leases" else DHCPD_ARGS="$DHCPD_ARGS -lf $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases" fi if [ -n "$OCF_RESKEY_user" ]; then DHCPD_ARGS="$DHCPD_ARGS -user $OCF_RESKEY_user" fi if [ -n "$OCF_RESKEY_group" ]; then DHCPD_ARGS="$DHCPD_ARGS -group $OCF_RESKEY_group" fi # If there is a pid file containing a pid, the machine might have crashed. pid files in # /var/run are always cleaned up at boot time, but this is not the case for the pid file in # the chroot jail. Therefore, an old pid file may exist. This is only a problem if it # incidentally contains the pid of a running process. If this process is not a 'dhcpd', # we remove the pid. (dhcpd itself only checks whether the pid is alive or not.) PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ocf_is_true $OCF_RESKEY_chrooted ; then ocf_log info "Starting dhcpd [chroot] service." DHCPD_ARGS="$DHCPD_ARGS -pf $OCF_RESKEY_pid" else ocf_log info "Starting dhcpd [non-chroot] service." PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` DHCPD_ARGS="$DHCPD_ARGS -pf $PIDF" fi test -e "$PIDF" && rm -f $PIDF ocf_run $OCF_RESKEY_binary -cf $OCF_RESKEY_config $DHCPD_ARGS $OCF_RESKEY_interface || return $OCF_ERR_INSTALLED while ! dhcpd_monitor; do sleep .1 ocf_log info "waiting for dhcpd to start" return $OCF_SUCCESS done if ocf_is_true $OCF_RESKEY_chrooted ; then ocf_log info "dhcpd [chrooted] has started." else ocf_log info "dhcpd [non-chrooted] has started." fi return $OCF_SUCCESS } # Stop dhcpd_stop () { local timeout local timewait local rc dhcpd_monitor rc=$? case "$rc" in "$OCF_SUCCESS") # Currently running, and is expected behaviour. ;; "$OCF_NOT_RUNNING") # Currently not running, therefore nothing to do. ocf_log info "dhcpd already stopped" return $OCF_SUCCESS ;; esac PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ! ocf_is_true $OCF_RESKEY_chrooted ; then PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` fi kill `cat $PIDF` # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timewait=$((OCF_RESKEY_CRM_meta_timeout/1500)) sleep 0.1; timeout=0 # Sleep here for .1 sec to let dhcpd finish. while dhcpd_monitor ; do if [ $timeout -ge $timewait ]; then break else sleep 1 timeout=`expr $timeout + 1` fi done #If still up if dhcpd_monitor 2>&1; then ocf_log notice "dhcpd is still up! Trying kill -s KILL" kill -s SIGKILL `cat $PIDF` fi # If we are running a dhcp server v4 or higher, unmount the proc partition. if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then # We only want to unmount proc in a chrooted environment, else we could # cause other issues. if ocf_is_true $OCF_RESKEY_chrooted ; then umount $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1 fi fi rm -f $PIDF ocf_log info "dhcpd stopped" return $OCF_SUCCESS } # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) dhcpd_meta_data exit $OCF_SUCCESS ;; validate-all) dhcpd_validate_all exit $OCF_SUCCESS ;; usage|help) dhcpd_usage exit $OCF_SUCCESS ;; esac # Translate each action into the appropriate function call case $__OCF_ACTION in start) dhcpd_start;; stop) dhcpd_stop;; restart) dhcpd_stop dhcpd_start ;; monitor) dhcpd_monitor;; *) dhcpd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/docker b/heartbeat/docker index 876d616d0..47f099e33 100755 --- a/heartbeat/docker +++ b/heartbeat/docker @@ -1,470 +1,470 @@ #!/bin/sh # # The docker HA resource agent creates and launches a docker container # based off a supplied docker image. Containers managed by this agent # are both created and removed upon the agent's start and stop actions. # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < - + 1.0 The docker HA resource agent creates and launches a docker container based off a supplied docker image. Containers managed by this agent are both created and removed upon the agent's start and stop actions. Docker container resource agent. The docker image to base this container off of. docker image The name to give the created container. By default this will be that resource's instance name. docker container name Allow the image to be pulled from the configured docker registry when the image does not exist locally. NOTE, this can drastically increase the time required to start the container if the image repository is pulled over the network. Allow pulling non-local images Add options to be appended to the 'docker run' command which is used when creating the container during the start action. This option allows users to do things such as setting a custom entry point and injecting environment variables into the newly created container. Note the '-d' option is supplied regardless of this value to force containers to run in the background. NOTE: Do not explicitly specify the --name argument in the run_opts. This agent will set --name using either the resource's instance or the name provided in the 'name' argument of this agent. run options Specifiy a command to launch within the container once it has initialized. run command Specifiy the full path of a command to launch within the container to check the health of the container. This command must return 0 to indicate that the container is healthy. A non-zero return code will indicate that the container has failed and should be recovered. If 'docker exec' is supported, it is used to execute the command. If not, nsenter is used. Note: Using this method for monitoring processes inside a container is not recommended, as containerd tries to track processes running inside the container and does not deal well with many short-lived processes being spawned. Ensure that your container monitors its own processes and terminates on fatal error rather than invoking a command from the outside. monitor command Kill a container immediately rather than waiting for it to gracefully shutdown force kill Allow the container to be reused after stopping the container. By default containers are removed after stop. With the reuse option containers will persist after the container stops. reuse container END } ####################################################################### REQUIRE_IMAGE_PULL=0 docker_usage() { cat </dev/null 2>&1; then out=$(docker exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1) rc=$? else out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1) rc=$? fi if [ $rc -eq 127 ]; then ocf_log err "monitor cmd failed (rc=$rc), output: $out" ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container." # there is no recovering from this, exit immediately exit $OCF_ERR_ARGS elif [ $rc -ne 0 ]; then ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out" rc=$OCF_ERR_GENERIC else ocf_log debug "monitor cmd passed: exit code = $rc" fi return $rc } container_exists() { docker inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1 } remove_container() { if ocf_is_true "$OCF_RESKEY_reuse"; then # never remove the container if we have reuse enabled. return 0 fi container_exists if [ $? -ne 0 ]; then # don't attempt to remove a container that doesn't exist return 0 fi ocf_log notice "Cleaning up inactive container, ${CONTAINER}." ocf_run docker rm $CONTAINER } docker_simple_status() { local val container_exists if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING fi # retrieve the 'Running' attribute for the container val=$(docker inspect --format {{.State.Running}} $CONTAINER 2>/dev/null) if [ $? -ne 0 ]; then #not running as a result of container not being found return $OCF_NOT_RUNNING fi if ocf_is_true "$val"; then # container exists and is running return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } docker_monitor() { local rc=0 docker_simple_status rc=$? if [ $rc -ne 0 ]; then return $rc fi monitor_cmd_exec } docker_start() { local run_opts="-d --name=${CONTAINER}" # check to see if the container has already started docker_simple_status if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi if [ -n "$OCF_RESKEY_run_opts" ]; then run_opts="$run_opts $OCF_RESKEY_run_opts" fi if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}" docker pull "${OCF_RESKEY_image}" if [ $? -ne 0 ]; then ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}" return $OCF_ERR_GENERIC fi fi if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then ocf_log info "starting existing container $CONTAINER." ocf_run docker start $CONTAINER else # make sure any previous container matching our container name is cleaned up first. # we already know at this point it wouldn't be running remove_container ocf_log info "running container $CONTAINER for the first time" ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd fi if [ $? -ne 0 ]; then ocf_exit_reason "docker failed to launch container" return $OCF_ERR_GENERIC fi # wait for monitor to pass before declaring that the container is started while true; do docker_simple_status if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Newly created docker container exited after start" return $OCF_ERR_GENERIC fi monitor_cmd_exec if [ $? -eq $OCF_SUCCESS ]; then ocf_log notice "Container $CONTAINER started successfully" return $OCF_SUCCESS fi ocf_exit_reason "waiting on monitor_cmd to pass after start" sleep 1 done } docker_stop() { local timeout=60 docker_simple_status if [ $? -eq $OCF_NOT_RUNNING ]; then remove_container return $OCF_SUCCESS fi if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 )) if [ $timeout -lt 10 ]; then timeout=10 fi fi if ocf_is_true "$OCF_RESKEY_force_kill"; then ocf_run docker kill $CONTAINER else ocf_log debug "waiting $timeout second[s] before killing container" ocf_run docker stop -t=$timeout $CONTAINER fi if [ $? -ne 0 ]; then ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." return $OCF_ERR_GENERIC fi remove_container if [ $? -ne 0 ]; then ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } image_exists() { # assume that OCF_RESKEY_name have been validated local IMAGE_NAME="$(echo ${OCF_RESKEY_image} | awk -F':' '{print $1}')" # if no tag was specified, use default "latest" local COLON_FOUND=0 local IMAGE_TAG="latest" COLON_FOUND="$(echo "${OCF_RESKEY_image}" | grep -o ':' | grep -c .)" if [ ${COLON_FOUND} -ne 0 ]; then IMAGE_TAG="$(echo ${OCF_RESKEY_image} | awk -F':' '{print $NF}')" fi # IMAGE_NAME might be following formats: # - image # - repository/image # - docker.io/image (some distro will display "docker.io/" as prefix) docker images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/)?${IMAGE_NAME}:${IMAGE_TAG}\$" if [ $? -eq 0 ]; then # image found return 0 fi if ocf_is_true "$OCF_RESKEY_allow_pull"; then REQUIRE_IMAGE_PULL=1 ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start" return 0 fi # image not found. return 1 } docker_validate() { check_binary docker if [ -z "$OCF_RESKEY_image" ]; then ocf_exit_reason "'image' option is required" exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_monitor_cmd" ]; then docker exec --help >/dev/null 2>&1 if [ ! $? ]; then ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified" check_binary nsenter fi fi image_exists if [ $? -ne 0 ]; then ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # TODO : # When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters. # When a user appoints reuse, the resource agent cannot connect plural clones with a container. if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then if [ -n "$OCF_RESKEY_name" ]; then if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] then ocf_exit_reason "Cannot make plural clones from the same name parameter." exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] then ocf_exit_reason "Cannot make plural master from the same name parameter." exit $OCF_ERR_CONFIGURED fi fi : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`} else : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}} fi if [ -n "$OCF_RESKEY_container" ]; then # we'll keep the container attribute around for a bit in order not to break # any existing deployments. The 'name' attribute is prefered now though. CONTAINER=$OCF_RESKEY_container ocf_log warn "The 'container' attribute is depreciated" else CONTAINER=$OCF_RESKEY_name fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; start) docker_validate docker_start;; stop) docker_stop;; monitor) docker_monitor;; validate-all) docker_validate;; usage|help) docker_usage exit $OCF_SUCCESS ;; *) docker_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/eDir88 b/heartbeat/eDir88 index b4c7952ff..edb759210 100755 --- a/heartbeat/eDir88 +++ b/heartbeat/eDir88 @@ -1,460 +1,460 @@ #!/bin/bash # # eDirectory Resource Agent (RA) for Heartbeat. # This script is only compatible with eDirectory 8.8 and later # # Copyright (c) 2007 Novell Inc, Yan Fitterer # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OCF parameters: # OCF_RESKEY_eDir_config_file - full filename to instance configuration file # OCF_RESKEY_eDir_monitor_ldap - Should we monitor LDAP (0/1 - 1 is true) # OCF_RESKEY_eDir_monitor_idm - Should we monitor IDM (0/1 - 1 is true) # OCF_RESKEY_eDir_jvm_initial_heap - Value of the DHOST_INITIAL_HEAP java env var # OCF_RESKEY_eDir_jvm_max_heap - Value of the DHOST_MAX_HEAP java env var # OCF_RESKEY_eDir_jvm_options - Value of the DHOST_OPTIONS java env var ############################################################################### ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs test -f /opt/novell/eDirectory/bin/ndspath && . /opt/novell/eDirectory/bin/ndspath 2>/dev/null >/dev/null ####################################################################### usage() { ME=$(basename "$0") cat <<-EOFA usage: $ME start|stop|status|monitor|validate-all $ME manages an eDirectory instance as an HA resource. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports if the instance is running. The 'monitor' operation reports if the instance is running, and runs additional checks. The 'validate-all' operation checks the validity of the arguments (environment variables). EOFA } eDir_meta_data() { cat <<-EOFB - + 1.0 Resource script for managing an eDirectory instance. Manages a single instance of eDirectory as an HA resource. The "multiple instances" feature or eDirectory has been added in version 8.8. This script will not work for any version of eDirectory prior to 8.8. This RA can be used to load multiple eDirectory instances on the same host. It is very strongly recommended to put eDir configuration files (as per the eDir_config_file parameter) on local storage on each node. This is necessary for this RA to be able to handle situations where the shared storage has become unavailable. If the eDir configuration file is not available, this RA will fail, and heartbeat will be unable to manage the resource. Side effects include STONITH actions, unmanageable resources, etc... Setting a high action timeout value is _very_ _strongly_ recommended. eDir with IDM can take in excess of 10 minutes to start. If heartbeat times out before eDir has had a chance to start properly, mayhem _WILL ENSUE_. The LDAP module seems to be one of the very last to start. So this script will take even longer to start on installations with IDM and LDAP if the monitoring of IDM and/or LDAP is enabled, as the start command will wait for IDM and LDAP to be available. Manages a Novell eDirectory directory server Path to configuration file for eDirectory instance. eDir config file Should we monitor if LDAP is running for the eDirectory instance? eDir monitor ldap Should we monitor if IDM is running for the eDirectory instance? eDir monitor IDM Value for the DHOST_INITIAL_HEAP java environment variable. If unset, java defaults will be used. DHOST_INITIAL_HEAP value Value for the DHOST_MAX_HEAP java environment variable. If unset, java defaults will be used. DHOST_MAX_HEAP value Value for the DHOST_OPTIONS java environment variable. If unset, original values will be used. DHOST_OPTIONS value EOFB return $OCF_SUCCESS } # # eDir_start: Start eDirectory instance # eDir_start() { if eDir_status ; then ocf_log info "eDirectory is already running ($NDSCONF)." return $OCF_SUCCESS fi # Start eDirectory instance if [ -n "$OCF_RESKEY_eDir_jvm_initial_heap" ]; then DHOST_JVM_INITIAL_HEAP=$OCF_RESKEY_eDir_jvm_initial_heap export DHOST_JVM_INITIAL_HEAP fi if [ -n "$OCF_RESKEY_eDir_jvm_max_heap" ]; then DHOST_JVM_MAX_HEAP=$OCF_RESKEY_eDir_jvm_max_heap export DHOST_JVM_MAX_HEAP fi if [ -n "$OCF_RESKEY_eDir_jvm_options" ]; then DHOST_JVM_OPTIONS=$OCF_RESKEY_eDir_jvm_options export DHOST_JVM_OPTIONS fi $NDSMANAGE start --config-file "$NDSCONF" > /dev/null 2>&1 if [ $? -eq 0 ]; then ocf_log info "eDir start command sent for $NDSCONF." else echo "ERROR: Can't start eDirectory for $NDSCONF." return $OCF_ERR_GENERIC fi CNT=0 while ! eDir_monitor ; do # Apparently, LDAP will only start after all other services # Startup time can be in excess of 10 minutes. # Leave a very long heartbeat timeout on the start action # We're relying on heartbeat to bail us out... let CNT=$CNT+1 ocf_log info "eDirectory start waiting for ${CNT}th retry for $NDSCONF." sleep 10 done ocf_log info "eDirectory start verified for $NDSCONF." return $OCF_SUCCESS } # # eDir_stop: Stop eDirectory instance # This action is written in such a way that even when run # on a node were things are broken (no binaries, no config # etc...) it will try to stop any running ndsd processes # and report success if none are running. # eDir_stop() { if ! eDir_status ; then return $OCF_SUCCESS fi $NDSMANAGE stop --config-file "$NDSCONF" >/dev/null 2>&1 if eDir_status ; then # eDir failed to stop. ocf_log err "eDirectory instance failed to stop for $NDSCONF" return $OCF_ERR_GENERIC else ocf_log info "eDirectory stop verified for $NDSCONF." return $OCF_SUCCESS fi } # # eDir_status: is eDirectory instance up ? # eDir_status() { if [ ! -r "$NDSCONF" ] ; then ocf_log err "Config file missing ($NDSCONF)." exit $OCF_ERR_GENERIC fi # Find how many ndsd processes have open listening sockets # with the IP of this eDir instance IFACE=$(grep -i "n4u.server.interfaces" $NDSCONF | cut -f2 -d= | tr '@' ':') if [ -z "$IFACE" ] ; then ocf_log err "Cannot retrieve interfaces from $NDSCONF. eDirectory may not be correctly configured." exit $OCF_ERR_GENERIC fi # In case of multiple IP's split into an array # and check all of them IFS=', ' read -a IFACE2 <<< "$IFACE" ocf_log debug "Found ${#IFACE2[@]} interfaces from $NDSCONF." counter=${#IFACE2[@]} for IFACE in "${IFACE2[@]}" do ocf_log debug "Checking ndsd instance for $IFACE" NDSD_SOCKS=$(netstat -ntlp | grep -ce "$IFACE.*ndsd") if [ "$NDSD_SOCKS" -eq 1 ] ; then let counter=counter-1 ocf_log debug "Found ndsd instance for $IFACE" elif [ "$NDSD_SOCKS" -gt 1 ] ; then ocf_log err "More than 1 ndsd listening socket matched. Likely misconfiguration of eDirectory." exit $OCF_ERR_GENERIC fi done if [ $counter -eq 0 ] ; then # Correct ndsd instance is definitely running ocf_log debug "All ndsd instances found." return 0; elif [ $counter -lt ${#IFACE2[@]} ]; then ocf_log err "Only some ndsd listening sockets matched, something is very wrong." exit $OCF_ERR_GENERIC fi # No listening socket. Make sure we don't have the process running... PIDDIR=$(grep -i "n4u.server.vardir" "$NDSCONF" | cut -f2 -d=) if [ -z "$PIDDIR" ] ; then ocf_log err "Cannot get vardir from nds config ($NDSCONF). Probable eDir configuration error." exit $OCF_ERR_GENERIC fi NDSD_PID=$(cat $PIDDIR/ndsd.pid 2>/dev/null) if [ -z "$NDSD_PID" ] ; then # PID file unavailable or empty. # This will happen if the PIDDIR is not available # on this node at this time. return 1 fi RC=$(ps -p "$NDSD_PID" | grep -c ndsd) if [ "$RC" -gt 0 ] ; then # process found but no listening socket. ndsd likely not operational ocf_log err "ndsd process found, but no listening socket. Something's gone wrong ($NDSCONF)" exit $OCF_ERR_GENERIC fi ocf_log debug "ndsd instance is not running, but no other error detected." return 1 } # # eDir_monitor: Do more in-depth checks to ensure that eDirectory is fully functional # LDAP and IDM checks are only done if reqested. # # eDir_monitor() { if ! eDir_status ; then ocf_log info "eDirectory instance is down ($NDSCONF)" return $OCF_NOT_RUNNING fi # We know the right ndsd is running locally, check health $NDSSTAT --config-file "$NDSCONF" >/dev/null 2>&1 if [ $? -ne 0 ] ; then return 1 fi # Monitor IDM first, as it will start before LDAP if [ $MONITOR_IDM -eq 1 ]; then RET=$($NDSTRACE --config-file "$NDSCONF" -c modules | egrep -i '^vrdim.*Running' | awk '{print $1}') if [ "$RET" != "vrdim" ]; then ocf_log err "eDirectory IDM engine isn't running ($NDSCONF)." return $OCF_ERR_GENERIC fi fi if [ $MONITOR_LDAP -eq 1 ] ; then $NDSNLDAP -c --config-file "$NDSCONF" >/dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "eDirectory LDAP server isn't running ($NDSCONF)." return $OCF_ERR_GENERIC fi fi ocf_log debug "eDirectory monitor success ($NDSCONF)" return $OCF_SUCCESS } # # eDir_validate: Validate environment # eDir_validate() { declare rc=$OCF_SUCCESS # Script must be run as root if ! ocf_is_root ; then ocf_log err "$0 must be run as root" rc=$OCF_ERR_GENERIC fi # ndsmanage must be available and runnable check_binary $NDSMANAGE # ndsstat must be available and runnable check_binary $NDSSTAT # Config file must be readable if [ ! -r "$NDSCONF" ] ; then ocf_log err "eDirectory configuration file [$NDSCONF] is not readable" rc=$OCF_ERR_ARGS fi # monitor_ldap must be unambiguously resolvable to a truth value MONITOR_LDAP=$(echo "$MONITOR_LDAP" | tr [A-Z] [a-z]) case "$MONITOR_LDAP" in yes|true|1) MONITOR_LDAP=1;; no|false|0) MONITOR_LDAP=0;; *) ocf_log err "Configuration parameter eDir_monitor_ldap has invalid value [$MONITOR_LDAP]" rc=$OCF_ERR_ARGS;; esac # monitor_idm must be unambiguously resolvable to a truth value MONITOR_IDM=$(echo "$MONITOR_IDM" | tr [A-Z] [a-z]) case "$MONITOR_IDM" in yes|true|1) MONITOR_IDM=1;; no|false|0) MONITOR_IDM=0;; *) ocf_log err "Configuration parameter eDir_monitor_idm has invalid value [$MONITOR_IDM]" rc=$OCF_ERR_ARGS;; esac # eDir_jvm_initial_heap must be blank or numeric if [ -n "$OCF_RESKEY_eDir_jvm_initial_heap" ] ; then if ! ocf_is_decimal "$OCF_RESKEY_eDir_jvm_initial_heap" ; then ocf_log err "Configuration parameter eDir_jvm_initial_heap has invalid" \ "value [$OCF_RESKEY_eDir_jvm_initial_heap]" rc=$OCF_ERR_ARGS fi fi # eDir_jvm_max_heap must be blank or numeric if [ -n "$OCF_RESKEY_eDir_jvm_max_heap" ] ; then if ! ocf_is_decimal "$OCF_RESKEY_eDir_jvm_max_heap" ; then ocf_log err "Configuration parameter eDir_jvm_max_heap has invalid" \ "value [$OCF_RESKEY_eDir_jvm_max_heap]" rc=$OCF_ERR_ARGS fi fi if [ $rc -ne $OCF_SUCCESS ] ; then ocf_log err "Invalid environment" fi return $rc } # # Start of main logic # ocf_log debug "$0 started with arguments \"$*\"" NDSBASE=/opt/novell/eDirectory NDSNLDAP=$NDSBASE/sbin/nldap NDSMANAGE=$NDSBASE/bin/ndsmanage NDSSTAT=$NDSBASE/bin/ndsstat NDSTRACE=$NDSBASE/bin/ndstrace NDSCONF=${OCF_RESKEY_eDir_config_file:-/etc/opt/novell/eDirectory/conf/nds.conf} MONITOR_LDAP=${OCF_RESKEY_eDir_monitor_ldap:-0} MONITOR_IDM=${OCF_RESKEY_eDir_monitor_idm:-0} # What kind of method was invoked? case "$1" in validate-all) eDir_validate; exit $?;; meta-data) eDir_meta_data; exit $OCF_SUCCESS;; status) if eDir_status ; then ocf_log info "eDirectory instance is up ($NDSCONF)" exit $OCF_SUCCESS else ocf_log info "eDirectory instance is down ($NDSCONF)" exit $OCF_NOT_RUNNING fi;; start) : skip;; stop) : skip;; monitor) : skip;; usage) usage; exit $OCF_SUCCESS;; *) ocf_log err "Invalid argument [$1]" usage; exit $OCF_ERR_ARGS;; esac # From now on we must have a valid environment to continue. # stop goes in the list above as it should ideally be able to # clean up after a start that failed due to bad args eDir_validate RC=$? if [ $RC -ne $OCF_SUCCESS ]; then exit $RC fi case "$1" in start) eDir_start;; stop) eDir_stop;; monitor) eDir_monitor;; esac exit $? diff --git a/heartbeat/fio b/heartbeat/fio index 6d2371f4f..8b1122980 100755 --- a/heartbeat/fio +++ b/heartbeat/fio @@ -1,172 +1,172 @@ #!/bin/bash # # fio RA # # Copyright (c) 2010 SUSE Linux Products GmbH, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < - + 1.0 fio is a generic I/O load generator. This RA allows start/stop of fio instances to simulate load on a cluster without configuring complex services. fio IO load generator Arguments to the fio client. Minimally, this should be a (list of) job descriptions to run. fio arguments END } ####################################################################### fio_usage() { cat </dev/null 2>&1 ${fio_state_file} ocf_log info "fio started as pid=$fio_pid" exit $OCF_SUCCESS } fio_stop() { for sig in SIGINT SIGTERM SIGKILL ; do fio_monitor ; rc=$? case $rc in $OCF_NOT_RUNNING) ocf_log info "fio already stopped." exit $OCF_SUCCESS ;; $OCF_ERR_GENERIC) rm $fio_state_file ocf_log info "fio stopped and cleaned up." exit $OCF_SUCCESS ;; $OCF_SUCCESS) if [ -n "$fio_pid" ]; then ocf_log info "Sending $sig to fio (pid=$fio_pid)" kill -$sig $fio_pid sleep 3 continue fi ocf_log err "Internal logic failure in fio RA." ;; *) ocf_log err "Internal logic failure in fio RA." ;; esac done ocf_log err "fio did not stop! Perhaps hung on IO?" exit $OCF_ERR_GENERIC } fio_monitor() { fio_state_file="${HA_RSCTMP}/fio-${OCF_RESOURCE_INSTANCE}.state" if [ ! -e $fio_state_file ]; then return $OCF_NOT_RUNNING fi fio_pid=`cat $fio_state_file` if [ -z "$fio_pid" ]; then ocf_log err "State file found, but empty. Assuming stopped." return $OCF_NOT_RUNNING fi ps=`ps h -o comm $fio_pid 2>&1` if [ "$ps" != "fio" ]; then fio_pid="" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } fio_validate() { return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; validate-all) fio_validate;; usage|help) fio_usage exit $OCF_SUCCESS ;; esac ocf_is_probe || check_binary fio case $__OCF_ACTION in start) fio_start;; stop) fio_stop;; monitor) fio_monitor;; *) fio_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit index c1bf11dbb..0a07c5faa 100755 --- a/heartbeat/iSCSILogicalUnit +++ b/heartbeat/iSCSILogicalUnit @@ -1,690 +1,690 @@ #!/bin/bash # # # iSCSILogicalUnit OCF RA. Exports and manages iSCSI Logical Units. # # (c) 2013 LINBIT, Lars Ellenberg # (c) 2009-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults # Set a default implementation based on software installed if have_binary ietadm; then OCF_RESKEY_implementation_default="iet" elif have_binary tgtadm; then OCF_RESKEY_implementation_default="tgt" elif have_binary lio_node; then OCF_RESKEY_implementation_default="lio" elif have_binary targetcli; then OCF_RESKEY_implementation_default="lio-t" fi : ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}} # Use a default SCSI ID and SCSI SN that is unique across the cluster, # and persistent in the event of resource migration. # SCSI IDs are limited to 24 bytes, but only 16 bytes are known to be # supported by all iSCSI implementations this RA cares about. Thus, # for a default, use the first 16 characters of # $OCF_RESOURCE_INSTANCE. OCF_RESKEY_scsi_id_default="${OCF_RESOURCE_INSTANCE:0:16}" : ${OCF_RESKEY_scsi_id=${OCF_RESKEY_scsi_id_default}} # To have a reasonably unique default SCSI SN, use the first 8 bytes # of an MD5 hash of of $OCF_RESOURCE_INSTANCE sn=`echo -n "${OCF_RESOURCE_INSTANCE}" | openssl md5 | sed -e 's/(stdin)= //'` OCF_RESKEY_scsi_sn_default=${sn:0:8} : ${OCF_RESKEY_scsi_sn=${OCF_RESKEY_scsi_sn_default}} # set 0 as a default value for lio iblock device number OCF_RESKEY_lio_iblock_default=0 OCF_RESKEY_lio_iblock=${OCF_RESKEY_lio_iblock:-$OCF_RESKEY_lio_iblock_default} ## tgt specifics # tgt has "backing store type" and "backing store open flags", # as well as device-type. # # suggestions how to make this generic accross all supported implementations? # how should they be named, how should they be mapped to implementation specifics? # # OCF_RESKEY_tgt_bstype # OCF_RESKEY_tgt_bsoflags # OCF_RESKEY_tgt_bsopts # OCF_RESKEY_tgt_device_type ####################################################################### meta_data() { cat < - + 0.9 Manages iSCSI Logical Unit. An iSCSI Logical unit is a subdivision of an SCSI Target, exported via a daemon that speaks the iSCSI protocol. Manages iSCSI Logical Units (LUs) The iSCSI target daemon implementation. Must be one of "iet", "tgt", "lio", or "lio-t". If unspecified, an implementation is selected based on the availability of management utilities, with "iet" being tried first, then "tgt", then "lio", then "lio-t". iSCSI target daemon implementation The iSCSI Qualified Name (IQN) that this Logical Unit belongs to. iSCSI target IQN The Logical Unit number (LUN) exposed to initiators. Logical Unit number (LUN) The path to the block device exposed. Some implementations allow this to be a regular file, too. Block device (or file) path The SCSI ID to be configured for this Logical Unit. The default is the resource name, truncated to 24 bytes. SCSI ID The SCSI serial number to be configured for this Logical Unit. The default is a hash of the resource name, truncated to 8 bytes. SCSI serial number The SCSI vendor ID to be configured for this Logical Unit. SCSI vendor ID The SCSI product ID to be configured for this Logical Unit. SCSI product ID TGT specific backing store type. If you want to use aio, make sure your tgtadm is built against libaio. See tgtadm(8). TGT backing store type TGT specific backing store open flags (direct|sync). See tgtadm(8). TGT backing store open flags TGT specific backing store options. See tgtadm(8). TGT backing store options TGT specific device type. See tgtadm(8). TGT device type Additional LU parameters. A space-separated list of "name=value" pairs which will be passed through to the iSCSI daemon's management interface. The supported parameters are implementation dependent. Neither the name nor the value may contain whitespace. List of iSCSI LU parameters Allowed initiators. A space-separated list of initiators allowed to connect to this lun. Initiators may be listed in any syntax the target implementation allows. If this parameter is empty or not set, access to this lun will not be allowed from any initiator, if target is not in demo mode. This parameter is only necessary when using LIO. List of iSCSI initiators allowed to connect to this lun. LIO iblock device name, a number starting from 0. Using distinct values here avoids a warning in LIO "LEGACY: SHARED HBA"; and it is necessary when using multiple LUNs started at the same time (eg. on node failover) to prevent a race condition in tcm_core on mkdir() in /sys/kernel/config/target/core/. LIO iblock device number END } ####################################################################### iSCSILogicalUnit_usage() { cat < /sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/wwn/vpd_unit_serial fi ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns create /backstores/block/${OCF_RESOURCE_INSTANCE} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then for initiator in ${OCF_RESKEY_allowed_initiators}; do ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls create ${initiator} add_mapped_luns=False || exit $OCF_ERR_GENERIC ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} create ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC done fi ;; esac # Force the monitor operation to pass before start is considered a success. iSCSILogicalUnit_monitor } iSCSILogicalUnit_stop() { iSCSILogicalUnit_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi case $OCF_RESKEY_implementation in iet) # IET allows us to remove LUs while they are in use ocf_run ietadm --op delete \ --tid=${TID} \ --lun=${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC ;; tgt) # tgt will fail to remove an LU while it is in use, # but at the same time does not allow us to # selectively shut down a connection that is using a # specific LU. Thus, we need to loop here until tgtd # decides that the LU is no longer in use, or we get # timed out by the LRM. while ! ocf_run -warn tgtadm --lld iscsi --op delete --mode logicalunit \ --tid ${TID} \ --lun=${OCF_RESKEY_lun}; do sleep 1 done ;; lio) acls_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/acls" for initiatorpath in ${acls_configfs_path}/*; do initiator=$(basename "${initiatorpath}") if [ -e "${initiatorpath}/lun_${OCF_RESKEY_lun}" ]; then ocf_log info "deleting acl at ${initiatorpath}/lun_${OCF_RESKEY_lun}" ocf_run lio_node --dellunacl=${OCF_RESKEY_target_iqn} 1 \ ${initiator} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC fi done lun_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/" if [ -e "${lun_configfs_path}" ]; then ocf_run lio_node --dellun=${OCF_RESKEY_target_iqn} 1 ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC fi block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path" if [ -e "${block_configfs_path}" ]; then ocf_run tcm_node --freedev=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC fi ;; lio-t) # "targetcli delete" will fail if the LUN is already # gone. Log a warning and still push ahead. ocf_run -warn targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns delete ${OCF_RESKEY_lun} if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then for initiator in ${OCF_RESKEY_allowed_initiators}; do if targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} status | grep "Mapped LUNs: 0" >/dev/null ; then ocf_run -warn targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/ delete ${initiator} fi done fi # If we've proceeded down to here and we're unable to # delete the backstore, then something is seriously # wrong and we need to fail the stop operation # (potentially causing fencing) ocf_run targetcli /backstores/block delete ${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC ;; esac return $OCF_SUCCESS } iSCSILogicalUnit_monitor() { if [ x"${OCF_RESKEY_tgt_bstype}" != x"rbd" ]; then # If our backing device (or file) doesn't even exist, we're not running [ -e ${OCF_RESKEY_path} ] || return $OCF_NOT_RUNNING fi case $OCF_RESKEY_implementation in iet) # Figure out and set the target ID TID=`sed -ne "s/tid:\([[:digit:]]\+\) name:${OCF_RESKEY_target_iqn}$/\1/p" < /proc/net/iet/volume` if [ -z "${TID}" ]; then # Our target is not configured, thus we're not # running. return $OCF_NOT_RUNNING fi # FIXME: this looks for a matching LUN and path, but does # not actually test for the correct target ID. grep -E -q "[[:space:]]+lun:${OCF_RESKEY_lun}.*path:${OCF_RESKEY_path}$" /proc/net/iet/volume && return $OCF_SUCCESS ;; tgt) # Figure out and set the target ID TID=`tgtadm --lld iscsi --op show --mode target \ | sed -ne "s/^Target \([[:digit:]]\+\): ${OCF_RESKEY_target_iqn}$/\1/p"` if [ -z "$TID" ]; then # Our target is not configured, thus we're not # running. return $OCF_NOT_RUNNING fi # This only looks for the backing store, but does not test # for the correct target ID and LUN. tgtadm --lld iscsi --op show --mode target \ | grep -E -q "[[:space:]]+Backing store.*: ${OCF_RESKEY_path}$" && return $OCF_SUCCESS ;; lio) configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/${OCF_RESOURCE_INSTANCE}/udev_path" [ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS # if we aren't activated, is a block device still left over? block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path" [ -e ${block_configfs_path} ] && ocf_log warn "existing block without an active lun: ${block_configfs_path}" [ -e ${block_configfs_path} ] && return $OCF_ERR_GENERIC ;; lio-t) configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/*/udev_path" [ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS # if we aren't activated, is a block device still left over? block_configfs_path="/sys/kernel/config/target/core/iblock_*/${OCF_RESOURCE_INSTANCE}/udev_path" [ -e ${block_configfs_path} ] && ocf_log warn "existing block without an active lun: ${block_configfs_path}" [ -e ${block_configfs_path} ] && return $OCF_ERR_GENERIC ;; esac return $OCF_NOT_RUNNING } iSCSILogicalUnit_validate() { # Do we have all required variables? for var in target_iqn lun path; do param="OCF_RESKEY_${var}" if [ -z "${!param}" ]; then ocf_exit_reason "Missing resource parameter \"$var\"!" exit $OCF_ERR_CONFIGURED fi done # Is the configured implementation supported? case "$OCF_RESKEY_implementation" in "iet"|"tgt"|"lio"|"lio-t") ;; "") # The user didn't specify an implementation, and we were # unable to determine one from installed binaries (in # other words: no binaries for any supported # implementation could be found) ocf_exit_reason "Undefined iSCSI target implementation" exit $OCF_ERR_INSTALLED ;; *) ocf_exit_reason "Unsupported iSCSI target implementation \"$OCF_RESKEY_implementation\"!" exit $OCF_ERR_CONFIGURED ;; esac # Do we have a valid LUN? case $OCF_RESKEY_implementation in iet) # IET allows LUN 0 and up [ $OCF_RESKEY_lun -ge 0 ] case $? in 0) # OK ;; 1) ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be a non-negative integer)." exit $OCF_ERR_CONFIGURED ;; *) ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be an integer)." exit $OCF_ERR_CONFIGURED ;; esac ;; tgt) # tgt reserves LUN 0 for its own purposes [ $OCF_RESKEY_lun -ge 1 ] case $? in 0) # OK ;; 1) ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be greater than 0)." exit $OCF_ERR_CONFIGURED ;; *) ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be an integer)." exit $OCF_ERR_CONFIGURED ;; esac ;; esac # Do we have any configuration parameters that the current # implementation does not support? local unsupported_params local var local envar case $OCF_RESKEY_implementation in iet) # IET does not support setting the vendor and product ID # (it always uses "IET" and "VIRTUAL-DISK") unsupported_params="vendor_id product_id allowed_initiators lio_iblock tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type" ;; tgt) unsupported_params="allowed_initiators lio_iblock" ;; lio) unsupported_params="scsi_id vendor_id product_id tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type" ;; lio-t) unsupported_params="scsi_id vendor_id product_id tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type lio_iblock" ;; esac for var in ${unsupported_params}; do envar=OCF_RESKEY_${var} defvar=OCF_RESKEY_${var}_default if [ -n "${!envar}" ]; then if [[ "${!envar}" != "${!defvar}" ]];then case "$__OCF_ACTION" in start|validate-all) ocf_log warn "Configuration parameter \"${var}\"" \ "is not supported by the iSCSI implementation" \ "and will be ignored." ;; esac fi fi done if ! ocf_is_probe; then # Do we have all required binaries? case $OCF_RESKEY_implementation in iet) check_binary ietadm ;; tgt) check_binary tgtadm ;; lio) check_binary tcm_node check_binary lio_node ;; lio-t) check_binary targetcli ;; esac # Is the required kernel functionality available? case $OCF_RESKEY_implementation in iet) [ -d /proc/net/iet ] if [ $? -ne 0 ]; then ocf_log err "/proc/net/iet does not exist or is not a directory -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi ;; tgt) # tgt is userland only ;; esac fi return $OCF_SUCCESS } case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) iSCSILogicalUnit_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test iSCSILogicalUnit_validate case $__OCF_ACTION in start) iSCSILogicalUnit_start;; stop) iSCSILogicalUnit_stop;; monitor|status) iSCSILogicalUnit_monitor;; reload) ocf_log err "Reloading..." iSCSILogicalUnit_start ;; validate-all) ;; *) iSCSILogicalUnit_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/iSCSITarget b/heartbeat/iSCSITarget index 08832cd64..08a765625 100755 --- a/heartbeat/iSCSITarget +++ b/heartbeat/iSCSITarget @@ -1,683 +1,683 @@ #!/bin/bash # # # iSCSITarget OCF RA. Exports and manages iSCSI targets. # # (c) 2009-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults # Set a default implementation based on software installed if have_binary ietadm; then OCF_RESKEY_implementation_default="iet" elif have_binary tgtadm; then OCF_RESKEY_implementation_default="tgt" elif have_binary lio_node; then OCF_RESKEY_implementation_default="lio" elif have_binary targetcli; then OCF_RESKEY_implementation_default="lio-t" fi : ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}} # Listen on 0.0.0.0:3260 by default OCF_RESKEY_portals_default="0.0.0.0:3260" : ${OCF_RESKEY_portals=${OCF_RESKEY_portals_default}} # Lockfile, used for selecting a target ID LOCKFILE=${HA_RSCTMP}/iSCSITarget-${OCF_RESKEY_implementation}.lock ####################################################################### meta_data() { cat < - + 0.9 Manages iSCSI targets. An iSCSI target is a collection of SCSI Logical Units (LUs) exported via a daemon that speaks the iSCSI protocol. iSCSI target export agent The iSCSI target daemon implementation. Must be one of "iet", "tgt", "lio", or "lio-t". If unspecified, an implementation is selected based on the availability of management utilities, with "iet" being tried first, then "tgt", then "lio", then "lio-t". Specifies the iSCSI target implementation ("iet", "tgt", "lio", or "lio-t"). The target iSCSI Qualified Name (IQN). Should follow the conventional "iqn.yyyy-mm.<reversed domain name>[:identifier]" syntax. iSCSI target IQN The iSCSI target ID. Required for tgt. iSCSI target ID iSCSI network portal addresses. Not supported by all implementations. If unset, the default is to create one portal that listens on ${OCF_RESKEY_portal_default}. iSCSI portal addresses iSCSI iSER network portal addresses. Not supported by all implementations. iSCSI iSER enabled portal addresses Allowed initiators. A space-separated list of initiators allowed to connect to this target. Initiators may be listed in any syntax the target implementation allows. If this parameter is empty or not set, access to this target will be allowed from any initiator. List of iSCSI initiators allowed to connect to this target A username used for incoming initiator authentication. If unspecified, allowed initiators will be able to log in without authentication. This is a unique parameter, as it not allowed to re-use a single username across multiple target instances. Incoming account username A password used for incoming initiator authentication. Incoming account password Additional target parameters. A space-separated list of "name=value" pairs which will be passed through to the iSCSI daemon's management interface. The supported parameters are implementation dependent. Neither the name nor the value may contain whitespace. List of iSCSI target parameters END } ####################################################################### iSCSITarget_usage() { cat <> /etc/initiators.deny echo "${OCF_RESKEY_iqn} ${OCF_RESKEY_allowed_initiators// /,}" >> /etc/initiators.allow else echo "${OCF_RESKEY_iqn} ALL" >> /etc/initiators.allow fi # In iet, adding a new user and assigning it to a target # is one operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run ietadm --op new --user \ --tid=${tid} \ --params=IncomingUser=${OCF_RESKEY_incoming_username},Password=${OCF_RESKEY_incoming_password} \ || exit $OCF_ERR_GENERIC fi ;; tgt) local tid tid="${OCF_RESKEY_tid}" # Create the target. ocf_run tgtadm --lld iscsi --op new --mode target \ --tid=${tid} \ --targetname ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC # Set parameters. for param in ${OCF_RESKEY_additional_parameters}; do name=${param%=*} value=${param#*=} ocf_run tgtadm --lld iscsi --op update --mode target \ --tid=${tid} \ --name=${name} --value=${value} || exit $OCF_ERR_GENERIC done # For tgt, we always have to add access per initiator; # access to targets is denied by default. If # "allowed_initiators" is unset, we must use the special # keyword ALL. for initiator in ${OCF_RESKEY_allowed_initiators=ALL}; do ocf_run tgtadm --lld iscsi --op bind --mode target \ --tid=${tid} \ --initiator-address=${initiator} || exit $OCF_ERR_GENERIC done # In tgt, we must first create a user account, then assign # it to a target using the "bind" operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run tgtadm --lld iscsi --mode account --op new \ --user=${OCF_RESKEY_incoming_username} \ --password=${OCF_RESKEY_incoming_password} || exit $OCF_ERR_GENERIC ocf_run tgtadm --lld iscsi --mode account --op bind \ --tid=${tid} \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC fi ;; lio) # lio distinguishes between targets and target portal # groups (TPGs). We will always create one TPG, with the # number 1. In lio, creating a network portal # automatically creates the corresponding target if it # doesn't already exist. for portal in ${OCF_RESKEY_portals}; do ocf_run lio_node --addnp ${OCF_RESKEY_iqn} 1 \ ${portal} || exit $OCF_ERR_GENERIC done # in lio, we can set target parameters by manipulating # the appropriate configfs entries for param in ${OCF_RESKEY_additional_parameters}; do name=${param%=*} value=${param#*=} configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/param/${name}" if [ -e ${configfs_path} ]; then echo ${value} > ${configfs_path} || exit $OCF_ERR_GENERIC else ocf_log warn "Unsupported iSCSI target parameter ${name}: will be ignored." fi done # lio does per-initiator filtering by default. To disable # this, we need to switch the target to "permissive mode". if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then for initiator in ${OCF_RESKEY_allowed_initiators}; do ocf_run lio_node --addnodeacl ${OCF_RESKEY_iqn} 1 \ ${initiator} || exit $OCF_ERR_GENERIC done else ocf_run lio_node --permissive ${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC # permissive mode enables read-only access by default, # so we need to change that to RW to be in line with # the other implementations. echo 0 > "/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/attrib/demo_mode_write_protect" if [ `cat /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/attrib/demo_mode_write_protect` -ne 0 ]; then ocf_log err "Failed to disable write protection for target ${OCF_RESKEY_iqn}." exit $OCF_ERR_GENERIC fi fi # TODO: add CHAP authentication support when it gets added # back into LIO ocf_run lio_node --disableauth ${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC # Finally, we need to enable the target to allow # initiators to connect ocf_run lio_node --enabletpg=${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC ;; lio-t) # lio distinguishes between targets and target portal # groups (TPGs). We will always create one TPG, with the # number 1. In lio, creating a network portal # automatically creates the corresponding target if it # doesn't already exist. ocf_run targetcli /iscsi set global auto_add_default_portal=false || exit $OCF_ERR_GENERIC ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC for portal in ${OCF_RESKEY_portals}; do if [ $portal != ${OCF_RESKEY_portals_default} ] ; then IFS=':' read -a sep_portal <<< "$portal" ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/portals create "${sep_portal[0]}" "${sep_portal[1]}" || exit $OCF_ERR_GENERIC else ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC fi done # in lio, we can set target parameters by manipulating # the appropriate configfs entries for param in ${OCF_RESKEY_additional_parameters}; do name=${param%=*} value=${param#*=} configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/param/${name}" if [ -e ${configfs_path} ]; then echo ${value} > ${configfs_path} || exit $OCF_ERR_GENERIC else ocf_log warn "Unsupported iSCSI target parameter ${name}: will be ignored." fi done # allow iSER enabled portal for iser_portal in ${OCF_RESKEY_iser_portals}; do configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/np/${iser_portal}\:*/iser" if [ -f ${configfs_path} ]; then echo "1" > ${configfs_path} || exit $OCF_ERR_GENERIC else ocf_log warn "Unable to set iSER on: $iser_portal" fi done # lio does per-initiator filtering by default. To disable # this, we need to switch the target to "permissive mode". if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then for initiator in ${OCF_RESKEY_allowed_initiators}; do ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/acls create ${initiator} || exit $OCF_ERR_GENERIC done else ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/ set attribute authentication=0 demo_mode_write_protect=0 generate_node_acls=1 cache_dynamic_acls=1 || exit $OCF_ERR_GENERIC fi # TODO: add CHAP authentication support when it gets added # back into LIO ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/ set attribute authentication=0 || exit $OCF_ERR_GENERIC # ocf_run targetcli /iscsi ;; esac iSCSITarget_monitor } iSCSITarget_stop() { iSCSITarget_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi local tid case $OCF_RESKEY_implementation in iet) # Figure out the target ID tid=`sed -ne "s/tid:\([[:digit:]]\+\) name:${OCF_RESKEY_iqn}/\1/p" < /proc/net/iet/volume` if [ -z "${tid}" ]; then ocf_log err "Failed to retrieve target ID for IQN ${OCF_RESKEY_iqn}" exit $OCF_ERR_GENERIC fi # Close existing connections. There is no other way to # do this in IET than to parse the contents of # /proc/net/iet/session. set -- $(sed -ne '/^tid:'${tid}' /,/^tid/ { /^[[:space:]]*sid:\([0-9]\+\)/ { s/^[[:space:]]*sid:\([0-9]*\).*/--sid=\1/; h; }; /^[[:space:]]*cid:\([0-9]\+\)/ { s/^[[:space:]]*cid:\([0-9]*\).*/--cid=\1/; G; p; }; }' < /proc/net/iet/session) while [[ -n $2 ]]; do # $2 $1 looks like "--sid=X --cid=Y" ocf_run ietadm --op delete \ --tid=${tid} $2 $1 shift 2 done # In iet, unassigning a user from a target and # deleting the user account is one operation. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run ietadm --op delete --user \ --tid=${tid} \ --params=IncomingUser=${OCF_RESKEY_incoming_username} \ || exit $OCF_ERR_GENERIC fi # Loop on delete. Keep trying until we time out, if # necessary. while true; do if ietadm --op delete --tid=${tid}; then ocf_log debug "Removed target ${OCF_RESKEY_iqn}." break else ocf_log warn "Failed to remove target ${OCF_RESKEY_iqn}, retrying." sleep 1 fi done # Avoid stale /etc/initiators.{allow,deny} entries # for this target if [ -e /etc/initiators.deny ]; then ocf_run sed -e "/^${OCF_RESKEY_iqn}[[:space:]]/d" \ -i /etc/initiators.deny fi if [ -e /etc/initiators.allow ]; then ocf_run sed -e "/^${OCF_RESKEY_iqn}[[:space:]]/d" \ -i /etc/initiators.allow fi ;; tgt) tid="${OCF_RESKEY_tid}" # Close existing connections. There is no other way to # do this in tgt than to parse the output of "tgtadm --op # show". set -- $(tgtadm --lld iscsi --op show --mode target \ | sed -ne '/^Target '${tid}':/,/^Target/ { /^[[:space:]]*I_T nexus: \([0-9]\+\)/ { s/^.*: \([0-9]*\).*/--sid=\1/; h; }; /^[[:space:]]*Connection: \([0-9]\+\)/ { s/^.*: \([0-9]*\).*/--cid=\1/; G; p; }; /^[[:space:]]*LUN information:/ q; }') while [[ -n $2 ]]; do # $2 $1 looks like "--sid=X --cid=Y" ocf_run tgtadm --lld iscsi --op delete --mode connection \ --tid=${tid} $2 $1 shift 2 done # In tgt, we must first unbind the user account from # the target, then remove the account itself. if [ -n "${OCF_RESKEY_incoming_username}" ]; then ocf_run tgtadm --lld iscsi --mode account --op unbind \ --tid=${tid} \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC ocf_run tgtadm --lld iscsi --mode account --op delete \ --user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC fi # Loop on delete. Keep trying until we time out, if # necessary. while true; do if tgtadm --lld iscsi --op delete --mode target --tid=${tid}; then ocf_log debug "Removed target ${OCF_RESKEY_iqn}." break else ocf_log warn "Failed to remove target ${OCF_RESKEY_iqn}, retrying." sleep 1 fi done # In tgt, we don't have to worry about our ACL # entries. They are automatically removed upon target # deletion. ;; lio) # In lio, removing a target automatically removes all # associated TPGs, network portals, and LUNs. ocf_run lio_node --deliqn ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC ;; lio-t) ocf_run targetcli /iscsi delete ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC ;; esac return $OCF_SUCCESS } iSCSITarget_monitor() { case $OCF_RESKEY_implementation in iet) grep -Eq "tid:[0-9]+ name:${OCF_RESKEY_iqn}" /proc/net/iet/volume && return $OCF_SUCCESS ;; tgt) tgtadm --lld iscsi --op show --mode target \ | grep -Eq "Target [0-9]+: ${OCF_RESKEY_iqn}" && return $OCF_SUCCESS ;; lio | lio-t) # if we have no configfs entry for the target, it's # definitely stopped [ -d /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn} ] || return $OCF_NOT_RUNNING # if the target is there, but its TPG is not enabled, then # we also consider it stopped [ `cat /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/enable` -eq 1 ] || return $OCF_NOT_RUNNING return $OCF_SUCCESS ;; esac return $OCF_NOT_RUNNING } iSCSITarget_validate() { # Do we have all required variables? local required_vars case $OCF_RESKEY_implementation in iet) required_vars="iqn" ;; tgt) required_vars="iqn tid" ;; esac for var in ${required_vars}; do param="OCF_RESKEY_${var}" if [ -z "${!param}" ]; then ocf_exit_reason "Missing resource parameter \"$var\"!" exit $OCF_ERR_CONFIGURED fi done # Is the configured implementation supported? case "$OCF_RESKEY_implementation" in "iet"|"tgt"|"lio"|"lio-t") ;; "") # The user didn't specify an implementation, and we were # unable to determine one from installed binaries (in # other words: no binaries for any supported # implementation could be found) ocf_exit_reason "Undefined iSCSI target implementation" exit $OCF_ERR_INSTALLED ;; *) ocf_exit_reason "Unsupported iSCSI target implementation \"$OCF_RESKEY_implementation\"!" exit $OCF_ERR_CONFIGURED ;; esac # Do we have any configuration parameters that the current # implementation does not support? local unsupported_params local var local envar case $OCF_RESKEY_implementation in iet|tgt) # IET and tgt do not support binding a target portal to a # specific IP address. unsupported_params="portals" ;; lio|lio-t) # TODO: Remove incoming_username and incoming_password # from this check when LIO 3.0 gets CHAP authentication unsupported_params="tid incoming_username incoming_password" ;; esac for var in ${unsupported_params}; do envar=OCF_RESKEY_${var} defvar=OCF_RESKEY_${var}_default if [ -n "${!envar}" ]; then if [[ "${!envar}" != "${!defvar}" ]];then case "$__OCF_ACTION" in start|validate-all) ocf_log warn "Configuration parameter \"${var}\"" \ "is not supported by the iSCSI implementation" \ "and will be ignored." ;; esac fi fi done if ! ocf_is_probe; then # Do we have all required binaries? case $OCF_RESKEY_implementation in iet) check_binary ietadm ;; tgt) check_binary tgtadm ;; lio) check_binary tcm_node check_binary lio_node ;; lio-t) check_binary targetcli ;; esac # Is the required kernel functionality available? case $OCF_RESKEY_implementation in iet) [ -d /proc/net/iet ] if [ $? -ne 0 ]; then ocf_log err "/proc/net/iet does not exist or is not a directory -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi ;; tgt) # tgt is userland only ;; lio) # lio needs configfs to be mounted if ! grep -Eq "^.*/sys/kernel/config[[:space:]]+configfs" /proc/mounts; then ocf_log err "configfs not mounted at /sys/kernel/config -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi # check for configfs entries created by target_core_mod if [ ! -d /sys/kernel/config/target ]; then ocf_log err "/sys/kernel/config/target does not exist or is not a directory -- check if required modules are loaded." exit $OCF_ERR_INSTALLED fi ;; lio-t) #targetcli loads the needed kernel modules ;; esac fi return $OCF_SUCCESS } case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) iSCSITarget_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test iSCSITarget_validate case $__OCF_ACTION in start) iSCSITarget_start;; stop) iSCSITarget_stop;; monitor|status) iSCSITarget_monitor;; reload) ocf_log err "Reloading..." iSCSITarget_start ;; validate-all) ;; *) iSCSITarget_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/jboss b/heartbeat/jboss index 48a51ca53..ca8e20b8b 100755 --- a/heartbeat/jboss +++ b/heartbeat/jboss @@ -1,658 +1,658 @@ #!/bin/sh # # Description: Manages a Jboss Server as an OCF High-Availability # resource under Heartbeat/LinuxHA control # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # Copyright (c) 2009 Bauer Systems KG / Stefan Schluppeck # ####################################################################### # OCF parameters: # OCF_RESKEY_resource_name - The name of the resource. Default is ${OCF_RESOURCE_INSTANCE} # OCF_RESKEY_jboss_version - The version of JBoss. Default is 5. # why not let the RA log through lrmd? # 2009/09/09 Nakahira: # jboss_console is used to record output of the "run.sh". # The log of "Run.sh" should not be output to ha-log because it is so annoying. # OCF_RESKEY_console - A destination of the log of jboss run and shutdown script. Default is /var/log/${OCF_RESKEY_resource_name}.log # OCF_RESKEY_shutdown_timeout - Time-out at the time of the stop. Default is 5 # OCF_RESKEY_kill_timeout - The re-try number of times awaiting a stop. Default is 10 # OCF_RESKEY_user - A user name to start a JBoss. Default is root # OCF_RESKEY_statusurl - URL for state confirmation. Default is ${OCF_RESKEY_statusurl_default} # OCF_RESKEY_java_home - Home directory of the Java. Default is ${JAVA_HOME} # OCF_RESKEY_java_opts - Options for Java. # OCF_RESKEY_jboss_home - Home directory of Jboss. Default is None # is it possible to devise this string from options? I'm afraid # that allowing users to set this could be error prone. # 2009/09/09 Nakahira: # It is difficult to set it automatically because jboss_pstring # greatly depends on the environment. At any rate, system architect # should note that pstring doesn't influence other processes. # OCF_RESKEY_jboss_base_dir - Base directory of JBoss. Default is ${OCF_RESKEY_jboss_base_dir_default} # OCF_RESKEY_pstring - String Jboss will found in procceslist. Default is ${OCF_RESKEY_pstring_default} # OCF_RESKEY_run_command - JBoss start command. Default is "${OCF_RESKEY_run_command_default}" # OCF_RESKEY_run_opts - Options for jboss to run. Default is ${OCF_RESKEY_run_opts_default} # OCF_RESKEY_shutdown_opts - Options for jboss to shutdonw. Default is "-s 127.0.0.1:1099" # OCF_RESKEY_rotate_consolelog - Control console log logrotation flag. Default is false. # OCF_RESKEY_rotate_value - console log logrotation value. Default is 86400 span(seconds). # OCF_RESKEY_rotate_logsuffix - Control console log logrotation suffix. Default is .%F. ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs usage() { cat <<-! usage: $0 action action: start start jboss stop stop the jboss status return the status of jboss, run or down monitor return TRUE if the jboss appears to be working. You have to have installed $WGETNAME for this to work. meta-data show meta data message validate-all validate the instance parameters ! return $OCF_ERR_ARGS } isrunning_jboss() { local rc if [ -z "$1" ];then ocf_run -q -err wget -t 1 -O /dev/null $STATUSURL else # Retry message for restraint wget -t 1 -O /dev/null $STATUSURL 2>/dev/null fi rc=$? if [ $rc -eq 0 ]; then return $OCF_SUCCESS fi # JBoss service error return $OCF_ERR_GENERIC } monitor_rotatelogs() { pgrep -f "$ROTATELOGS.*$CONSOLE$ROTATELOG_SUFFIX" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log warn "A rotatelogs command for $CONSOLE is not running. Restarting it." start_rotatelogs if [ $? -eq 0 ]; then ocf_log info "Restart rotatelogs process succeeded." else ocf_log warn "Restart rotatelogs process failed." fi fi } monitor_jboss() { if ! pgrep -f "$PSTRING" > /dev/null; then return $OCF_NOT_RUNNING fi isrunning_jboss $1 rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if ocf_is_true $ROTATELOG_FLG; then # Monitor rotatelogs process and restart it if it is stopped. # And never consider rotatelogs process failure to be a monitor failure # as long as JBoss process works fine. monitor_rotatelogs fi return $OCF_SUCCESS } start_rotatelogs() { su - -s /bin/sh $JBOSS_USER \ -c "$ROTATELOGS -l \"$CONSOLE$ROTATELOG_SUFFIX\" $ROTATEVALUE" \ < "$CONSOLE" > /dev/null 2>&1 & } rotate_console() { # Check $CONSOLE$ROTATELOG_SUFFIX is writable or not. CURRENT_ROTATELOG_SUFFIX=`date +"$ROTATELOG_SUFFIX"` su - -s /bin/sh $JBOSS_USER \ -c "touch \"$CONSOLE$CURRENT_ROTATELOG_SUFFIX\"" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "$CONSOLE$CURRENT_ROTATELOG_SUFFIX is not writable." return $OCF_ERR_GENERIC fi # Clean up and set permissions on required files if [ -p "$CONSOLE" ]; then rm -rf "$CONSOLE" elif [ -e "$CONSOLE" ]; then DATE=`date +"%F-%H%M%S"` ocf_log warn "$CONSOLE already exists. It is saved as $CONSOLE-$DATE" mv "$CONSOLE" "$CONSOLE-$DATE" fi mkfifo -m700 "$CONSOLE" chown --dereference "$JBOSS_USER" "$CONSOLE" || true start_rotatelogs } start_jboss() { monitor_jboss start if [ $? -eq $OCF_SUCCESS ]; then ocf_log info "JBoss already running." return $OCF_SUCCESS fi if ocf_is_true $ROTATELOG_FLG; then rotate_console if [ $? -eq 0 ]; then ocf_log debug "Rotate console log succeeded." else ocf_log err "Rotate console log failed. Avoid starting jboss without console log rotation." return $OCF_ERR_GENERIC fi fi ocf_log info "Starting JBoss[$RESOURCE_NAME]" if [ "$JBOSS_USER" = root ]; then "$RUN_COMMAND" $RUN_OPTS \ >> "$CONSOLE" 2>&1 & else su - -s /bin/sh "$JBOSS_USER" \ -c "export JAVA_HOME=\"${JAVA_HOME}\"; \ export JAVA_OPTS=\"${JAVA_OPTS}\"; \ export JBOSS_HOME=\"${JBOSS_HOME}\"; \ export JBOSS_BASE_DIR=\"${JBOSS_BASE_DIR}\"; \ \"$RUN_COMMAND\" $RUN_OPTS" \ >> "$CONSOLE" 2>&1 & fi while true; do monitor_jboss start if [ $? -eq $OCF_SUCCESS ]; then break fi ocf_log info "start_jboss[$RESOURCE_NAME]: retry monitor_jboss" sleep 3 done ocf_log info "JBoss[$RESOURCE_NAME] is started." return $OCF_SUCCESS } output_thread_dump() { ocf_log info "stop_jboss[$RESOURCE_NAME]: output a JVM thread dump to $CONSOLE" pkill -QUIT -f "$PSTRING" } # arg1 : timeout # arg2 : send specified signal wait_process_exit() { local lapse_sec=0 local timeout=$1 local signal=$2 while pgrep -f "$PSTRING" > /dev/null; do sleep 1 lapse_sec=`expr $lapse_sec + 1` if [ -n "$signal" ]; then ocf_log info "stop_jboss[$RESOURCE_NAME]: kill jboss by SIG$signal ($lapse_sec/$timeout)" pkill -$signal -f "$PSTRING" else ocf_log info "stop_jboss[$RESOURCE_NAME]: stop NORM $lapse_sec/$timeout" fi if [ "$timeout" -ne 0 -a $lapse_sec -ge $timeout ]; then return 1 fi done return 0 } stop_jboss5() { if [ "$JBOSS_USER" = root ]; then "$JBOSS_HOME/bin/shutdown.sh" $SHUTDOWN_OPTS -S \ >> "$CONSOLE" 2>&1 & else su - -s /bin/sh "$JBOSS_USER" \ -c "export JAVA_HOME=\"${JAVA_HOME}\"; \ export JBOSS_HOME=\"${JBOSS_HOME}\"; \ \"$JBOSS_HOME/bin/shutdown.sh\" $SHUTDOWN_OPTS -S" \ >> "$CONSOLE" 2>&1 & fi if ! wait_process_exit $SHUTDOWN_TIMEOUT; then output_thread_dump if ! wait_process_exit $KILL_TIMEOUT TERM; then return 1 fi fi return 0 } stop_jboss6() { pkill -TERM -f "$PSTRING" if ! wait_process_exit $SHUTDOWN_TIMEOUT; then output_thread_dump return 1 fi return 0 } stop_jboss() { local rc if ! pgrep -f "$PSTRING" > /dev/null; then ocf_log info "JBoss[$RESOURCE_NAME] is already stopped." else ocf_log info "Stopping JBoss[$RESOURCE_NAME]" # JBoss5 : shutdonw.sh -> SIGQUIT(output thread dump) -> SIGTERM # If the JBoss process hangs, JBoss RA waits $SHUTDOWN_TIMEOUT # seconds and tries kill TERM and QUIT for $KILL_TIMEOUT seconds. # JBoss6 : SIGTERM -> SIGQUIT(output thread dump) # If the JBoss process hangs, JBoss RA waits $SHUTDOWN_TIMEOUT # seconds and tries kill QUIT. if [ "$JBOSS_VERSION" -le 5 ]; then stop_jboss5 rc=$? else stop_jboss6 rc=$? fi if [ $rc -ne 0 ]; then # JBoss5 # The stop timeout of RA should be # longer than $SHUTDOWN_TIMEOUT + $KILL_TIMEOUT. # JBoss6 # The stop timeout of RA should be longer than $SHUTDOWN_TIMEOUT. wait_process_exit 0 KILL fi ocf_log info "JBoss[$RESOURCE_NAME] is stopped." fi if ocf_is_true $ROTATELOG_FLG; then rm -f "${CONSOLE}" fi return $OCF_SUCCESS } status_jboss() { if ! pgrep -f "$PSTRING" > /dev/null; then echo "JBoss process[$RESOURCE_NAME] is not running." return $OCF_NOT_RUNNING fi if isrunning_jboss; then echo "JBoss[$RESOURCE_NAME] is running." return $OCF_SUCCESS else echo "JBoss process[$RESOURCE_NAME] is running." echo "But, we can not access JBoss web service." return $OCF_NOT_RUNNING fi } metadata_jboss() { cat < 1.0 Resource script for Jboss. It manages a Jboss instance as an HA resource. Manages a JBoss application server instance The version of JBoss. Default is 5. The usage of JBoss was greatly changed as of JBoss 6. Specify "6" when you use JBoss 6. -The version of JBoss +The version of JBoss The name of the resource. Defaults to the name of the resource instance. -The name of the resource +The name of the resource A destination of the log of jboss run and shutdown script. -jboss log path +jboss log path Timeout for jboss bin/shutdown.sh. We wait for this timeout to expire, then send the TERM and QUIT signals. Finally, the KILL signal is used to terminate the jboss process. You should set the timeout for the stop operation to a value bigger than the sum of the timeout parameters. See also kill_timeout. -shutdown timeout +shutdown timeout If bin/shutdown.sh doesn't stop the jboss process, then we send it TERM and QUIT signals, intermittently and once a second. After this timeout expires, if the process is still live, we use the KILL signal. See also shutdown_timeout. -stop by signal timeout +stop by signal timeout A user name to start a JBoss. -A user name to start a resource. +A user name to start a resource. URL to test in the monitor operation. -URL to test in the monitor operation. +URL to test in the monitor operation. Home directory of Java. Defaults to the environment variable JAVA_HOME. If it is not set, then define this parameter. -Home directory of Java. +Home directory of Java. Java options. -Java options. +Java options. Home directory of Jboss. -Home directory of Jboss. +Home directory of Jboss. Base directory of JBoss. This parameter is not used in JBoss5. -Base directory of JBoss. +Base directory of JBoss. With this string heartbeat matches for the right process to kill. -pkill/pgrep search string +pkill/pgrep search string JBoss start command. -JBoss start command. +JBoss start command. Start options to start Jboss with, defaults are from the Jboss-Doku. -options for jboss run.sh +options for jboss run.sh Stop options to stop Jboss with. -options for jboss shutdown.sh +options for jboss shutdown.sh Rotate console log flag. -Rotate console log flag +Rotate console log flag Console log rotation value (default is 86400 seconds). -Console log rotation value (default is 86400 seconds) +Console log rotation value (default is 86400 seconds) Rotate console log suffix. -Rotate console log suffix +Rotate console log suffix END return $OCF_SUCCESS } validate_all_jboss() { if [ ! -d "$JAVA_HOME" ]; then ocf_log err "JAVA_HOME does not exist." return $OCF_ERR_INSTALLED fi if [ ! -d "$JBOSS_HOME" ]; then ocf_log err "JBOSS_HOME does not exist." return $OCF_ERR_INSTALLED fi if [ "$JBOSS_VERSION" -gt 5 ]; then if [ ! -d "$JBOSS_BASE_DIR" ]; then ocf_log err "JBOSS_BASE_DIR does not exist." return $OCF_ERR_INSTALLED fi fi if [ ! -x "$JAVA" ]; then ocf_log err "java command does not exist." return $OCF_ERR_INSTALLED fi if ocf_is_true $ROTATELOG_FLG; then if [ ! -x "$ROTATELOGS" ]; then ocf_log err "rotatelogs command does not exist." return $OCF_ERR_INSTALLED fi fi return $OCF_SUCCESS } COMMAND=$1 JBOSS_VERSION="${OCF_RESKEY_jboss_version-5}" if ! ocf_is_decimal $JBOSS_VERSION; then ocf_log err "Invalid parameter value: jboss_version [$JBOSS_VERSION]" return $OCF_ERR_ARGS fi # Setting of the default value if [ "$JBOSS_VERSION" -le 5 ]; then OCF_RESKEY_statusurl_default="http://127.0.0.1:8080" OCF_RESKEY_pstring_default="java -Dprogram.name=run.sh" OCF_RESKEY_run_command_default="${OCF_RESKEY_jboss_home}/bin/run.sh" OCF_RESKEY_run_opts_default="-c default" else OCF_RESKEY_jboss_base_dir_default="${OCF_RESKEY_jboss_home}/standalone" JBOSS_BASE_DIR="${OCF_RESKEY_jboss_base_dir-${OCF_RESKEY_jboss_base_dir_default}}" OCF_RESKEY_statusurl_default="http://127.0.0.1:9990" OCF_RESKEY_pstring_default="java.*-Djboss.server.base.dir=${JBOSS_BASE_DIR}( .*)?$" OCF_RESKEY_run_command_default="${OCF_RESKEY_jboss_home}/bin/standalone.sh" OCF_RESKEY_run_opts_default="" fi RESOURCE_NAME="${OCF_RESKEY_resource_name-${OCF_RESOURCE_INSTANCE}}" CONSOLE="${OCF_RESKEY_console-/var/log/${RESOURCE_NAME}.log}" SHUTDOWN_TIMEOUT="${OCF_RESKEY_shutdown_timeout-5}" KILL_TIMEOUT="${OCF_RESKEY_kill_timeout-10}" JBOSS_USER="${OCF_RESKEY_user-root}" STATUSURL="${OCF_RESKEY_statusurl-${OCF_RESKEY_statusurl_default}}" PSTRING="${OCF_RESKEY_pstring-${OCF_RESKEY_pstring_default}}" RUN_OPTS="${OCF_RESKEY_run_opts-${OCF_RESKEY_run_opts_default}}" SHUTDOWN_OPTS="${OCF_RESKEY_shutdown_opts--s 127.0.0.1:1099}" ROTATELOG_FLG="${OCF_RESKEY_rotate_consolelog-false}" ROTATEVALUE="${OCF_RESKEY_rotate_value-86400}" ROTATELOG_SUFFIX="${OCF_RESKEY_rotate_logsuffix-.%F}" if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi if [ "$COMMAND" = "meta-data" ]; then metadata_jboss exit $OCF_SUCCESS fi if [ "$COMMAND" = "help" -o "$COMMAND" = "usage" ]; then usage exit $OCF_SUCCESS fi # test if these two are set and if directories exist and if the # required scripts/binaries exist; use OCF_ERR_INSTALLED JAVA_HOME="${OCF_RESKEY_java_home-${JAVA_HOME}}" JAVA_OPTS="${OCF_RESKEY_java_opts}" JBOSS_HOME="${OCF_RESKEY_jboss_home}" RUN_COMMAND="${OCF_RESKEY_run_command-${OCF_RESKEY_run_command_default}}" LSB_STATUS_STOPPED=3 export JAVA_HOME JAVA_OPTS JBOSS_HOME JBOSS_BASE_DIR JAVA=${JAVA_HOME}/bin/java ROTATELOGS="" if ocf_is_true $ROTATELOG_FLG; then # Look for rotatelogs/rotatelogs2 if [ -x /usr/sbin/rotatelogs ]; then ROTATELOGS=/usr/sbin/rotatelogs elif [ -x /usr/sbin/rotatelogs2 ]; then ROTATELOGS=/usr/sbin/rotatelogs2 fi fi validate_all_jboss rc=$? [ "$COMMAND" = "validate-all" ] && exit $rc if [ $rc -ne 0 ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi case "$COMMAND" in start) start_jboss func_status=$? exit $func_status ;; stop) stop_jboss func_status=$? exit $func_status ;; status) status_jboss exit $? ;; monitor) monitor_jboss func_status=$? exit $func_status ;; validate-all) validate_all_jboss exit $? ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/kamailio b/heartbeat/kamailio index e200d8c6c..0968ac68c 100755 --- a/heartbeat/kamailio +++ b/heartbeat/kamailio @@ -1,688 +1,688 @@ #!/bin/bash # # OCF resource agent for Kamailio for pacemaker # # Copyright (c) 2013 FREQUENTIS AG, # Authors: Stefan Wenk # Rainer Brestan # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # OCF input parameters: # OCF_RESKEY_binary # OCF_RESKEY_conffile # OCF_RESKEY_pidfile # OCF_RESKEY_monitoring_ip # OCF_RESKEY_listen_address # OCF_RESKEY_port # OCF_RESKEY_proto # OCF_RESKEY_sipsak # OCF_RESKEY_kamctlrc # OCF_RESKEY_kamuser # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Defaults RESKEY_binary_default="/usr/sbin/kamailio" RESKEY_conffile_default="/etc/kamailio/kamailio.cfg" RESKEY_pidfile_default="/var/run/kamailio_${OCF_RESOURCE_INSTANCE}/kamailio.pid" RESKEY_monitoring_ip_default=127.0.0.1 RESKEY_port_default=5060 RESKEY_proto_default="udptcp" RESKEY_sipsak_default="/usr/bin/sipsak" RESKEY_kamctlrc_default="/etc/kamailio/kamctlrc" RESKEY_kamuser_default="" ####################################################################### : ${OCF_RESKEY_binary=${RESKEY_binary_default}} : ${OCF_RESKEY_conffile=${RESKEY_conffile_default}} : ${OCF_RESKEY_pidfile=${RESKEY_pidfile_default}} : ${OCF_RESKEY_monitoring_ip=${RESKEY_monitoring_ip_default}} : ${OCF_RESKEY_port=${RESKEY_port_default}} : ${OCF_RESKEY_proto=${RESKEY_proto_default}} : ${OCF_RESKEY_sipsak=${RESKEY_sipsak_default}} : ${OCF_RESKEY_kamctlrc=${RESKEY_kamctlrc_default}} : ${OCF_RESKEY_kamuser=${RESKEY_kamuser_default}} ####################################################################### usage() { cat < - + 1.0 Resource agent for the Kamailio SIP proxy/registrar. Multiple instances are possible when using following parameter combinations: Parameters for Kamailio instance 1: listen_address=192.168.159.128 monitoring_ip=192.168.159.128 proto=udptcp port=5060 Parameters for Kamailio instance 2: listen_address=192.168.159.128 monitoring_ip=192.168.159.128 proto=udp port=5070 conffile=/etc/kamailio/kamailio2.cfg kamctlrc="" Only one instance can be monitored via the command "kamctl monitor" because the kamctl tool of kamailio 4.x is not designed for multiple instances. Therefore, the provided kamctrlrc file path needs to be empty for instance 2, 3 ... Parameters for a third Kamailio instance: listen_address=192.168.159.128 monitoring_ip=192.168.159.128 proto=tcp port=5080 conffile=/etc/kamailio/kamailio3.cfg kamctlrc="" Resource agent for Kamailio The kamailio binary The kamailio binary The kamailio configuration file name with full path. For example, "/etc/kamailio/kamailio.cfg" , which is the default value. Make sure to use unique names in case of having multiple instances. Configuration file name with full path The kamailio PID file. The directory used must be writable by kamailio process user. Be sure to use unique name for running more than one instance. Try to use absolute path names. If empty, resource agent create a unique directory from the resource instance name for the PID file and assign it to the process user. PID file SIP IP Address of the kamailio instance used for SIP OPTIONS polling monitoring. Usually the same IP address value as for parameter listen_address should be provided. In order to respond with a 200 OK response to the SIP OOPTION requests, the kamailio.cfg file needs to contain following section: Note: The following "kamailio.cfg" code sniplet is part of an XML section. Therefore it contains two & characters, which need to be replaced with two ampersand characters within "kamailio.cfg": if (is_method("OPTIONS") && ($ru=~"sip:monitor@.*")) { ## ## If the method is an OPTIONS we are simply going to respond ## with a 200 OK. # xlog("L_INFO", "Method is an OPTIONS, probably just monitoring\n"); sl_send_reply("200", "Kamailio is alive"); exit; } Monitoring IP address used for SIP OPTIONS polling. SIP IP address the kamailio will listen on. Listening SIP address SIP port for the kamailio instance. SIP Port The protocol used for SIP proto = udp|tcp|udptcp. protocol The installation path of the sipsak tool, which is used for monitoring Kamailio via SIP OPTIONS polling. protocol The location of the "kamctlrc" file for the Kamailio instance. The file "kamctlrc" is the Kamailio configuration file for its "kamctl" control tool. This parameter only needs to be provided in case of using multiple Kamailio server instances on a single cluster node: In case that the parameter "kamctlrc" is not empty, this ressource agent monitors the health state of the Kamailio server via the command "kamctl monitor 1". This setting is recommended in case of using a single Kamailio server instance. In case that the parameter "kamctlrc" is empty, the ressource agent does not monitor the health state of the Kamailio server instance via the "kamctl" command. Please note that the "kamctl" control command of Kamailio 4.x does not support running multiple Kamailio instances on one host. Nevertheless this resource agent does allow multiple Kamailio instances per host. The result of the "kamctl" limitation in terms of number of Kamailio server instances is that the health check via "kamctl monitor 1" can be configured for a single Kamailio instance only. Please refer to the long description of this resoure agent for an example of parameter combinations in case that multiple instances are to be configured per cluster node. protocol The user account for kamailio process to run with. Uses the current user, if not specified or empty. There is no check, if running kamailio with the specified user account is possible. kamailio user END exit $OCF_SUCCESS } ####################################################################### ### #Check if a process with given PID is running # Parameter 1: PID ### isRunning_PID() { kill -s 0 "$1" > /dev/null 2>&1 } ### #Check if an instance with given command line is running # Parameter 1: command line. ### isRunning_cmd() { pkill -s 0 "$1" > /dev/null 2>&1 } ### # Formats the result of a command. # # Parameter 1: Exit status. # Parameter 2: Standard output (stdout). # Parameter 3: Error output (stderr). # Returns: Formatted result. kamailio_format_result() { local exitstatus="$1" local value="$2" local error="$3" echo -n "exit status: ${exitstatus}" if [ -n "$value" ]; then echo -n ", value: ${value}" fi if [ -n "$error" ]; then echo -n ", error: ${error}" fi echo } ### # Put the command line, how the kamailio process is started according # to the configured parameters, into the variable "kam_cmd". ### kamailio_cmd() { case ${OCF_RESKEY_proto} in udp) listen_param="-T -l udp:${OCF_RESKEY_listen_address}:${OCF_RESKEY_port} -l udp:127.0.0.1:${OCF_RESKEY_port}" ;; tcp) listen_param="-l tcp:${OCF_RESKEY_listen_address}:${OCF_RESKEY_port} -l tcp:127.0.0.1:${OCF_RESKEY_port}" ;; udptcp) listen_param1="-l udp:${OCF_RESKEY_listen_address}:${OCF_RESKEY_port} -l udp:127.0.0.1:${OCF_RESKEY_port}" listen_param2="-l tcp:${OCF_RESKEY_listen_address}:${OCF_RESKEY_port} -l tcp:127.0.0.1:${OCF_RESKEY_port}" listen_param="${listen_param1} ${listen_param2}" ;; *) listen_param="-T" ;; esac kam_cmd="${OCF_RESKEY_binary} -P ${OCF_RESKEY_pidfile} -f ${OCF_RESKEY_conffile} $listen_param" } ### # Gets the PID for the running Kamailio instance. # # Returns: The variable $PID contains the found PID value or an empty string. # Exit Status: Zero if the PID file was found and this process run under # the command line parameters of our instance. # 1) if the PID file is not present and no process running under # our command line options is active. # 2) in all other fatal cases, which we classify in the followig # as OCF_ERR_genering. These are folloing cases: # a) The PID file contains a PID value which does no match to # to our instance # b) The PID contains a empty string in its first line # c) The PID file contains some text and some processeses # from our instance are still active kamailio_get_pid() { if [ -f ${OCF_RESKEY_pidfile} ]; then PID=`head -n 1 $OCF_RESKEY_pidfile` if [ ! -z "$PID" ]; then #Cross check if the PID file really contains a process of our kamailio instance: kamailio_cmd CROSSPID=`pgrep -o -f "${kam_cmd}"` if [ x"$PID" == x"$CROSSPID" ]; then #ocf_log debug "Found kamailio process PID with value: $PID." return 0 fi #ocf_log debug "PID file does not contain a PID of a $OCF_RESKEY_binary process!" return 2 fi #PID file does not contain a valid PID rm -f ${OCF_RESKEY_pidfile} return 2 fi # No PID file found! # Check if still a process exists even though we don't have the PID any longer: kamailio_cmd pgrep -f "${kam_cmd}" if [ $? -eq 0 ]; then ocf_log info "PID file does not contain a valid PID, but kamailio process is still active" return 2 fi ocf_log info "No PID file found and our kamailio instance is not active" return 1 } kamailio_status() { local not_running_log_level="warn" local errorfile error output if [ "$__OCF_ACTION" = "start" ]; then not_running_log_level="debug" fi kamailio_get_pid >/dev/null RET=$? if [ $RET -ne 0 ]; then if [ $RET -eq 2 ]; then ocf_log $not_running_log_level "PID file does not contain a PID of a ${OCF_RESKEY_binary} process!" return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING fi PID=`head -n 1 $OCF_RESKEY_pidfile` isRunning_PID "$PID" RET=$? if [ "$RET" -ne 0 ]; then ocf_log $not_running_log_level "PID from $PID from ${OCF_RESKEY_pidfile} not running" rm -f ${OCF_RESKEY_pidfile} return $OCF_NOT_RUNNING fi rc=0 # In case that OCF_RESKEY_kamctlrc we perfom a health check via "kamctl monitor 1" if [ ! -z ${OCF_RESKEY_kamctlrc} ]; then # PID is running now but it is not save to check via kamctl without care, because # the implementation analysis in the case that we kill all running processes # shows that in case that the fifo cannot be read, then kamctl blocks. This needs # to be avoided. # In order to be on the safe side, we run this check therefore under "timeout" control: rc=1 timeout 3 kamctl monitor 1 |grep "Up since" ; rc=$? fi if [ $rc -ne 0 ]; then ocf_log $not_running_log_level "Kamailio is not up according to kamctl monitor!" return $OCF_NOT_RUNNING fi errorfile=`mktemp` case ${OCF_RESKEY_proto} in udp) output=`$OCF_RESKEY_sipsak -s sip:monitor@$OCF_RESKEY_monitoring_ip:${OCF_RESKEY_port} -H localhost --transport udp>/dev/null 2>>$errorfile` result=$? ;; tcp) output=`$OCF_RESKEY_sipsak -s sip:monitor@$OCF_RESKEY_monitoring_ip:${OCF_RESKEY_port} -H localhost --transport tcp>/dev/null 2>>$errorfile` result=$? ;; udptcp) output=`$OCF_RESKEY_sipsak -s sip:monitor@$OCF_RESKEY_monitoring_ip:${OCF_RESKEY_port} -H localhost --transport tcp>/dev/null 2>>$errorfile` result=$? if [ $result -eq 0 ]; then output=`$OCF_RESKEY_sipsak -s sip:monitor@$OCF_RESKEY_monitoring_ip:${OCF_RESKEY_port} -H localhost --transport udp>/dev/null 2>>$errorfile` result=$? fi ;; *) output=`$OCF_RESKEY_sipsak -s sip:monitor@$OCF_RESKEY_monitoring_ip:${OCF_RESKEY_port} -H localhost --transport udp>/dev/null 2>>$errorfile` result=$? ;; esac error=`cat $errorfile` rm -f $errorfile if [ $result -ne 0 ]; then ocf_log $not_running_log_level "Kamailio is running, but not functional as sipsak ${OCF_RESKEY_proto} failed with $(kamailio_format_result $result "$output" "$error")" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } kamailio_monitor() { kamailio_status } kamailio_start() { local errorfile error output piddir if kamailio_status then ocf_log info "kamailio already running." return $OCF_SUCCESS fi # if pidfile directory does not exist, create it with kamailio process owner piddir=`dirname "${OCF_RESKEY_pidfile}"` if [ ! -d "$piddir" ]; then mkdir -p "$piddir" if [ "$OCF_RESKEY_kamuser" != "" ]; then chown ${OCF_RESKEY_kamuser} "$piddir" fi fi kamailio_cmd if [ "$OCF_RESKEY_kamuser" != "" ]; then kam_cmd="su -s /bin/bash $OCF_RESKEY_kamuser -c \"$kam_cmd\"" fi ocf_log info "start kamailio with $kam_cmd." errorfile=`mktemp` output=$(eval ${kam_cmd} 2>>$errorfile) result=$? error=`cat $errorfile` rm -f $errorfile if [ $result -eq 0 ]; then result=1 while [ $result -ne 0 ]; do sleep 1 kamailio_get_pid >/dev/null result=$? done ocf_log info "kamailio instance PID=$PID started." # check with monitor operation if running correctly result=$OCF_ERR_GENERIC while [ $result -ne $OCF_SUCCESS ]; do sleep 1 kamailio_monitor result=$? ocf_log info "monitor in start returned $result" done ocf_log info "kamailio started successful." else ocf_log err "kamailio instance could not be started, $(kamailio_format_result $result "$output" "$error")" result=$OCF_ERR_GENERIC fi return $result } kamailio_stop() { local piddir local TRIES=0 result=$OCF_SUCCESS kamailio_cmd ocf_log info "Stopping kamailio by sending SIGTERM to ${kam_cmd}" pkill -SIGTERM -x -f "${kam_cmd}" if [ $? -eq 1 ]; then # already stopped. no processes found # in case of not specified pidfile, delete the created directory # otherwise only the pidfile itself if [ "${OCF_RESKEY_pidfile}" == "${RESKEY_pidfile_default}" ]; then piddir=`dirname "${OCF_RESKEY_pidfile}"` rm -rf "$piddir" else rm -f "${OCF_RESKEY_pidfile}" fi return $result fi if [ "$OCF_RESKEY_CRM_meta_timeout" != "" ]; then KAMAILIO_STOP_TIMEOUT=$(( ($OCF_RESKEY_CRM_meta_timeout/1000) - 7 )) else KAMAILIO_STOP_TIMEOUT=20 fi while isRunning_cmd "${kam_cmd}" && [ "$TRIES" -lt "${KAMAILIO_STOP_TIMEOUT}" ] do sleep 1 ocf_log info "kamailio ${kam_cmd} is still running after SIGTERM" ((TRIES++)) done isRunning_cmd "${kam_cmd}" RET=$? if [ "$RET" -eq 0 ]; then ocf_log info "Killing ${kam_cmd} with SIGKILL" TRIES=0 pkill -SIGKILL -x -f "${kam_cmd}" > /dev/null 2>&1 while isRunning_cmd "${kam_cmd}" && [ "$TRIES" -lt 3 ] do sleep 1 ocf_log info "kamailio ${kam_cmd} is still running after SIGKILL" ((TRIES++)) done isRunning_cmd "${kam_cmd}" RET=$? if [ "$RET" -eq 0 ]; then ocf_log fatal "kamailio is still running even after SIGKILL" result=$OCF_ERR_GENERIC fi else ocf_log info "${kam_cmd} has stopped." fi # in case of not specified pidfile, delete the created directory # otherwise only the pidfile itself if [ "${OCF_RESKEY_pidfile}" == "${RESKEY_pidfile_default}" ]; then piddir=`dirname "${OCF_RESKEY_pidfile}"` rm -rf "$piddir" else rm -f "${OCF_RESKEY_pidfile}" fi return $result } kamailio_validate_all() { # Check if kamailio configuration is valid before starting the server if [ ! -f $OCF_RESKEY_binary ]; then ocf_log err "File OCF_RESKEY_binary [${OCF_RESKEY_binary}] does not exist!" return $OCF_NOT_INSTALLED fi out=$($OCF_RESKEY_binary -c 2>&1 > /dev/null) retcode=$? if [ "$retcode" -ne '0' ]; then ocf_log info "Not starting kamailio: $OCF_RESKEY_binary does not start!" return $OCF_ERR_CONFIGURED fi case $OCF_RESKEY_monitoring_ip in "") ocf_log err "Required parameter OCF_RESKEY_monitoring_ip is missing!" return $OCF_ERR_CONFIGURED ;; [0-9]*.[0-9]*.[0-9]*.[0-9]*) : OK ;; *) ocf_log err "Parameter OCF_RESKEY_monitoring_ip [$OCF_RESKEY_monitoring_ip] is not an IP address!" return $OCF_ERR_CONFIGURED ;; esac case $OCF_RESKEY_listen_address in "") ocf_log err "Required parameter $OCF_RESKEY_listen_address is missing!" return $OCF_ERR_CONFIGURED ;; [0-9]*.[0-9]*.[0-9]*.[0-9]*) : OK ;; *) ocf_log err "Parameter OCF_RESKEY_listen_address [$OCF_RESKEY_listen_address] not an IP address!" return $OCF_ERR_CONFIGURED ;; esac if [ ! -f ${OCF_RESKEY_sipsak} ]; then ocf_log err "sipsak [${OCF_RESKEY_sipsak}] does not exist!" return $OCF_NOT_INSTALLED fi if [ ! -z ${OCF_RESKEY_kamctlrc} ]; then if [ ! -f ${OCF_RESKEY_kamctlrc} ]; then ocf_log err "kamctlrc file [${kamctlrc}] does not exist!" return $OCF_NOT_INSTALLED fi else ocf_log debug "No monitoring via kamctl monitor because the parameter [kamctlrc] is empty." fi if [ ! -f ${OCF_RESKEY_conffile} ]; then ocf_log err "Kamailio configuration file provided in the parameter conffile [${OCF_RESKEY_conffile}] does not exist!" return $OCF_ERR_CONFIGURED fi case $OCF_RESKEY_proto in "") ocf_log err "Parameter $OCF_RESKEY_proto is empty!" return $OCF_ERR_CONFIGURED ;; udp|tcp|udptcp) : OK ;; *) ocf_log err "Parameter value $OCF_RESKEY_proto for parameter [proto] not yet supported!" return $OCF_ERR_CONFIGURED ;; esac return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start|stop|status|monitor) kamailio_${__OCF_ACTION} ;; validate-all) kamailio_validate_all ;; notify) exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; # reload) #Not supported by Kamailio, but not needed by pacemaker # ;; # recover #Not needed by pacemaker # ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/lxc b/heartbeat/lxc index d2f544f27..b582cb229 100755 --- a/heartbeat/lxc +++ b/heartbeat/lxc @@ -1,374 +1,374 @@ #!/bin/bash # Should now conform to guidelines: # https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc # # LXC (Linux Containers) OCF RA. # Used to cluster enable the start, stop and monitoring of a LXC container. # # Copyright (c) 2011 AkurIT.com.au, Darren Thompson # All Rights Reserved. # # Without limiting the rights of the original copyright holders # This resource is licensed under GPL version 2 # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # OCF instance parameters # OCF_RESKEY_container # OCF_RESKEY_config # OCF_RESKEY_log # OCF_RESKEY_use_screen # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_log_default="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.log" OCF_RESKEY_use_screen_default="false" : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_use_screen=${OCF_RESKEY_use_screen_default}} # Set default TRANS_RES_STATE (temporary file to "flag" if resource was stated but not stopped) TRANS_RES_STATE="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.state" meta_data() { cat < - + 0.1 Allows LXC containers to be managed by the cluster. If the container is running "init" it will also perform an orderly shutdown. It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" I have absolutly no idea how this is done with 'upstart' or 'systemd', YMMV if your container is using one of them. Manages LXC containers The unique name for this 'Container Instance' e.g. 'test1'. Container Name Absolute path to the file holding the specific configuration for this container e.g. '/etc/lxc/test1/config'. The LXC config file. Absolute path to the container log file Container log file Provides the option of capturing the 'root console' from the container and showing it on a separate screen. To see the screen output run 'screen -r {container name}' The default value is set to 'false', change to 'true' to activate this option Use 'screen' for container 'root console' output END } LXC_usage() { cat <${CGROUP_MOUNT_POINT}/notify_on_release return 0 } LXC_start() { # put this here as it's so long it gets messy later!!! if ocf_is_true $OCF_RESKEY_use_screen; then STARTCMD="screen -dmS ${OCF_RESKEY_container} lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log}" else STARTCMD="lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log} -d" fi LXC_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already running" ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi ocf_log info "Starting" ${OCF_RESKEY_container} ocf_run ${STARTCMD} || exit $OCF_ERR_GENERIC # Spin on status, wait for the cluster manager to time us out if # we fail while ! LXC_status; do ocf_log info "Container ${OCF_RESKEY_container} has not started, waiting" sleep 1 done ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } LXC_stop() { local shutdown_timeout local now LXC_status if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi # If the container is running "init" and is able to perform and orderly shutdown, then it should be done. # It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. # On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" declare -i PID=0 declare CMD= # LXC prior 1.0.0 if ocf_version_cmp "`lxc_version`" 1.0.0 ; then # This should work for traditional 'sysvinit' and 'upstart' lxc-ps --name "${OCF_RESKEY_container}" -- -C init -o pid,comm |while read CN PID CMD ;do [ $PID -gt 1 ] || continue [ "$CMD" = "init" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\"" kill -PWR $PID done # This should work for containers using 'systemd' instead of 'init' lxc-ps --name "${OCF_RESKEY_container}" -- -C systemd -o pid,comm |while read CN PID CMD ;do [ $PID -gt 1 ] || continue [ "$CMD" = "systemd" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\"" kill -PWR $PID done else PID=$(lxc-info --name "${OCF_RESKEY_container}" -p -H) # If there is no PID the container seems to be down which # shouldn't happen. if [ $PID -eq 0 ]; then ocf_log err "${OCF_RESKEY_container} seems to run, but has no PID." exit $OCF_ERR_GENERIC fi # Rescue me. if [ $PID -eq 1 ]; then ocf_log err "${OCF_RESKEY_container} seems to run with PID 1 which cannot be." PID=0 CMD= else CMD=$(ps -o comm= -p $PID) fi # This should work for traditional 'sysvinit' and 'upstart' if [ "$CMD" = "init" ]; then ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\"" kill -PWR $PID fi # This should work for containers using 'systemd' instead of 'init' if [ "$CMD" = "systemd" ]; then ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\"" kill -PWR $PID fi fi # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds now=$(date +%s) shutdown_timeout=$(( $now + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $now -lt $shutdown_timeout ]; do LXC_status status=$? case $status in "$OCF_NOT_RUNNING") ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS ;; "$OCF_SUCCESS") # Container is still running, keep waiting (until # shutdown_timeout expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac now=$(date +%s) done # If the container is still running, it will be stopped now. regardless of state! # LXC prior 1.0.0 if ocf_version_cmp "`lxc_version`" 1.0.0 ; then ocf_run lxc-stop -n ${OCF_RESKEY_container} || exit $OCF_ERR_GENERIC else ocf_run lxc-stop -n ${OCF_RESKEY_container} -k || exit $OCF_ERR_GENERIC fi ocf_log info "Container" ${OCF_RESKEY_container} "stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS } LXC_status() { # run lxc-info with -s option for LXC-0.7.5 or later local lxc_info_opt="-s" ocf_version_cmp "`lxc_version`" 0.7.5 && lxc_info_opt="" S=`lxc-info $lxc_info_opt -n ${OCF_RESKEY_container}` ocf_log debug "State of ${OCF_RESKEY_container}: $S" if [[ "${S##* }" = "RUNNING" ]] ; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } LXC_monitor() { LXC_status && return $OCF_SUCCESS if [ -f $TRANS_RES_STATE ]; then ocf_log err "${OCF_RESKEY_container} is not running, but state file ${TRANS_RES_STATE} exists." exit $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } LXC_validate() { # Quick check that all required attributes are set if [ -z "${OCF_RESKEY_container}" ]; then ocf_log err "LXC container name not set!" exit $OCF_ERR_CONFIGURED fi if [ -z "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration filename name not set!" exit $OCF_ERR_CONFIGURED fi # Tests that apply only to non-probes if ! ocf_is_probe; then if ! [ -f "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration file \"${OCF_RESKEY_config}\" missing or not found!" exit $OCF_ERR_INSTALLED fi if ocf_is_true $OCF_RESKEY_use_screen; then check_binary screen fi check_binary lxc-start check_binary lxc-stop if ocf_version_cmp "`lxc_version`" 1.0.0 ; then check_binary lxc-ps fi check_binary lxc-info fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then LXC_usage exit $OCF_ERR_ARGS fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) LXC_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test LXC_validate case $__OCF_ACTION in start) LXC_start;; stop) LXC_stop;; status) LXC_status;; monitor) LXC_monitor;; validate-all) ;; *) LXC_usage ocf_log err "$0 was called with unsupported arguments: $*" exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/metadata.rng b/heartbeat/metadata.rng new file mode 100644 index 000000000..ac534db82 --- /dev/null +++ b/heartbeat/metadata.rng @@ -0,0 +1,91 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + boolean + string + second + integer + + + + + select + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 1 + + + + + + Master + Slave + + + + diff --git a/heartbeat/nfsnotify b/heartbeat/nfsnotify index b8dc1e408..6d4961f2b 100755 --- a/heartbeat/nfsnotify +++ b/heartbeat/nfsnotify @@ -1,315 +1,315 @@ #!/bin/bash # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ocf-directories ####################################################################### sbindir=$HA_SBIN_DIR if [ -z "$sbindir" ]; then sbindir=/usr/sbin fi SELINUX_ENABLED=-1 NFSNOTIFY_TMP_DIR="${HA_RSCTMP}/nfsnotify_${OCF_RESOURCE_INSTANCE}/" HA_STATD_PIDFILE="$NFSNOTIFY_TMP_DIR/rpc.statd_${OCF_RESOURCE_INSTANCE}.pid" HA_STATD_PIDFILE_PREV="$NFSNOTIFY_TMP_DIR/rpc.statd_${OCF_RESOURCE_INSTANCE}.pid.prev" STATD_PATH="/var/lib/nfs/statd" SM_NOTIFY_BINARY="${sbindir}/sm-notify" IS_RENOTIFY=0 meta_data() { cat < - + 1.0 This agent sends NFSv3 reboot notifications to clients which informs clients to reclaim locks. sm-notify reboot notifications Comma separated list of floating IP addresses or host names that clients use to access the nfs service. This will be used to set the source address and mon_name of the SN_NOTIFY reboot notifications. source IP addresses Additional arguments to send to the sm-notify command. By default this agent will always set sm-notify's '-f' option. When the source_host option is set, the '-v' option will be used automatically to set the proper source address. Any additional sm-notify arguments set with this option will be used in addition to the previous default arguments. sm-notify arguments END } v3notify_usage() { cat < /dev/null 2>&1 if [ $? -eq 0 ]; then # it is useful to know if sm-notify processes were actually left around # or not during the stop/start operation. Whether this condition is true # or false does not indicate a failure. It does indicate that # there are probably some unresponsive nfs clients out there that are keeping # the sm-notify processes retrying. ocf_log info "previous sm-notify processes terminated before $__OCF_ACTION action." fi } v3notify_stop() { killall_smnotify rm -f $HA_STATD_PIDFILE_PREV > /dev/null 2>&1 mv $HA_STATD_PIDFILE $HA_STATD_PIDFILE_PREV > /dev/null 2>&1 return $OCF_SUCCESS } check_statd_pidfile() { local binary="rpc.statd" local pidfile="$HA_STATD_PIDFILE" ocf_log debug "Checking status for ${binary}." if [ -e "$pidfile" ]; then cat /proc/$(cat $pidfile)/cmdline 2>/dev/null | grep -a "${binary}" > /dev/null 2>&1 if [ $? -eq 0 ]; then return $OCF_SUCCESS fi ocf_exit_reason "$(cat $pidfile) for $binary is no longer running, sm-notify needs to re-notify clients" return $OCF_ERR_GENERIC fi # if we don't have a pid file for rpc.statd, we have not yet sent the notifications return $OCF_NOT_RUNNING } write_statd_pid() { local binary="rpc.statd" local pidfile="$HA_STATD_PIDFILE" local pid pid=$(pgrep ${binary}) case $? in 0) ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}." mkdir -p $(dirname $pidfile) echo "$pid" > $pidfile return $OCF_SUCCESS;; 1) rm -f "$pidfile" > /dev/null 2>&1 ocf_log info "$binary is not running" return $OCF_NOT_RUNNING;; *) rm -f "$pidfile" > /dev/null 2>&1 ocf_exit_reason "Error encountered detecting pid status of $binary" return $OCF_ERR_GENERIC;; esac } copy_statd() { local src=$1 local dest=$2 if ! [ -d "$dest" ]; then mkdir -p "$dest" fi cp -rpfn $src/sm $src/sm.bak $src/state $dest > /dev/null 2>&1 # make sure folder ownership and selinux lables stay consistent [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$dest" [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$dest" } v3notify_start() { local rc=$OCF_SUCCESS local cur_statd local statd_backup local is_renotify=0 # monitor, see if we need to notify or not v3notify_monitor if [ $? -eq 0 ]; then return $OCF_SUCCESS fi # kill off any other sm-notify processes that might already be running. killall_smnotify # record the pid of rpc.statd. if this pid ever changes, we have to re-notify write_statd_pid rc=$? if [ $rc -ne 0 ]; then return $rc fi # if the last time we ran nfs-notify, it was with the same statd process, # consider this a re-notification. During re-notifications we do not let the # sm-notify binary have access to the real statd directory. if [ "$(cat $HA_STATD_PIDFILE)" = "$(cat $HA_STATD_PIDFILE_PREV 2>/dev/null)" ]; then ocf_log info "Renotifying clients" is_renotify=1 fi statd_backup="$STATD_PATH/nfsnotify.bu" copy_statd "$STATD_PATH" "$statd_backup" if [ -z "$OCF_RESKEY_source_host" ]; then if [ "$is_renotify" -eq 0 ]; then cur_statd="$STATD_PATH" else cur_statd="$statd_backup" fi ocf_log info "sending notifications on default source address." $SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -P $cur_statd if [ $? -ne 0 ]; then ocf_exit_reason "sm-notify execution failed, view syslog for more information" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS fi # do sm-notify for each ip for ip in `echo ${OCF_RESKEY_source_host} | sed 's/,/ /g'`; do # have the first sm-notify use the actual statd directory so the # notify list can be managed properly. if [ "$is_renotify" -eq 0 ]; then cur_statd="$STATD_PATH" # everything after the first notify we are considering a renotification # which means we don't use the real statd directory. is_renotify=1 else # use our copied statd directory for the remaining ip addresses cur_statd="$STATD_PATH/nfsnotify_${OCF_RESOURCE_INSTANCE}_${ip}" copy_statd "$statd_backup" "$cur_statd" fi ocf_log info "sending notifications with source address $ip" $SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -v $ip -P "$cur_statd" if [ $? -ne 0 ]; then ocf_exit_reason "sm-notify with source host set to [ $ip ] failed. view syslog for more information" return $OCF_ERR_GENERIC fi done return $OCF_SUCCESS } v3notify_monitor() { # verify rpc.statd is up, and that the rpc.statd pid is the same one we # found during the start. otherwise rpc.statd recovered and we need to notify # again. check_statd_pidfile } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) v3notify_usage exit $OCF_SUCCESS;; *) ;; esac which restorecon > /dev/null 2>&1 && selinuxenabled SELINUX_ENABLED=$? if [ $SELINUX_ENABLED -eq 0 ]; then export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')" fi case $__OCF_ACTION in start) v3notify_start;; stop) v3notify_stop;; monitor) v3notify_monitor;; validate-all) v3notify_validate;; *) v3notify_usage exit $OCF_ERR_UNIMPLEMENTED;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/pgagent b/heartbeat/pgagent old mode 100644 new mode 100755 index 58054a7c3..3d9943b9b --- a/heartbeat/pgagent +++ b/heartbeat/pgagent @@ -1,139 +1,139 @@ #!/bin/sh # # High-Availability pgagent OCF resource agent # # Description: Starts/stops pgagent # Author: Oleg Selin # License: GNU General Public License (GPL) # # OCF parameters: # OCF_RESKEY_connection_string # OCF_RESKEY_user # OCF_RESKEY_options # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs OCF_RESKEY_executable_default="`which pgagent`" OCF_RESKEY_connection_string_default="user=postgres host=/var/run/postgresql" OCF_RESKEY_user_default="postgres" OCF_RESKEY_options_default="-r 1 -t 1" : ${OCF_RESKEY_executable="${OCF_RESKEY_executable_default}"} : ${OCF_RESKEY_connection_string="${OCF_RESKEY_connection_string_default}"} : ${OCF_RESKEY_user="${OCF_RESKEY_user_default}"} : ${OCF_RESKEY_options="${OCF_RESKEY_options_default}"} pgagent_validate_all() { check_binary pgagent ocf_log debug "executable: '$OCF_RESKEY_executable'" ocf_log debug "connection string: '$OCF_RESKEY_connection_string'" ocf_log debug "user: '$OCF_RESKEY_user'" ocf_log debug "options: '$OCF_RESKEY_options'" if [ -z "$OCF_RESKEY_connection_string" ]; then ocf_log err "Connection string is not configured!" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_user" ]; then ocf_log err "User is not configured!" exit $OCF_ERR_CONFIGURED fi getent passwd $OCF_RESKEY_user >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "User $OCF_RESKEY_user doesn't exist"; return $OCF_ERR_CONFIGURED; fi return $OCF_SUCCESS } pgagent_start() { pgagent_validate_all nohup su - $OCF_RESKEY_user -c "'$OCF_RESKEY_executable' $OCF_RESKEY_options '$OCF_RESKEY_connection_string'" > /dev/null 2>&1 & sleep 1 if [ -n pgagent_monitor ]; then return $OCF_SUCCESS fi return $OCF_ERR_GENERIC } pgagent_stop() { pgagent_validate_all pid=`pgrep -f -x -U $OCF_RESKEY_user "$OCF_RESKEY_executable $OCF_RESKEY_options $OCF_RESKEY_connection_string"` if [ -n "$pid" ]; then ocf_run kill $pid || return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } pgagent_monitor() { if [ -z "$OCF_RESKEY_executable" ]; then return $OCF_ERR_INSTALLED fi ocf_run pgrep -f -x -U "$OCF_RESKEY_user" "$OCF_RESKEY_executable $OCF_RESKEY_options $OCF_RESKEY_connection_string" || return $OCF_NOT_RUNNING return $OCF_SUCCESS } meta_data() { cat < - + 1.0 This is a pgagent Resource Agent. Controls pgagent Connection string for pgagent. pgagent connection string User to run pgagent as. User to run pgagent Options for pgagent. pgagent run options, see pgagent --help for details END } pgagent_usage() { cat <&2 usage: $CMD {start|stop|status|monitor|meta-data|validate-all} $CMD is used to temporarily block ports using iptables. It can be used to blackhole a port before bringing up an IP address, and enable it after a service is started. To do that for samba, the following can be used: crm configure < 1.0 Resource script for portblock. It is used to temporarily block ports using iptables. In addition, it may allow for faster TCP reconnects for clients on failover. Use that if there are long lived TCP connections to an HA service. This feature is enabled by setting the tickle_dir parameter and only in concert with action set to unblock. Note that the tickle ACK function is new as of version 3.0.2 and hasn't yet seen widespread use. Block and unblocks access to TCP and UDP ports The protocol used to be blocked/unblocked. protocol The port number used to be blocked/unblocked. portno The action (block/unblock) to be done on the protocol::portno. action - -(try to) reset server TCP sessions when unblock stops - + If for some reason the long lived server side TCP sessions won't be cleaned up by a reconfiguration/flush/stop of whatever services this portblock protects, they would linger in the connection table, even after the IP is gone and services have been switched over to an other node. An example would be the default NFS kernel server. These "known" connections may seriously confuse and delay a later switchback. Enabling this option will cause this agent to try to get rid of these connections by injecting a temporary iptables rule to TCP-reset outgoing packets from the blocked ports, and additionally tickle them locally, just before it starts to DROP incoming packets on "unblock stop". +(try to) reset server TCP sessions when unblock stops + The IP address used to be blocked/unblocked. ip The shared or local directory (_must_ be absolute path) which stores the established TCP connections. Tickle directory If the tickle_dir is a local directory, then the TCP connection state file has to be replicated to other nodes in the cluster. It can be csync2 (default), some wrapper of rsync, or whatever. It takes the file name as a single argument. For csync2, set it to "csync2 -xv". Connection state file synchronization script END } # # Because this is the normal usage, we consider "block" # resources to be pseudo-resources -- that is, their status can't # be reliably determined through external means. # This is because we expect an "unblock" resource to come along # and disable us -- but we're still in some sense active... # #active_grep_pat {udp|tcp} portno,portno active_grep_pat() { w="[ ][ ]*" any="0\\.0\\.0\\.0/0" echo "^DROP${w}${1}${w}--${w}${any}${w}${3}${w}multiport${w}dports${w}${2}\>" } #chain_isactive {udp|tcp} portno,portno ip chain_isactive() { PAT=`active_grep_pat "$1" "$2" "$3"` $IPTABLES $wait -n -L INPUT | grep "$PAT" >/dev/null } save_tcp_connections() { [ -z "$OCF_RESKEY_tickle_dir" ] && return statefile=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip if [ -z "$OCF_RESKEY_sync_script" ]; then netstat -tn |awk -F '[:[:space:]]+' ' $8 == "ESTABLISHED" && $4 == "'$OCF_RESKEY_ip'" \ {printf "%s:%s\t%s:%s\n", $4,$5, $6,$7}' | dd of="$statefile".new conv=fsync && mv "$statefile".new "$statefile" else netstat -tn |awk -F '[:[:space:]]+' ' $8 == "ESTABLISHED" && $4 == "'$OCF_RESKEY_ip'" \ {printf "%s:%s\t%s:%s\n", $4,$5, $6,$7}' \ > $statefile $OCF_RESKEY_sync_script $statefile > /dev/null 2>&1 & fi } tickle_remote() { [ -z "$OCF_RESKEY_tickle_dir" ] && return echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip [ -r $f ] || return $TICKLETCP -n 3 < $f } tickle_local() { [ -z "$OCF_RESKEY_tickle_dir" ] && return f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip [ -r $f ] || return # swap "local" and "remote" address, # so we tickle ourselves. # We set up a REJECT with tcp-reset before we do so, so we get rid of # the no longer wanted potentially long lived "ESTABLISHED" connection # entries on the IP we are going to delet in a sec. These would get in # the way if we switch-over and then switch-back in quick succession. local i awk '{ print $2, $1; }' $f | $TICKLETCP netstat -tn | grep -Fw $OCF_RESKEY_ip || return for i in 0.1 0.5 1 2 4 ; do sleep $i awk '{ print $2, $1; }' $f | $TICKLETCP netstat -tn | grep -Fw $OCF_RESKEY_ip || break done } SayActive() { echo "$CMD DROP rule for INPUT chain [$*] is running (OK)" } SayConsideredActive() { echo "$CMD DROP rule for INPUT chain [$*] considered to be running (OK)" } SayInactive() { echo "$CMD DROP rule for INPUT chain [$*] is inactive" } #IptablesStatus {udp|tcp} portno,portno ip {block|unblock} IptablesStatus() { local rc rc=$OCF_ERR_GENERIC activewords="$CMD $1 $2 is running (OK)" if chain_isactive "$1" "$2" "$3"; then case $4 in block) SayActive $* rc=$OCF_SUCCESS ;; *) SayInactive $* rc=$OCF_NOT_RUNNING ;; esac else case $4 in block) if ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" status; then SayConsideredActive $* rc=$OCF_SUCCESS else SayInactive $* rc=$OCF_NOT_RUNNING fi ;; *) if ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" status; then SayActive $* #This is only run on real monitor events. save_tcp_connections rc=$OCF_SUCCESS else SayInactive $* rc=$OCF_NOT_RUNNING fi ;; esac fi return $rc } #IptablesBLOCK {udp|tcp} portno,portno ip IptablesBLOCK() { local rc=0 local try_reset=false if [ "$1/$4/$__OCF_ACTION" = tcp/unblock/stop ] && ocf_is_true $reset_local_on_unblock_stop then try_reset=true fi if chain_isactive "$1" "$2" "$3" then : OK -- chain already active else if $try_reset ; then $IPTABLES $wait -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset tickle_local fi $IPTABLES $wait -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP rc=$? if $try_reset ; then $IPTABLES $wait -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset fi fi return $rc } #IptablesUNBLOCK {udp|tcp} portno,portno ip IptablesUNBLOCK() { if chain_isactive "$1" "$2" "$3" then $IPTABLES $wait -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP else : Chain Not active fi return $? } #IptablesStart {udp|tcp} portno,portno ip {block|unblock} IptablesStart() { ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" start case $4 in block) IptablesBLOCK "$@";; unblock) IptablesUNBLOCK "$@" rc=$? tickle_remote #ignore run_tickle_tcp exit code! return $rc ;; *) usage; return 1; esac return $? } #IptablesStop {udp|tcp} portno,portno ip {block|unblock} IptablesStop() { ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" stop case $4 in block) IptablesUNBLOCK "$@";; unblock) save_tcp_connections IptablesBLOCK "$@" ;; *) usage; return 1;; esac return $? } # # Check if the port is valid, this function code is not decent, but works # CheckPort() { # Examples of valid port: "1080", "1", "0080" # Examples of invalid port: "1080bad", "0", "0000", "" echo $1 |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*' } IptablesValidateAll() { check_binary $IPTABLES case $protocol in tcp|udp) ;; *) ocf_log err "Invalid protocol $protocol!" exit $OCF_ERR_CONFIGURED ;; esac if CheckPort "$portno"; then : else ocf_log err "Invalid port number $portno!" exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_tickle_dir" ]; then if [ x"$action" != x"unblock" ]; then ocf_log err "Tickles are only useful with action=unblock!" exit $OCF_ERR_CONFIGURED fi if [ ! -d "$OCF_RESKEY_tickle_dir" ]; then ocf_log err "The tickle dir doesn't exist!" exit $OCF_ERR_INSTALLED fi fi case $action in block|unblock) ;; *) ocf_log err "Invalid action $action!" exit $OCF_ERR_CONFIGURED ;; esac if ocf_is_true $reset_local_on_unblock_stop; then if [ $action != unblock ] ; then ocf_log err "reset_local_on_unblock_stop is only relevant with action=unblock" exit $OCF_ERR_CONFIGURED fi if [ -z $OCF_RESKEY_tickle_dir ] ; then ocf_log warn "reset_local_on_unblock_stop works best with tickle_dir enabled as well" fi fi return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac if [ -z "$OCF_RESKEY_protocol" ]; then ocf_log err "Please set OCF_RESKEY_protocol" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_portno" ]; then ocf_log err "Please set OCF_RESKEY_portno" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_action" ]; then ocf_log err "Please set OCF_RESKEY_action" exit $OCF_ERR_CONFIGURED fi # iptables v1.4.20+ is required to use -w (wait) version=$(iptables -V | awk -F ' v' '{print $NF}') ocf_version_cmp "$version" "1.4.19.1" if [ "$?" -eq "2" ]; then wait="-w" else wait="" fi protocol=$OCF_RESKEY_protocol portno=$OCF_RESKEY_portno action=$OCF_RESKEY_action ip=$OCF_RESKEY_ip reset_local_on_unblock_stop=$OCF_RESKEY_reset_local_on_unblock_stop case $1 in start) IptablesStart $protocol $portno $ip $action ;; stop) IptablesStop $protocol $portno $ip $action ;; status|monitor) IptablesStatus $protocol $portno $ip $action ;; validate-all) IptablesValidateAll ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/pound b/heartbeat/pound index ab7dd3157..837d591ca 100755 --- a/heartbeat/pound +++ b/heartbeat/pound @@ -1,339 +1,339 @@ #!/bin/sh # # # Pound # # Description: Manage pound instances as a HA resource # # Author: Taro Matsuzawa # # License: GNU General Public License (GPL) # # See usage() for more details # # OCF instance parameters: # OCF_RESKEY_pid # OCF_RESKEY_binary # OCF_RESKEY_ctl_binary # OCF_RESKEY_socket_path # OCF_RESKEY_config # OCF_RESKEY_name # OCF_RESKEY_maxfiles # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Set default paramenter values # Set these two first, as other defaults depend on it OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE} : ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} OCF_RESKEY_binary_default=pound OCF_RESKEY_ctl_binary_default=poundctl OCF_RESKEY_pid_default=/var/run/pound_${OCF_RESKEY_name}.pid OCF_RESKEY_socket_path_default=/var/lib/pound/pound.cfg : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_ctl_binary=${OCF_RESKEY_ctl_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_socket_path=${OCF_RESKEY_socket_path_default}} meta_data() { cat < - + 1.0 The Pound Resource Agent can manage Pound instances. Manage a Pound instance The Pound configuration file that Pound should manage, for example "/etc/pound.cfg". Pound configuration file Override the name of the instance that should be given to Pound (defaults to the resource identifier). Instance name Write the process's PID to the specified file. The default will include the specified name, i.e.: "/var/run/pound_production.pid". Unlike what this help message shows, it is most likely not necessary to change this parameter. Pidfile This is used to start Pound server. Normally use pound. This is used to watch Pound status via Unix socket. Normally use poundctl. Write the process's Unix socket. This parameter is same 'Control' parameter in configuration file, i.e.: Control "/var/lib/pound/pound.cfg". Determines how many files pound is allowed to open at a time. Helps to fix the 'Too many open files' error message. Allowed number of open files. END } ####################################################################### pound_usage() { cat < # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### RMQ_SERVER=/usr/sbin/rabbitmq-server RMQ_CTL=/usr/sbin/rabbitmqctl RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia" RMQ_PID_DIR="/var/run/rabbitmq" RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid" RMQ_LOG_DIR="/var/log/rabbitmq" NODENAME=$(ocf_local_nodename) # this attr represents the current active local rmq node name. # when rmq stops or the node is fenced, this attr disappears RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}" # this attr represents the last known active local rmq node name # when rmp stops or the node is fenced, the attr stays forever so # we can continue to map an offline pcmk node to it's rmq node name # equivalent. RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}" meta_data() { cat < - + 1.0 Starts cloned rabbitmq cluster instance rabbitmq clustered Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance. rabbitmqctl set_policy args END } ####################################################################### rmq_usage() { cat < /dev/null 2>&1 } rmq_local_node() { local node_name=$(rabbitmqctl status 2>&1 | sed -n -e "s/^.*[S|s]tatus of node \(.*\)\s.*$/\1/p" | tr -d "'") if [ -z "$node_name" ]; then node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null | grep "\s*RABBITMQ_NODENAME=" | awk -F= '{print $2}') fi echo "$node_name" } rmq_join_list() { cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" } rmq_write_nodename() { local node_name=$(rmq_local_node) if [ -z "$node_name" ]; then ocf_log err "Failed to determine rabbitmq node name, exiting" exit $OCF_ERR_GENERIC fi # store the pcmknode to rmq node mapping as a transient attribute. This allows # us to retrieve the join list with a simple xpath. ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name" # the pcmknode to rmq node mapping as a permanent attribute as well. this lets # us continue to map offline nodes to their equivalent rmq node name ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --name "$RMQ_CRM_ATTR_COOKIE_LAST_KNOWN" -v "$node_name" } rmq_delete_nodename() { # remove node-name ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D } prepare_dir () { if [ ! -d ${1} ] ; then mkdir -p ${1} chown -R rabbitmq:rabbitmq ${1} chmod 755 ${1} fi } remove_pid () { rm -f ${RMQ_PID_FILE} > /dev/null 2>&1 } rmq_monitor() { local rc $RMQ_CTL cluster_status > /dev/null 2>&1 rc=$? case "$rc" in 0) ocf_log debug "RabbitMQ server is running normally" rmq_write_nodename return $OCF_SUCCESS ;; 2|68|69|70|75|78) ocf_log info "RabbitMQ server is not running" rmq_delete_nodename return $OCF_NOT_RUNNING ;; *) ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc" rmq_delete_nodename return $OCF_ERR_GENERIC ;; esac } rmq_init_and_wait() { local rc prepare_dir $RMQ_PID_DIR prepare_dir $RMQ_LOG_DIR remove_pid # the server startup script uses this environment variable export RABBITMQ_PID_FILE="$RMQ_PID_FILE" setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" & ocf_log info "Waiting for server to start" $RMQ_CTL wait $RMQ_PID_FILE rc=$? if [ $rc -ne $OCF_SUCCESS ]; then remove_pid ocf_log info "rabbitmq-server start failed: $rc" return $OCF_ERR_GENERIC fi rmq_monitor return $? } rmq_set_policy() { $RMQ_CTL set_policy "$@" > /dev/null 2>&1 } rmq_start_first() { local rc ocf_log info "Bootstrapping rabbitmq cluster" rmq_wipe_data rmq_init_and_wait rc=$? if [ $rc -eq 0 ]; then rc=$OCF_SUCCESS ocf_log info "cluster bootstrapped" if [ -n "$OCF_RESKEY_set_policy" ]; then # do not quote set_policy, we are passing in arguments rmq_set_policy $OCF_RESKEY_set_policy > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy" rc=$OCF_ERR_GENERIC else ocf_log info "Policy set: $OCF_RESKEY_set_policy" fi fi else ocf_log info "failed to bootstrap cluster. Check SELINUX policy" rc=$OCF_ERR_GENERIC fi return $rc } rmq_is_clustered() { $RMQ_CTL eval 'rabbit_mnesia:is_clustered().' | grep -q true } rmq_join_existing() { local join_list="$1" local rc=$OCF_ERR_GENERIC ocf_log info "Joining existing cluster with [ $(echo $join_list | tr '\n' ' ') ] nodes." rmq_init_and_wait if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi if rmq_is_clustered; then ocf_log info "Successfully re-joined existing rabbitmq cluster automatically" return $OCF_SUCCESS fi # unconditionally join the cluster $RMQ_CTL stop_app > /dev/null 2>&1 for node in $(echo "$join_list"); do ocf_log info "Attempting to join cluster with target node $node" $RMQ_CTL join_cluster $node if [ $? -eq 0 ]; then ocf_log info "Joined cluster by connecting to node $node, starting app" $RMQ_CTL start_app rc=$? if [ $rc -ne 0 ]; then ocf_log err "'$RMQ_CTL start_app' failed" fi break; fi done if [ "$rc" -ne 0 ]; then ocf_log info "Join process incomplete, shutting down." return $OCF_ERR_GENERIC fi ocf_log info "Successfully joined existing rabbitmq cluster" return $OCF_SUCCESS } rmq_forget_cluster_node_remotely() { local running_cluster_nodes="$1" local node_to_forget="$2" ocf_log info "Forgetting $node_to_forget via nodes [ $(echo $running_cluster_nodes | tr '\n' ' ') ]." for running_cluster_node in $running_cluster_nodes; do rabbitmqctl -n $running_cluster_node forget_cluster_node $node_to_forget if [ $? = 0 ]; then ocf_log info "Succeeded forgetting $node_to_forget via $running_cluster_node." return else ocf_log err "Failed to forget node $node_to_forget via $running_cluster_node." fi done } rmq_notify() { node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}" mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" # When notifications are on, this agent is going to "forget" nodes once they # leave the cluster. This is thought to resolve some issues where rabbitmq # blocks trying to sync with an offline node after a fencing action occurs. if ! [ "${mode}" = "post-stop" ]; then return $OCF_SUCCESS fi rmq_monitor if [ $? -ne $OCF_SUCCESS ]; then # only run forget when we are for sure active return $OCF_SUCCESS fi # forget each stopped rmq instance in the provided pcmk node in the list. for node in $(echo "$node_list"); do local rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $node -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" if [ -z "$rmq_node" ]; then ocf_log warn "Unable to map pcmk node $node to a known rmq node." continue fi ocf_log notice "Forgetting stopped node $rmq_node" $RMQ_CTL forget_cluster_node $rmq_node if [ $? -ne 0 ]; then ocf_log warn "Unable to forget offline node $rmq_node." fi done return $OCF_SUCCESS } rmq_start() { local join_list="" local rc rmq_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi join_list=$(rmq_join_list) # No join list means no active instances are up. This instance # is the first, so it needs to bootstrap the rest if [ -z "$join_list" ]; then rmq_start_first rc=$? return $rc fi # first try to join without wiping mnesia data rmq_join_existing "$join_list" if [ $? -ne 0 ]; then ocf_log info "node failed to join, wiping data directory and trying again" local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" # if the graceful join fails, use the hammer and reset all the data. rmq_stop rmq_wipe_data rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node" rmq_join_existing "$join_list" rc=$? # Restore users and users' permissions (if any) BaseDataDir=`dirname $RMQ_DATA_DIR` if [ -f $BaseDataDir/users.erl ] ; then rabbitmqctl eval " %% Run only if Mnesia is ready. lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso begin [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), %% Read users first {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"), Upgrade = fun ({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5}; ({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D} end, Downgrade = fun ({internal_user, A, B, C}) -> {internal_user, A, B, C}; ({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C}; %% Incompatible scheme, so we will loose user's password ('B' value) during conversion. %% Unfortunately, this case will require manual intervention - user have to run: %% rabbitmqctl change_password ({internal_user, A, B, C, _}) -> {internal_user, A, B, C} end, case WildPattern of %% Version < 3.6.0 {internal_user,'_','_','_'} -> lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Downgrade(X)) end, Users); %% Version >= 3.6.0 {internal_user,'_','_','_','_'} -> lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Upgrade(X)) end, Users) end, ok = file:delete(\"$BaseDataDir/users.erl\") end. " fi if [ -f $BaseDataDir/users_perms.erl ] ; then rabbitmqctl eval " %% Run only if Mnesia is ready. lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso begin {ok, [UsersPerms]} = file:consult(\"$BaseDataDir/users_perms.erl\"), lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user_permission, X) end, UsersPerms), ok = file:delete(\"$BaseDataDir/users_perms.erl\") end. " fi if [ $rc -ne 0 ]; then ocf_log info "node failed to join even after reseting local data. Check SELINUX policy" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } rmq_stop() { # Backup users and users' permissions BaseDataDir=`dirname $RMQ_DATA_DIR` rabbitmqctl eval " %% Run only if Mnesia is still available. lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso begin [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), Users = case WildPattern of %% Version < 3.6.0 {internal_user,'_','_','_'} -> mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]); %% Version >= 3.6.0 {internal_user,'_','_','_','_'} -> mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]) end, Users /= [] andalso file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])), UsersPerms = mnesia:dirty_select(rabbit_user_permission, [{{'\\\$1', {'\\\$2', '\\\$3','\\\$4'}, '\\\$5'}, [{'/=', '\\\$3', <<\"guest\">>}], ['\\\$_']}]), UsersPerms /= [] andalso file:write_file(\"$BaseDataDir/users_perms.erl\", io_lib:fwrite(\"~p.~n\", [UsersPerms])) end. " rmq_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi $RMQ_CTL stop rc=$? if [ $rc -ne 0 ]; then ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc" return $rc fi #TODO add kill logic stop_wait=1 while [ $stop_wait = 1 ]; do rmq_monitor rc=$? if [ "$rc" -eq $OCF_NOT_RUNNING ]; then stop_wait=0 break elif [ "$rc" -ne $OCF_SUCCESS ]; then ocf_log info "rabbitmq-server stop failed: $rc" exit $OCF_ERR_GENERIC fi sleep 1 done remove_pid return $OCF_SUCCESS } rmq_validate() { check_binary $RMQ_SERVER check_binary $RMQ_CTL # This resource only makes sense as a clone right now. at some point # we may want to verify the following. #TODO verify cloned #TODO verify ordered=true # Given that this resource does the cluster join explicitly, # having a cluster_nodes list in the static config file will # likely conflict with this agent. #TODO verify no cluster list in rabbitmq conf #cat /etc/rabbitmq/rabbitmq.config | grep "cluster_nodes" return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) rmq_start;; stop) rmq_stop;; monitor) rmq_monitor;; validate-all) rmq_validate;; notify) rmq_notify;; usage|help) rmq_usage exit $OCF_SUCCESS ;; *) rmq_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/rsyslog b/heartbeat/rsyslog index c538afba4..a8ebc6394 100755 --- a/heartbeat/rsyslog +++ b/heartbeat/rsyslog @@ -1,254 +1,254 @@ #!/bin/bash # # Description: Manages a rsyslog instance, provided by NTT OSSC as an # OCF High-Availability resource under Heartbeat/LinuxHA control # # Copyright (c) 2011 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ############################################################################## # OCF parameters: # OCF_RESKEY_rsyslog_binary : Path to rsyslog binary. # Default is "/sbin/rsyslogd" # OCF_RESKEY_configfile : Configuration file # OCF_RESKEY_start_opts : Startup options # # Only OCF_RESKEY_configfile must be specified. Each of the rests # has its default value or refers OCF_RESKEY_configfile to make # its value when no explicit value is given. # # Further infomation for setup: # There are sample configurations at the end of this file. # ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs usage() { cat <<-! usage: $0 action action: start : start a new rsyslog instance stop : stop the running rsyslog instance status : return the status of rsyslog, run or down monitor : return TRUE if the rsyslog appears to be working. meta-data : show meta data message validate-all: validate the instance parameters ! return $OCF_ERR_UNIMPLEMENTED } metadata_rsyslog() { cat < 1.0 This script manages a rsyslog instance as an HA resource. rsyslog resource agent This parameter specifies a configuration file for a rsyslog instance managed by this RA. -Configuration file +Configuration file This parameter specifies rsyslog's executable file. -rsyslog executable +rsyslog executable This parameter specifies startup options for a rsyslog instance managed by this RA. When no value is given, no startup options is used. Don't use option '-F'. It causes a stuck of a start action. -Start options +Start options END return $OCF_SUCCESS } monitor_rsyslog() { set -- $(pgrep -f "$PROCESS_PATTERN" 2>/dev/null) case $# in 0) ocf_log debug "No rsyslog process for $CONFIGFILE" return $OCF_NOT_RUNNING;; 1) return $OCF_SUCCESS;; esac ocf_log warn "Multiple rsyslog process for $CONFIGFILE" return $OCF_SUCCESS } start_rsyslog() { local ocf_status monitor_rsyslog if [ $? = "$OCF_SUCCESS" ]; then return $OCF_SUCCESS fi $RSYSLOG_EXE -f $CONFIGFILE $START_OPTS 2>&1 ocf_status=$? if [ "$ocf_status" != "$OCF_SUCCESS" ]; then return $OCF_ERR_GENERIC fi while true; do monitor_rsyslog if [ $? = "$OCF_SUCCESS" ]; then return $OCF_SUCCESS fi sleep 1 done } stop_rsyslog() { pkill -TERM -f "$PROCESS_PATTERN" typeset lapse_sec=0 while pgrep -f "$PROCESS_PATTERN" > /dev/null; do sleep 1 lapse_sec=$(( lapse_sec + 1 )) ocf_log debug "stop_rsyslog[${OCF_RESOURCE_INSTANCE}]: stop NORM $lapse_sec/$OCF_RESKEY_CRM_meta_timeout" if [ $lapse_sec -ge $OCF_RESKEY_CRM_meta_timeout ]; then break fi done lapse_sec=0 while pgrep -f "$PROCESS_PATTERN" > /dev/null; do pkill -KILL -f "$PROCESS_PATTERN" sleep 1 lapse_sec=$(( lapse_sec + 1 )) ocf_log debug "stop_rsyslog[${OCF_RESOURCE_INSTANCE}]: suspend rsyslog by SIGKILL ($lapse_sec/@@@)" done return $OCF_SUCCESS } status_rsyslog() { monitor_rsyslog rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "rsyslog service is running." elif [ $rc = $OCF_NOT_RUNNING ]; then echo "rsyslog service is stopped." fi return $rc } validate_all_rsyslog() { ocf_log info "validate_all_rsyslog[${OCF_RESOURCE_INSTANCE}]" return $OCF_SUCCESS } if [[ "$1" = "meta-data" ]]; then metadata_rsyslog exit $? fi CONFIGFILE="${OCF_RESKEY_configfile}" if [[ -z "$CONFIGFILE" ]]; then ocf_log err "undefined parameter:configfile" exit $OCF_ERR_CONFIGURED fi if [[ ! -f "$CONFIGFILE" ]]; then ocf_log err "Config file $CONFIGFILE does not exist." exit $OCF_ERR_CONFIGURED fi RSYSLOG_EXE="${OCF_RESKEY_rsyslog_binary-/sbin/rsyslogd}" if [[ ! -x "$RSYSLOG_EXE" ]]; then ocf_log err "Invalid value:rsyslog_binary:$RSYSLOG_EXE" exit $OCF_ERR_CONFIGURED fi START_OPTS=${OCF_RESKEY_start_opts} PROCESS_PATTERN="$RSYSLOG_EXE -f $CONFIGFILE" COMMAND=$1 case "$COMMAND" in start) ocf_log debug "[${OCF_RESOURCE_INSTANCE}] Enter rsyslog start" start_rsyslog func_status=$? ocf_log debug "[${OCF_RESOURCE_INSTANCE}] Leave rsyslog start $func_status" exit $func_status ;; stop) ocf_log debug "[${OCF_RESOURCE_INSTANCE}] Enter rsyslog stop" stop_rsyslog func_status=$? ocf_log debug "[${OCF_RESOURCE_INSTANCE}] Leave rsyslog stop $func_status" exit $func_status ;; status) status_rsyslog exit $? ;; monitor) monitor_rsyslog func_status=$? exit $func_status ;; validate-all) validate_all_rsyslog exit $? ;; *) usage ;; esac diff --git a/heartbeat/sg_persist b/heartbeat/sg_persist index 4d518ef0e..69866e6e5 100755 --- a/heartbeat/sg_persist +++ b/heartbeat/sg_persist @@ -1,674 +1,674 @@ #!/bin/bash # # # OCF Resource Agent compliant PERSISTENT SCSI RESERVATION resource script. # # # Copyright (c) 2011 Evgeny Nifontov and lwang@suse.com All Rights Reserved. # # "Heartbeat drbd OCF Resource Agent: 2007, Lars Marowsky-Bree" was used # as example of multistate OCF Resource Agent. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OCF instance parameters # OCF_RESKEY_binary # OCF_RESKEY_devs # OCF_RESKEY_required_devs_nof # OCF_RESKEY_reservation_type # OCF_RESKEY_master_score_base # OCF_RESKEY_master_score_dev_factor # OCF_RESKEY_master_score_delay # # TODO # # 1) PROBLEM: devices which were not accessible during 'start' action, will be never registered/reserved # TODO: 'Master' and 'Salve' registers new devs in 'monitor' action # TODO: 'Master' reserves new devs in 'monitor' action ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # set default values : ${sg_persist_binary="sg_persist"} # binary name for the resource : ${devs=""} # device list : ${required_devs_nof=1} # number of required devices : ${reservation_type=1} # reservation type : ${master_score_base=0} # master score base : ${master_score_dev_factor=100} # device factor for master score : ${master_score_delay=30} # delay for master score ####################################################################### meta_data() { cat < 1.1 This resource agent manages SCSI PERSISTENT RESERVATIONS. "sg_persist" from sg3_utils is used, please see its documentation. Should be used as multistate (Master/Slave) resource Slave registers its node id ("crm_node -i") as reservation key ( --param-rk ) on each device in the "devs" list. Master reservs all devices from "devs" list with reservation "--prout-type" value from "reservation_type" parameter. Manages SCSI PERSISTENT RESERVATIONS The name of the binary that manages the resource. -the binary name of the resource +the binary name of the resource Device list. Multiple devices can be listed with blank space as separator. Shell wildcars are allowed. device list Minimum number of "working" devices from device list 1) existing 2) "sg_persist --read-keys \$device" works (Return code 0) resource actions "start","monitor","promote" and "validate-all" return "\$OCF_ERR_INSTALLED" if the actual number of "working" devices is less then "required_devs_nof". resource actions "stop" and "demote" tries to remove reservations and registration keys from all working devices, but always return "\$OCF_SUCCESS" minimum number of working devices reservation type reservation type master_score_base value "master_score_base" value is used in "master_score" calculation: master_score = \$master_score_base + \$master_score_dev_factor * \$working_devs if set to bigger value in sg_persist resource configuration on some node, this node will be "preferred" for master role. base master_score value Working device factor in master_score calculation each "working" device provides additional value to "master_score", so the node that sees more devices will be preferred for the "Master"-role Setting it to 0 will disable this behavior. working device factor in master_score calculation master/slave decreases/increases its master_score after delay of \$master_score_delay seconds so if some device gets inaccessible, the slave decreases its master_score first and the resource will no be watched and after this device reappears again the master increases its master_score first this can work only if the master_score_delay is bigger then monitor interval on both master and slave Setting it to 0 will disable this behavior. master_score decrease/increase delay time END exit $OCF_SUCCESS } sg_persist_init() { if ! ocf_is_root ; then ocf_log err "You must be root to perform this operation." exit $OCF_ERR_PERM fi SG_PERSIST=${OCF_RESKEY_binary:-"$sg_persist_binary"} check_binary $SG_PERSIST ROLE=$OCF_RESKEY_CRM_meta_role NOW=$(date +%s) RESOURCE="${OCF_RESOURCE_INSTANCE}" MASTER_SCORE_VAR_NAME="master-${OCF_RESOURCE_INSTANCE//:/-}" PENDING_VAR_NAME="pending-$MASTER_SCORE_VAR_NAME" #only works with corocync CRM_NODE="${HA_SBIN_DIR}/crm_node" NODE_ID_DEC=$($CRM_NODE -i) NODE=$($CRM_NODE -l | $GREP -w ^$NODE_ID_DEC) NODE=${NODE#$NODE_ID_DEC } NODE=${NODE% *} MASTER_SCORE_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$MASTER_SCORE_VAR_NAME --node=$NODE" CRM_MASTER="${HA_SBIN_DIR}/crm_master --lifetime=reboot" PENDING_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$PENDING_VAR_NAME --node=$NODE" NODE_ID_HEX=$(printf '0x%x' $NODE_ID_DEC) if [ -z "$NODE_ID_HEX" ]; then ocf_log err "Couldn't get node id with \"$CRM_NODE\"" exit $OCF_ERR_INSTALLED fi ocf_log debug "$RESOURCE: NODE:$NODE, ROLE:$ROLE, NODE_ID DEC:$NODE_ID_DEC HEX:$NODE_ID_HEX" DEVS=${OCF_RESKEY_devs:=$devs} REQUIRED_DEVS_NOF=${OCF_RESKEY_required_devs_nof:=$required_devs_nof} RESERVATION_TYPE=${OCF_RESKEY_reservation_type:=$reservation_type} MASTER_SCORE_BASE=${OCF_RESKEY_master_score_base:=$master_score_base} MASTER_SCORE_DEV_FACTOR=${OCF_RESKEY_master_score_dev_factor:=$master_score_dev_factor} MASTER_SCORE_DELAY=${OCF_RESKEY_master_score_delay:=$master_score_delay} ocf_log debug "$RESOURCE: DEVS=$DEVS" ocf_log debug "$RESOURCE: REQUIRED_DEVS_NOF=$REQUIRED_DEVS_NOF" ocf_log debug "$RESOURCE: RESERVATION_TYPE=$RESERVATION_TYPE" ocf_log debug "$RESOURCE: MASTER_SCORE_BASE=$MASTER_SCORE_BASE" ocf_log debug "$RESOURCE: MASTER_SCORE_DEV_FACTOR=$MASTER_SCORE_DEV_FACTOR" ocf_log debug "$RESOURCE: MASTER_SCORE_DELAY=$MASTER_SCORE_DELAY" #expand path wildcards DEVS=$(echo $DEVS) if [ -z "$DEVS" ]; then ocf_log err "\"devs\" not defined" exit $OCF_ERR_INSTALLED fi sg_persist_check_devs sg_persist_get_status } sg_persist_action_usage() { cat <&1` [ $? -eq 0 ] || continue WORKING_DEVS+=($dev) echo "$READ_KEYS" | $GREP -qw $NODE_ID_HEX\$ [ $? -eq 0 ] || continue REGISTERED_DEVS+=($dev) READ_RESERVATION=`$SG_PERSIST --in --read-reservation $dev 2>&1` [ $? -eq 0 ] || continue echo "$READ_RESERVATION" | $GREP -qw $NODE_ID_HEX\$ if [ $? -eq 0 ]; then RESERVED_DEVS+=($dev) fi reservation_key=`echo $READ_RESERVATION | $GREP -o 'Key=0x[0-9a-f]*' | $GREP -o '0x[0-9a-f]*'` if [ -n "$reservation_key" ]; then DEVS_WITH_RESERVATION+=($dev) RESERVATION_KEYS+=($reservation_key) fi done WORKING_DEVS_NOF=${#WORKING_DEVS[*]} ocf_log debug "$RESOURCE: working devices: `sg_persist_echo_array ${WORKING_DEVS[*]}`" ocf_log debug "$RESOURCE: number of working devices: $WORKING_DEVS_NOF" ocf_log debug "$RESOURCE: registered devices: `sg_persist_echo_array ${REGISTERED_DEVS[*]}`" ocf_log debug "$RESOURCE: reserved devices: `sg_persist_echo_array ${RESERVED_DEVS[*]}`" ocf_log debug "$RESOURCE: devices with reservation: `sg_persist_echo_array ${DEVS_WITH_RESERVATION[*]}`" ocf_log debug "$RESOURCE: reservation keys: `sg_persist_echo_array ${RESERVATION_KEYS[*]}`" MASTER_SCORE=$(($MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF)) ocf_log debug "$RESOURCE: master_score: $MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF = $MASTER_SCORE" } sg_persist_check_devs() { for dev in $DEVS do if [ -e "$dev" ]; then EXISTING_DEVS+=($dev) fi done EXISTING_DEVS_NOF=${#EXISTING_DEVS[*]} if [ $EXISTING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then ocf_log err "Number of existing devices=$EXISTING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF" exit $OCF_ERR_INSTALLED fi } sg_persist_is_registered() { for registered_dev in ${REGISTERED_DEVS[*]} do if [ "$registered_dev" == "$1" ]; then return 0 fi done return 1 } sg_persist_get_reservation_key() { for array_index in ${!DEVS_WITH_RESERVATION[*]} do if [ "${DEVS_WITH_RESERVATION[$array_index]}" == "$1" ]; then echo ${RESERVATION_KEYS[$array_index]} return 0 fi done echo "" } sg_persist_echo_array() { str_count=0 arr_str="" for str in "$@" do arr_str="$arr_str[$str_count]:$str " str_count=$(($str_count+1)) done echo $arr_str } sg_persist_parse_act_pending() { ACT_PENDING_TS=0 ACT_PENDING_SCORE=0 if [ -n "$ACT_PENDING" ]; then ACT_PENDING_TS=${ACT_PENDING%%_*} ACT_PENDING_SCORE=${ACT_PENDING##*_} fi } sg_persist_clear_pending() { if [ -n "$ACT_PENDING" ]; then DO_PENDING_UPDATE="YES" NEW_PENDING="" fi } sg_persist_new_master_score() { DO_MASTER_SCORE_UPDATE="YES" NEW_MASTER_SCORE=$1 } sg_persist_new_pending() { DO_PENDING_UPDATE="YES" NEW_PENDING=$1 } # Functions invoked by resource manager actions sg_persist_action_start() { ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE ocf_run $PENDING_ATTRIBUTE --update="" if [ $WORKING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then ocf_log err "$RESOURCE: Number of working devices=$WORKING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF" exit $OCF_ERR_GENERIC fi for dev in ${WORKING_DEVS[*]} do if sg_persist_is_registered $dev ; then : OK else ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=0 --param-sark=$NODE_ID_HEX $dev if [ $? -ne $OCF_SUCCESS ] then return $OCF_ERR_GENERIC fi fi done return $OCF_SUCCESS } sg_persist_action_stop() { if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log debug "$RESOURCE stop: already no registrations" else # Clear preference for becoming master ocf_run $MASTER_SCORE_ATTRIBUTE --delete ocf_run $PENDING_ATTRIBUTE --delete for dev in ${REGISTERED_DEVS[*]} do ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev done fi return $OCF_SUCCESS } sg_persist_action_monitor() { ACT_MASTER_SCORE=`$MASTER_SCORE_ATTRIBUTE --query --quiet 2>&1` ocf_log debug "$RESOURCE monitor: ACT_MASTER_SCORE=$ACT_MASTER_SCORE" ACT_PENDING=`$PENDING_ATTRIBUTE --query --quiet 2>&1` ocf_log debug "$RESOURCE monitor: ACT_PENDING=$ACT_PENDING" sg_persist_parse_act_pending ocf_log debug "$RESOURCE monitor: ACT_PENDING_TS=$ACT_PENDING_TS" ocf_log debug "$RESOURCE monitor: ACT_PENDING_VAL=$ACT_PENDING_SCORE" ocf_log debug "$MASTER_SCORE, $ACT_MASTER_SCORE, $ROLE" DO_MASTER_SCORE_UPDATE="NO" DO_PENDING_UPDATE="NO" if [ -n "$ACT_MASTER_SCORE" ] then if [ $ACT_MASTER_SCORE -eq $MASTER_SCORE ]; then sg_persist_clear_pending else case $ROLE in Master) if [ $MASTER_SCORE -lt $ACT_MASTER_SCORE ]; then if [ -n "$ACT_PENDING" ] then if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi else if [ $MASTER_SCORE_DELAY -eq 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending else sg_persist_new_pending "${NOW}_${MASTER_SCORE}" fi fi else sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi ;; Slave) if [ $MASTER_SCORE -gt $ACT_MASTER_SCORE ]; then if [ -n "$ACT_PENDING" ]; then if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi else if [ $MASTER_SCORE_DELAY -eq 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending else sg_persist_new_pending "${NOW}_${MASTER_SCORE}" fi fi else sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi ;; *) ;; esac fi fi if [ $DO_MASTER_SCORE_UPDATE == "YES" ]; then ocf_run $MASTER_SCORE_ATTRIBUTE --update=$NEW_MASTER_SCORE fi if [ $DO_PENDING_UPDATE == "YES" ]; then ocf_run $PENDING_ATTRIBUTE --update=$NEW_PENDING fi if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log debug "$RESOURCE monitor: no registrations" return $OCF_NOT_RUNNING fi if [ ${#RESERVED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then return $OCF_RUNNING_MASTER fi if [ ${#REGISTERED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then if [ $RESERVATION_TYPE -eq 7 ] || [ $RESERVATION_TYPE -eq 8 ]; then if [ ${#DEVS_WITH_RESERVATION[*]} -gt 0 ]; then return $OCF_RUNNING_MASTER else return $OCF_SUCCESS fi else return $OCF_SUCCESS fi fi ocf_log err "$RESOURCE monitor: unexpected state" return $OCF_ERR_GENERIC } sg_persist_action_promote() { if [ ${#RESERVED_DEVS[*]} -gt 0 ]; then ocf_log info "$RESOURCE promote: already master" return $OCF_SUCCESS fi for dev in ${WORKING_DEVS[*]} do reservation_key=`sg_persist_get_reservation_key $dev` case $RESERVATION_TYPE in 1|3|5|6) if [ -z "$reservation_key" ]; then ocf_run $SG_PERSIST --out --no-inquiry --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi else ocf_run $SG_PERSIST --out --no-inquiry --preempt --param-sark=$reservation_key --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi fi ;; 7|8) if [ -z "$reservation_key" ]; then ocf_run $SG_PERSIST --out --no-inquiry --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ] then return $OCF_ERR_GENERIC fi else ocf_log info "$RESOURCE promote: there already exist an reservation holder, all registrants become reservation holders" return $OCF_SUCCESS fi ;; *) return $OCF_ERR_ARGS ;; esac done return $OCF_SUCCESS } sg_persist_action_demote() { case $RESERVATION_TYPE in 1|3|5|6) if [ ${#RESERVED_DEVS[*]} -eq 0 ]; then ocf_log info "$RESOURCE demote: already slave" return $OCF_SUCCESS fi for dev in ${RESERVED_DEVS[*]} do ocf_run $SG_PERSIST --out --no-inquiry --release --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi done ;; 7|8) #in case of 7/8, --release won't release the reservation unless unregister the key. if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log info "$RESOURCE demote: already slave" return $OCF_SUCCESS fi for dev in ${REGISTERED_DEVS[*]} do ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi done ;; *) return $OCF_ERR_ARGS ;; esac return $OCF_SUCCESS } sg_persist_action_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" set -- $OCF_RESKEY_CRM_meta_notify_active_resource local n_active="$#" set -- $OCF_RESKEY_CRM_meta_notify_stop_resource local n_stop="$#" set -- $OCF_RESKEY_CRM_meta_notify_start_resource local n_start="$#" ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active $n_active - starting $n_start - stopping $n_stop" return $OCF_SUCCESS } sg_persist_action_validate_all () { if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then ocf_log err "Master options misconfigured." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then echo "Incorrect parameter count." sg_persist_action_usage exit $OCF_ERR_ARGS fi ACTION=$1 case $ACTION in meta-data) meta_data ;; validate-all) sg_persist_init sg_persist_action_validate_all ;; start|promote|monitor|stop|demote) ocf_log debug "$RESOURCE: starting action \"$ACTION\"" sg_persist_init sg_persist_action_$ACTION exit $? ;; notify) sg_persist_action_notify exit $? ;; usage|help) sg_persist_action_usage exit $OCF_SUCCESS ;; *) sg_persist_action_usage exit $OCF_ERR_ARGS ;; esac diff --git a/heartbeat/slapd b/heartbeat/slapd index e9ae2bbb1..961924e1b 100755 --- a/heartbeat/slapd +++ b/heartbeat/slapd @@ -1,577 +1,577 @@ #!/bin/bash # # Stand-alone LDAP Daemon (slapd) # # Description: Manages Stand-alone LDAP Daemon (slapd) as an OCF resource in # an high-availability setup. # # Authors: Jeroen Koekkoek # nozawat@gmail.com # John Keith Hohm # # License: GNU General Public License (GPL) # Copyright: (C) 2011 Pagelink B.V. # # The OCF code was inspired by the Postfix resource script written by # Raoul Bhatia . # # The code for managing the slapd instance is based on the the slapd init # script found in Debian GNU/Linux 6.0. # # OCF parameters: # OCF_RESKEY_slapd # OCF_RESKEY_ldapsearch # OCF_RESKEY_config # OCF_RESKEY_pidfile # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_services # OCF_RESKEY_watch_suffix # OCF_RESKEY_ignore_suffix # OCF_RESKEY_bind_dn # OCF_RESKEY_password # OCF_RESKEY_parameters # OCF_RESKEY_stop_escalate # OCF_RESKEY_maxfiles # ################################################################################ # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs : ${OCF_RESKEY_slapd="/usr/sbin/slapd"} : ${OCF_RESKEY_ldapsearch="ldapsearch"} : ${OCF_RESKEY_config=""} : ${OCF_RESKEY_pidfile=""} : ${OCF_RESKEY_user=""} : ${OCF_RESKEY_group=""} : ${OCF_RESKEY_services="ldap:///"} : ${OCF_RESKEY_watch_suffix=""} : ${OCF_RESKEY_ignore_suffix=""} : ${OCF_RESKEY_bind_dn=""} : ${OCF_RESKEY_password=""} : ${OCF_RESKEY_parameters=""} : ${OCF_RESKEY_stop_escalate=15} : ${OCF_RESKEY_maxfiles=""} USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}" ORIG_IFS=$IFS NEWLINE=' ' ################################################################################ usage() { echo $USAGE >&2 } meta_data() { cat < 0.1 Resource script for Stand-alone LDAP Daemon (slapd). It manages a slapd instance as an OCF resource. Manages a Stand-alone LDAP Daemon (slapd) instance Full path to the slapd binary. For example, "/usr/sbin/slapd". Full path to slapd binary Full path to the ldapsearch binary. For example, "/usr/bin/ldapsearch". Full path to ldapsearch binary Full path to a slapd configuration directory or a slapd configuration file. For example, "/etc/ldap/slapd.d" or "/etc/ldap/slapd.conf". -Full path to configuration directory or file +Full path to configuration directory or file File to read the PID from; read from olcPidFile/pidfile in config if not set. File to read PID from User name or id slapd will run with. The group id is also changed to this user's gid, unless the group parameter is used to override. User name or id slapd will run with Group name or id slapd will run with. Group name or id slapd will run with LDAP (and other scheme) URLs slapd will serve. For example, "ldap://127.0.0.1:389 ldaps:/// ldapi:///" -LDAP (and other scheme) URLs to serve +LDAP (and other scheme) URLs to serve Suffix (database backend) that will be monitored for availability. Multiple suffixes can be specified by providing a space seperated list. By providing one or more suffixes here, the ignore_suffix parameter is discarded. All suffixes will be monitored if left blank. -Suffix that will be monitored for availability. +Suffix that will be monitored for availability. Suffix (database backend) that will not be monitored for availability. Multiple suffixes can be specified by providing a space seperated list. No suffix will be excluded if left blank. -Suffix that will not be monitored for availability. +Suffix that will not be monitored for availability. Distinguished Name used to bind to the LDAP directory for testing. Leave blank to bind to the LDAP directory anonymously. -Distinguished Name used to bind to the LDAP directory for testing. +Distinguished Name used to bind to the LDAP directory for testing. Password used to bind to the LDAP directory for testing. -Password used to bind to the LDAP directory for testing. +Password used to bind to the LDAP directory for testing. slapd may be called with additional parameters. Specify any of them here. Any additional parameters to slapd. Number of seconds to wait for shutdown (using SIGTERM) before resorting to SIGKILL Seconds before stop escalation to KILL Maximum number of open files (for ulimit -n) Max open files END } watch_suffix() { local rc if [ -n "$OCF_RESKEY_watch_suffix" ]; then if echo "'$OCF_RESKEY_watch_suffix'" | grep "'$1'" >/dev/null 2>&1; then rc=0 else rc=1 fi else if echo "'$OCF_RESKEY_ignore_suffix'" | grep "'$1'" >/dev/null 2>&1; then rc=1 else rc=0 fi fi return $rc } slapd_pid() { local pid if [ -f "$pid_file" ]; then pid=`head -n 1 "$pid_file" 2>/dev/null` if [ "X$pid" != "X" ]; then echo "$pid" return $OCF_SUCCESS fi ocf_exit_reason "slapd pid file '$pid_file' empty." return $OCF_ERR_GENERIC fi ocf_log info "slapd pid file '$pid_file' does not exist." return $OCF_NOT_RUNNING } slapd_status() { local pid=$1 if ! kill -0 $pid >/dev/null 2>&1; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi } slapd_start() { local options local reason local rc local state slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_SUCCESS ]; then ocf_log info "slapd already running." return $state elif [ $state -eq $OCF_ERR_GENERIC ]; then return $state fi options="-u $user -g $group" if [ -d "$config" ]; then options="$options -F $config" elif [ -f "$config" ]; then options="$options -f $config" else ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi if [ -n "$parameters" ]; then options="$options $parameters" fi if [ -n "$OCF_RESKEY_maxfiles" ]; then ulimit -n $OCF_RESKEY_maxfiles u_rc=$? if [ "$u_rc" -ne 0 ]; then ocf_log warn "Could not set ulimit for open files for slapd to '$OCF_RESKEY_maxfiles'" fi fi if [ -n "$services" ]; then $slapd -h "$services" $options 2>&1; rc=$? else $slapd $options 2>&1; rc=$? fi if [ $rc -ne 0 ]; then ocf_exit_reason "slapd returned error." return $OCF_ERR_GENERIC fi while true; do slapd_monitor start if [ $? = "$OCF_SUCCESS" ]; then break fi sleep 1 done ocf_log info "slapd started." return $OCF_SUCCESS } slapd_stop() { local pid local rc local state pid=`slapd_pid`; slapd_status $pid; state=$? if [ $state -eq $OCF_NOT_RUNNING ]; then ocf_log info "slapd already stopped." return $OCF_SUCCESS elif [ $state -eq $OCF_ERR_GENERIC ]; then return $state fi ocf_stop_processes TERM $OCF_RESKEY_stop_escalate $pid; rc=$? if [ $rc -eq 1 ]; then ocf_log err "cannot stop slapd." return $OCF_ERR_GENERIC fi if [ -f "$pid_file" ]; then rm -f "$pid_file" >/dev/null 2>&1 fi ocf_log info "slapd stopped." return $OCF_SUCCESS } slapd_monitor() { local options local rc local state local suffix local suffixes local err_option="-info" slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_NOT_RUNNING ]; then if [ -z "$1" ];then if ! ocf_is_probe; then ocf_exit_reason "slapd process not found." fi fi return $state elif [ $state -ne $OCF_SUCCESS ]; then ocf_exit_reason "slapd returned error." return $state fi if [ -d "$config" ]; then for suffix in `find "$config"/'cn=config' -type f -name olcDatabase* -exec \ sed -ne 's/^[[:space:]]*olcSuffix:[[:space:]]\+\(.\+\)/\1/p' {} \;` do suffix=${suffix#\"*} suffix=${suffix%\"*} if watch_suffix $suffix; then suffixes="$suffixes $suffix" fi done elif [ -f "$config" ]; then for suffix in `sed -ne 's/^[[:space:]]*suffix[[:space:]]\+\(.\+\)/\1/p' "$config"` do suffix=${suffix#\"*} suffix=${suffix%\"*} if watch_suffix $suffix; then suffixes="$suffixes $suffix" fi done else if ocf_is_probe; then ocf_log info "slapd configuration '$config' does not exist during probe." else ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi fi options="-LLL -s base -x" if [ -n "$bind_dn" ]; then options="$options -D $bind_dn -w $password" fi [ -z "$1" ] && err_option="" for suffix in $suffixes; do ocf_run -q $err_option "$ldapsearch" -H "$services" -b "$suffix" $options >/dev/null 2>&1; rc=$? case "$rc" in "0") ocf_log debug "slapd database with suffix '$suffix' reachable" ;; "49") ocf_exit_reason "slapd database with suffix '$suffix' unreachable. Invalid credentials." return $OCF_ERR_CONFIGURED ;; *) if [ -z "$1" ] || [ -n "$1" -a $rc -ne 1 ]; then ocf_exit_reason "slapd database with suffix '$suffix' unreachable. exit code ($rc)" fi state=$OCF_ERR_GENERIC ;; esac done return $state } slapd_validate_all() { check_binary "$slapd" check_binary "$ldapsearch" if [ -z "$pid_file" ]; then if [ -d "$config" ]; then pid_file=`sed -ne \ 's/^olcPidFile:[[:space:]]\+\(.\+\)[[:space:]]*/\1/p' \ "$config"/'cn=config.ldif' 2>/dev/null` elif [ -f "$config" ]; then pid_file=`sed -ne \ 's/^pidfile[[:space:]]\+\(.\+\)/\1/p' \ "$config" 2>/dev/null` else if ocf_is_probe; then ocf_log info "slapd configuration '$config' does not exist during probe." else ocf_exit_reason "slapd configuration '$config' does not exist." return $OCF_ERR_INSTALLED fi fi fi if [ -z "$user" ]; then user=`id -nu 2>/dev/null` elif ! id "$user" >/dev/null 2>&1; then ocf_exit_reason "slapd user '$user' does not exist" return $OCF_ERR_INSTALLED fi if [ -z "$group" ]; then group=`id -ng 2>/dev/null` elif ! grep "^$group:" /etc/group >/dev/null 2>&1; then ocf_exit_reason "slapd group '$group' does not exist" return $OCF_ERR_INSTALLED fi pid_dir=`dirname "$pid_file"` if [ ! -d "$pid_dir" ]; then mkdir -p "$pid_dir" chown -R "$user" "$pid_dir" chgrp -R "$group" "$pid_dir" fi return $OCF_SUCCESS } # # Main # slapd=$OCF_RESKEY_slapd ldapsearch=$OCF_RESKEY_ldapsearch config=$OCF_RESKEY_config user=$OCF_RESKEY_user group=$OCF_RESKEY_group services=$OCF_RESKEY_services bind_dn=$OCF_RESKEY_bind_dn password=$OCF_RESKEY_password parameters=$OCF_RESKEY_parameters pid_file=$OCF_RESKEY_pidfile if [ -z "$config" ]; then config_dirname="/etc/ldap" if [ -e "/etc/openldap" ]; then config_dirname="/etc/openldap" fi config="$config_dirname/slapd.conf" if [ -e "$config_dirname/slapd.d" ]; then config="$config_dirname/slapd.d" fi fi if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac slapd_validate_all rc=$? [ $rc -eq $OCF_SUCCESS ] || exit $rc case $1 in status) slapd_status `slapd_pid`; state=$? if [ $state -eq $OCF_SUCCESS ]; then ocf_log debug "slapd is running." elif [ $state -eq $OCF_NOT_RUNNING ]; then ocf_log debug "slapd is stopped." fi exit $state ;; start) slapd_start exit $? ;; stop) slapd_stop exit $? ;; monitor) slapd_monitor; state=$? exit $state ;; validate-all) exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/syslog-ng b/heartbeat/syslog-ng index 66ad9a524..ef42bf3f1 100755 --- a/heartbeat/syslog-ng +++ b/heartbeat/syslog-ng @@ -1,367 +1,367 @@ #!/bin/bash # # Description: Manages a syslog-ng instance, provided by NTT OSSC as an # OCF High-Availability resource under Heartbeat/LinuxHA control # # Copyright (c) 2009 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ############################################################################## # OCF parameters: # OCF_RESKEY_syslog_ng_binary : Path to syslog-ng binary. # Default is "/sbin/syslog-ng" # OCF_RESKEY_configfile : Configuration file # OCF_RESKEY_start_opts : Startup options # OCF_RESKEY_kill_term_timeout: Number of seconds to await to confirm a # normal stop method # # Only OCF_RESKEY_configfile must be specified. Each of the rests # has its default value or refers OCF_RESKEY_configfile to make # its value when no explicit value is given. # # Further infomation for setup: # There are sample configurations at the end of this file. # ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs usage() { cat <<-! usage: $0 action action: start : start a new syslog-ng instance stop : stop the running syslog-ng instance status : return the status of syslog-ng, run or down monitor : return TRUE if the syslog-ng appears to be working. meta-data : show meta data message validate-all: validate the instance parameters ! return $OCF_ERR_UNIMPLEMENTED } metadata_syslog_ng() { cat < 1.0 This script manages a syslog-ng instance as an HA resource. Syslog-ng resource agent This parameter specifies a configuration file for a syslog-ng instance managed by this RA. -Configuration file +Configuration file This parameter specifies syslog-ng's executable file. -syslog-ng executable +syslog-ng executable This parameter specifies startup options for a syslog-ng instance managed by this RA. When no value is given, no startup options is used. Don't use option '-F'. It causes a stuck of a start action. -Start options +Start options On a stop action, a normal stop method(pkill -TERM) is firstly used. And then the confirmation of its completion is waited for the specified seconds by this parameter. The default value is 10. -Number of seconds to await to confirm a normal stop method +Number of seconds to await to confirm a normal stop method END return $OCF_SUCCESS } monitor_syslog_ng() { set -- $(pgrep -f "$PROCESS_PATTERN" 2>/dev/null) case $# in 0) ocf_log debug "No syslog-ng process for $CONFIGFILE" return $OCF_NOT_RUNNING;; 1) return $OCF_SUCCESS;; esac ocf_log warn "Multiple syslog-ng process for $CONFIGFILE" return $OCF_SUCCESS } start_syslog_ng() { monitor_syslog_ng if [[ $? = "$OCF_SUCCESS" ]]; then return $OCF_SUCCESS fi # set -- $SYSLOG_NG_OPTS # ocf_run "$SYSLOG_NG_EXE" -f "$SYSLOG_NG_CONF" "$@" # reduce to this? ocf_run "$SYSLOG_NG_EXE" -f "$CONFIGFILE" $START_OPTS ocf_status=$? if [[ "$ocf_status" != "$OCF_SUCCESS" ]]; then return $OCF_ERR_GENERIC fi while true; do monitor_syslog_ng if [[ $? = "$OCF_SUCCESS" ]]; then return $OCF_SUCCESS fi sleep 1 done } stop_syslog_ng() { pkill -TERM -f "$PROCESS_PATTERN" typeset lapse_sec=0 while pgrep -f "$PROCESS_PATTERN" > /dev/null; do sleep 1 lapse_sec=$(( lapse_sec + 1 )) ocf_log debug "stop_syslog_ng[$SYSLOG_NG_NAME]: stop NORM $lapse_sec/$KILL_TERM_TIMEOUT" if [ $lapse_sec -ge $KILL_TERM_TIMEOUT ]; then break fi done # if the process can't be removed, then the following part is # not going to be executed (the RA will be killed by lrmd on # timeout) and the pidfile will remain; don't know if that # has any consequences # 2009/09/18 Nakahira # If the syslog-ng process hangs, syslog-ng RA waits # $KILL_TERM_TIMEOUT seconds. # The stop timeout of RA should be longer than $KILL_TERM_TIMEOUT. lapse_sec=0 while pgrep -f "$PROCESS_PATTERN" > /dev/null; do pkill -KILL -f "$PROCESS_PATTERN" sleep 1 lapse_sec=$(( lapse_sec + 1 )) ocf_log debug "stop_syslog_ng[$SYSLOG_NG_NAME]: suspend syslog_ng by SIGKILL ($lapse_sec/@@@)" done return $OCF_SUCCESS } status_syslog_ng() { # ???? why not monitor and then print running or stopped monitor_syslog_ng rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "Syslog-ng service is running." elif [ $rc = $OCF_NOT_RUNNING ]; then echo "Syslog-ng service is stopped." else echo "Mutiple syslog-ng process for $CONFIGFILE." fi return $rc } validate_all_syslog_ng() { ocf_log info "validate_all_syslog_ng[$SYSLOG_NG_NAME]" return $OCF_SUCCESS } if [[ "$1" = "meta-data" ]]; then metadata_syslog_ng exit $? fi CONFIGFILE="${OCF_RESKEY_configfile}" if [[ -z "$CONFIGFILE" ]]; then ocf_log err "undefined parameter:configfile" exit $OCF_ERR_CONFIGURED fi SYSLOG_NG_NAME=${CONFIGFILE##*/} SYSLOG_NG_NAME=${SYSLOG_NG_NAME%.*} SYSLOG_NG_EXE="${OCF_RESKEY_syslog_ng_binary:-/sbin/syslog-ng}" if [[ ! -x "$SYSLOG_NG_EXE" ]]; then ocf_log err "Invalid value:syslog_ng_binary:$SYSLOG_NG_EXE" exit $OCF_ERR_CONFIGURED fi # actually, the pidfile has no function; the status is checked by # testing for a running process only KILL_TERM_TIMEOUT="${OCF_RESKEY_kill_term_timeout-10}" if ! ocf_is_decimal "$KILL_TERM_TIMEOUT"; then ocf_log err "Invalid value:kill_term_timeout:$KILL_TERM_TIMEOUT" exit $OCF_ERR_CONFIGURED fi START_OPTS=${OCF_RESKEY_start_opts} PROCESS_PATTERN="$SYSLOG_NG_EXE -f $CONFIGFILE" COMMAND=$1 case "$COMMAND" in start) ocf_log debug "[$SYSLOG_NG_NAME] Enter syslog_ng start" start_syslog_ng func_status=$? ocf_log debug "[$SYSLOG_NG_NAME] Leave syslog_ng start $func_status" exit $func_status ;; stop) ocf_log debug "[$SYSLOG_NG_NAME] Enter syslog_ng stop" stop_syslog_ng func_status=$? ocf_log debug "[$SYSLOG_NG_NAME] Leave syslog_ng stop $func_status" exit $func_status ;; status) status_syslog_ng exit $? ;; monitor) #ocf_log debug "[$SYSLOG_NG_NAME] Enter syslog_ng monitor" monitor_syslog_ng func_status=$? #ocf_log debug "[$SYSLOG_NG_NAME] Leave syslog_ng monitor $func_status" exit $func_status ;; validate-all) validate_all_syslog_ng exit $? ;; *) usage ;; esac # vim: set sw=4 ts=4 : ### A sample snippet of cib.xml for a syslog-ng resource ## # # # # # # # # # # # # ### A sample syslog-ng configuration file for a log collecting host ### ### This sample is for a log collecting host by syslog-ng. ### A syslog-ng process configurated by this sample accepts all messages ### from a certain network. Any message from the network is preserved into ### a file for security infomation. Restricting messages to "authpriv" from ### the network is done on log sending hosts. (See the sample below) ### Any internal message of the syslog-ng process is preserved into its ### dedicated file. And any "authpriv" internal message of the syslog-ng ### process is also preserved into the security infomation file. ### ### Change "f_incoming" to suit your enviroment. ### If you use it as a configuration file for the sample cib.xml above, ### save it into "/etc/syslog-ng/syslog-ng-ext.conf". ## #options { # sync (0); # time_reopen (10); # log_fifo_size (1000); # long_hostnames (off); # use_dns (yes); # use_fqdn (no); # create_dirs (no); # keep_hostname (yes); }; # #source s_internal { internal(); }; #source s_incoming { udp(port(514)); }; #filter f_internal { facility(authpriv); }; #filter f_incoming { netmask("172.20.0.0/255.255.192.0"); }; # #destination d_internal { file("/var/log/syslog-ng-ext.log" perm(0640));}; #destination d_incoming { # file("/var/log/secure-ext.log" create_dirs(yes) perm(0640)); }; # #log { source(s_internal); destination(d_internal); }; #log { source(s_internal); filter(f_internal); destination(d_incoming); }; #log { source(s_incoming); filter(f_incoming); destination(d_incoming); }; ### A sample snippet of syslog-ng configuration file for a log sending host ### ### This sample is for a log sending host that uses syslog-ng. ### ### Replace "syslog-ng-ext" to the IP address or the hostname of your ### log collecting host and append it to "syslog-ng.conf" of each log sending ### host. See the install default syslog-ng.conf to know what "s_sys" and ### "f_auth" are. ## #destination d_outgoing { udp("syslog-ng-ext" port(514)); }; #log { source(s_sys); filter(f_auth); destination(d_outgoing); }; ### A sample snippet of syslog configuration file for a log sending host ### ### This sample is for a log sending host that uses syslog. ### ### Replace "syslog-ng-ext" to the IP address or the hostname of your ### log collecting host and append it to "syslog.conf" of each log sending ### host. ## # authpriv.* @syslog-ng-ext diff --git a/heartbeat/tomcat b/heartbeat/tomcat index cbcd12a00..857da78c5 100755 --- a/heartbeat/tomcat +++ b/heartbeat/tomcat @@ -1,759 +1,759 @@ #!/bin/sh # # Description: Manages a Tomcat Server as an OCF High-Availability # resource under Heartbeat/LinuxHA control # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # Copyright (c) 2007 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # ####################################################################### # OCF parameters: # OCF_RESKEY_tomcat_name - The name of the resource. Default is tomcat # OCF_RESKEY_script_log - A destination of the log of this script. Default /var/log/OCF_RESKEY_tomcat_name.log # OCF_RESKEY_tomcat_stop_timeout - Time-out at the time of the stop. Default is 5. DEPRECATED # OCF_RESKEY_tomcat_suspend_trialcount - The re-try number of times awaiting a stop. Default is 10. DEPRECATED # OCF_RESKEY_tomcat_user - A user name to start a resource. # OCF_RESKEY_statusurl - URL for state confirmation. Default is http://127.0.0.1:8080 # OCF_RESKEY_max_stop_time - The max time it should take for proper shutdown. Restrictions, only Tomcat6. # OCF_RESKEY_java_home - Home directory of Java. Default is none # OCF_RESKEY_java_opts - Options to pass to Java JVM for start and stop. Default is none # OCF_RESKEY_catalina_home - Home directory of Tomcat. Default is none # OCF_RESKEY_catalina_base - Base directory of Tomcat. Default is OCF_RESKEY_catalina_home # OCF_RESKEY_catalina_out - Log file name of Tomcat. Default is OCF_RESKEY_catalina_base/logs/catalina.out # OCF_RESKEY_catalina_pid - A PID file name of Tomcat. Default is OCF_RESKEY_catalina_base/logs/catalina.pid # OCF_RESKEY_tomcat_start_opts - Start options of Tomcat. Default is none. # OCF_RESKEY_catalina_opts - CATALINA_OPTS environment variable. Default is none. # OCF_RESKEY_catalina_tmpdir - CATALINA_TMPDIR environment variable. Default is none. # OCF_RESKEY_catalina_rotate_log - Control catalina.out logrotation flag. Default is NO. # OCF_RESKEY_catalina_rotatetime - catalina.out logrotation time span(seconds). Default is 86400. # OCF_RESKEY_java_endorsed_dirs - JAVA_ENDORSED_DIRS environment variable. Default is none. # OCF_RESKEY_logging_config - LOGGING_CONFIG environment variable. Default is none. # OCF_RESKEY_logging_manager - LOGGING_MANAGER environment variable. Default is none. ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Use runuser if available for SELinux. if [ -x /sbin/runuser ]; then SU=runuser else SU=su fi usage() { cat <<-EOF usage: $0 action action: start start Tomcat stop stop Tomcat status return the status of Tomcat, up or down monitor return TRUE if Tomcat appears to be working. You have to have installed $WGETNAME for this to work. meta-data show meta data message validate-all validate the instance parameters EOF } isrunning_tomcat() { $WGET --tries=20 -O /dev/null $RESOURCE_STATUSURL >/dev/null 2>&1 } isalive_tomcat() { if ocf_is_true $SYSTEMD; then systemctl is-active tomcat@${TOMCAT_NAME} > /dev/null 2>&1 return $? fi # As the server stops, the PID file disappears. To avoid race conditions, # we will have remembered the PID of a running instance on script entry. local pid=$rememberedPID # If there is a PID file, attempt to use that if [ -f $CATALINA_PID ]; then local tmp ocf_log debug "Reading pid from $CATALINA_PID" tmp=`head -n 1 $CATALINA_PID` if [ $? -eq 0 ]; then pid=$tmp fi fi if [ -n "$pid" ] && [ "$pid" -gt 0 ]; then # Retry message for restraint ocf_log debug "Sending noop signal to $pid" kill -s 0 $pid >/dev/null 2>&1 return $? fi # No PID file false } # Check rotatelogs process and restart if it is stopped monitor_rotatelogs() { pgrep -f "$ROTATELOGS.*$CATALINA_BASE/logs/catalina_%F.log" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log warn "A rotatelogs command for $CATALINA_BASE/logs/catalina_%F.log is not running. Restarting it." start_rotatelogs if [ $? -eq 0 ]; then ocf_log info "Restart rotatelogs process succeeded." else ocf_log warn "Restart rotatelogs process failed." fi fi } monitor_tomcat() { isalive_tomcat || return $OCF_NOT_RUNNING isrunning_tomcat || return $OCF_ERR_GENERIC if ocf_is_true ${CATALINA_ROTATE_LOG}; then # Monitor rotatelogs process and restart it if it is stopped. # And never consider rotatelogs process failure to be a monitor failure # as long as Tomcat process works fine. monitor_rotatelogs fi return $OCF_SUCCESS } start_rotatelogs() { # -s is required because tomcat5.5's login shell is /bin/false $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \ -c "$ROTATELOGS -l \"$CATALINA_BASE/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \ < "$CATALINA_OUT" > /dev/null 2>&1 & } # Execute catalina.out log rotation rotate_catalina_out() { # Check catalina_%F.log is writable or not. CURRENT_ROTATELOG_SUFFIX=`date +"%F"` $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \ -c "touch \"$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log\"" > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_exit_reason "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable." return $OCF_ERR_GENERIC fi # Clean up and set permissions on required files rm -rf "$CATALINA_BASE"/temp/* if [ -p "$CATALINA_OUT" ]; then rm -f "$CATALINA_OUT" elif [ -e "$CATALINA_OUT" ]; then DATE=`date +"%F-%H%M%S"` ocf_log warn "$CATALINA_OUT already exists. It is saved as $CATALINA_OUT-$DATE" mv "$CATALINA_OUT" "$CATALINA_OUT-$DATE" fi mkfifo -m700 "$CATALINA_OUT" chown --dereference "$RESOURCE_TOMCAT_USER" "$CATALINA_OUT" || true start_rotatelogs } create_systemd_config() { cat<<-EOF > /etc/sysconfig/tomcat@${TOMCAT_NAME} JAVA_HOME=${JAVA_HOME} JAVA_OPTS="${JAVA_OPTS}" CATALINA_HOME=${CATALINA_HOME} CATALINA_BASE=${CATALINA_BASE} CATALINA_OUT=${CATALINA_OUT} CATALINA_OPTS="${CATALINA_OPTS}" CATALINA_TMPDIR="${CATALINA_TMPDIR}" JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}" LOGGING_CONFIG="${LOGGING_CONFIG}" LOGGING_MANAGER="${LOGGING_MANAGER}" TOMCAT_CFG=${TOMCAT_CFG} EOF } # shellcheck disable=SC2068 tomcatCommand() { if ocf_is_true $SYSTEMD; then systemctl $@ tomcat@${TOMCAT_NAME} else cat<<-END_TOMCAT_COMMAND export JAVA_HOME=${JAVA_HOME} export JAVA_OPTS="${JAVA_OPTS}" export CATALINA_HOME=${CATALINA_HOME} export CATALINA_BASE=${CATALINA_BASE} export CATALINA_OUT=${CATALINA_OUT} export CATALINA_PID=${CATALINA_PID} export CATALINA_OPTS="${CATALINA_OPTS}" export CATALINA_TMPDIR="${CATALINA_TMPDIR}" export JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}" export LOGGING_CONFIG="${LOGGING_CONFIG}" export LOGGING_MANAGER="${LOGGING_MANAGER}" export TOMCAT_CFG=${TOMCAT_CFG} $TOMCAT_START_SCRIPT $@ END_TOMCAT_COMMAND fi } # shellcheck disable=SC2068 attemptTomcatCommand() { if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then TOMCAT_CFG=$(mktemp "${HA_RSCTMP}/tomcat-tmp-XXXXX.cfg") export TOMCAT_CFG fi if ocf_is_true $SYSTEMD; then tomcatCommand $@ elif [ "$RESOURCE_TOMCAT_USER" = root ]; then "$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1 else tomcatCommand $@ | $SU - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 fi if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then rm -f "$TOMCAT_CFG" fi } start_tomcat() { if ocf_is_true $SYSTEMD; then create_systemd_config fi cd "$CATALINA_HOME/bin" || return $OCF_ERR_GENERIC validate_all_tomcat || exit $? monitor_tomcat if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi # Remove $CATALINA_PID if it exists rm -f $CATALINA_PID #ocf_log debug "catalina.out rotation FLG = ${CATALINA_ROTATE_LOG}" if ocf_is_true ${CATALINA_ROTATE_LOG}; then rotate_catalina_out if [ $? -eq 0 ]; then ocf_log debug "Rotate catalina.out succeeded." else ocf_exit_reason "Rotate catalina.out failed. Avoid starting tomcat without catalina.out rotation." return $OCF_ERR_GENERIC fi fi echo "`date "+%Y/%m/%d %T"`: start ===========================" >> "$TOMCAT_CONSOLE" ocf_log debug "CATALINA_OPTS value = ${CATALINA_OPTS}" attemptTomcatCommand start ${TOMCAT_START_OPTS} & while true; do monitor_tomcat if [ $? -eq $OCF_SUCCESS ]; then break fi ocf_log debug "start_tomcat[$TOMCAT_NAME]: retry monitor_tomcat" sleep 3 done return $OCF_SUCCESS } stop_tomcat() { local stop_time local RA_TIMEOUT=20 local TOMCAT_STOP_OPTS="" if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then RA_TIMEOUT=$((OCF_RESKEY_CRM_meta_timeout/1000)) fi STOP_TIMEOUT=$((RA_TIMEOUT-5)) if [ -n "$MAX_STOP_TIME" ]; then if [ $MAX_STOP_TIME -gt $RA_TIMEOUT ]; then ocf_log warn "max_stop_timeout must be shorter than the timeout of stop operation." fi if [ $MAX_STOP_TIME -eq 0 ]; then STOP_TIMEOUT=$RA_TIMEOUT else STOP_TIMEOUT=$MAX_STOP_TIME fi fi cd "$CATALINA_HOME/bin" memorize_pid # This lets monitoring continue to work reliably echo "`date "+%Y/%m/%d %T"`: stop ###########################" >> "$TOMCAT_CONSOLE" if [ "$TOMCAT_START_SCRIPT" = "$CATALINA_HOME/bin/catalina.sh" ]; then TOMCAT_STOP_OPTS="$STOP_TIMEOUT --force" fi stop_time=$(date +%s) attemptTomcatCommand stop $TOMCAT_STOP_OPTS lapse_sec=0 while isalive_tomcat; do sleep 1 lapse_sec=`expr $(date +%s) - $stop_time` if [ $lapse_sec -ge $STOP_TIMEOUT ]; then ocf_log debug "stop_tomcat[$TOMCAT_NAME]: stop failed, killing with SIGKILL ($lapse_sec)" kill -s KILL $rememberedPID > /dev/null 2>&1 fi done if ocf_is_true ${CATALINA_ROTATE_LOG}; then rm -f "$CATALINA_PID" "${CATALINA_OUT}" else rm -f "$CATALINA_PID" fi return $OCF_SUCCESS } metadata_tomcat() { cat < 1.0 Resource script for Tomcat. It manages a Tomcat instance as a cluster resource. Manages a Tomcat servlet environment instance The name of the resource, added as a Java parameter in JAVA_OPTS: -Dname=<tomcat_name> to Tomcat process on start. Used to ensure process is still running and must be unique. -The name of the resource +The name of the resource Log file, used during start and stop operations. -Log file +Log file Time-out for stop operation. DEPRECATED -Time-out for the stop operation. DEPRECATED +Time-out for the stop operation. DEPRECATED Maximum number of times to retry stop operation before suspending and killing Tomcat. DEPRECATED. Does not retry. -Max retry count for stop operation. DEPRECATED +Max retry count for stop operation. DEPRECATED The user who starts Tomcat. -The user who starts Tomcat +The user who starts Tomcat URL for state confirmation. -URL for state confirmation +URL for state confirmation Number of seconds to wait during a stop before drastic measures (force kill) are used on the tomcat process. This number MUST be less than your cluster stop timeout for the resource. The default value is five seconds before the timeout value of stop operation. When it is over this value, it stops a process in kill commands. This parameter is only effective on Tomcat 6 or later. -The max time it should take for proper shutdown. +The max time it should take for proper shutdown. Home directory of Java. -Home directory of Java +Home directory of Java Java JVM options used on start and stop. -Java options parsed to JVM, used on start and stop. +Java options parsed to JVM, used on start and stop. Home directory of Tomcat. -Home directory of Tomcat +Home directory of Tomcat Instance directory of Tomcat -Instance directory of Tomcat, defaults to catalina_home +Instance directory of Tomcat, defaults to catalina_home Log file name of Tomcat -Log file name of Tomcat, defaults to catalina_base/logs/catalina.out +Log file name of Tomcat, defaults to catalina_base/logs/catalina.out A PID file name for Tomcat. -A PID file name for Tomcat +A PID file name for Tomcat Force use of systemd when available. -Force use of systemd when available +Force use of systemd when available Absolute path to the custom tomcat start script to use. -Tomcat start script location +Tomcat start script location Tomcat start options. -Tomcat start options +Tomcat start options Catalina options, for the start operation only. -Catalina options +Catalina options Temporary directory of Tomcat -Temporary directory of Tomcat, defaults to none +Temporary directory of Tomcat, defaults to none Rotate catalina.out flag. -Rotate catalina.out flag +Rotate catalina.out flag catalina.out rotation interval (seconds). -catalina.out rotation interval (seconds) +catalina.out rotation interval (seconds) Java_endorsed_dirs of tomcat -Java_endorsed_dirs of Tomcat, defaults to none +Java_endorsed_dirs of Tomcat, defaults to none Logging_config of tomcat -Logging_config of Tomcat, defaults to none +Logging_config of Tomcat, defaults to none Logging_manager of tomcat -Logging_manager of Tomcat, defaults to none. +Logging_manager of Tomcat, defaults to none. END return $OCF_SUCCESS } validate_all_tomcat() { local port local rc=$OCF_SUCCESS ocf_log info "validate_all_tomcat[$TOMCAT_NAME]" check_binary $WGET if ! ocf_is_true $OCF_RESKEY_force_systemd && [ -z "${TOMCAT_START_SCRIPT}" ]; then ocf_exit_reason "No default tomcat start script detected. Please specify start script location using the 'tomcat_start_script' option" rc=$OCF_ERR_CONFIGURED fi if [ -n "$MAX_STOP_TIME" ] && [ "$MAX_STOP_TIME" -lt 0 ]; then ocf_exit_reason "max_stop_time must be set to a value greater than 0." rc=$OCF_ERR_CONFIGURED fi if echo "$RESOURCE_STATUSURL" | grep -q ":[0-9][0-9]*" ; then port=${RESOURCE_STATUSURL##*:} port=${port%%/*} ocf_log debug "Tomcat port is $port" ocf_log debug "grep port=\"$port\" $CATALINA_BASE/conf/server.xml" grep "port=\"$port\"" $CATALINA_BASE/conf/server.xml > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_exit_reason "Your configured status URL specifies a port ($port), but the server does not have a connector listening to that port in $CATALINA_BASE/conf/server.xml" rc=$OCF_ERR_INSTALLED fi fi if ocf_is_true ${CATALINA_ROTATE_LOG}; then if [ ! -x "$ROTATELOGS" ]; then ocf_exit_reason "rotatelogs command does not exist." rc=$OCF_ERR_INSTALLED fi fi return $rc } # As we stop tomcat, it removes it's own pid file...we still want to know what it was memorize_pid() { if [ -f $CATALINA_PID ]; then rememberedPID=$(cat $CATALINA_PID) fi } # ### tomcat RA environment variables # COMMAND=$1 TOMCAT_NAME="${OCF_RESKEY_tomcat_name-tomcat}" TOMCAT_CONSOLE="${OCF_RESKEY_script_log-/var/log/$TOMCAT_NAME.log}" RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user-root}" RESOURCE_STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}" OCF_RESKEY_force_systemd_default=0 JAVA_HOME="${OCF_RESKEY_java_home}" JAVA_OPTS="${OCF_RESKEY_java_opts}" CATALINA_HOME="${OCF_RESKEY_catalina_home}" CATALINA_BASE="${OCF_RESKEY_catalina_base-${OCF_RESKEY_catalina_home}}" CATALINA_OUT="${OCF_RESKEY_catalina_out-$CATALINA_BASE/logs/catalina.out}" CATALINA_PID=$OCF_RESKEY_catalina_pid if [ -z "$CATALINA_PID" ]; then mkdir -p "${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/" if [ "${RESOURCE_TOMCAT_USER}" != "root" ]; then chown ${RESOURCE_TOMCAT_USER} "${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/" fi CATALINA_PID="${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/catalina.pid" fi MAX_STOP_TIME="${OCF_RESKEY_max_stop_time}" : ${OCF_RESKEY_force_systemd=${OCF_RESKEY_force_systemd_default}} TOMCAT_START_OPTS="${OCF_RESKEY_tomcat_start_opts}" TOMCAT_START_SCRIPT="${OCF_RESKEY_tomcat_start_script}" CATALINA_OPTS="-Dname=$TOMCAT_NAME ${OCF_RESKEY_catalina_opts}" CATALINA_TMPDIR="${OCF_RESKEY_catalina_tmpdir}" CATALINA_ROTATE_LOG="${OCF_RESKEY_catalina_rotate_log-NO}" CATALINA_ROTATETIME="${OCF_RESKEY_catalina_rotatetime-86400}" JAVA_ENDORSED_DIRS="${OCF_RESKEY_java_endorsed_dirs}" LOGGING_CONFIG="${OCF_RESKEY_logging_config}" LOGGING_MANAGER="${OCF_RESKEY_logging_manager}" if [ -z "${TOMCAT_START_SCRIPT}" ]; then if ocf_is_true $OCF_RESKEY_force_systemd && \ ps -p 1 | grep -q systemd; then SYSTEMD=1 elif [ -e "$CATALINA_HOME/bin/catalina.sh" ]; then TOMCAT_START_SCRIPT="$CATALINA_HOME/bin/catalina.sh" elif [ -e "/usr/sbin/tomcat" ]; then REDIRECT_DEFAULT_CONFIG=1 TOMCAT_START_SCRIPT="/usr/sbin/tomcat" elif [ -e "/usr/sbin/tomcat6" ]; then REDIRECT_DEFAULT_CONFIG=1 TOMCAT_START_SCRIPT="/usr/sbin/tomcat6" fi fi LSB_STATUS_STOPPED=3 if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case "$COMMAND" in meta-data) metadata_tomcat; exit $OCF_SUCCESS;; help|usage) usage; exit $OCF_SUCCESS;; esac if [ ! -d "$JAVA_HOME" -o ! -d "$CATALINA_HOME" -o ! -d "$CATALINA_BASE" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac ocf_exit_reason "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist." exit $OCF_ERR_INSTALLED fi export JAVA_HOME JAVA_OPTS CATALINA_HOME CATALINA_BASE CATALINA_OUT CATALINA_PID CATALINA_OPTS CATALINA_TMPDIR JAVA_ENDORSED_DIRS LOGGING_CONFIG LOGGING_MANAGER JAVA=${JAVA_HOME}/bin/java if [ ! -x "$JAVA" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac ocf_exit_reason "java command does not exist." exit $OCF_ERR_INSTALLED fi ROTATELOGS="" if ocf_is_true ${CATALINA_ROTATE_LOG}; then # Look for rotatelogs/rotatelogs2 if [ -x /usr/sbin/rotatelogs ]; then ROTATELOGS=/usr/sbin/rotatelogs elif [ -x /usr/sbin/rotatelogs2 ]; then ROTATELOGS=/usr/sbin/rotatelogs2 fi fi # # ------------------ # the main script # ------------------ # case "$COMMAND" in start) ocf_log debug "[$TOMCAT_NAME] Enter tomcat start" start_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat start $func_status" exit $func_status ;; stop) ocf_log debug "[$TOMCAT_NAME] Enter tomcat stop" stop_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat stop $func_status" exit $func_status ;; status) if monitor_tomcat; then echo tomcat instance $TOMCAT_NAME is running exit $OCF_SUCCESS else echo tomcat instance $TOMCAT_NAME is stopped exit $OCF_NOT_RUNNING fi exit $? ;; monitor) #ocf_log debug "[$TOMCAT_NAME] Enter tomcat monitor" monitor_tomcat func_status=$? #ocf_log debug "[$TOMCAT_NAME] Leave tomcat monitor $func_status" exit $func_status ;; meta-data) metadata_tomcat exit $? ;; validate-all) validate_all_tomcat exit $? ;; usage|help) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/varnish b/heartbeat/varnish index d56583253..84672c7d6 100755 --- a/heartbeat/varnish +++ b/heartbeat/varnish @@ -1,454 +1,454 @@ #!/bin/sh # # # Varnish # # Description: Manage varnish instances as a HA resource # # Author: Léon Keijser # # License: GNU General Public License (GPL) # # See usage() for more details # # OCF instance parameters: # OCF_RESKEY_pid # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_config # OCF_RESKEY_name # OCF_RESKEY_listen_address # OCF_RESKEY_mgmt_address # OCF_RESKEY_ttl # OCF_RESKEY_varnish_user # OCF_RESKEY_varnish_group # OCF_RESKEY_backend_type # OCF_RESKEY_backend_size # OCF_RESKEY_backend_file # OCF_RESKEY_worker_threads # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Set default paramenter values # Set these two first, as other defaults depend on it OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE} : ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} OCF_RESKEY_binary_default=varnishd OCF_RESKEY_client_binary_default=varnishadm OCF_RESKEY_pid_default=/var/run/varnishd_${OCF_RESKEY_name}.pid OCF_RESKEY_listen_address_default=0.0.0.0:80 OCF_RESKEY_ttl_default=600 OCF_RESKEY_varnish_user_default=varnish OCF_RESKEY_varnish_group_default=varnish OCF_RESKEY_backend_type_default=malloc OCF_RESKEY_backend_size_default=1G OCF_RESKEY_backend_file_default=/var/lib/varnish/${OCF_RESKEY_name}.bin OCF_RESKEY_worker_threads_default=100,3000,120 OCF_RESKEY_maxfiles_default=131072 OCF_RESKEY_max_locked_memory_default=82000 : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_listen_address=${OCF_RESKEY_listen_address_default}} : ${OCF_RESKEY_ttl=${OCF_RESKEY_ttl_default}} : ${OCF_RESKEY_varnish_user=${OCF_RESKEY_varnish_user_default}} : ${OCF_RESKEY_varnish_group=${OCF_RESKEY_varnish_group_default}} : ${OCF_RESKEY_backend_type=${OCF_RESKEY_backend_type_default}} : ${OCF_RESKEY_backend_size=${OCF_RESKEY_backend_size_default}} : ${OCF_RESKEY_backend_file=${OCF_RESKEY_backend_file_default}} : ${OCF_RESKEY_worker_threads=${OCF_RESKEY_worker_threads_default}} : ${OCF_RESKEY_maxfiles=${OCF_RESKEY_maxfiles_default}} : ${OCF_RESKEY_max_locked_memory=${OCF_RESKEY_max_locked_memory_default}} meta_data() { cat < - + 1.0 The Varnish Resource Agent can manage several varnishd instances throughout the cluster. It does so by creating a unique PID file and requires a unique listen address and name for each instance. Manage a Varnish instance The VCL configuration file that Varnish should manage, for example "/etc/varnish/default.vcl". VCL file Override the name of the instance that should be given to Varnish (defaults to the resource identifier). Instance name Write the process's PID to the specified file. The default will include the specified name, i.e.: "/var/run/varnish_production.pid". Unlike what this help message shows, it is most likely not necessary to change this parameter. Listen address Listen on this address:port, for example "192.168.1.1:80" Listen address Provide a management interface, for example "127.0.0.1:2222" Management interface Specify a hard minimum time to live for cached documents. TTL Specify the name of an unprivileged user to which the child process should switch before it starts accepting connections. Unprivileged user Specify the name of an unprivileged group to which the child process should switch before it starts accepting connections. Unprivileged group Use the specified storage backend. Valid options are 'malloc' for memory and 'file' for a file backend. Backend type Specify the size of the backend. For example "1G". Backend size Specify the backend filename if you use backend_type file. For example /var/lib/varnish/mybackend.bin Backend file Start at least min but no more than max worker threads with the specified idle timeout. Syntax: min[,max[,timeout]] For example: 100,3000,120 Worker threads This is used to control Varnish via a CLI. It's currently only used to check the status of the running child process. Varnish admin utility Maximum number of open files (for ulimit -n) Max open files Locked shared memory limit (for ulimit -l) Max locked memory END } ####################################################################### varnish_usage() { cat <