diff --git a/doc/man/ra2refentry.xsl b/doc/man/ra2refentry.xsl index ac148ef6c..d0535fd36 100644 --- a/doc/man/ra2refentry.xsl +++ b/doc/man/ra2refentry.xsl @@ -1,644 +1,642 @@ resource-agents ocf heartbeat 7 | __ re-ra- - Linux-HA contributors (see the resource agent source for information about individual authors) + ClusterLabs contributors (see the resource agent source for information about individual authors) OCF resource agents - + - - - - - + + + + + - + - + - + - + - + - + - - - + + + - + Description This resource agent may be configured for native migration if available in the cluster manager. For Pacemaker, the allow-migrate="true" meta attribute enables native migration. Supported Parameters - - - - - - This resource agent does not support any parameters. - - + + + + + + This resource agent does not support any parameters. + + - - - - + + + + ( - unique, + unique, - - required - - - optional - + + required + + + optional + , - - , + + , - - , + + , - - default - - " - - - - - - " - - - - default - false - - - no default - + + default + + " + + + + + + " + + + + default + false + + + no default + ) Supported Actions - - - - - - - This resource agent does not advertise any supported actions. - - + + + + + + + This resource agent does not advertise any supported actions. + + This resource agent supports the following actions (operations): - - - Starts the resource. - - - Stops the resource. - - - Performs a status check. - - - Performs a detailed status check. - - - Promotes the resource to the Master role. - - - Demotes the resource to the Slave role. - - - Executes steps necessary for migrating the - resource - away from - the node. - - - Executes steps necessary for migrating the - resource - to - the node. - - - Performs a validation of the resource configuration. - - - Retrieves resource agent metadata (internal use only). - - - - Suggested minimum timeout: - - . - - - Suggested interval: - - . - + + + Starts the resource. + + + Stops the resource. + + + Performs a status check. + + + Performs a detailed status check. + + + Promotes the resource to the Master role. + + + Demotes the resource to the Slave role. + + + Executes steps necessary for migrating the + resource + away from + the node. + + + Executes steps necessary for migrating the + resource + to + the node. + + + Performs a validation of the resource configuration. + + + Retrieves resource agent metadata (internal use only). + + + + Suggested minimum timeout: + + . + + + Suggested interval: + + . + Example CRM Shell - The following is an example configuration for a - - resource using the - crm8 - shell: + The following is an example configuration for a + + resource using the + crm8 + shell: - primitive p_ - - - - : - - : - - - - \ + primitive p_ + + + + : + + : + + + + \ params \ - - - - \ - - - - - + + + + \ + + + + + meta allow-migrate="true" \ - - + + - - ms ms_ - - p_ - - \ + + ms ms_ + + p_ + + \ meta notify="true" interleave="true" - + = \ - " - - " + " + + " - + op \ - - =" - - " + + =" + + " Example PCS - The following is an example configuration for a - - resource using - pcs8 + The following is an example configuration for a + + resource using + pcs8 - pcs resource create p_ - - - - : - - : - - - - \ + pcs resource create p_ + + + + : + + : + + + + \ - - - - \ - - - + + + + \ + + + - --master + --master = \ - " - - " + " + + " - + op \ - - =" - - " + + =" + + " See also - - - http://www.linux-ha.org/wiki/ - - _(resource_agent) - - + + + http://clusterlabs.org/ + + diff --git a/heartbeat/Delay b/heartbeat/Delay index f9d303bf8..ab0796579 100755 --- a/heartbeat/Delay +++ b/heartbeat/Delay @@ -1,223 +1,223 @@ #!/bin/sh # # -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # This script is a test resource for introducing delay. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_startdelay # OCF_RESKEY_stopdelay # OCF_RESKEY_mondelay # # # OCF_RESKEY_startdelay defaults to 20 (seconds) # OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay # OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay # # # This is really a test resource script. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-! usage: $0 {start|stop|status|monitor|meta-data|validate-all} ! } meta_data() { cat < 1.0 This script is a test resource for introducing delay. Waits for a defined timespan How long in seconds to delay on start operation. Start delay How long in seconds to delay on stop operation. Defaults to "startdelay" if unspecified. Stop delay How long in seconds to delay on monitor operation. Defaults to "startdelay" if unspecified. Monitor delay END } Delay_stat() { ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor } Delay_Status() { if Delay_stat then ocf_log info "Delay is running OK" return $OCF_SUCCESS else ocf_log info "Delay is stopped" return $OCF_NOT_RUNNING fi } Delay_Monitor() { Delay_Validate_All -q sleep $OCF_RESKEY_mondelay Delay_Status } Delay_Start() { if Delay_stat then ocf_log info "Delay already running." return $OCF_SUCCESS else Delay_Validate_All -q ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start rc=$? sleep $OCF_RESKEY_startdelay if [ $rc -ne 0 ] then return $OCF_ERR_PERM fi return $OCF_SUCCESS fi } Delay_Stop() { if Delay_stat then Delay_Validate_All -q ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop rc=$? sleep $OCF_RESKEY_stopdelay if [ $rc -ne 0 ] then return $OCF_ERR_PERM fi return $OCF_SUCCESS else ocf_log info "Delay already stopped." return $OCF_SUCCESS fi } # Check if all the arguments are valid numbers, a string is considered valid if: # 1. It does not contain any character but digits and period "."; # 2. The period "." does not occur more than once Are_Valid_Numbers() { for i in "$@"; do echo $i |grep -v [^0-9.] |grep -q -v [.].*[.] if test $? -ne 0; then return $OCF_ERR_ARGS fi done return $OCF_SUCCESS } Delay_Validate_All() { # Be quiet when specified -q option _and_ validation succeded getopts "q" option if test $option = "q"; then quiet=yes else quiet=no fi shift $(($OPTIND -1)) if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \ $OCF_RESKEY_mondelay; then if test $quiet = "no"; then echo "Validate OK" fi # _Return_ on validation success return $OCF_SUCCESS else ocf_exit_reason "Some of the instance parameters are invalid" # _Exit_ on validation failure exit $OCF_ERR_ARGS fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_startdelay=20} : ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay} : ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay} case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; start) Delay_Start ;; stop) Delay_Stop ;; monitor) Delay_Monitor ;; status) Delay_Status ;; validate-all) Delay_Validate_All ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_ARGS ;; esac exit $? diff --git a/heartbeat/EvmsSCC b/heartbeat/EvmsSCC index 802bac470..21dfc7bde 100755 --- a/heartbeat/EvmsSCC +++ b/heartbeat/EvmsSCC @@ -1,216 +1,216 @@ #!/bin/sh # -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # EvmsSCC # Description: Runs evms_activate in a heartbeat cluster to activate a # EVMS shared cluster container in the cluster. # Original Author: Jo De Baer (jdebaer@novell.com) # Original Release: 06 Nov 2006 # # usage: ./EvmsSCC {start|stop|status|monitor|meta-data} # # The goal of this resource agent is to provoke the creation of device file # in /dev/emvs which correspond to EVMS2 volumes that reside in a EVMS2 shared # cluster container. As such it should be run as a clone resource in the # cluster. Logic inside the resource agent will make sure that "evms_activate" # is run on only one node in the cluster, both at cluster startup time as well # as when a node joins the cluster. # # Typically, resources that need to mount EVMS2 volumes should run after this # resource agent has finished it's run. As such those resources should be made # "dependent" on this resource agent by the cluster administrator. An example # of resources that should depend on this resource agent are Filesystem resource # agent that mount OCFS2 volumes that reside on EVMS2 volumes in a shared # EVMS2 cluster container. # # For this resource agent to do it's job correctly, evmsd must be running on # the node where the agent is started. Usually evmsd is started by the cluster # software via a respawn statement in /etc/ha.d/ha.cf. If you encounter timing # issues where evmsd is not yet started but where the cluster already starts # the EvmsSCC clone, then you should comment out the evmsd respawn statement # in /etc/ha.d/ha.cf and start evmsd on each node in the cluster via a separate # clone resource agent. The EvmsSCC resource agent cloneset should then be made # dependent to this evmsd cloneset. This will guarantee that emvsd is running # before EvmsSCC is started, on each node in the cluster. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Utilities used by this script CUT=cut EVMSACTIVATE=evms_activate usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|meta-data} EOT } meta_data() { cat < 1.0 Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. -- Resource script for EVMS shared cluster container. It runs evms_activate on one node in the cluster. Manages EVMS Shared Cluster Containers (SCCs) (deprecated) If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning END } EvmsSCC_status() { # At the moment we don't support monitoring EVMS activations. We just return "not running" to cope with the pre-start monitor call. return $OCF_NOT_RUNNING } EvmsSCC_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" case "$n_type" in pre) case "$n_op" in start) ocf_log debug "EvmsSCC: Notify: Starting node(s): $n_start." EvmsSCC_start_notify_common ;; esac ;; esac return $OCF_SUCCESS } EvmsSCC_start() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" ocf_log debug "EvmsSCC: Start: starting node(s): $n_start." EvmsSCC_start_notify_common return $OCF_SUCCESS } EvmsSCC_stop() { return $OCF_SUCCESS } EvmsSCC_start_notify_common() { local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)} ocf_log debug "EvmsSCC: Start_Notify: I am node $n_myself." n_active="$n_active $n_start" case " $n_active " in *" $n_myself "*) ;; *) ocf_log err "EvmsSCC: $n_myself (local) not on active list!" return $OCF_ERR_GENERIC ;; esac #pick the first node from the starting list #when the cluster boots this will be one of the many booting nodes #when a node later joins the cluster, this will be the joining node local n_first=$(echo $n_start | cut -d ' ' -f 1) ocf_log debug "EvmsSCC: Start_Notify: First node in starting list is $n_first." if [ "$n_myself" = "$n_first" ] ; then ocf_log debug "EvmsSCC: Start_Notify: I am running ${EVMSACTIVATE}." while true ; do if ! ${EVMSACTIVATE} -q 2> /dev/null ; then SLEEP_TIME=$(($(ocf_maybe_random) % 40)) ocf_log info "EvmsSCC: Evms call failed - sleeping for $SLEEP_TIME seconds and then trying again." sleep $SLEEP_TIME else break fi done fi return $OCF_SUCCESS } # Check the arguments passed to this script if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi OP=$1 case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # Be obnoxious, log deprecation warning on every invocation (unless # suppressed by resource configuration). ocf_deprecated check_binary $CUT check_binary $EVMSACTIVATE case $OP in start) EvmsSCC_start ;; notify) EvmsSCC_notify ;; stop) EvmsSCC_stop ;; status|monitor) EvmsSCC_status ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem index 9baf14073..d009329cb 100755 --- a/heartbeat/Filesystem +++ b/heartbeat/Filesystem @@ -1,883 +1,883 @@ #!/bin/sh # -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # Filesystem # Description: Manages a Filesystem on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # # usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data} # # OCF parameters are as below: # OCF_RESKEY_device # OCF_RESKEY_directory # OCF_RESKEY_fstype # OCF_RESKEY_options # OCF_RESKEY_statusfile_prefix # OCF_RESKEY_run_fsck # OCF_RESKEY_fast_stop # OCF_RESKEY_force_clones # #OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0 # Or a -U or -L option for mount, or an NFS mount specification #OCF_RESKEY_directory : the mount point for the filesystem #OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2 #OCF_RESKEY_options : options to be given to the mount command via -o #OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring #OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no #OCF_RESKEY_fast_stop : fast stop: yes(default)/no #OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts # for each brick in a glusterfs setup # # # This assumes you want to manage a filesystem on a shared (SCSI) bus, # on a replicated device (such as DRBD), or a network filesystem (such # as NFS or Samba). # # Do not put this filesystem in /etc/fstab. This script manages all of # that for you. # # NOTE: If 2 or more nodes mount the same file system read-write, and # that file system is not designed for that specific purpose # (such as GFS or OCFS2), and is not a network file system like # NFS or Samba, then the filesystem is going to become # corrupted. # # As a result, you should use this together with the stonith # option and redundant, independent communications paths. # # If you don't do this, don't blame us when you scramble your # disk. ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults DFLT_STATUSDIR=".Filesystem_status/" # Variables used by multiple methods HOSTOS=`uname` # The status file is going to an extra directory, by default # prefix=${OCF_RESKEY_statusfile_prefix} : ${prefix:=$DFLT_STATUSDIR} suffix="${OCF_RESOURCE_INSTANCE}" [ "$OCF_RESKEY_CRM_meta_clone" ] && suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone" suffix="${suffix}_`uname -n`" STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|meta-data} EOT } meta_data() { cat < 1.1 Resource script for Filesystem. It manages a Filesystem on a shared storage medium. The standard monitor operation of depth 0 (also known as probe) checks if the filesystem is mounted. If you want deeper tests, set OCF_CHECK_LEVEL to one of the following values: 10: read first 16 blocks of the device (raw read) This doesn't exercise the filesystem at all, but the device on which the filesystem lives. This is noop for non-block devices such as NFS, SMBFS, or bind mounts. 20: test if a status file can be written and read The status file must be writable by root. This is not always the case with an NFS mount, as NFS exports usually have the "root_squash" option set. In such a setup, you must either use read-only monitoring (depth=10), export with "no_root_squash" on your NFS server, or grant world write permissions on the directory where the status file is to be placed. Manages filesystem mounts The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. block device The mount point for the filesystem. mount point The type of filesystem to be mounted. filesystem type Any extra options to be given as -o options to mount. For bind mounts, add "bind" here and set fstype to "none". We will do the right thing for options such as "bind,ro". options The prefix to be used for a status file for resource monitoring with depth 20. If you don't specify this parameter, all status files will be created in a separate directory. status file prefix Specify how to decide whether to run fsck or not. "auto" : decide to run fsck depending on the fstype(default) "force" : always run fsck regardless of the fstype "no" : do not run fsck ever. run_fsck Normally, we expect no users of the filesystem and the stop operation to finish quickly. If you cannot control the filesystem users easily and want to prevent the stop action from failing, then set this parameter to "no" and add an appropriate timeout for the stop operation. fast stop The use of a clone setup for local filesystems is forbidden by default. For special setups like glusterfs, cloning a mount of a local device with a filesystem like ext4 or xfs independently on several nodes is a valid use case. Only set this to "true" if you know what you are doing! allow running as a clone, regardless of filesystem type This option allows specifying how to handle processes that are currently accessing the mount directory. "true" : Default value, kill processes accessing mount point "safe" : Kill processes accessing mount point using methods that avoid functions that could potentially block during process detection "false" : Do not kill any processes. The 'safe' option uses shell logic to walk the /procs/ directory for pids using the mount point while the default option uses the fuser cli tool. fuser is known to perform operations that can potentially block if unresponsive nfs mounts are in use on the system. Kill processes before unmount END } # # Make sure the kernel does the right thing with the FS buffers # This function should be called after unmounting and before mounting # It may not be necessary in 2.4 and later kernels, but it shouldn't hurt # anything either... # # It's really a bug that you have to do this at all... # flushbufs() { if have_binary $BLOCKDEV ; then if [ "$blockdevice" = "yes" ] ; then $BLOCKDEV --flushbufs $1 return $? fi fi return 0 } # Take advantage of /etc/mtab if present, use portable mount command # otherwise. Normalize format to "dev mountpoint fstype". is_bind_mount() { echo "$options" | grep -w bind >/dev/null 2>&1 } list_mounts() { local inpf="" if [ -e "/proc/mounts" ] && ! is_bind_mount; then inpf=/proc/mounts elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then inpf=/etc/mtab fi if [ "$inpf" ]; then cut -d' ' -f1,2,3 < $inpf else $MOUNT | cut -d' ' -f1,3,5 fi } determine_blockdevice() { if [ $blockdevice = "yes" ]; then return fi # Get the current real device name, if possible. # (specified devname could be -L or -U...) case "$FSTYPE" in nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|none) : ;; *) DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1` if [ -b "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Lists all filesystems potentially mounted under a given path, # excluding the path itself. list_submounts() { list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r } # kernels < 2.6.26 can't handle bind remounts bind_kernel_check() { echo "$options" | grep -w ro >/dev/null 2>&1 || return uname -r | awk -F. ' $1==2 && $2==6 { sub("[^0-9].*","",$3); if ($3<26) exit(1); }' [ $? -ne 0 ] && ocf_log warn "kernel `uname -r` cannot handle read only bind mounts" } bind_mount() { if is_bind_mount && [ "$options" != "-o bind" ] then bind_kernel_check bind_opts=`echo $options | sed 's/bind/remount/'` $MOUNT $bind_opts $MOUNTPOINT else true # make sure to return OK fi } is_option() { echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1 } is_fsck_needed() { case $OCF_RESKEY_run_fsck in force) true;; no) false;; ""|auto) case $FSTYPE in ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs) false;; *) true;; esac;; *) ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'" OCF_RESKEY_run_fsck="auto" is_fsck_needed;; esac } fstype_supported() { local support="$FSTYPE" local rc if [ "X${HOSTOS}" != "XOpenBSD" ];then # skip checking /proc/filesystems for obsd return $OCF_SUCCESS fi if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then : No FSTYPE specified, rely on the system has the right file-system support already return $OCF_SUCCESS fi # support fuse-filesystems (e.g. GlusterFS) case $FSTYPE in fuse.*|glusterfs|rozofs) support="fuse";; esac grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -eq 0 ]; then # found the fs type return $OCF_SUCCESS fi # if here, we should attempt to load the module and then # check the if the filesystem support exists again. $MODPROBE $support >/dev/null if [ $? -ne 0 ]; then ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernel module" return $OCF_ERR_INSTALLED fi # It is possible for the module to load and not be complete initialized # before we check /proc/filesystems again. Give this a few trys before # giving up entirely. for try in $(seq 5); do grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -eq 0 ] ; then # yes. found the filesystem after doing the modprobe return $OCF_SUCCESS fi ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again" sleep 1 done ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems" return $OCF_ERR_INSTALLED } # # START: Start up the filesystem # Filesystem_start() { # See if the device is already mounted. if Filesystem_status >/dev/null 2>&1 ; then ocf_log info "Filesystem $MOUNTPOINT is already mounted." return $OCF_SUCCESS fi fstype_supported || exit $OCF_ERR_INSTALLED # Check the filesystem & auto repair. # NOTE: Some filesystem types don't need this step... Please modify # accordingly if [ $blockdevice = "yes" ]; then if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" exit $OCF_ERR_INSTALLED fi if is_fsck_needed; then ocf_log info "Starting filesystem check on $DEVICE" if [ -z "$FSTYPE" ]; then $FSCK -p $DEVICE else $FSCK -t $FSTYPE -p $DEVICE fi # NOTE: if any errors at all are detected, it returns non-zero # if the error is >= 4 then there is a big problem if [ $? -ge 4 ]; then ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE" return $OCF_ERR_GENERIC fi fi fi [ -d "$MOUNTPOINT" ] || ocf_run mkdir -p $MOUNTPOINT if [ ! -d "$MOUNTPOINT" ] ; then ocf_exit_reason "Couldn't find directory [$MOUNTPOINT] to use as a mount point" exit $OCF_ERR_INSTALLED fi flushbufs $DEVICE # Mount the filesystem. case "$FSTYPE" in none) $MOUNT $options $DEVICE $MOUNTPOINT && bind_mount ;; "") $MOUNT $options $DEVICE $MOUNTPOINT ;; *) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;; esac if [ $? -ne 0 ]; then ocf_exit_reason "Couldn't mount device [$DEVICE] as $MOUNTPOINT" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # end of Filesystem_start get_pids() { local dir=$1 local procs local mmap_procs if ocf_is_true "$FORCE_UNMOUNT"; then if [ "X${HOSTOS}" = "XOpenBSD" ];then fstat | grep $dir | awk '{print $3}' else $FUSER -m $dir 2>/dev/null fi elif [ "$FORCE_UNMOUNT" = "safe" ]; then procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}') mmap_procs=$(grep " ${dir}" /proc/[0-9]*/maps | awk -F/ '{print $3}') printf "${procs}\n${mmap_procs}" | sort | uniq fi } signal_processes() { local dir=$1 local sig=$2 local pids pid # fuser returns a non-zero return code if none of the # specified files is accessed or in case of a fatal # error. pids=$(get_pids "$dir") if [ -z "$pids" ]; then ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'" return fi for pid in $pids; do ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`" kill -s $sig $pid done } try_umount() { local SUB=$1 $UMOUNT $umount_force $SUB list_mounts | grep -q " $SUB " >/dev/null 2>&1 || { ocf_log info "unmounted $SUB successfully" return $OCF_SUCCESS } return $OCF_ERR_GENERIC } fs_stop() { local SUB=$1 timeout=$2 sig cnt for sig in TERM KILL; do cnt=$((timeout/2)) # try half time with TERM while [ $cnt -gt 0 ]; do try_umount $SUB && return $OCF_SUCCESS ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig" signal_processes $SUB $sig cnt=$((cnt-1)) sleep 1 done done return $OCF_ERR_GENERIC } # # STOP: Unmount the filesystem # Filesystem_stop() { # See if the device is currently mounted Filesystem_status >/dev/null 2>&1 if [ $? -eq $OCF_NOT_RUNNING ]; then # Already unmounted, wonderful. rc=$OCF_SUCCESS else # Wipe the status file, but continue with a warning if # removal fails -- the file system might be read only if [ $OCF_CHECK_LEVEL -eq 20 ]; then rm -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log warn "Failed to remove status file ${STATUSFILE}." fi fi # Determine the real blockdevice this is mounted on (if # possible) prior to unmounting. determine_blockdevice # For networked filesystems, there's merit in trying -f: case "$FSTYPE" in nfs4|nfs|cifs|smbfs) umount_force="-f" ;; esac # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. local timeout for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do ocf_log info "Trying to unmount $SUB" if ocf_is_true "$FAST_STOP"; then timeout=6 else timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"} timeout=$((timeout/1000)) fi fs_stop $SUB $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "Couldn't unmount $SUB, giving up!" fi done fi flushbufs $DEVICE return $rc } # end of Filesystem_stop # # STATUS: is the filesystem mounted or not? # Filesystem_status() { if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then rc=$OCF_SUCCESS msg="$MOUNTPOINT is mounted (running)" else rc=$OCF_NOT_RUNNING msg="$MOUNTPOINT is unmounted (stopped)" fi # Special case "monitor" to check whether the UUID cached and # on-disk still match? case "$OP" in status) ocf_log info "$msg";; esac return $rc } # end of Filesystem_status # Note: the read/write tests below will stall in case the # underlying block device (or in the case of a NAS mount, the # NAS server) has gone away. In that case, if I/O does not # return to normal in time, the operation hits its timeout # and it is up to the CRM to initiate appropriate recovery # actions (such as fencing the node). # # MONITOR 10: read the device # Filesystem_monitor_10() { if [ "$blockdevice" = "no" ] ; then ocf_log warn "$DEVICE is not a block device, monitor 10 is noop" return $OCF_SUCCESS fi dd_opts="iflag=direct bs=4k count=1" err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null` if [ $? -ne 0 ]; then ocf_exit_reason "Failed to read device $DEVICE" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # MONITOR 20: write and read a status file # Filesystem_monitor_20() { if [ "$blockdevice" = "no" ] ; then # O_DIRECT not supported on cifs/smbfs dd_opts="oflag=sync bs=4k conv=fsync,sync" else # Writing to the device in O_DIRECT mode is imperative # to bypass caches. dd_opts="oflag=direct,sync bs=4k conv=fsync,sync" fi status_dir=`dirname $STATUSFILE` [ -d "$status_dir" ] || mkdir -p "$status_dir" err_output=`echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1` if [ $? -ne 0 ]; then ocf_exit_reason "Failed to write status file ${STATUSFILE}" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi test -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_exit_reason "Cannot stat the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi cat ${STATUSFILE} > /dev/null if [ $? -ne 0 ]; then ocf_exit_reason "Cannot read the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } Filesystem_monitor() { Filesystem_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then case "$OCF_CHECK_LEVEL" in 10) Filesystem_monitor_10; rc=$?;; 20) Filesystem_monitor_20; rc=$?;; *) ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL" rc=$OCF_ERR_CONFIGURED ;; esac fi return $rc } # end of Filesystem_monitor # # VALIDATE_ALL: Are the instance parameters valid? # FIXME!! The only part that's useful is the return code. # This code always returns $OCF_SUCCESS (!) # Filesystem_validate_all() { if [ -n "$MOUNTPOINT" ] && [ ! -d "$MOUNTPOINT" ]; then ocf_log warn "Mountpoint $MOUNTPOINT does not exist" fi # Check if the $FSTYPE is workable # NOTE: Without inserting the $FSTYPE module, this step may be imprecise # TODO: This is Linux specific crap. if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then cut -f2 /proc/filesystems |grep -q ^$FSTYPE$ if [ $? -ne 0 ]; then modpath=/lib/modules/`uname -r` moddep=$modpath/modules.dep # Do we have $FSTYPE in modules.dep? cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$" if [ $? -ne 0 ]; then ocf_log info "It seems we do not have $FSTYPE support" fi fi fi # If we are supposed to do monitoring with status files, then # we need a utility to write in O_DIRECT mode. if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary dd # Note: really old coreutils version do not support # the "oflag" option for dd. We don't check for that # here. In case dd does not support oflag, monitor is # bound to fail, with dd spewing an error message to # the logs. On such systems, we must do without status # file monitoring. fi #TODO: How to check the $options ? return $OCF_SUCCESS } # # set the blockdevice variable to "no" or "yes" # set_blockdevice_var() { blockdevice=no # these are definitely not block devices case $FSTYPE in nfs4|nfs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs) return;; esac if `is_option "loop"`; then return fi case $DEVICE in -*) # Oh... An option to mount instead... Typically -U or -L ;; /dev/null) # Special case for BSC blockdevice=yes ;; *) if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" fi if [ ! -d "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Check the arguments passed to this script if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # Check the OCF_RESKEY_ environment variables... FORCE_UNMOUNT="yes" if [ -n "${OCF_RESKEY_force_unmount}" ]; then FORCE_UNMOUNT=$OCF_RESKEY_force_unmount fi DEVICE=$OCF_RESKEY_device FSTYPE=$OCF_RESKEY_fstype if [ ! -z "$OCF_RESKEY_options" ]; then options="-o $OCF_RESKEY_options" fi FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} OP=$1 # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac if [ x = x"$DEVICE" ]; then ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed" exit $OCF_ERR_CONFIGURED fi set_blockdevice_var # Normalize instance parameters: # It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". # But the output of `mount` and /proc/mounts do not. if [ -z "$OCF_RESKEY_directory" ]; then if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then ocf_exit_reason "Please specify the directory" exit $OCF_ERR_CONFIGURED fi else MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//') : ${MOUNTPOINT:=/} # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll # kill the whole system. Is that a good idea? fi # Check to make sure the utilites are found if [ "X${HOSTOS}" != "XOpenBSD" ];then check_binary $MODPROBE check_binary $FUSER fi check_binary $FSCK check_binary $MOUNT check_binary $UMOUNT if [ "$OP" != "monitor" ]; then ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" fi case $OP in status) Filesystem_status exit $? ;; monitor) Filesystem_monitor exit $? ;; validate-all) Filesystem_validate_all exit $? ;; stop) Filesystem_stop exit $? ;; esac CLUSTERSAFE=0 is_option "ro" && CLUSTERSAFE=2 case $FSTYPE in nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs) CLUSTERSAFE=1 # this is kind of safe too ;; # add here CLUSTERSAFE=0 for all filesystems which are not # cluster aware and which, even if when mounted read-only, # could still modify parts of it such as journal/metadata ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) if ocf_is_true "$OCF_RESKEY_force_clones"; then CLUSTERSAFE=2 else CLUSTERSAFE=0 # these are not allowed fi ;; esac if ocf_is_clone; then case $CLUSTERSAFE in 0) ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" ocf_log err "DO NOT RUN IT AS A CLONE!" ocf_log err "Politely refusing to proceed to avoid data corruption." exit $OCF_ERR_CONFIGURED ;; 2) ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!" if ocf_is_true "$OCF_RESKEY_force_clones"; then ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so." else ocf_log warn "But we'll let it run because it is mounted read-only." ocf_log warn "Please make sure that it's meta data is read-only too!" fi ;; esac fi case $OP in start) Filesystem_start ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/ICP b/heartbeat/ICP index c427e7a39..8f187e082 100755 --- a/heartbeat/ICP +++ b/heartbeat/ICP @@ -1,296 +1,296 @@ #!/bin/sh # # # ICP # # Description: Manages an ICP Vortex clustered host drive as an HA resource # # # Author: Lars Marowsky-Bree -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 SuSE Linux AG # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 LinuxSCSI::0:0 ICP::c0h1::/dev/sdb1 LVM::myvolname # # Notice that you will need to get the utility "icpclucon" from the ICP # support to use this. # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_driveid # OCF_RESKEY_device ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # ICPCLUCON=/usr/sbin/icpclucon # usage() { methods=`ICP_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages an ICP Vortex clustered host drive. The 'start' operation reserves the given host drive. The 'stop' operation releses the given host drive. The 'status' operation reports whether the host drive is reserved. The 'monitor' operation reports whether the host drive is reserved. The 'validate-all' operation reports whether OCF instance parameters are valid. The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat < 1.0 Resource script for ICP. It Manages an ICP Vortex clustered host drive as an HA resource. Manages an ICP Vortex clustered host drive The ICP cluster drive ID. ICP cluster drive ID The device name. device END } # # methods: What methods/operations do we support? # ICP_methods() { cat <<-! start stop status monitor methods validate-all meta-data usage ! } ICP_status() { local icp_out icp_out=$($ICPCLUCON -v -status $1) if [ $? -ne 0 ]; then ocf_log "err" "Hostdrive not reserved by us." return $OCF_ERR_GENERIC fi if expr match "$icp_out" \ '.*Drive is reserved by this host.*' >/dev/null 2>&1 ; then ocf_log "info" "Volume $1 is reserved by us." return $OCF_SUCCESS elif expr match "$icp_out" \ '.*Drive is not reserved by any host.*' >/dev/null 2>&1 ; then ocf_log "err" "Volume $1 not reserved by any host." return $OCF_NOT_RUNNING else ocf_log "err" "Unknown output from icpclucon. Assuming we do not have a reservation:" ocf_log "err" "$icp_out" return $OCF_NOT_RUNNING fi } ICP_report_status() { if ICP_status $1 ; then echo "$1: running" return $OCF_SUCCESS else echo "$1: not running" return $OCF_NOT_RUNNING fi } # # Monitor the host drive - does it really seem to be working? # # ICP_monitor() { if ICP_status $1 then return $? else ocf_log "err" "ICP host drive $1 is offline" return $OCF_NOT_RUNNING fi } Clear_bufs() { $BLOCKDEV --flushbufs $1 } # # Enable ICP host drive # ICP_start() { ocf_log "info" "Activating host drive $1" ocf_run $ICPCLUCON -v -reserve $1 if [ $? -ne 0 ]; then ocf_log "info" "Forcing reservation of $1" ocf_run $ICPCLUCON -v -force $1 || return $OCF_ERR_GENERIC fi if ICP_status $1 then : OK # A reservation isn't as prompt as it should be sleep 3 return $OCF_SUCCESS else ocf_log "err" "ICP: $1 was not reserved correctly" return $OCF_ERR_GENERIC fi } # # Release the ICP host drive # ICP_stop() { ocf_log "info" "Releasing ICP host drive $1" ocf_run $ICPCLUCON -v -release $1 || return $OCF_ERR_GENERIC ocf_log "info" "Verifying reservation" if ICP_status $1 ; then ocf_log "err" "ICP: $1 was not released correctly" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ICP_validate_all() { check_binary $BLOCKDEV check_binary $ICPCLUCON $ICPCLUCON -v -status $driveid >/dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "Invalid driveid $driveid" exit $OCF_ERR_ARGS fi if [ ! -b $device ]; then ocf_log err "Device $device is not a block device" exit $OCF_ERR_ARGS fi # Do not know how to check the association of $device with $driveid. return $OCF_SUCCESS } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations do not require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; methods) ICP_methods exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if [ -z "$OCF_RESKEY_driveid" ] then ocf_log err "Please specify OCF_RESKEY_driveid" exit $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_device" ]; then ocf_log err "Please specify OCF_RESKEY_device" exit $OCF_ERR_ARGS fi driveid=$OCF_RESKEY_driveid device=$OCF_RESKEY_device # What kind of method was invoked? case "$1" in start) ICP_validate_all ICP_start $driveid Clear_bufs $device exit $?;; stop) ICP_stop $driveid Clear_bufs $device exit $?;; status) ICP_report_status $driveid exit $?;; monitor) ICP_monitor $driveid exit $?;; validate-all) ICP_validate_all exit $?;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/IPaddr b/heartbeat/IPaddr index 8ada6c4d2..8c9fb20f3 100755 --- a/heartbeat/IPaddr +++ b/heartbeat/IPaddr @@ -1,892 +1,892 @@ #!/bin/sh # # License: GNU General Public License (GPL) -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # # This script manages IP alias IP addresses # # It can add an IP alias, or remove one. # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds an IP alias. # # Surprisingly, the "stop" arg removes one. :-) # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_lvs_support ( e.g. true, on, 1 ) # OCF_RESKEY_ARP_INTERVAL_MS # OCF_RESKEY_ARP_REPEAT # OCF_RESKEY_ARP_BACKGROUND (e.g. yes ) # OCF_RESKEY_ARP_NETMASK # OCF_RESKEY_local_start_script # OCF_RESKEY_local_stop_script # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs SENDARP=$HA_BIN/send_arp FINDIF=$HA_BIN/findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ####################################################################### SYSTYPE="`uname -s`" case "$SYSTYPE" in SunOS) # `uname -r` = 5.9 -> SYSVERSION = 9 SYSVERSION="`uname -r | cut -d. -f 2`" ;; Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac meta_data() { cat < 1.0 This script manages IP alias IP addresses It can add an IP alias, or remove one. Manages virtual IPv4 addresses (portable version) The IPv4 address to be configured in dotted quad notation, for example "192.168.1.1". IPv4 address The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. If you can not assign any static IP address on the interface, modify this kernel parameter: sysctl -w net.ipv4.conf.all.promote_secondaries=1 (or per device) Network interface The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). If unspecified, the script will also try to determine this from the routing table. Netmask Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address You can specify an additional label for your IP address here. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Enable support for LVS DR Script called when the IP is released Script called when the IP is released Script called when the IP is added Script called when the IP is added milliseconds between ARPs milliseconds between gratuitous ARPs How many gratuitous ARPs to send out when bringing up a new address repeat count run in background (no longer any reason to do this) run in background netmask for ARP - in nonstandard hexadecimal format. netmask for ARP END exit $OCF_SUCCESS } # The 'ping' command takes highly OS-dependent arguments, so this # function creates a suitable argument list for the host OS's 'ping'. # We use a subset of its functionality: # 1. single packet # 2. reasonable timeout (say 1 second) # # arguments: # $1: IP address to ping # result string: # arguments for ping command # # If more flexibility is needed, they could be specified in the environment # to this function, to adjust the resulting 'ping' arguments. # David Lee May 2007 pingargs() { _baseip=$1 _timeout=1 # seconds _pktcount=1 _systype="`uname -s`" case $_systype in Linux) # Default is perpetual ping: need "-c $_pktcount". # -c count -t timetolive -q(uiet) -n(umeric) -W timeout _pingargs="-c $_pktcount -q -n $_baseip" ;; SunOS) # Default is immediate (or timeout) return. _pingargs="$_baseip $_timeout" ;; *) _pingargs="-c $_pktcount $_baseip" ;; esac echo "$_pingargs" } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # lvs_remove_conflicting_loopback() { ipaddr="$1" ifname="$2" ocf_log info "Removing conflicting loopback $ifname." if echo $ifname > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi if [ ! -z "${OCF_RESKEY_local_stop_script}" ]; then if [ -x "${OCF_RESKEY_local_stop_script}" ]; then ${OCF_RESKEY_local_stop_script} $* fi fi delete_interface "$ifname" "$ipaddr" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by lvs_remove_conflicting_loopback() # lvs_restore_loopback() { ipaddr="$1" if [ ! -s "$VLDIR/$ipaddr" ]; then return fi ifname=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address $ipaddr on $ifname." CMD="OCF_RESKEY_cidr_netmask=32 OCF_RESKEY_ip=$1 OCF_RESKEY_nic=$ifname $FINDIF" if NICINFO=`eval $CMD` NICINFO=`echo $NICINFO | tr " " " " | tr -s " "` then netmask_text=`echo "$NICINFO" | cut -f3 -d " "` broadcast=`echo "$NICINFO" | cut -f5 -d " "` else echo "ERROR: $CMD failed (rc=$rc)" exit $OCF_ERR_GENERIC fi add_interface "$ipaddr" "$ifname" "$ifname" $netmask_text $broadcast rm -f "$VLDIR/$ipaddr" } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_solaris() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ;; *) continue;; esac # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:$%%'` case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } find_interface_bsd() { $IFCONFIG $IFCONFIG_A_OPT | awk -v ip_addr="$ipaddr" ' /UP,/ && $0 ~ /^[a-z]+[0-9]:/ { if_name=$1; sub(":$","",if_name); } $1 == "inet" && $2 == ip_addr { print if_name exit(0) }' } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_generic() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | while read ifname linkstuff do : Read gave us ifname = $ifname read inet addr junk : Read gave us inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ifname=`echo $ifname | sed 's/:$//'`;; *) continue;; esac : "comparing $ipaddr to $addr (from ifconfig)" case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface() { ipaddr="$1" case "$SYSTYPE" in SunOS) NIC=`find_interface_solaris $ipaddr`;; *BSD) NIC=`find_interface_bsd $ipaddr`;; *) NIC=`find_interface_generic $ipaddr`;; esac echo $NIC return $OCF_SUCCESS; } # # Find an unused interface/alias name for us to use for new IP alias # The argument is an IP address, and the output # is an aliased interface name (e.g., "eth0:0", "dc0", "le0:0"). # find_free_interface() { NIC="$1" if [ "X$NIC" = "X" ]; then ocf_log err "No free interface found for $OCF_RESKEY_ip" return $OCF_ERR_GENERIC; fi NICBASE="$VLDIR/IPaddr-$NIC" touch "$NICBASE" case "$SYSTYPE" in *BSD) echo $NIC; return $OCF_SUCCESS;; SunOS) j=1 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's%: .*%%'`;; *) j=0 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's% .*%%'` TRYADRCNT=`ls "${NICBASE}:"* 2>/dev/null | wc -w | tr -d ' '` if [ -f "${NICBASE}:${TRYADRCNT}" ]; then : OK else j="${TRYADRCNT}" fi ;; esac IFLIST=" `echo $IFLIST` " while [ $j -lt 512 ] do case $IFLIST in *" "$NIC:$j" "*) ;; *) NICLINK="$NICBASE:$j" if ln "$NICBASE" "$NICLINK" 2>/dev/null then echo "$NIC:$j" return $OCF_SUCCESS fi ;; esac j=`expr $j + 1` done return $OCF_ERR_GENERIC } delete_route () { ipaddr="$1" case "$SYSTYPE" in SunOS) return 0;; *BSD) CMD="$ROUTE -n delete -host $ipaddr";; *) CMD="$ROUTE -n del -host $ipaddr";; esac $CMD return $? } delete_interface () { ifname="$1" ipaddr="$2" case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then CMD="$IFCONFIG $ifname unplumb" else CMD="$IFCONFIG $ifname 0 down" fi;; Darwin*) CMD="$IFCONFIG $ifname $ipaddr delete";; *BSD) CMD="$IFCONFIG $ifname inet $ipaddr delete";; *) CMD="$IFCONFIG $ifname down";; esac ocf_log info "$CMD" $CMD return $? } add_interface () { ipaddr="$1" iface_base="$2" iface="$3" netmask="$4" broadcast="$5" if [ $# != 5 ]; then ocf_log err "Insufficient arguments to add_interface: $*" exit $OCF_ERR_ARGS fi case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then $IFCONFIG $iface plumb rc=$? if [ $rc -ne 0 ] ; then echo "ERROR: '$IFCONFIG $iface plumb' failed." return $rc fi fi # At Solaris 10, this single-command version sometimes broke. # Almost certainly an S10 bug. # CMD="$IFCONFIG $iface inet $ipaddr $text up" # So hack the following workaround: CMD="$IFCONFIG $iface inet $ipaddr" CMD="$CMD && $IFCONFIG $iface netmask $netmask" CMD="$CMD && $IFCONFIG $iface up" ;; *BSD) # netmask is always set to 255.255.255.255 for an alias CMD="$IFCONFIG $iface inet $ipaddr netmask 255.255.255.255 alias";; *) CMD="$IFCONFIG $iface $ipaddr netmask $netmask broadcast $broadcast";; esac # Use "eval $CMD" (not "$CMD"): it might be a chain of two or more commands. ocf_log info "eval $CMD" eval $CMD rc=$? if [ $rc != 0 ]; then echo "ERROR: eval $CMD failed (rc=$rc)" fi return $rc } # # Remove the IP alias for the requested IP address... # ip_stop() { SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" NIC=`find_interface $OCF_RESKEY_ip` if [ -f "$SENDARPPIDFILE" ]; then cat "$SENDARPPIDFILE" | xargs kill rm -f "$SENDARPPIDFILE" fi if [ -z "$NIC" ]; then : Requested interface not in use return $OCF_SUCCESS fi if [ ${OCF_RESKEY_lvs_support} = 1 ]; then case $NIC in lo*) : Requested interface is on loopback return $OCF_SUCCESS;; esac fi delete_route "$OCF_RESKEY_ip" delete_interface "$NIC" "$OCF_RESKEY_ip" rc=$? if [ ${OCF_RESKEY_lvs_support} = 1 ]; then lvs_restore_loopback "$OCF_RESKEY_ip" fi # remove lock file... rm -f "$VLDIR/IPaddr-$NIC" if [ $rc != 0 ]; then ocf_log warn "IP Address $OCF_RESKEY_ip NOT released: rc=$rc" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # Add an IP alias for the requested IP address... # # It could be that we already have taken it, in which case it should # do nothing. # ip_start() { # # Do we already service this IP address? # ip_status_internal if [ $? = $OCF_SUCCESS ]; then # Nothing to do, the IP is already active return $OCF_SUCCESS; fi NIC_unique=`find_free_interface $OCF_RESKEY_nic` if [ -n "$NIC_unique" ]; then : OK got interface [$NIC_unique] for $OCF_RESKEY_ip else return $OCF_ERR_GENERIC fi # This logic is mostly to support LVS (If I understand it correctly) if [ ${OCF_RESKEY_lvs_support} = 1 ]; then NIC_current=`find_interface $OCF_RESKEY_ip` case $NIC_unique in lo*) if [ x"$NIC_unique" = x"$NIC_current" ]; then # Its already "running" and not moving, nothing to do. ocf_log err "Could not find a non-loopback device to move $OCF_RESKEY_ip to" return $OCF_ERR_GENERIC fi;; *) lvs_remove_conflicting_loopback "$OCF_RESKEY_ip" "$NIC_current";; esac fi if [ ! -z "${OCF_RESKEY_local_start_script}" ]; then if [ -x "${OCF_RESKEY_local_start_script}" ]; then ${OCF_RESKEY_local_start_script} $* fi fi add_interface "$OCF_RESKEY_ip" "$OCF_RESKEY_nic" "$NIC_unique" \ "$OCF_RESKEY_cidr_netmask" "$OCF_RESKEY_broadcast" rc=$? if [ $rc != 0 ]; then ocf_log err "Could not add $OCF_RESKEY_ip to $OCF_RESKEY_nic: rc=$rc" return $rc fi # The address is active, now notify others about it using sendarp if [ "$SYSTYPE" = "DarwinBSD" -a "$NIC_unique" = "lo0" ]; then # Darwin can't send ARPs on loopback devices SENDARP="x$SENDARP" # Prevent the binary from being found fi if [ -x $SENDARP ]; then TARGET_INTERFACE=`echo $NIC_unique | sed 's%:.*%%'` SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" ARGS="-i $OCF_RESKEY_ARP_INTERVAL_MS -r $OCF_RESKEY_ARP_REPEAT" ARGS="$ARGS -p $SENDARPPIDFILE $TARGET_INTERFACE $OCF_RESKEY_ip" ARGS="$ARGS auto $OCF_RESKEY_ip $OCF_RESKEY_ARP_NETMASK" ocf_log debug "Sending Gratuitous Arp for $OCF_RESKEY_ip on $NIC_unique [$TARGET_INTERFACE]" case $OCF_RESKEY_ARP_BACKGROUND in yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?" & ) >&2 ;; *) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?";; esac fi ip_status_internal return $? } ip_status_internal() { NIC=`find_interface "$OCF_RESKEY_ip"` if [ "x$NIC" = x ]; then return $OCF_NOT_RUNNING elif [ "${OCF_RESKEY_lvs_support}" = "1" ]; then case $NIC in lo*) return $OCF_NOT_RUNNING;; *) return $OCF_SUCCESS;; esac else if [ x$OCF_RESKEY_nic != x ]; then simple_OCF_NIC=`echo $OCF_RESKEY_nic | awk -F: '{print $1}'` simple_NIC=`echo $NIC | awk -F: '{print $1}'` if [ $simple_OCF_NIC != $simple_NIC ]; then ocf_log err "$OCF_RESKEY_ip is running an interface ($simple_NIC) instead of the configured one ($simple_OCF_NIC)" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS fi } ip_status() { ip_status_internal rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "running" elif [ $rc = $OCF_NOT_RUNNING ]; then echo "stopped" else echo "unknown" fi return $rc; } # # Determine if this IP address is really being served, or not. # Note that we must distinguish if *we're* serving it locally... # ip_monitor() { ip_status_internal rc=$? if [ $OCF_CHECK_LEVEL = 0 -o $rc != 0 ]; then return $rc fi ocf_log info "Checking IP stack" PINGARGS="`pingargs $OCF_RESKEY_ip`" for j in 1 2 3 4 5 6 7 8 9 10; do MSG=`$PING $PINGARGS 2>&1` if [ $? = 0 ]; then return $OCF_SUCCESS fi done ocf_log err "$MSG" return $OCF_ERR_GENERIC } is_positive_integer() { ocf_is_decimal $1 && [ $1 -ge 1 ] if [ $? = 0 ]; then return 1 fi return 0 } ip_validate_all() { : ${OCF_RESKEY_ARP_BACKGROUND=yes} : ${OCF_RESKEY_ARP_NETMASK=ffffffffffff} : ${OCF_RESKEY_ARP_INTERVAL_MS=500} : ${OCF_RESKEY_ARP_REPEAT=10} check_binary $AWK check_binary $IFCONFIG check_binary $ROUTE check_binary $PING if is_positive_integer $OCF_RESKEY_ARP_INTERVAL_MS then ocf_log err "Invalid parameter value: ARP_INTERVAL_MS [$OCF_RESKEY_ARP_INTERVAL_MS]" return $OCF_ERR_ARGS fi if is_positive_integer $OCF_RESKEY_ARP_REPEAT then ocf_log err "Invalid parameter value: ARP_REPEAT [$OCF_RESKEY_ARP_REPEAT]" return $OCF_ERR_ARGS fi : ${OCF_RESKEY_lvs_support=0} if [ "$SYSTYPE" = "Linux" -o "$SYSTYPE" = "SunOS" ]; then : else if [ "${OCF_RESKEY_lvs_support}" = "1" ]; then ocf_log err "$SYSTYPE does not support LVS" return $OCF_ERR_GENERIC fi fi case $OCF_RESKEY_ip in "") ocf_log err "Required parameter OCF_RESKEY_ip is missing" return $OCF_ERR_CONFIGURED;; [0-9]*.[0-9]*.[0-9]*.*[0-9]) : OK;; *) ocf_log err "Parameter OCF_RESKEY_ip [$OCF_RESKEY_ip] not an IP address" return $OCF_ERR_CONFIGURED;; esac # Unconditionally do this? case $OCF_RESKEY_nic in *:*) OCF_RESKEY_nic=`echo $OCF_RESKEY_nic | sed 's/:.*//'` ;; esac NICINFO=`$FINDIF` rc=$? if [ $rc != 0 ]; then ocf_log err "$FINDIF failed [rc=$rc]." return $OCF_ERR_GENERIC fi tmp=`echo "$NICINFO" | cut -f1` if [ "x$OCF_RESKEY_nic" = "x" ] then ocf_log info "Using calculated nic for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_nic=$tmp elif [ x$tmp != x${OCF_RESKEY_nic} ] then ocf_log err "Invalid parameter value: nic [$OCF_RESKEY_nic] Calculated nic: [$tmp]" return $OCF_ERR_ARGS fi tmp=`echo "$NICINFO" | cut -f2 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_cidr_netmask" != "x$tmp" ] then ocf_log info "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp" fi # Always use the calculated version becuase it might have been specified # using CIDR notation which not every system accepts OCF_RESKEY_netmask=$tmp OCF_RESKEY_cidr_netmask=$tmp; export OCF_RESKEY_cidr_netmask tmp=`echo "$NICINFO" | cut -f3 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_broadcast" = "x" ] then ocf_log debug "Using calculated broadcast for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_broadcast=$tmp elif [ x$tmp != x${OCF_RESKEY_broadcast} ]; then ocf_log err "Invalid parameter value: broadcast [$OCF_RESKEY_broadcast] Calculated broadcast: [$tmp]" return $OCF_ERR_ARGS fi return $OCF_SUCCESS } usage() { echo $USAGE >&2 return $1 } if [ $# -ne 1 ]; then usage $OCF_ERR_ARGS fi : ${OCF_RESKEY_lvs_support=0} # Normalize the value of lvs_support if [ "${OCF_RESKEY_lvs_support}" = "true" \ -o "${OCF_RESKEY_lvs_support}" = "on" \ -o "${OCF_RESKEY_lvs_support}" = "yes" \ -o "${OCF_RESKEY_lvs_support}" = "1" ]; then OCF_RESKEY_lvs_support=1 else OCF_RESKEY_lvs_support=0 fi # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. So, don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi case $1 in meta-data) meta_data;; start) ip_validate_all && ip_start;; stop) ip_stop;; status) ip_status;; monitor) ip_monitor;; validate-all) ip_validate_all;; usage) usage $OCF_SUCCESS;; *) usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr index 33c5be62f..08fd8a623 100755 --- a/heartbeat/IPsrcaddr +++ b/heartbeat/IPsrcaddr @@ -1,503 +1,503 @@ #!/bin/sh # # Description: IPsrcaddr - Preferred source address modification # # Author: John Sutton -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: SCL Internet # # Based on the IPaddr script. # # This script manages the preferred source address associated with # packets which originate on the localhost and are routed through the # default route. By default, i.e. without the use of this script or # similar, these packets will carry the IP of the primary i.e. the # non-aliased interface. This can be a nuisance if you need to ensure # that such packets carry the same IP irrespective of which host in # a redundant cluster they actually originate from. # # It can add a preferred source address, or remove one. # # usage: IPsrcaddr {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds a preferred source address. # # Surprisingly, the "stop" arg removes it. :-) # # NOTES: # # 1) There must be one and not more than 1 default route! Mainly because # I can't see why you should have more than one. And if there is more # than one, we would have to box clever to find out which one is to be # modified, or we would have to pass its identity as an argument. # # 2) The script depends on Alexey Kuznetsov's ip utility from the # iproute aka iproute2 package. # # 3) No checking is done to see if the passed in IP address can # reasonably be associated with the interface on which the default # route exists. So unless you want to deliberately spoof your source IP, # check it! Normally, I would expect that your haresources looks # something like: # # nodename ip1 ip2 ... ipN IPsrcaddr::ipX # # where ipX is one of the ip1 to ipN. # # OCF parameters are as below: # OCF_RESKEY_ipaddress ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0" CMDCHANGE="$IP2UTIL route change to " SYSTYPE="`uname -s`" usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 Resource script for IPsrcaddr. It manages the preferred source address modification. Manages the preferred source address for outgoing IP packets The IP address. IP address The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). Netmask END } errorexit() { ocf_exit_reason "$*" exit $OCF_ERR_GENERIC } # # We can distinguish 3 cases: no preferred source address, a # preferred source address exists which matches that specified, and one # exists but doesn't match that specified. srca_read() returns 1,0,2 # respectively. # # The output of route show is something along the lines of: # # default via X.X.X.X dev eth1 src Y.Y.Y.Y # # where the src clause "src Y.Y.Y.Y" may or may not be present WS="[`echo -en ' \t'`]" OCTET="[0-9]\{1,3\}" IPADDR="\($OCTET\.\)\{3\}$OCTET" SRCCLAUSE="src$WS$WS*\($IPADDR\)" MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)" FINDIF=$HA_BIN/findif # findif needs that to be set export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress srca_read() { # Capture the default route - doublequotes prevent word splitting... DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed" # ... so we can make sure there is only 1 default route [ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \ errorexit "more than 1 default route exists" # But there might still be no default route [ -z "$DEFROUTE" ] && errorexit "no default route exists" # Sed out the source ip address if it exists SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"` # and what remains after stripping out the source ip address clause ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"` [ -z "$SRCIP" ] && return 1 [ $SRCIP = $1 ] && return 0 return 2 } # # Add (or change if it already exists) the preferred source address # The exit code should conform to LSB exit codes. # srca_start() { srca_read $1 rc=$? if [ $rc = 0 ]; then rc=$OCF_SUCCESS ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)" else ip route replace $NETWORK dev $INTERFACE src $1 || \ errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed" $CMDCHANGE $ROUTE_WO_SRC src $1 || \ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed" rc=$? fi return $rc } # # Remove (if it exists) the preferred source address. # If one exists but it's not the same as the one specified, that's # an error. Maybe that's the wrong behaviour because if this fails # then when IPaddr releases the associated interface (if there is one) # your default route will also get dropped ;-( # The exit code should conform to LSB exit codes. # srca_stop() { srca_read $1 rc=$? if [ $rc = 1 ]; then # We do not have a preferred source address for now ocf_log info "No preferred source address defined, nothing to stop" exit $OCF_SUCCESS fi [ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address" ip route replace $NETWORK dev $INTERFACE || \ errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed" $CMDCHANGE $ROUTE_WO_SRC || \ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed" return $? } srca_status() { srca_read $1 case $? in 0) echo "OK" return $OCF_SUCCESS;; 1) echo "No preferred source address defined" return $OCF_NOT_RUNNING;; 2) echo "Preferred source address has incorrect value" return $OCF_ERR_GENERIC;; esac } # A not reliable IP address checking function, which only picks up those _obvious_ violations... # # It accepts IPv4 address in dotted quad notation, for example "192.168.1.1" # # 100% confidence whenever it reports "negative", # but may get false "positive" answer. # CheckIP() { ip="$1" case $ip in *[!0-9.]*) #got invalid char false;; .*|*.) #begin or end by ".", which is invalid false;; *..*) #consecutive ".", which is invalid false;; *.*.*.*.*) #four decimal dots, which is too many false;; *.*.*.*) #exactly three decimal dots, candidate, evaluate each field local IFS=. set -- $ip if ( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] ) then if [ $1 -eq 127 ]; then ocf_exit_reason "IP address [$ip] is a loopback address, thus can not be preferred source address" exit $OCF_ERR_CONFIGURED fi else true fi ;; *) #less than three decimal dots false;; esac return $? # This return is unnecessary, this comment too :) } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface_solaris() { $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:*$%%'` case $addr in addr:$BASEIP) echo $ifname; return $OCF_SUCCESS;; $BASEIP) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface_generic() { local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \ | cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'` if [ -z "$iface" ]; then return $OCF_ERR_GENERIC else echo $iface return $OCF_SUCCESS fi } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface() { case "$SYSTYPE" in SunOS) IF=`find_interface_solaris $BASEIP` ;; *) IF=`find_interface_generic $BASEIP` ;; esac echo $IF return $OCF_SUCCESS; } ip_status() { BASEIP="$1" case "$SYSTYPE" in Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac case "$SYSTYPE" in *BSD) $IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$BASEIP " >/dev/null 2>&1 if [ $? = 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi;; Linux|SunOS) IF=`find_interface "$BASEIP"` if [ -z "$IF" ]; then return $OCF_NOT_RUNNING fi case $IF in lo*) ocf_exit_reason "IP address [$BASEIP] is served by loopback, thus can not be preferred source address" exit $OCF_ERR_CONFIGURED ;; *)return $OCF_SUCCESS;; esac ;; *) if [ -z "$IF" ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi;; esac } srca_validate_all() { if [ -z "$OCF_RESKEY_ipaddress" ]; then # usage ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!" return $OCF_ERR_CONFIGURED fi if ! [ "x$SYSTYPE" = "xLinux" ]; then # checks after this point are only relevant for linux. return $OCF_SUCCESS fi check_binary $AWK check_binary $IFCONFIG # The IP address should be in good shape if CheckIP "$ipaddress"; then : else ocf_exit_reason "Invalid IP address [$ipaddress]" return $OCF_ERR_CONFIGURED fi if ocf_is_probe; then return $OCF_SUCCESS fi # We should serve this IP address of course if ip_status "$ipaddress"; then : else ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations do not require the OCF instance parameters to be set case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac ipaddress="$OCF_RESKEY_ipaddress" srca_validate_all rc=$? if [ $rc -ne $OCF_SUCCESS ]; then case $1 in # if we can't validate the configuration during a stop, that # means the resources isn't configured correctly. There's no way # to actually stop the resource in this situation because there's # no way it could have even started. Return success here # to indicate that the resource is not running, otherwise the # stop action will fail causing the node to be fenced just because # of a mis configuration. stop) exit $OCF_SUCCESS;; *) exit $rc;; esac fi findif_out=`$FINDIF -C` rc=$? [ $rc -ne 0 ] && { ocf_exit_reason "[$FINDIF -C] failed" exit $rc } INTERFACE=`echo $findif_out | awk '{print $1}'` NETWORK=`ip route list dev $INTERFACE scope link match $ipaddress|grep -o '^[^ ]*'` case $1 in start) srca_start $ipaddress ;; stop) srca_stop $ipaddress ;; status) srca_status $ipaddress ;; monitor) srca_status $ipaddress ;; validate-all) srca_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # # Version 0.3 2002/11/04 17:00:00 John Sutton # Name changed from IPsrcroute to IPsrcaddr and now reports errors # using ha_log rather than on stderr. # # Version 0.2 2002/11/02 17:00:00 John Sutton # Changed status output to "OK" to satisfy ResourceManager's # we_own_resource() function. # # Version 0.1 2002/11/01 17:00:00 John Sutton # First effort but does the job? # diff --git a/heartbeat/LVM b/heartbeat/LVM index 75cd6fae8..79c279127 100755 --- a/heartbeat/LVM +++ b/heartbeat/LVM @@ -1,713 +1,713 @@ #!/bin/sh # # # LVM # # Description: Manages an LVM volume as an HA resource # # # Author: Alan Robertson -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # This code significantly inspired by the LVM resource # in FailSafe by Lars Marowsky-Bree # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_volgrpname # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { methods=`LVM_methods` methods=`echo $methods | tr ' ' '|'` cat < 1.0 Resource script for LVM. It manages an Linux Volume Manager volume (LVM) as an HA resource. Controls the availability of an LVM Volume Group The name of volume group. Volume group name If set, the volume group will be activated exclusively. This option works one of two ways. If the volume group has the cluster attribute set, then the volume group will be activated exclusively using clvmd across the cluster. If the cluster attribute is not set, the volume group will be activated exclusively using a tag and the volume_list filter. When the tag option is in use, the volume_list in lvm.con must be initialized. This can be as simple as setting 'volume_list = []' depending on your setup. Exclusive activation If "exclusive" is set on a non clustered volume group, this overrides the tag to be used. Exclusive activation tag If set, the volume group will be activated partially even with some physical volumes missing. It helps to set to true when using mirrored logical volumes. Activate VG partially when missing PVs EOF } # # methods: What methods/operations do we support? # LVM_methods() { cat < /dev/null 2>&1 if [ $? -ne 0 ]; then return fi ## # Now check to see if the initrd has been updated. # If not, the machine could boot and activate the VG outside # the control of pacemaker ## if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" = "" ]; then ocf_log warn "LVM: Improper setup detected" ocf_log warn "* initrd image needs to be newer than lvm.conf" # While dangerous if not done the first time, there are many # cases where we don't simply want to fail here. Instead, # keep warning until the user remakes the initrd - or has # it done for them by upgrading the kernel. # # initrd can be updated using this command. # dracut -H -f /boot/initramfs-$(uname -r).img $(uname -r) # fi } ## # does this vg have our tag ## check_tags() { local owner=`vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '` if [ -z "$owner" ]; then # No-one owns this VG yet return 1 fi if [ "$OUR_TAG" = "$owner" ]; then # yep, this is ours return 0 fi # some other tag is set on this vg return 2 } strip_tags() { local i for i in `vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g`; do ocf_log info "Stripping tag, $i" # LVM version 2.02.98 allows changing tags if PARTIAL vgchange --deltag $i $OCF_RESKEY_volgrpname done if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '` ]; then ocf_exit_reason "Failed to remove ownership tags from $OCF_RESKEY_volgrpname" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } set_tags() { check_tags case $? in 0) # we already own it. return $OCF_SUCCESS ;; 2) # other tags are set, strip them before setting if ! strip_tags; then return $OCF_ERR_GENERIC fi ;; *) : ;; esac vgchange --addtag $OUR_TAG $OCF_RESKEY_volgrpname if [ $? -ne 0 ]; then ocf_exit_reason "Failed to add ownership tag to $OCF_RESKEY_volgrpname" return $OCF_ERR_GENERIC fi ocf_log info "New tag \"$OUR_TAG\" added to $OCF_RESKEY_volgrpname" return $OCF_SUCCESS } # # Return LVM status (silently) # LVM_status() { local rc=1 loglevel="debug" # Set the log level of the error message if [ "X${2}" = "X" ]; then loglevel="err" if ocf_is_probe; then loglevel="warn" else if [ ${OP_METHOD} = "stop" ]; then loglevel="info" fi fi fi if [ -d /dev/$1 ]; then test "`cd /dev/$1 && ls`" != "" rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "VG $1 with no logical volumes is not supported by this RA!" fi fi if [ $rc -ne 0 ]; then ocf_log $loglevel "LVM Volume $1 is not available (stopped)" rc=$OCF_NOT_RUNNING else case $(get_vg_mode) in 1) # exclusive with tagging. # If vg is running, make sure the correct tag is present. Otherwise we # can not guarantee exclusive activation. if ! check_tags; then ocf_exit_reason "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\"" rc=$OCF_ERR_GENERIC fi # make sure the environment for tags activation is still valid if ! verify_tags_environment; then rc=$OCF_ERR_GENERIC fi # let the user know if their initrd is older than lvm.conf. check_initrd_warning ;; *) : ;; esac fi if [ "X${2}" = "X" ]; then # status call return return $rc fi # Report on LVM volume status to stdout... if [ $rc -eq 0 ]; then echo "Volume $1 is available (running)" else echo "Volume $1 is not available (stopped)" fi return $rc } get_activate_options() { local options="-a" case $(get_vg_mode) in 0) options="${options}ly";; 1) options="${options}y --config activation{volume_list=[\"@${OUR_TAG}\"]}";; 2) options="${options}ey";; esac if ocf_is_true "$OCF_RESKEY_partial_activation" ; then options="${options} --partial" fi # for clones (clustered volume groups), we'll also have to force # monitoring, even if disabled in lvm.conf. if ocf_is_clone; then options="$options --monitor y" fi echo $options } ## # Attempt to deactivate vg cluster wide and then start the vg exclusively ## retry_exclusive_start() { local vgchange_options="$(get_activate_options)" # Deactivate each LV in the group one by one cluster wide set -- $(lvs -o name,attr --noheadings $OCF_RESKEY_volgrpname 2> /dev/null) while [ $# -ge 2 ]; do case $2 in ????ao*) # open LVs cannot be deactivated. return $OCF_ERR_GENERIC;; *) if ! lvchange -an $OCF_RESKEY_volgrpname/$1; then ocf_exit_reason "Unable to perform required deactivation of $OCF_RESKEY_volgrpname/$1 before starting" return $OCF_ERR_GENERIC fi ;; esac shift 2 done ocf_run vgchange $vgchange_options $OCF_RESKEY_volgrpname } # # Enable LVM volume # LVM_start() { local vgchange_options="$(get_activate_options)" local vg=$1 local clvmd=0 # TODO: This MUST run vgimport as well ocf_log info "Activating volume group $vg" if [ "$LVM_MAJOR" -eq "1" ]; then ocf_run vgscan $vg else ocf_run vgscan fi case $(get_vg_mode) in 2) clvmd=1 ;; 1) if ! set_tags; then return $OCF_ERR_GENERIC fi ;; *) : ;; esac if ! ocf_run vgchange $vgchange_options $vg; then if [ $clvmd -eq 0 ]; then return $OCF_ERR_GENERIC fi # Failure to exclusively activate cluster vg.: # This could be caused by a remotely active LV, Attempt # to disable volume group cluster wide and try again. # Allow for some settling sleep 5 if ! retry_exclusive_start; then return $OCF_ERR_GENERIC fi fi if LVM_status $vg; then : OK Volume $vg activated just fine! return $OCF_SUCCESS else ocf_exit_reason "LVM: $vg did not activate correctly" return $OCF_NOT_RUNNING fi } # # Disable the LVM volume # LVM_stop() { local res=$OCF_ERR_GENERIC local vgchange_options="-aln" local vg=$1 if ! vgs $vg > /dev/null 2>&1; then ocf_log info "Volume group $vg not found" return $OCF_SUCCESS fi ocf_log info "Deactivating volume group $vg" case $(get_vg_mode) in 1) vgchange_options="-an" ;; esac for i in $(seq 10) do ocf_run vgchange $vgchange_options $vg res=$? if LVM_status $vg; then ocf_exit_reason "LVM: $vg did not stop correctly" res=1 fi if [ $res -eq 0 ]; then break fi res=$OCF_ERR_GENERIC ocf_log warn "$vg still Active" ocf_log info "Retry deactivating volume group $vg" sleep 1 which udevadm > /dev/null 2>&1 && udevadm settle --timeout=5 done case $(get_vg_mode) in 1) if [ $res -eq 0 ]; then strip_tags res=$? fi ;; esac return $res } # # Check whether the OCF instance parameters are valid # LVM_validate_all() { check_binary $AWK ## # lvmetad is a daemon that caches lvm metadata to improve the # performance of LVM commands. This daemon should never be used when # volume groups exist that are being managed by the cluster. The lvmetad # daemon introduces a response lag, where certain LVM commands look like # they have completed (like vg activation) when in fact the command # is still in progress by the lvmetad. This can cause reliability issues # when managing volume groups in the cluster. For Example, if you have a # volume group that is a dependency for another application, it is possible # the cluster will think the volume group is activated and attempt to start # the application before volume group is really accesible... lvmetad is bad. ## lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1 if [ $? -eq 0 ]; then # for now warn users that lvmetad is enabled and that they should disable it. In the # future we may want to consider refusing to start, or killing the lvmetad daemon. ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process" fi ## # Off-the-shelf tests... ## VGOUT=`vgck ${VOLUME} 2>&1` if [ $? -ne 0 ]; then # Inconsistency might be due to missing physical volumes, which doesn't # automatically mean we should fail. If partial_activation=true then # we should let start try to handle it, or if no PVs are listed as # "unknown device" then another node may have marked a device missing # where we have access to all of them and can start without issue. if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then if vgs -o pv_name --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep -E "unknown device|Couldn't find device|Device mismatch detected" > /dev/null 2>&1; then if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then # We are missing devices and cannot activate partially ocf_exit_reason "Volume group [$VOLUME] has devices missing. Consider partial_activation=true to attempt to activate partially" exit $OCF_ERR_GENERIC else # We are missing devices but are allowed to activate partially. # Assume that caused the vgck failure and carry on ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action." fi fi # else the vg is partial but all devices are accounted for, so another # node must have marked the device missing. Proceed. else # vgck failure was for something other than missing devices ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi fi ## # Does the Volume Group exist? ## if [ "$LVM_MAJOR" = "1" ]; then VGOUT=`vgdisplay ${VOLUME} 2>&1` else VGOUT=`vgdisplay -v ${VOLUME} 2>&1` fi if [ $? -ne 0 ]; then ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi ## # If exclusive activation is not enabled, then # further checking of proper setup is not necessary ## if ! ocf_is_true "$OCF_RESKEY_exclusive"; then return $OCF_SUCCESS; fi ## # Having cloned lvm resources with exclusive vg activation makes no sense at all. ## if ocf_is_clone; then ocf_exit_reason "cloned lvm resources can not be activated exclusively" exit $OCF_ERR_CONFIGURED fi ## # Make sure the cluster attribute is set and clvmd is up when exclusive # activation is enabled. Otherwise we can't exclusively activate the volume group. ## case $(get_vg_mode) in 1) # exclusive activation using tags if ! verify_tags_environment; then exit $OCF_ERR_GENERIC fi ;; 2) # exclusive activation with clvmd ## # verify is clvmd running ## if ! ps -C clvmd > /dev/null 2>&1; then ocf_exit_reason "$OCF_RESKEY_volgrpname has the cluster attribute set, but 'clvmd' is not running" exit $OCF_ERR_GENERIC fi ;; *) : ;; esac return $OCF_SUCCESS } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS;; methods) LVM_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if [ -z "$OCF_RESKEY_volgrpname" ] then ocf_exit_reason "You must identify the volume group name!" exit $OCF_ERR_CONFIGURED fi # Get the LVM version number, for this to work we assume(thanks to panjiam): # # LVM1 outputs like this # # # vgchange --version # vgchange: Logical Volume Manager 1.0.3 # Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10) # # LVM2 and higher versions output in this format # # # vgchange --version # LVM version: 2.00.15 (2004-04-19) # Library version: 1.00.09-ioctl (2004-03-31) # Driver version: 4.1.0 LVM_VERSION=`vgchange --version 2>&1 | \ $AWK '/Logical Volume Manager/ {print $5"\n"; exit; } /LVM version:/ {printf $3"\n"; exit;}'` rc=$? if ( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] ) then ocf_exit_reason "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?" exit $OCF_ERR_INSTALLED fi LVM_MAJOR="${LVM_VERSION%%.*}" VOLUME=$OCF_RESKEY_volgrpname OP_METHOD=$1 if [ -n "$OCF_RESKEY_tag" ]; then OUR_TAG=$OCF_RESKEY_tag fi # What kind of method was invoked? case "$1" in start) LVM_validate_all LVM_start $VOLUME exit $?;; stop) LVM_stop $VOLUME exit $?;; status) LVM_status $VOLUME $1 exit $?;; monitor) LVM_status $VOLUME exit $?;; validate-all) LVM_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/LinuxSCSI b/heartbeat/LinuxSCSI index ce033c00b..89fed6b74 100755 --- a/heartbeat/LinuxSCSI +++ b/heartbeat/LinuxSCSI @@ -1,314 +1,314 @@ #!/bin/sh # # # LinuxSCSI # # Description: Enables/Disables SCSI devices to protect them from being # used by mistake # # # Author: Alan Robertson -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 IBM # # CAVEATS: See the usage message for some important warnings # # usage: ./LinuxSCSI (start|stop|status|monitor|meta-data|validate-all|methods) # # OCF parameters are as below: # OCF_RESKEY_scsi # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 LinuxSCSI:0:0:11 # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### zeropat="[ 0]0" PROCSCSI=/proc/scsi/scsi usage() { cat < 1.0 Deprecation warning: This agent makes use of Linux SCSI hot-plug functionality which has been superseded by SCSI reservations. It is deprecated and may be removed from a future release. See the scsi2reservation and sfex agents for alternatives. -- This is a resource agent for LinuxSCSI. It manages the availability of a SCSI device from the point of view of the linux kernel. It make Linux believe the device has gone away, and it can make it come back again. Enables and disables SCSI devices through the kernel SCSI hot-plug subsystem (deprecated) The SCSI instance to be managed. SCSI instance If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning EOF } scsi_methods() { cat <>$PROCSCSI echo "scsi add-single-device $host $channel $target $lun" >>$PROCSCSI if scsi_status "$1" then return $OCF_SUCCESS else ocf_log err "SCSI device $1 not active!" return $OCF_ERR_GENERIC fi } # # stop: Disable the given SCSI device in the kernel # scsi_stop() { parseinst "$1" # [ $target = error ] && exit 1 echo "scsi remove-single-device $host $channel $target $lun" >>$PROCSCSI if scsi_status "$1" then ocf_log err "SCSI device $1 still active!" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } # # status: is the given device now available? # scsi_status() { parseinst "$1" # [ $target = error ] && exit 1 [ $channel -eq 0 ] && channel=$zeropat [ $target -eq 0 ] && target=$zeropat [ $lun -eq 0 ] && lun=$zeropat greppat="Host: *scsi$host *Channel: *$channel *Id: *$target *Lun: *$lun" grep -i "$greppat" $PROCSCSI >/dev/null if [ $? -eq 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } # # validate_all: Check the OCF instance parameters # scsi_validate_all() { parseinst $instance return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then ocf_log err "Parameter number error." usage exit $OCF_ERR_GENERIC fi #if # [ -z "$OCF_RESKEY_scsi" ] && [ "X$1" = "Xmethods" ] #then # scsi_methods # exit #? #fi case $1 in methods) scsi_methods exit $OCF_SUCCESS ;; meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # Be obnoxious, log deprecation warning on every invocation (unless # suppressed by resource configuration). ocf_deprecated if [ -z "$OCF_RESKEY_scsi" ] then ocf_log err "You have to set a valid scsi id at least!" # usage exit $OCF_ERR_GENERIC fi instance=$OCF_RESKEY_scsi case $1 in start) scsi_start $instance ;; stop) scsi_stop $instance ;; status|monitor) if scsi_status $instance then ocf_log info "SCSI device $instance is running" return $OCF_SUCCESS else ocf_log info "SCSI device $instance is stopped" exit $OCF_NOT_RUNNING fi ;; validate-all) scsi_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 index 7cf658b59..bef2606cf 100755 --- a/heartbeat/Raid1 +++ b/heartbeat/Raid1 @@ -1,556 +1,556 @@ #!/bin/sh # # # License: GNU General Public License (GPL) -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # # Raid1 # Description: Manages a Linux software RAID device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdc1 # ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 This resource agent manages Linux software RAID (MD) devices on a shared storage medium. It uses mdadm(8) to start, stop, and monitor the MD devices. Raidtools are supported, but deprecated. See https://raid.wiki.kernel.org/index.php/Linux_Raid for more information. Manages Linux software RAID (MD) devices on shared storage The RAID configuration file, e.g. /etc/mdadm.conf. RAID config file One or more block devices to use, space separated. Alternatively, set to "auto" to manage all devices specified in raidconf. block device The value for the homehost directive; this is an mdadm feature to protect RAIDs against being activated by accident. It is recommended to create RAIDs managed by the cluster with "homehost" set to a special value, so they are not accidentially auto-assembled by nodes not supposed to own them. Homehost for mdadm If processes or kernel threads are using the array, it cannot be stopped. We will try to stop processes, first by sending TERM and then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL. The lsof(8) program is required to get the list of array users. Of course, the kernel threads cannot be stopped this way. If the processes are critical for data integrity, then set this parameter to false. Note that in that case the stop operation will fail and the node will be fenced. force stop processes using the array Wait until udevd creates a device in the start operation. On a normally loaded host this should happen quickly, but you may be unlucky. If you are not using udev set this to "no". udev Activating the same md RAID array on multiple nodes at the same time will result in data corruption and thus is forbidden by default. A safe example could be an array that is only named identically across all nodes, but is in fact distinct. Only set this to "true" if you know what you are doing! force ability to run as a clone END } udev_settle() { if ocf_is_true $WAIT_FOR_UDEV; then udevadm settle $* fi } list_conf_arrays() { test -f $RAIDCONF || { ocf_exit_reason "$RAIDCONF gone missing!" exit $OCF_ERR_GENERIC } grep ^ARRAY $RAIDCONF | awk '{print $2}' } forall() { local func=$1 local checkall=$2 local mddev rc=0 for mddev in $RAIDDEVS; do $func $mddev rc=$(($rc | $?)) [ "$checkall" = all ] && continue [ $rc -ne 0 ] && return $rc done return $rc } are_arrays_stopped() { local rc mddev for mddev in $RAIDDEVS; do raid1_monitor_one $mddev rc=$? [ $rc -ne $OCF_NOT_RUNNING ] && break done test $rc -eq $OCF_NOT_RUNNING } md_assemble() { local mddev=$1 $MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST udev_settle --exit-if-exists=$mddev } # # START: Start up the RAID device # raid1_start() { local rc raid1_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then # md already online, nothing to do. return $OCF_SUCCESS fi if [ $rc -ne $OCF_NOT_RUNNING ]; then # If the array is in a broken state, this agent doesn't # know how to repair that. ocf_exit_reason "$RAIDDEVS in a broken state; cannot start (rc=$rc)" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else forall md_assemble all fi raid1_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS else ocf_exit_reason "Couldn't start RAID for $RAIDDEVS" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # mark_readonly() { local mddev=$1 local rc ocf_log info "Attempting to mark array $mddev readonly" $MDADM --readonly $mddev --config=$RAIDCONF rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to set $mddev readonly (rc=$rc)" fi return $rc } mknod_raid1_stop() { # first create a block device file, then try to stop the # array local rc n tmp_block_file n=`echo $1 | sed 's/[^0-9]*//'` if ! ocf_is_decimal "$n"; then ocf_log warn "could not get the minor device number from $1" return 1 fi tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`" rm -f $tmp_block_file ocf_log info "block device file $1 missing, creating one in order to stop the array" mknod $tmp_block_file b 9 $n $MDADM --stop $tmp_block_file --config=$RAIDCONF --wait-clean -W rc=$? rm -f $tmp_block_file return $rc } raid1_stop_one() { ocf_log info "Stopping array $1" if [ -b "$1" ]; then $MDADM --stop $1 --config=$RAIDCONF --wait-clean -W && return else # newer mdadm releases can stop arrays when given the # basename; try that first $MDADM --stop `basename $1` --config=$RAIDCONF --wait-clean -W && return # otherwise create a block device file mknod_raid1_stop $1 fi } get_users_pids() { local mddev=$1 local outp l ocf_log debug "running lsof to list $mddev users..." outp=`lsof $mddev | tail -n +2` echo "$outp" | awk '{print $2}' | sort -u echo "$outp" | while read l; do ocf_log warn "$l" done } stop_raid_users() { local pids pids=`forall get_users_pids all | sort -u` if [ -z "$pids" ]; then ocf_log warn "lsof reported no users holding arrays" return 2 else ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids fi } stop_arrays() { if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else forall raid1_stop_one all fi } showusers() { local disk for disk; do if have_binary lsof; then ocf_log info "running lsof to list $disk users..." ocf_run -warn lsof $disk fi if [ -d /sys/block/$disk/holders ]; then ocf_log info "ls -l /sys/block/$disk/holders" ocf_run -warn ls -l /sys/block/$disk/holders fi done } raid1_stop() { local rc # See if the MD device is already cleanly stopped: if are_arrays_stopped; then return $OCF_SUCCESS fi # Turn off raid if ! stop_arrays; then if ocf_is_true $FORCESTOP; then if have_binary lsof; then stop_raid_users case $? in 2) false;; *) stop_arrays;; esac else ocf_log warn "install lsof(8) to list users holding the disk" false fi else false fi fi rc=$? if [ $rc -ne 0 ]; then ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)" showusers $RAIDDEVS if [ $HAVE_RAIDTOOLS != "true" ]; then forall mark_readonly all fi return $OCF_ERR_GENERIC fi if are_arrays_stopped; then return $OCF_SUCCESS fi ocf_exit_reason "RAID $RAIDDEVS still active after stop command!" return $OCF_ERR_GENERIC } # # monitor: a less noisy status # raid1_monitor_one() { local mddev=$1 local md=`echo $mddev | sed 's,/dev/,,'` local rc local TRY_READD=0 local pbsize # check if the md device exists first # but not if we are in the stop operation # device existence is important only for the running arrays if [ "$__OCF_ACTION" != "stop" -a ! -b $mddev ]; then ocf_log info "$mddev is not a block device" return $OCF_NOT_RUNNING fi if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then ocf_log info "$md not found in /proc/mdstat" return $OCF_NOT_RUNNING fi if [ $HAVE_RAIDTOOLS != "true" ]; then $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? case $rc in 0) ;; 1) ocf_log warn "$mddev has at least one failed device." TRY_READD=1 ;; 2) ocf_exit_reason "$mddev has failed." return $OCF_ERR_GENERIC ;; 4) ocf_exit_reason "mdadm failed on $mddev." return $OCF_ERR_GENERIC ;; *) ocf_exit_reason "mdadm returned an unknown result ($rc)." return $OCF_ERR_GENERIC ;; esac fi if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" $MDADM $mddev --fail detached $MDADM $mddev --remove failed $MDADM $mddev --re-add missing # TODO: At this stage, there's nothing to actually do # here. Either this worked or it did not. fi pbsize=`(blockdev --getpbsz $mddev || stat -c "%o" $mddev) 2>/dev/null` if [ -z "$pbsize" ]; then ocf_log warn "both blockdev and stat could not get the block size (will use 4k)" pbsize=4096 # try with 4k fi if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \ iflag=direct >/dev/null 2>&1 ; then ocf_exit_reason "$mddev: I/O error on read" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } raid1_monitor() { forall raid1_monitor_one } # # STATUS: is the raid device online or offline? # raid1_status() { # See if the MD device is online local rc raid1_monitor rc=$? if [ $rc -ne $OCF_SUCCESS ]; then echo "stopped" else echo "running" fi return $rc } raid1_validate_all() { return $OCF_SUCCESS } PROC_CLEANUP_TIME=3 if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac RAIDCONF="$OCF_RESKEY_raidconf" MDDEV="$OCF_RESKEY_raiddev" FORCESTOP="${OCF_RESKEY_force_stop:-1}" WAIT_FOR_UDEV="${OCF_RESKEY_udev:-1}" if [ -z "$RAIDCONF" ] ; then ocf_exit_reason "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_CONFIGURED fi if [ ! -r "$RAIDCONF" ] ; then ocf_exit_reason "Configuration file [$RAIDCONF] does not exist, or can not be opend!" exit $OCF_ERR_INSTALLED fi if [ -z "$MDDEV" ] ; then ocf_exit_reason "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_CONFIGURED fi if ocf_is_clone && ! ocf_is_true "$OCF_RESKEY_force_clones"; then ocf_exit_reason "md RAID arrays are NOT safe to run as a clone!" ocf_log err "Please read the comment on the force_clones parameter." exit $OCF_ERR_CONFIGURED fi if ocf_is_true $WAIT_FOR_UDEV && ! have_binary udevadm; then if [ "$__OCF_ACTION" = "start" ]; then ocf_log warn "either install udevadm or set udev to false" ocf_log info "setting udev to false!" fi WAIT_FOR_UDEV=0 fi if ! ocf_is_true $WAIT_FOR_UDEV; then export MDADM_NO_UDEV=1 fi if ocf_is_true $FORCESTOP && ! have_binary lsof; then ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..." fi HAVE_RAIDTOOLS=false if have_binary $MDADM >/dev/null 2>&1 ; then if [ -n "$OCF_RESKEY_homehost" ]; then MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" else MDADM_HOMEHOST="" fi else check_binary $RAIDSTART HAVE_RAIDTOOLS=true fi if [ $HAVE_RAIDTOOLS = true ]; then if [ "$MDDEV" = "auto" ]; then ocf_exit_reason "autoconf supported only with mdadm!" exit $OCF_ERR_INSTALLED elif [ `echo $MDDEV|wc -w` -gt 1 ]; then ocf_exit_reason "multiple devices supported only with mdadm!" exit $OCF_ERR_INSTALLED fi fi if [ "$MDDEV" = "auto" ]; then RAIDDEVS=`list_conf_arrays` else RAIDDEVS="$MDDEV" fi # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; status) raid1_status ;; monitor) raid1_monitor ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/SendArp b/heartbeat/SendArp index b67404f24..9d0b48726 100755 --- a/heartbeat/SendArp +++ b/heartbeat/SendArp @@ -1,267 +1,267 @@ #!/bin/sh # # # Copyright (c) 2006, Huang Zhen # Converting original heartbeat RA to OCF RA. # # Copyright (C) 2004 Horms # # Based on IPaddr2: Copyright (C) 2003 Tuomo Soini # # License: GNU General Public License (GPL) -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # # This script send out gratuitous Arp for an IP address # # It can be used _instead_ of the IPaddr2 or IPaddr resource # to send gratuitous arp for an IP address on a given interface, # without adding the address to that interface. I.e. if for # some reason you want to send gratuitous arp for addresses # managed by IPaddr2 or IPaddr on an additional interface. # # OCF parameters are as below: # OCF_RESKEY_ip # OCF_RESKEY_nic # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs SENDARP=$HA_BIN/send_arp SENDARPPIDDIR=${HA_RSCTMP} BASEIP="$OCF_RESKEY_ip" INTERFACE="$OCF_RESKEY_nic" RESIDUAL="" SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP" BACKGROUND=${OCF_RESKEY_background:-"yes"} # Set default values : ${ARP_INTERVAL_MS=200} # milliseconds between ARPs : ${ARP_REPEAT=5} # repeat count : ${ARP_BACKGROUND=$BACKGROUND} # no to run in foreground : ${ARP_NETMASK=ffffffffffff} # netmask for ARP ####################################################################### sendarp_meta_data() { cat < 1.0 This RA can be used _instead_ of the IPaddr2 or IPaddr RA to send gratuitous ARP for an IP address on a given interface, without adding the address to that interface. For example, if for some resaon you wanted to send gratuitous ARP for addresses managed by IPaddr2 or IPaddr on an additional interface. Broadcasts unsolicited ARP announcements The IP address for sending ARP packet. IP address The NIC for sending ARP packet. NIC Send ARPs in background. Set to false if you want to test if sending ARPs succeeded. Send ARPs in background END } ####################################################################### sendarp_usage() { cat < 1.0 Resource script for ServeRAID. It enables/disables shared ServeRAID merge groups. Enables and disables shared ServeRAID merge groups The adapter number of the ServeRAID adapter. serveraid The logical drive under consideration. mergegroup END } ServeRAID_methods() { cat <<-! start stop status validate-all methods usage meta-data ! } ServeRAIDSCSI="/proc/scsi/ips" IPS=ipssend proc_scsi=/proc/scsi/scsi parseinst() { sr_adapter=error sr_mergegroup=error hostid=error sr_logicaldrivenumber=error if [ $# -ne 2 ] then ocf_log err "Invalid ServeRAID instance: $*" exit $OCF_ERR_ARGS fi PerlScript='next unless /^Host/; $_ .= <>.<>; print "$1 " if /SERVERAID/ and /Proces/ and /scsi(\d+)/' # Get the list of host ids of the ServeRAID host adapters hostlist=`$PERL -ne "${PerlScript}" <$proc_scsi` # Figure the host id of the desired ServeRAID adapter hostid=`echo $hostlist | cut -d' ' -f$1` if [ ! -f "$ServeRAIDSCSI/$hostid" ] then ocf_log err "No such ServeRAID adapter: $1" exit $OCF_ERR_ARGS fi case $2 in [1-8]);; *) ocf_log err "Invalid Shared Merge Group Number: $2" exit $OCF_ERR_ARGS;; esac sr_adapter=$1 sr_mergegroup=$2 CheckRaidLevel return $? } SRLogicalDriveConfig() { $IPS getconfig $sr_adapter ld } MergeGroupToSCSI_ID() { PerlScript="while (<>) { /logical drive number *([0-9]+)/i && (\$ld=\$1); /part of merge group *: *$sr_mergegroup *\$/i && print \$ld - 1, \"\n\"; }" ID=`SRLogicalDriveConfig | $PERL -e "$PerlScript"` case $ID in [0-9]*) echo "$ID"; return 0;; *) return 1;; esac } MergeGroupRaidLevel() { PerlScript="while (<>) { /RAID level *: *([0-9]+[A-Za-z]*)/i && (\$ld=\$1); /part of merge group *: *$sr_mergegroup *\$/i && print \$ld, \"\n\"; }" Level=`SRLogicalDriveConfig | $PERL -e "$PerlScript"` case $Level in ?*) echo "$Level"; return 0;; *) return 1;; esac } CheckRaidLevel() { RAIDlevel=`MergeGroupRaidLevel` case $RAIDlevel in *5*) ocf_log err "ServeRAID device $sr_adapter $sr_mergegroup is RAID level $RAIDlevel" ocf_log err "This level of ServeRAID RAID is not supported for failover by the firmware." exit $OCF_ERR_GENERIC;; esac return $OCF_SUCCESS } ReleaseSCSI() { targetid=`MergeGroupToSCSI_ID` echo "${SCSI}remove-single-device $hostid 0 $targetid 0" > $proc_scsi } AddSCSI() { targetid=`MergeGroupToSCSI_ID` echo "${SCSI}add-single-device $hostid 0 $targetid 0" > $proc_scsi } # # start: Enable the given ServeRAID device # ServeRAID_start() { if ServeRAID_status $serveraid $mergegroup then ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running." return $OCF_SUCCESS else if # # Normally we do a MERGE PARTNER, but if we still own the drive for # some reason, then we'll need to do a MERGE OWN instead... # out=`$IPS MERGE $sr_adapter $sr_mergegroup PARTNER 2>&1` if [ $? -eq $srsuccess ] then ocf_log info "$out" else ocf_run $IPS MERGE $sr_adapter $sr_mergegroup OWN fi then : OK All is well! targetid=`MergeGroupToSCSI_ID` sr_logicaldrivenumber=`expr $targetid + 1` #run $IPS SYNCH $sr_adapter $sr_logicaldrivenumber & # This version of the SYNCH command requires the 6.10 or later # ServeRAID support CD. # To avoid issues when called by lrmd, redirect stdout->stderr. # Use () to create a subshell to make the redirection be synchronized. ( ocf_run $IPS SYNCH $sr_adapter $sr_mergegroup & ) >&2 AddSCSI else return $OCF_ERR_GENERIC fi fi if ServeRAID_status "$@" then return $OCF_SUCCESS else ocf_log err "ServeRAID device $1 not active!" exit $OCF_ERR_GENERIC fi } # # stop: Disable the given ServeRAID device # ServeRAID_stop() { parseinst "$@" ReleaseSCSI if ocf_run $IPS UNMERGE $sr_adapter $sr_mergegroup then : UNMERGE $sr_adapter $sr_mergegroup worked fi if ServeRAID_status "$@" then ocf_log err "ServeRAID device $* is still active!" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } # # status: is the given device now available? # ServeRAID_status() { parseinst "$@" # # The output we're looking for # Part of merge group : 2 # SRLogicalDriveConfig \ | grep -i "part of merge group[ ]*: *$sr_mergegroup *\$" >/dev/null } # # validate_all: are the OCF instance parameters valid? # ServeRAID_validate_all() { check_binary $PERL # parseinst() will do all the work... parseinst "$@" return $? } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; # # methods: What methods do we support? # methods) ServeRAID_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if ( [ -z "$OCF_RESKEY_serveraid" ] || [ -z "$OCF_RESKEY_mergegroup" ] ) then ocf_log err "You have to set the OCF_RESKEY_serveraid and OCF_RESKEY_mergegroup\n enviroment virables before running $0 !" # usage exit $OCF_ERR_GENERIC fi : Right Number of arguments.. serveraid=$OCF_RESKEY_serveraid mergegroup=$OCF_RESKEY_mergegroup # Look for the start, stop, status, or methods calls... case "$1" in stop) ServeRAID_stop $serveraid $mergegroup exit $?;; start) ServeRAID_start $serveraid $mergegroup exit $?;; status|monitor) if ServeRAID_status $serveraid $mergegroup then ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running." exit $OCF_SUCCESS else ocf_log debug "ServeRAID merge group $serveraid $mergegroup is stopped." exit $OCF_NOT_RUNNING fi exit $?;; validate-all) ServeRAID_validate_all $serveraid $mergegroup exit $?;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/VIPArip b/heartbeat/VIPArip index 01c6c994f..12804dffb 100755 --- a/heartbeat/VIPArip +++ b/heartbeat/VIPArip @@ -1,302 +1,302 @@ #!/bin/sh # # License: GNU General Public License (GPL) -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # Author: Huang Zhen # Copyright (c) 2006 International Business Machines # # Virtual IP Address by RIP2 protocol. # This script manages IP alias in different subnet with quagga/ripd. # It can add an IP alias, or remove one. # # The quagga package should be installed to run this RA # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds an IP alias. # Surprisingly, the "stop" arg removes one. :-) # # OCF parameters are as below # OCF_RESKEY_ip The IP address in different subnet # OCF_RESKEY_nic The nic for broadcast the route information # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs RIPDCONF=$HA_RSCTMP/VIPArip-ripd.conf ZEBRA=/usr/sbin/zebra RIPD=/usr/sbin/ripd USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ####################################################################### meta_data() { cat < 1.0 Virtual IP Address by RIP2 protocol. This script manages IP alias in different subnet with quagga/ripd. It can add an IP alias, or remove one. Manages a virtual IP address through RIP2 The IPv4 address in different subnet, for example "192.168.1.1". The IP address in different subnet The nic for broadcast the route information. The ripd uses this nic to broadcast the route informaton to others The nic for broadcast the route information Absolute path to the zebra binary. zebra binary Absolute path to the ripd binary. ripd binary END exit $OCF_SUCCESS } usage() { echo $USAGE >&2 } new_config_file() { echo new_config_file $1 $2 $3 cat >$RIPDCONF < $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF } add_ip() { echo add_ip $1 sed "s/ip_tag/ip_tag\naccess-list private permit $1\/32/g" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF } del_ip() { echo del_ip $1 sed "/$1/d" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF if $GREP "access-list private permit" $RIPDCONF>/dev/null then echo some other IP is running reload_config else stop_quagga echo remove $RIPDCONF rm $RIPDCONF fi } add_nic() { echo add_nic $1 if $GREP "network $1" $RIPDCONF >/dev/null then echo the nic is already in the config file else sed "s/nic_tag/nic_tag\n no passive-interface $1\n network $1\n distribute-list private out $1\n distribute-list private in $1/g" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF fi } reload_config() { echo reload_config echo $RIPDCONF: cat $RIPDCONF echo killall -SIGHUP ripd killall -SIGHUP ripd } start_quagga() { echo start_quagga echo $RIPDCONF: cat $RIPDCONF echo $ZEBRA -d $ZEBRA -d echo $RIPD -d -f $RIPDCONF $RIPD -d -f $RIPDCONF } stop_quagga() { echo stop_quagga echo $RIPDCONF: cat $RIPDCONF echo killall -SIGTERM ripd killall -SIGTERM ripd echo killall -SIGTERM zebra killall -SIGTERM zebra } start_rip_ip() { echo start_rip_ip check_params if [ x"$OCF_RESKEY_nic" = x ] then echo OCF_RESKEY_nic is null, set to eth0 OCF_RESKEY_nic="eth0" fi status_rip_ip case $? in $OCF_SUCCESS) ocf_log info "already running" exit $OCF_SUCCESS ;; $OCF_NOT_RUNNING) ;; *) ocf_log info "state undefined, stopping first" stop_rip_ip ;; esac $IP2UTIL addr add $OCF_RESKEY_ip/32 dev lo if [ -f "$RIPDCONF" ] then # there is a config file, add new data(IP,nic,metric) # to the existing config file. add_ip $OCF_RESKEY_ip add_nic $OCF_RESKEY_nic set_metric 1 reload_config echo sleep 3 sleep 3 set_metric 3 reload_config else new_config_file $OCF_RESKEY_ip $OCF_RESKEY_nic 1 start_quagga echo sleep 3 sleep 3 set_metric 3 reload_config fi return $OCF_SUCCESS } stop_rip_ip() { echo stop_rip_ip check_params status_rip_ip if [ $? = $OCF_NOT_RUNNING ] then exit $OCF_SUCCESS fi $IP2UTIL addr del $OCF_RESKEY_ip dev lo echo sleep 2 sleep 2 del_ip $OCF_RESKEY_ip return $OCF_SUCCESS } status_rip_ip() { check_params if $IP2UTIL addr | $GREP $OCF_RESKEY_ip >/dev/null then if $GREP $OCF_RESKEY_ip $RIPDCONF >/dev/null then if pidof ripd >/dev/null then return $OCF_SUCCESS fi fi return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi [ x != x"$OCF_RESKEY_zebra_binary" ] && ZEBRA=$OCF_RESKEY_zebra_binary [ x != x"$OCF_RESKEY_ripd_binary" ] && RIPD=$OCF_RESKEY_ripd_binary case $1 in start) start_rip_ip;; stop) stop_rip_ip;; status) status_rip_ip;; monitor) status_rip_ip;; validate-all) check_binary $IP2UTIL exit $OCF_SUCCESS;; meta-data) meta_data;; usage) usage; exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain index 188b7c3ab..0b614f5f3 100755 --- a/heartbeat/VirtualDomain +++ b/heartbeat/VirtualDomain @@ -1,842 +1,842 @@ #!/bin/sh # -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # Resource Agent for domains managed by the libvirt API. # Requires a running libvirt daemon (libvirtd). # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_migration_downtime_default=0 OCF_RESKEY_migration_speed_default=0 OCF_RESKEY_force_stop_default=0 OCF_RESKEY_autoset_utilization_cpu_default="true" OCF_RESKEY_autoset_utilization_hv_memory_default="true" OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 )) OCF_RESKEY_CRM_meta_timeout_default=90000 OCF_RESKEY_save_config_on_stop_default=false OCF_RESKEY_sync_config_on_stop_default=false : ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}} : ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}} : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} : ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} : ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} : ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}} : ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}} : ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}} : ${OCF_RESKEY_sync_config_on_stop=${OCF_RESKEY_sync_config_on_stop_default}} if ocf_is_true ${OCF_RESKEY_sync_config_on_stop}; then OCF_RESKEY_save_config_on_stop="true" fi ####################################################################### ## I'd very much suggest to make this RA use bash, ## and then use magic $SECONDS. ## But for now: NOW=$(date +%s) usage() { echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" } VirtualDomain_meta_data() { cat < 1.1 Resource agent for a virtual domain (a.k.a. domU, virtual machine, virtual environment etc., depending on context) managed by libvirtd. Manages virtual domains through the libvirt virtualization framework Absolute path to the libvirt configuration file, for this virtual domain. Virtual domain configuration file Hypervisor URI to connect to. See the libvirt documentation for details on supported URI formats. The default is system dependent. Determine the system's default uri by running 'virsh --quiet uri'. Hypervisor URI Always forcefully shut down ("destroy") the domain on stop. The default behavior is to resort to a forceful shutdown only after a graceful shutdown attempt has failed. You should only set this to true if your virtual domain (or your virtualization backend) does not support graceful shutdown. Always force shutdown on stop Transport used to connect to the remote hypervisor while migrating. Please refer to the libvirt documentation for details on transports available. If this parameter is omitted, the resource will use libvirt's default transport to connect to the remote hypervisor. Remote hypervisor transport Define max downtime during live migration in milliseconds Live migration downtime Define live migration speed per resource in MiB/s Live migration speed Use a dedicated migration network. The migration URI is composed by adding this parameters value to the end of the node name. If the node name happens to be an FQDN (as opposed to an unqualified host name), insert the suffix immediately prior to the first period (.) in the FQDN. At the moment Qemu/KVM and Xen migration via a dedicated network is supported. Note: Be sure this composed host name is locally resolveable and the associated IP is reachable through the favored network. See also the migrate_options parameter below. Migration network host name suffix Extra virsh options for the guest live migration. You can also specify here --migrateuri if the calculated migrate URI is unsuitable for your environment. If --migrateuri is set then migration_network_suffix and migrateport are effectively ignored. Use "%n" as the placeholder for the target node name. Please refer to the libvirt documentation for details on guest migration. live migrate options To additionally monitor services within the virtual domain, add this parameter with a list of scripts to monitor. Note: when monitor scripts are used, the start and migrate_from operations will complete only when all monitor scripts have completed successfully. Be sure to set the timeout of these operations to accommodate this delay. space-separated list of monitor scripts If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it into the CPU utilization of the resource when the monitor is executed. Enable auto-setting the CPU utilization of the resource If set true, the agent will detect the number of *Max memory* from virsh, and put it into the hv_memory utilization of the resource when the monitor is executed. Enable auto-setting the hv_memory utilization of the resource This port will be used in the qemu migrateuri. If unset, the port will be a random highport. Port for migrateuri Changes to a running VM's config are normally lost on stop. This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter. Save running VM's config back to its config file Setting this automatically enables save_config_on_stop. When enabled this parameter instructs the RA to call csync2 -x to synchronize the file to all nodes. csync2 must be properly set up for this to work. Save running VM's config back to its config file Path to the snapshot directory where the virtual machine image will be stored. When this parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot directory when stopped. If on start a state file is present for the domain, the domain will be restored to the same state it was in right before it stopped last. This option is incompatible with the 'force_stop' option. Restore state on start/stop EOF } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ $? -ne 0 ] && [ -z "$cval" ]; then crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1 if [ $? -eq 0 ]; then ocf_log debug "Unable to set utilization attribute, cib is not available" return fi fi if [ "$cval" != "$val" ]; then outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp" fi } update_utilization() { local dom_cpu dom_mem if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}') test -n "$dom_cpu" && set_util_attr cpu $dom_cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}') test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" fi } get_emulator() { local emulator="" emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') if [ -z "$emulator" ] && [ -e "$EMULATOR_STATE" ]; then emulator=$(cat $EMULATOR_STATE) fi if [ -z "$emulator" ]; then emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') fi if [ -n "$emulator" ]; then basename $emulator fi } update_emulator_cache() { local emulator emulator=$(get_emulator) if [ -n "$emulator" ]; then echo $emulator > $EMULATOR_STATE fi } # attempt to check domain status outside of libvirt using the emulator process pid_status() { local rc=$OCF_ERR_GENERIC local emulator=$(get_emulator) # An emulator is not required, so only report message in debug mode local loglevel="debug" if ocf_is_probe; then loglevel="notice" fi case "$emulator" in qemu-kvm|qemu-dm|qemu-system-*) rc=$OCF_NOT_RUNNING ps awx | grep -E "[q]emu-(kvm|dm|system).*-name $DOMAIN_NAME " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; libvirt_lxc) rc=$OCF_NOT_RUNNING ps awx | grep -E "[l]ibvirt_lxc.*-name $DOMAIN_NAME " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; # This can be expanded to check for additional emulators *) # We may be running xen with PV domains, they don't # have an emulator set. try xl list or xen-lists if have_binary xl; then rc=$OCF_NOT_RUNNING xl list $DOMAIN_NAME >/dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi elif have_binary xen-list; then rc=$OCF_NOT_RUNNING xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi else ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME" fi ;; esac if [ $rc -eq $OCF_SUCCESS ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently running." elif [ $rc -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running." fi return $rc } VirtualDomain_status() { local try=0 rc=$OCF_ERR_GENERIC status="no state" while [ "$status" = "no state" ]; do try=$(($try + 1 )) status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z') case "$status" in *"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off") # shut off: domain is defined, but not started, will not happen if # domain is created but not defined # "Domain not found" or "failed to get domain": domain is not defined # and thus not started ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)" rc=$OCF_NOT_RUNNING ;; running|paused|idle|blocked|"in shutdown") # running: domain is currently actively consuming cycles # paused: domain is paused (suspended) # idle: domain is running but idle # blocked: synonym for idle used by legacy Xen versions # in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed. ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_SUCCESS ;; ""|*"failed to "*"connect to the hypervisor"*|"no state") # Empty string may be returned when virsh does not # receive a reply from libvirtd. # "no state" may occur when the domain is currently # being migrated (on the migration target only), or # whenever virsh can't reliably obtain the domain # state. status="no state" if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then # During the stop operation, we want to bail out # quickly, so as to be able to force-stop (destroy) # the domain if necessary. ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out." return $OCF_ERR_GENERIC; elif [ "$__OCF_ACTION" = "monitor" ]; then pid_status rc=$? if [ $rc -ne $OCF_ERR_GENERIC ]; then # we've successfully determined the domains status outside of libvirt return $rc fi else # During all other actions, we just wait and try # again, relying on the CRM/LRM to time us out if # this takes too long. ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." fi sleep 1 ;; *) # any other output is unexpected. ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" sleep 1 ;; esac done return $rc } # virsh undefine removes configuration files if they are in # directories which are managed by libvirt. such directories # include also subdirectories of /etc (for instance # /etc/libvirt/*) which may be surprising. VirtualDomain didn't # include the undefine call before, hence this wasn't an issue # before. # # There seems to be no way to find out which directories are # managed by libvirt. # verify_undefined() { local tmpf if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME" then tmpf=$(mktemp -t vmcfgsave.XXXXXX) if [ ! -r "$tmpf" ]; then ocf_log warn "unable to create temp file, disk full?" # we must undefine the domain virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 else cp -p $OCF_RESKEY_config $tmpf virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 [ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config rm -f $tmpf fi fi } VirtualDomain_start() { local snapshotimage if VirtualDomain_status; then ocf_log info "Virtual domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then virsh restore $snapshotimage if [ $? -eq 0 ]; then rm -f $snapshotimage return $OCF_SUCCESS fi ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory." return $OCF_ERR_GENERIC fi # Make sure domain is undefined before creating. # The 'create' command guarantees that the domain will be # undefined on shutdown, but requires the domain to be undefined. # if a user defines the domain # outside of this agent, we have to ensure that the domain # is restored to an 'undefined' state before creating. verify_undefined virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config} rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi while ! VirtualDomain_monitor; do sleep 1 done return $OCF_SUCCESS } force_stop() { local out ex translate local status=0 ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1) ex=$? translate=$(echo $out|tr 'A-Z' 'a-z') echo >&2 "$translate" case $ex$translate in *"error:"*"domain is not running"*|*"error:"*"domain not found"*|\ *"error:"*"failed to get domain"*) : ;; # unexpected path to the intended outcome, all is well [!0]*) ocf_exit_reason "forced stop failed" return $OCF_ERR_GENERIC ;; 0*) while [ $status != $OCF_NOT_RUNNING ]; do VirtualDomain_status status=$? done ;; esac return $OCF_SUCCESS } sync_config(){ ocf_log info "Syncing $DOMAIN_NAME config file with csync2 -x ${OCF_RESKEY_config}" if ! csync2 -x ${OCF_RESKEY_config}; then ocf_log warn "Syncing ${OCF_RESKEY_config} failed."; fi } save_config(){ CFGTMP=$(mktemp -t vmcfgsave.XXX) virsh $VIRSH_OPTIONS dumpxml --inactive --security-info ${DOMAIN_NAME} > ${CFGTMP} if [ -s ${CFGTMP} ]; then if ! cmp -s ${CFGTMP} ${OCF_RESKEY_config}; then if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes or sync_config_on_stop is on." if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}." if ocf_is_true "$OCF_RESKEY_sync_config_on_stop"; then sync_config fi else ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed." fi else ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update." fi fi else ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update." fi rm -f ${CFGTMP} } VirtualDomain_stop() { local i local status local shutdown_timeout local needshutdown=1 VirtualDomain_status status=$? case $status in $OCF_SUCCESS) if ocf_is_true $OCF_RESKEY_force_stop; then # if force stop, don't bother attempting graceful shutdown. force_stop return $? fi ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." if [ -n "$OCF_RESKEY_snapshot" ]; then virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ $? -eq 0 ]; then needshutdown=0 else ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop" fi fi # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # issue the shutdown if save state didn't shutdown for us if [ $needshutdown -eq 1 ]; then # Issue a graceful shutdown request virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} fi # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $NOW -lt $shutdown_timeout ]; do VirtualDomain_status status=$? case $status in $OCF_NOT_RUNNING) # This was a graceful shutdown. return $OCF_SUCCESS ;; $OCF_SUCCESS) # Domain is still running, keep # waiting (until shutdown_timeout # expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac NOW=$(date +%s) done ;; $OCF_NOT_RUNNING) ocf_log info "Domain $DOMAIN_NAME already stopped." return $OCF_SUCCESS esac # OK. Now if the above graceful shutdown hasn't worked, kill # off the domain with destroy. If that too does not work, # have the LRM time us out. force_stop } mk_migrateuri() { local target_node local migrate_target local hypervisor target_node="$OCF_RESKEY_CRM_meta_migrate_target" # A typical migration URI via a special migration network looks # like "tcp://bar-mig:49152". The port would be randomly chosen # by libvirt from the range 49152-49215 if omitted, at least since # version 0.7.4 ... if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" # Hostname might be a FQDN migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") case $hypervisor in qemu) # For quiet ancient libvirt versions a migration port is needed # and the URI must not contain the "//". Newer versions can handle # the "bad" URI. echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}" ;; xen) echo "xenmigr://${migrate_target}" ;; *) ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." ;; esac fi } VirtualDomain_migrate_to() { local rc local target_node local remoteuri local transport_suffix local migrateuri local migrate_opts local migrate_pid target_node="$OCF_RESKEY_CRM_meta_migrate_target" if VirtualDomain_status; then # Find out the remote hypervisor to connect to. That is, turn # something like "qemu://foo:9999/system" into # "qemu+tcp://bar:9999/system" if [ -n "${OCF_RESKEY_migration_transport}" ]; then transport_suffix="+${OCF_RESKEY_migration_transport}" fi # User defined migrateuri or do we make one? migrate_opts="$OCF_RESKEY_migrate_options" if echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then migrateuri=`echo "$migrate_opts" | sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"` migrate_opts=`echo "$migrate_opts" | sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\2/"` else migrateuri=`mk_migrateuri` fi # Scared of that sed expression? So am I. :-) remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # Live migration speed limit if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})." virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed} fi # OK, we know where to connect to. Now do the actual migration. ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)." virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri & migrate_pid=${!} # Live migration downtime interval # Note: You can set downtime only while live migration is in progress if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then sleep 2 ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})." virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime} fi wait ${migrate_pid} rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc" return $OCF_ERR_GENERIC else ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." return $OCF_SUCCESS fi else ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } VirtualDomain_migrate_from() { while ! VirtualDomain_monitor; do sleep 1 done ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi return $OCF_SUCCESS } VirtualDomain_monitor() { # First, check the domain status. If that returns anything other # than $OCF_SUCCESS, something is definitely wrong. VirtualDomain_status rc=$? if [ ${rc} -eq ${OCF_SUCCESS} ]; then # OK, the generic status check turned out fine. Now, if we # have monitor scripts defined, run them one after another. for script in ${OCF_RESKEY_monitor_scripts}; do script_output="$($script 2>&1)" script_rc=$? if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then # A monitor script returned a non-success exit # code. Stop iterating over the list of scripts, log a # warning message, and propagate $OCF_ERR_GENERIC. ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" rc=$OCF_ERR_GENERIC break else ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" fi done fi update_emulator_cache update_utilization # Save configuration on monitor as well, so we will have a better chance of # having fresh and up to date config files on all nodes. if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi return ${rc} } VirtualDomain_validate_all() { if ocf_is_true $OCF_RESKEY_force_stop && [ -n "$OCF_RESKEY_snapshot" ]; then ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together." return $OCF_ERR_CONFIGURED fi # check if we can read the config file (otherwise we're unable to # deduce $DOMAIN_NAME from it, see below) if [ ! -r $OCF_RESKEY_config ]; then if ocf_is_probe; then ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe." elif [ "$__OCF_ACTION" = "stop" ]; then ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped." else ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or not readable." fi return $OCF_ERR_INSTALLED fi if [ -z $DOMAIN_NAME ]; then ocf_exit_reason "Unable to determine domain name." return $OCF_ERR_INSTALLED fi # Check if csync2 is available when config tells us we might need it. if ocf_is_true $OCF_RESKEY_sync_config_on_stop; then check_binary csync2 fi # Check if migration_speed is a decimal value if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then ocf_exit_reason "migration_speed has to be a decimal value" return $OCF_ERR_CONFIGURED fi # Check if migration_downtime is a decimal value if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then ocf_exit_reason "migration_downtime has to be a decimal value" return $OCF_ERR_CONFIGURED fi } VirtualDomain_getconfig() { # Grab the virsh uri default, but only if hypervisor isn't set : ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)} # Set options to be passed to virsh: VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" # Retrieve the domain name from the xml file. DOMAIN_NAME=`egrep '[[:space:]]*.*[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*\(.*\)<\/name>[[:space:]]*$/\1/'` EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state" } OCF_REQUIRED_PARAMS="config" OCF_REQUIRED_BINARIES="virsh sed" ocf_rarun $* diff --git a/heartbeat/WAS b/heartbeat/WAS index a46cdd9be..3c7469328 100755 --- a/heartbeat/WAS +++ b/heartbeat/WAS @@ -1,572 +1,572 @@ #!/bin/sh # # # WAS # # Description: Manages a Websphere Application Server as an HA resource # # # Author: Alan Robertson -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_config # (WAS-configuration file, used for the single server edition of WAS) # OCF_RESKEY_port # (WAS--port-number, used for the advanced edition of WAS) ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### WASDIR=/opt/WebSphere/AppServer if [ ! -d $WASDIR ] then WASDIR=/usr/WebSphere/AppServer fi STARTTIME=300 # 5 minutes DEFAULT_WASPORTS="9080" # # WASBIN=$WASDIR/bin DEFAULT=$WASDIR/config/server-cfg.xml # # Print usage message # usage() { methods=`WAS_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-END usage: $0 ($methods) For the single server edition of WAS, you have to set the following enviroment virable: OCF_RESKEY_config (WAS-configuration file) For the advanced edition of WAS, you have to set the following enviroment virable: OCF_RESKEY_port (WAS--port-number) $0 manages a Websphere Application Server (WAS) as an HA resource The 'start' operation starts WAS. The 'stop' operation stops WAS. The 'status' operation reports whether WAS is running The 'monitor' operation reports whether the WAS seems to be working (httpd also needs to be working for this case) The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_config or OCF_RESKEY_port) is valid The 'methods' operation reports on the methods $0 supports This is known to work with the Single Server edition of Websphere, and is believed to work with the Advanced edition too. Since the Advanced Edition has no configuration file (it's in a the database) you need to give a port number instead of a configuration file for this config parameter. The default configuration file for the single server edition is: $DEFAULT The default snoop-port for the advanced edition is: $DEFAULT_WASPORTS The start and stop operations must be run as root. The status operation will report a pid of "-" for the WAS root process using unless it is run as root. If you don't have xmllint on your system, parsing of WAS configuration files is very primitive. In this case, the port specification we need from the XML config file has to be on the same line as the first part of the tag. We run servlet/snoop on the first transport port listed in the config file for the "monitor" operation. END } meta_data() { cat < 1.0 Resource script for WAS. It manages a Websphere Application Server (WAS) as an HA resource. Manages a WebSphere Application Server instance The WAS-configuration file. configration file The WAS-(snoop)-port-number. port END } # # Reformat the XML document in a sort of canonical form # if we can. If we don't have xmllint, we just cat it out # and hope for the best ;-) # xmlcat() { if [ "X$XMLcat" = X ] then XMLcat=`which xmllint 2>/dev/null` if [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] then XMLcat=cat else XMLcat="$XMLcat --recover --format" fi fi for j in "$@" do ${XMLcat} "$j" done } # #This is a bit skanky, but it works anyway... # # # # # # It's not really skanky if we can find xmllint on the system, because it # reformats tags so they are all on one line, which is all we we need... # # # Get the numbers of the ports WAS should be listening on... # # If we don't have xmllint around, then the applicationserver and the # port= specification have to be on the same line in the XML config file. # GetWASPorts() { case $1 in [0-9]*) echo "$1" | tr ',' '\012';; *) xmlcat $1 | grep -i 'transports.*applicationserver:HTTPTransport' | grep port= | sed -e 's%.*port= *"* *%%' \ -e 's%[^0-9][^0-9]*.*$%%' # Delete up to port=, throw away optional quote and optional # white space. # Throw away everything after the first non-digit. # This should leave us the port number all by itself... esac } # # We assume that the first port listed in the # is the one we should run servlet/snoop on. # GetWASSnoopPort() { GetWASPorts "$@" | head -n1 } # # Return information on the processname/id for the WAS ports # # pid/java is the expected output. Several lines, one per port... # # WASPortInfo() { pat="" once=yes PortCount=0 for j in $* do case $pat in "") pat="$j";; *) pat="$pat|$j";; esac PortCount=`expr $PortCount + 1` done netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' } # # Return the number of WAS ports which are open # CheckWASPortsInUse() { count=`WASPortInfo "$@" | wc -l` echo $count } # # Return the pid(s) of the processes that have WAS ports open # WASPIDs() { WASPortInfo "$@" | sort -u | cut -f1 -d/ } # # The version of ps that returns all processes and their (long) args # It's only used by WAS_procs, which isn't used for anything ;-) # ps_long() { ps axww } # # The total set of WAS processes (single server only) # WAS_procs() { ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 } # # methods: What methods/operations do we support? # WAS_methods() { cat <<-! start stop status methods validate-all meta-data usage ! if have_binary $WGET then echo monitor fi } # # Return WAS status (silently) # WAS_status() { WASPorts=`GetWASPorts $1` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) false;; *) true;; esac } # # Report on WAS status to stdout... # WAS_report_status() { WASPorts=`GetWASPorts $1` PortCount=`echo $WASPorts | wc -w` PortCount=`echo $PortCount` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; *) pids=`WASPIDs $WASPorts` if [ $PortsInUse -ge $PortCount ] then ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." else ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." fi return $OCF_SUCCESS;; esac } # # Monitor WAS - does it really seem to be working? # # For this we invoke the snoop applet via wget. # # This is actually faster than WAS_status above... # WAS_monitor() { trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0 tmpfile=`maketempfile` || return 1 SnoopPort=`GetWASSnoopPort $1` output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/servlet/snoop 2>&1` rc=$? if [ $rc -eq 0 ] then if grep -i 'user-agent.*Wget' $tmpfile >/dev/null then : OK else ocf_log "err" "WAS: $1: no user-agent from snoop application" rc=$OCF_ERR_GENERIC fi else ocf_log "err" "WAS: $1: wget failure: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # Start WAS instance # WAS_start() { # Launch Arguments: # # -configFile # -nodeName # -serverName # -oltEnabled # -oltHost # -oltPort # -debugEnabled # -jdwpPort # -debugSource # -serverTrace # -serverTraceFile # -script [] # -platform # -noExecute # -help if [ -x $WASBIN/startServer.sh ] then cmd="$WASBIN/startServer.sh -configFile $1" else cmd="$WASBIN/startupServer.sh" fi if ocf_run $cmd then if WAS_wait_4_start $STARTTIME "$@" then #true return $OCF_SUCCESS else ocf_log "err" "WAS server $1 did not start correctly" return $OCF_ERR_GENERIC fi else #false return $OCF_ERR_GENERIC fi } # # Wait for WAS to actually start up. # # It seems to take between 30 and 60 seconds for it to # start up on a trivial WAS instance. # WAS_wait_4_start() { max=$1 retries=0 shift while [ $retries -lt $max ] do if WAS_status "$@" then return $OCF_SUCCESS else sleep 1 fi retries=`expr $retries + 1` done WAS_status "$@" } # # Shut down WAS # WAS_stop() { # They don't return good return codes... # And, they seem to allow anyone to stop WAS (!) if [ -x $WASBIN/stopServer.sh ] then ocf_run $WASBIN/stopServer.sh -configFile $1 else WASPorts=`GetWASPorts $1` kill `WASPIDs $WASPorts` fi if WAS_status $1 then ocf_log "err" "WAS: $1 did not stop correctly" #false return $OCF_ERR_GENERIC else #true return $OCF_SUCCESS fi } # # Check if the port is valid # CheckPort() { ocf_is_decimal "$1" && [ $1 -gt 0 ] } WAS_validate_all() { if [ -x $WASBIN/startServer.sh ]; then # $arg should be config file if [ ! -f "$arg" ]; then ocf_log err "Configuration file [$arg] does not exist" exit $OCF_ERR_ARGS fi # $arg should specify a valid port number at the very least local WASPorts=`GetWASPorts $arg` if [ -z "$WASPorts" ]; then ocf_log err "No port number specified in configuration file [$arg]" exit $OCF_ERR_CONFIGURED fi local port local have_valid_port=false for port in $WASPorts; do if CheckPort $port; then have_valid_port=true break fi done if [ "false" = "$have_valid_port" ]; then ocf_log err "No valid port number specified in configuration file [$arg]" exit $OCF_ERR_CONFIGURED fi elif [ -x $WASBIN/startupServer.sh ]; then # $arg should be port number if CheckPort "$arg"; then ocf_log err "Port number is required but [$arg] is not valid port number" exit $OCF_ERR_ARGS fi else # Do not know hot to validate_all ocf_log warn "Do not know how to validate-all, assuming validation OK" return $OCF_SUCCESS fi } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # # Supply default configuration parameter(s) # if ( [ -z $OCF_RESKEY_config ] && [ -z $OCF_RESKEY_port ] ) then if [ -f $DEFAULT ] then arg=$DEFAULT else arg=$DEFAULT_WASPORTS fi elif [ ! -z $OCF_RESKEY_config ] then arg=$OCF_RESKEY_config else arg=$OCF_RESKEY_port fi if [ ! -f $arg ] then case $arg in [0-9]*) ;; # ignore port numbers... *) ocf_log "err" "WAS configuration file $arg does not exist!" usage exit $OCF_ERR_ARGS;; esac fi # What kind of method was invoked? case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; start) WAS_start $arg exit $?;; stop) WAS_stop $arg exit $?;; status) WAS_report_status $arg exit $?;; monitor) WAS_monitor $arg exit $?;; validate-all) WAS_validate_all $arg exit $?;; methods) WAS_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/WAS6 b/heartbeat/WAS6 index 560ae602f..b3e7e2245 100755 --- a/heartbeat/WAS6 +++ b/heartbeat/WAS6 @@ -1,546 +1,546 @@ #!/bin/sh # WAS6 # # Description: Manages a Websphere Application Server as an HA resource # # # Author: Ru Xiang Min -# Support: linux-ha@lists.linux-ha.org +# Support: users@clusterlabs.org # License: GNU General Public License (GPL) # Copyright: (C) 2006 International Business Machines China, Ltd., Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/IBM/WebSphere/AppServer/profiles/default/config/cells/Node01Cell/nodes/Node01/serverindex.xml # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_profile # (WAS profile name, used for the single server edition of WAS6) ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### WAS_DIR=/opt/IBM/WebSphere/AppServer if [ ! -d $WAS_DIR ] then WAS_DIR=/usr/IBM/WebSphere/AppServer fi STARTTIME=300 # 5 minutes DEFAULT_WASPORTS="9080" # # WAS_BIN=$WAS_DIR/bin DEFAULT=default # # Print usage message # usage() { methods=`WAS_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-END usage: $0 ($methods) For the single server edition of WAS6, you have to set the following enviroment virable: OCF_RESKEY_profile (WAS profile name) $0 manages a Websphere Application Server 6(WAS6) as an HA resource The 'start' operation starts WAS6. The 'stop' operation stops WAS6. The 'status' operation reports whether WAS6 is running The 'monitor' operation reports whether the WAS6 seems to be working (httpd also needs to be working for this case) The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_profileName ) is valid The 'methods' operation reports on the methods $0 supports This is known to work with the Single Server edition of Websphere. The default profile name for the single server edition is: $DEFAULT The start and stop operations must be run as root. The status operation will report a pid of "-" for the WAS root process using unless it is run as root. If you don't have xmllint on your system, parsing of WAS configuration files is very primitive. We run servlet/snoop on the seventh transport port listed in the config file for the "monitor" operation. END } meta_data() { cat < 1.0 Resource script for WAS6. It manages a Websphere Application Server (WAS6) as an HA resource. Manages a WebSphere Application Server 6 instance The WAS profile name. profile name END } # # Reformat the XML document in a sort of canonical form # if we can. If we don't have xmllint, we just cat it out # and hope for the best ;-) # xmlcat() { if [ "X$XMLcat" = X ] then XMLcat=`which xmllint 2>/dev/null` if [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] then XMLcat=cat else XMLcat="$XMLcat --recover --format" fi fi for j in "$@" do ${XMLcat} "$j" done } # #This is a bit skanky, but it works anyway... # # It's not really skanky if we can find xmllint on the system, because it # reformats tags so they are all on one line, which is all we we need... # # # Get the numbers of the ports WAS should be listening on... # # If we don't have xmllint around, then the applicationserver and the # port= specification have to be on the same line in the XML config file. # GetWASPorts() { case $1 in [0-9]*) echo "$1" | tr ',' '\012';; *) xmlcat ${WAS_DIR}/profiles/${WAS_PROFILE_NAME}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml | grep port= | sed -e 's%.*port= *"* *%%' \ -e 's%[^0-9][^0-9]*.*$%%' # Delete up to port=, throw away optional quote and optional # white space. # Throw away everything after the first non-digit. # This should leave us the port number all by itself... esac } # # We assume that the seventh port listed in the serverindex.xml # is the one we should run servlet/snoop on. # GetWASSnoopPort() { GetWASPorts "$@" | sed -n '7p' } # # Return information on the processname/id for the WAS ports # # pid/java is the expected output. Several lines, one per port... # # WASPortInfo() { pat="" once=yes PortCount=0 for j in $* do case $pat in "") pat="$j";; *) pat="$pat|$j";; esac PortCount=`expr $PortCount + 1` done netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' } # # Return the number of WAS ports which are open # CheckWASPortsInUse() { count=`WASPortInfo "$@" | wc -l` echo $count } # # Return the pid(s) of the processes that have WAS ports open # WASPIDs() { WASPortInfo "$@" | sort -u | cut -f1 -d/ } # # The version of ps that returns all processes and their (long) args # It's only used by WAS_procs, which isn't used for anything ;-) # ps_long() { ps axww } # # The total set of WAS processes (single server only) # WAS_procs() { ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 } # # methods: What methods/operations do we support? # WAS_methods() { cat <<-! start stop status methods validate-all meta-data usage ! if have_binary $WGET then echo " monitor" fi } # # Return WAS status (silently) # WAS_status() { WASPorts=`GetWASPorts $1` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) false;; *) true;; esac } # # Report on WAS status to stdout... # WAS_report_status() { WASPorts=`GetWASPorts $1` PortCount=`echo $WASPorts | wc -w` PortCount=`echo $PortCount` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; *) pids=`WASPIDs $WASPorts` if [ $PortsInUse -ge $PortCount ] then ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." else ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." fi return $OCF_SUCCESS;; esac } # # Monitor WAS - does it really seem to be working? # # For this we invoke the snoop applet via wget. # # This is actually faster than WAS_status above... # WAS_monitor() { trap '[ -z "$tmpfile" || rmtempfile "$tmpfile"' 0 tmpfile=`maketempfile` || exit 1 SnoopPort=`GetWASSnoopPort $1` output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/snoop 2>&1` rc=$? if [ $rc -eq 0 ] then if grep -i 'user-agent.*Wget' $tmpfile >/dev/null then : OK else ocf_log "err" "WAS: $1: no user-agent from snoop application" rc=$OCF_ERR_GENERIC fi else ocf_log "err" "WAS: $1: wget failure: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # Start WAS instance # WAS_start() { # Launch Arguments: # -nowait # -quiet # -logfile # -replacelog # -trace # -script [