diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem index 61b191064..6d6aad91f 100755 --- a/heartbeat/Filesystem +++ b/heartbeat/Filesystem @@ -1,1131 +1,1131 @@ #!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Filesystem # Description: Manages a Filesystem on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # # usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data} # # OCF parameters are as below: # OCF_RESKEY_device # OCF_RESKEY_directory # OCF_RESKEY_fstype # OCF_RESKEY_options # OCF_RESKEY_statusfile_prefix # OCF_RESKEY_run_fsck # OCF_RESKEY_fast_stop # OCF_RESKEY_force_clones # #OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0 # Or a -U or -L option for mount, or an NFS mount specification #OCF_RESKEY_directory : the mount point for the filesystem #OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2 #OCF_RESKEY_options : options to be given to the mount command via -o #OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring #OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no #OCF_RESKEY_fast_stop : fast stop: yes(default)/no #OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts # for each brick in a glusterfs setup # # # This assumes you want to manage a filesystem on a shared (SCSI) bus, # on a replicated device (such as DRBD), or a network filesystem (such # as NFS or Samba). # # Do not put this filesystem in /etc/fstab. This script manages all of # that for you. # # NOTE: If 2 or more nodes mount the same file system read-write, and # that file system is not designed for that specific purpose # (such as GFS or OCFS2), and is not a network file system like # NFS or Samba, then the filesystem is going to become # corrupted. # # As a result, you should use this together with the stonith # option and redundant, independent communications paths. # # If you don't do this, don't blame us when you scramble your # disk. ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults DFLT_STATUSDIR=".Filesystem_status/" # Variables used by multiple methods HOSTOS=`uname` # The status file is going to an extra directory, by default # prefix=${OCF_RESKEY_statusfile_prefix} : ${prefix:=$DFLT_STATUSDIR} suffix="${OCF_RESOURCE_INSTANCE}" [ "$OCF_RESKEY_CRM_meta_clone" ] && suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone" suffix="${suffix}_`uname -n`" STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|meta-data} EOT } meta_data() { cat < 1.1 Resource script for Filesystem. It manages a Filesystem on a shared storage medium. The standard monitor operation of depth 0 (also known as probe) checks if the filesystem is mounted. If you want deeper tests, set OCF_CHECK_LEVEL to one of the following values: 10: read first 16 blocks of the device (raw read) This doesn't exercise the filesystem at all, but the device on which the filesystem lives. This is noop for non-block devices such as NFS, SMBFS, or bind mounts. 20: test if a status file can be written and read The status file must be writable by root. This is not always the case with an NFS mount, as NFS exports usually have the "root_squash" option set. In such a setup, you must either use read-only monitoring (depth=10), export with "no_root_squash" on your NFS server, or grant world write permissions on the directory where the status file is to be placed. Manages filesystem mounts The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. block device The mount point for the filesystem. mount point The type of filesystem to be mounted. filesystem type Any extra options to be given as -o options to mount. For bind mounts, add "bind" here and set fstype to "none". We will do the right thing for options such as "bind,ro". options The prefix to be used for a status file for resource monitoring with depth 20. If you don't specify this parameter, all status files will be created in a separate directory. status file prefix Specify how to decide whether to run fsck or not. "auto" : decide to run fsck depending on the fstype(default) "force" : always run fsck regardless of the fstype "no" : do not run fsck ever. run_fsck Normally, we expect no users of the filesystem and the stop operation to finish quickly. If you cannot control the filesystem users easily and want to prevent the stop action from failing, then set this parameter to "no" and add an appropriate timeout for the stop operation. fast stop -The usage of a clone setup for local filesystems is forbidden +The use of a clone setup for local filesystems is forbidden by default. For special setups like glusterfs, cloning a mount -of a local device with a filesystem like ext4 or xfs, independently -on several nodes is a valid use-case. +of a local device with a filesystem like ext4 or xfs independently +on several nodes is a valid use case. Only set this to "true" if you know what you are doing! allow running as a clone, regardless of filesystem type END } # # Make sure the kernel does the right thing with the FS buffers # This function should be called after unmounting and before mounting # It may not be necessary in 2.4 and later kernels, but it shouldn't hurt # anything either... # # It's really a bug that you have to do this at all... # flushbufs() { if have_binary $BLOCKDEV ; then if [ "$blockdevice" = "yes" ] ; then $BLOCKDEV --flushbufs $1 return $? fi fi return 0 } # Take advantage of /etc/mtab if present, use portable mount command # otherwise. Normalize format to "dev mountpoint fstype". is_bind_mount() { echo "$options" | grep -w bind >/dev/null 2>&1 } list_mounts() { local inpf="" if [ -e "/proc/mounts" ] && ! is_bind_mount; then inpf=/proc/mounts elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then inpf=/etc/mtab fi if [ "$inpf" ]; then cut -d' ' -f1,2,3 < $inpf else $MOUNT | cut -d' ' -f1,3,5 fi } determine_blockdevice() { if [ $blockdevice = "yes" ]; then return fi # Get the current real device name, if possible. # (specified devname could be -L or -U...) case "$FSTYPE" in nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|none) ;; *) DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1` if [ -b "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Lists all filesystems potentially mounted under a given path, # excluding the path itself. list_submounts() { list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r } ocfs2_del_cache() { if [ -e "$_OCFS2_uuid_cache" ]; then rm -f $_OCFS2_uuid_cache fi } ocfs2_cleanup() { # We'll never see the post-stop notification. We're gone now, # have unmounted, and thus should remove the membership. # # (Do so regardless of whether we were unmounted already, # because the admin might have manually unmounted but not # cleared up the membership directory. Bad admin, no cookie.) # if [ ! -d "$OCFS2_FS_ROOT" ]; then ocf_log info "$OCFS2_FS_ROOT: Filesystem membership already gone." else ocf_log info "$OCFS2_FS_ROOT: Removing membership directory." rm -rf $OCFS2_FS_ROOT/ fi ocfs2_del_cache } ocfs2_fetch_uuid() { mounted.ocfs2 -d $DEVICE|tail -1|awk '{print $3}'|tr -d -- -|tr '[a-z]' '[A-Z]' } ocfs2_set_uuid() { _OCFS2_uuid_cache="$HA_RSCTMP/Filesystem.ocfs2_uuid.$(echo $DEVICE|tr / .)" if [ "$OP" != "start" -a -e "$_OCFS2_uuid_cache" ]; then # Trust the cache. OCFS2_UUID=$(cat $_OCFS2_uuid_cache 2>/dev/null) return 0 fi OCFS2_UUID=$(ocfs2_fetch_uuid) if [ -n "$OCFS2_UUID" -a "$OCFS2_UUID" != "UUID" ]; then # UUID valid: echo $OCFS2_UUID > $_OCFS2_uuid_cache return 0 fi # Ok, no UUID still, but that's alright for stop, because it # very likely means we never got started - if [ "$OP" = "stop" ]; then ocf_log warn "$DEVICE: No UUID; assuming never started!" OCFS2_UUID="UUID_NOT_SET" return 0 fi # Everything else - wrong: ocf_log err "$DEVICE: Could not determine ocfs2 UUID for device." exit $OCF_ERR_GENERIC } ocfs2_init() { # Check & initialize the OCFS2 specific variables. # This check detects whether the special/legacy hooks to # integrate OCFS2 with user-space clustering on SLES10 need to # be activated. # Newer kernels >= 2.6.28, with OCFS2+openAIS+Pacemaker, do # not need this: OCFS2_SLES10="" if [ "X$HA_cluster_type" = "Xcman" ]; then return elif [ "X$HA_cluster_type" != "Xopenais" ]; then if grep -q "SUSE Linux Enterprise Server 10" /etc/SuSE-release >/dev/null 2>&1 ; then OCFS2_SLES10="yes" ocf_log info "$DEVICE: Enabling SLES10 compatibility mode for OCFS2." else ocf_log err "$DEVICE: ocfs2 is not compatible with your environment." exit $OCF_ERR_CONFIGURED fi else return fi if [ $OP != "stop" ]; then if [ -z "$OCF_RESKEY_CRM_meta_clone" ]; then ocf_log err "ocfs2 must be run as a clone." exit $OCF_ERR_GENERIC fi fi if [ $blockdevice = "no" ]; then ocf_log err "$DEVICE: ocfs2 needs a block device instead." exit $OCF_ERR_GENERIC fi for f in "$OCF_RESKEY_ocfs2_configfs" /sys/kernel/config/cluster /configfs/cluster ; do if [ -n "$f" -a -d "$f" ]; then OCFS2_CONFIGFS="$f" break fi done if [ ! -d "$OCFS2_CONFIGFS" ]; then ocf_log err "ocfs2 needs configfs mounted." exit $OCF_ERR_GENERIC fi ocfs2_set_uuid if [ -n "$OCF_RESKEY_ocfs2_cluster" ]; then OCFS2_CLUSTER=$(echo $OCF_RESKEY_ocfs2_cluster) else OCFS2_CLUSTER=$(find "$OCFS2_CONFIGFS" -maxdepth 1 -mindepth 1 -type d -printf %f 2>/dev/null) set -- $OCFS2_CLUSTER local n; n="$#" if [ $n -gt 1 ]; then ocf_log err "$OCFS2_CLUSTER: several clusters found." exit $OCF_ERR_GENERIC fi if [ $n -eq 0 ]; then ocf_log err "$OCFS2_CONFIGFS: no clusters found." exit $OCF_ERR_GENERIC fi fi OCFS2_CLUSTER_ROOT="$OCFS2_CONFIGFS/$OCFS2_CLUSTER" if [ ! -d "$OCFS2_CLUSTER_ROOT" ]; then ocf_log err "$OCFS2_CLUSTER: Cluster doesn't exist. Maybe o2cb hasn't been run?" exit $OCF_ERR_GENERIC fi OCFS2_FS_ROOT=$OCFS2_CLUSTER_ROOT/heartbeat/$OCFS2_UUID } # kernels < 2.6.26 can't handle bind remounts bind_kernel_check() { echo "$options" | grep -w ro >/dev/null 2>&1 || return uname -r | awk -F. ' $1==2 && $2==6 { sub("[^0-9].*","",$3); if ($3<26) exit(1); }' [ $? -ne 0 ] && ocf_log warn "kernel `uname -r` cannot handle read only bind mounts" } bind_mount() { if is_bind_mount && [ "$options" != "-o bind" ] then bind_kernel_check bind_opts=`echo $options | sed 's/bind/remount/'` $MOUNT $bind_opts $MOUNTPOINT else true # make sure to return OK fi } is_option() { echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1 } is_fsck_needed() { case $OCF_RESKEY_run_fsck in force) true;; no) false;; ""|auto) case $FSTYPE in ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs) false;; *) true;; esac;; *) ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'" OCF_RESKEY_run_fsck="auto" is_fsck_needed;; esac } # # START: Start up the filesystem # Filesystem_start() { if [ -n "$OCFS2_SLES10" ]; then # "start" now has the notification data available; that # we're being started means we didn't get the # pre-notification, because we weren't running, so # process the information now first. ocf_log info "$OCFS2_UUID: Faking pre-notification on start." OCF_RESKEY_CRM_meta_notify_type="pre" OCF_RESKEY_CRM_meta_notify_operation="start" Filesystem_notify fi # See if the device is already mounted. if Filesystem_status >/dev/null 2>&1 ; then ocf_log info "Filesystem $MOUNTPOINT is already mounted." return $OCF_SUCCESS fi if [ "X${HOSTOS}" != "XOpenBSD" ];then if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then : No FSTYPE specified, rely on the system has the right file-system support already else local support="$FSTYPE" # support fuse-filesystems (e.g. GlusterFS) case $FSTYPE in glusterfs) support="fuse";; esac grep -w "$support"'$' /proc/filesystems >/dev/null || $MODPROBE $support >/dev/null grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -ne 0 ] ; then ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems" return $OCF_ERR_INSTALLED fi fi fi # Check the filesystem & auto repair. # NOTE: Some filesystem types don't need this step... Please modify # accordingly if [ $blockdevice = "yes" ]; then if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" exit $OCF_ERR_INSTALLED fi if is_fsck_needed; then ocf_log info "Starting filesystem check on $DEVICE" if [ -z "$FSTYPE" ]; then $FSCK -p $DEVICE else $FSCK -t $FSTYPE -p $DEVICE fi # NOTE: if any errors at all are detected, it returns non-zero # if the error is >= 4 then there is a big problem if [ $? -ge 4 ]; then ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE" return $OCF_ERR_GENERIC fi fi fi [ -d "$MOUNTPOINT" ] || ocf_run mkdir -p $MOUNTPOINT if [ ! -d "$MOUNTPOINT" ] ; then ocf_log err "Couldn't find directory [$MOUNTPOINT] to use as a mount point" exit $OCF_ERR_INSTALLED fi flushbufs $DEVICE # Mount the filesystem. case "$FSTYPE" in none) $MOUNT $options $DEVICE $MOUNTPOINT && bind_mount ;; "") $MOUNT $options $DEVICE $MOUNTPOINT ;; *) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;; esac if [ $? -ne 0 ]; then ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT" if [ -n "$OCFS2_SLES10" ]; then ocfs2_cleanup fi return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # end of Filesystem_start Filesystem_notify() { # Process notifications; this is the essential glue level for # giving user-space membership events to a cluster-aware # filesystem. Right now, only OCFS2 is supported. # # When we get a pre-start notification, we set up all the nodes # which will be active in our membership for the filesystem. # (For the resource to be started, this happens at the time of # the actual 'start' operation.) # # At a post-start, actually there's nothing to do for us really, # but no harm done in re-syncing either. # # pre-stop is meaningless; we can't remove any node yet, it # first needs to unmount. # # post-stop: the node is removed from the membership of the # other nodes. # # Note that this expects that the base cluster is already # active; ie o2cb has been started and populated # $OCFS2_CLUSTER_ROOT/node/ already. This can be achieved by # simply having o2cb run on all nodes by the CRM too. This # probably ought to be mentioned somewhere in the to be written # documentation. ;-) # if [ -z "$OCFS2_SLES10" ]; then # One of the cases which shouldn't occur; it should have # been caught much earlier. Still, you know ... ocf_log err "$DEVICE: Please only enable notifications for SLES10 OCFS2 mounts." # Yes, in theory this is a configuration error, but # simply discarding them allows users to switch from the # SLES10 stack to the new one w/o downtime. # Ignoring the notifications is harmless, afterall, and # they can simply disable them in their own time. return $OCF_SUCCESS fi local n_type; n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op; n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active; n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop; n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start; n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" ocf_log info "$OCFS2_UUID: notify: $n_type for $n_op" ocf_log info "$OCFS2_UUID: notify active: $n_active" ocf_log info "$OCFS2_UUID: notify stop: $n_stop" ocf_log info "$OCFS2_UUID: notify start: $n_start" case "$n_type" in pre) case "$n_op" in stop) ocf_log info "$OCFS2_UUID: ignoring pre-notify for stop." return $OCF_SUCCESS ;; start) # These are about to become active; prepare to # communicate with them. # Duplicate removal - start can contain nodes # already on the active list, confusing the # script later on: for UNAME in $n_active; do n_start=`echo ${n_start} | sed s/$UNAME//` done # Merge pruned lists again: n_active="$n_active $n_start" ;; esac ;; post) case "$n_op" in stop) # remove unames from notify_stop_uname; these have been # stopped and can no longer be considered active. for UNAME in $n_stop; do n_active=`echo ${n_active} | sed s/$UNAME//` done ;; start) if [ "$n_op" = "start" ]; then ocf_log info "$OCFS2_UUID: ignoring post-notify for start." return $OCF_SUCCESS fi ;; esac ;; esac ocf_log info "$OCFS2_UUID: post-processed active: $n_active" local n_myself; n_myself=${HA_CURHOST:-$(uname -n | tr '[A-Z]' '[a-z]')} ocf_log info "$OCFS2_UUID: I am node $n_myself." case " $n_active " in *" $n_myself "*) ;; *) ocf_log err "$OCFS2_UUID: $n_myself (local) not on active list!" return $OCF_ERR_GENERIC ;; esac if [ -d "$OCFS2_FS_ROOT" ]; then entry_prefix=$OCFS2_FS_ROOT/ for entry in $OCFS2_FS_ROOT/* ; do n_fs="${entry##$entry_prefix}" # ocf_log info "$OCFS2_UUID: Found current node $n_fs" case " $n_active " in *" $n_fs "*) # Construct a list of nodes which are present # already in the membership. n_exists="$n_exists $n_fs" ocf_log info "$OCFS2_UUID: Keeping node: $n_fs" ;; *) # Node is in the membership currently, but not on our # active list. Must be removed. if [ "$n_op" = "start" ]; then ocf_log warn "$OCFS2_UUID: Removing nodes on start" fi ocf_log info "$OCFS2_UUID: Removing dead node: $n_fs" if ! rm -f $entry ; then ocf_log err "$OCFS2_UUID: Removal of $n_fs failed!" fi ;; esac done else ocf_log info "$OCFS2_UUID: heartbeat directory doesn't exist yet, creating." mkdir -p $OCFS2_FS_ROOT fi ocf_log info "$OCFS2_UUID: Existing node list: $n_exists" # (2) for entry in $n_active ; do # ocf_log info "$OCFS2_UUID: Expected active node: $entry" case " $n_exists " in *" $entry "*) ocf_log info "$OCFS2_UUID: Already active: $entry" ;; *) if [ "$n_op" = "stop" ]; then ocf_log warn "$OCFS2_UUID: Adding nodes on stop" fi ocf_log info "$OCFS2_UUID: Activating node: $entry" if ! ln -s $OCFS2_CLUSTER_ROOT/node/$entry $OCFS2_FS_ROOT/$entry ; then ocf_log err "$OCFS2_CLUSTER_ROOT/node/$entry: failed to link" fi ;; esac done } signal_processes() { local dir=$1 local sig=$2 local pids pid # fuser returns a non-zero return code if none of the # specified files is accessed or in case of a fatal # error. pids=$( if [ "X${HOSTOS}" = "XOpenBSD" ];then fstat | grep $dir | awk '{print $3}' else $FUSER -m $dir 2>/dev/null fi ) if [ -z "$pids" ]; then ocf_log info "No processes on $dir were signalled" return fi for pid in $pids; do ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`" kill -s $sig $pid done } try_umount() { local SUB=$1 $UMOUNT $umount_force $SUB list_mounts | grep -q " $SUB " >/dev/null 2>&1 || { ocf_log info "unmounted $SUB successfully" return $OCF_SUCCESS } return $OCF_ERR_GENERIC } fs_stop() { local SUB=$1 timeout=$2 sig cnt for sig in TERM KILL; do cnt=$((timeout/2)) # try half time with TERM while [ $cnt -gt 0 ]; do try_umount $SUB && return $OCF_SUCCESS ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig" signal_processes $SUB $sig cnt=$((cnt-1)) sleep 1 done done return $OCF_ERR_GENERIC } # # STOP: Unmount the filesystem # Filesystem_stop() { # See if the device is currently mounted Filesystem_status >/dev/null 2>&1 if [ $? -eq $OCF_NOT_RUNNING ]; then # Already unmounted, wonderful. rc=$OCF_SUCCESS else # Wipe the status file, but continue with a warning if # removal fails -- the file system might be read only if [ $OCF_CHECK_LEVEL -eq 20 ]; then rm -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log warn "Failed to remove status file ${STATUSFILE}." fi fi # Determine the real blockdevice this is mounted on (if # possible) prior to unmounting. determine_blockdevice # For networked filesystems, there's merit in trying -f: case "$FSTYPE" in nfs4|nfs|cifs|smbfs) umount_force="-f" ;; esac # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. local timeout for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do ocf_log info "Trying to unmount $SUB" if ocf_is_true "$FAST_STOP"; then timeout=6 else timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"} timeout=$((timeout/1000)) fi fs_stop $SUB $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_log err "Couldn't unmount $SUB, giving up!" fi done fi flushbufs $DEVICE # Yes I know the next blob is ugly, sorry. if [ $rc -eq $OCF_SUCCESS ]; then if [ "$FSTYPE" = "ocfs2" ]; then ocfs2_init if [ -n "$OCFS2_SLES10" ]; then ocfs2_cleanup fi fi fi return $rc } # end of Filesystem_stop # # STATUS: is the filesystem mounted or not? # Filesystem_status() { if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then rc=$OCF_SUCCESS msg="$MOUNTPOINT is mounted (running)" else rc=$OCF_NOT_RUNNING msg="$MOUNTPOINT is unmounted (stopped)" fi # TODO: For ocfs2, or other cluster filesystems, should we be # checking connectivity to other nodes here, or the IO path to # the storage? # Special case "monitor" to check whether the UUID cached and # on-disk still match? case "$OP" in status) ocf_log info "$msg";; esac return $rc } # end of Filesystem_status # Note: the read/write tests below will stall in case the # underlying block device (or in the case of a NAS mount, the # NAS server) has gone away. In that case, if I/O does not # return to normal in time, the operation hits its timeout # and it is up to the CRM to initiate appropriate recovery # actions (such as fencing the node). # # MONITOR 10: read the device # Filesystem_monitor_10() { if [ "$blockdevice" = "no" ] ; then ocf_log warn "$DEVICE is not a block device, monitor 10 is noop" return $OCF_SUCCESS fi dd_opts="iflag=direct bs=4k count=1" err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null` if [ $? -ne 0 ]; then ocf_log err "Failed to read device $DEVICE" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # MONITOR 20: write and read a status file # Filesystem_monitor_20() { if [ "$blockdevice" = "no" ] ; then # O_DIRECT not supported on cifs/smbfs dd_opts="oflag=sync bs=4k conv=fsync,sync" else # Writing to the device in O_DIRECT mode is imperative # to bypass caches. dd_opts="oflag=direct,sync bs=4k conv=fsync,sync" fi status_dir=`dirname $STATUSFILE` [ -d "$status_dir" ] || mkdir -p "$status_dir" err_output=` echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1` if [ $? -ne 0 ]; then ocf_log err "Failed to write status file ${STATUSFILE}" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi test -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log err "Cannot stat the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi cat ${STATUSFILE} > /dev/null if [ $? -ne 0 ]; then ocf_log err "Cannot read the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } Filesystem_monitor() { Filesystem_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then case "$OCF_CHECK_LEVEL" in 10) Filesystem_monitor_10; rc=$?;; 20) Filesystem_monitor_20; rc=$?;; *) ocf_log err "unsupported monitor level $OCF_CHECK_LEVEL" rc=$OCF_ERR_CONFIGURED ;; esac fi return $rc } # end of Filesystem_monitor # # VALIDATE_ALL: Are the instance parameters valid? # FIXME!! The only part that's useful is the return code. # This code always returns $OCF_SUCCESS (!) # Filesystem_validate_all() { if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then ocf_log warn "Mountpoint $MOUNTPOINT does not exist" fi # Check if the $FSTYPE is workable # NOTE: Without inserting the $FSTYPE module, this step may be imprecise # TODO: This is Linux specific crap. if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then cut -f2 /proc/filesystems |grep -q ^$FSTYPE$ if [ $? -ne 0 ]; then modpath=/lib/modules/`uname -r` moddep=$modpath/modules.dep # Do we have $FSTYPE in modules.dep? cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$" if [ $? -ne 0 ]; then ocf_log info "It seems we do not have $FSTYPE support" fi fi fi # If we are supposed to do monitoring with status files, then # we need a utility to write in O_DIRECT mode. if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary dd # Note: really old coreutils version do not support # the "oflag" option for dd. We don't check for that # here. In case dd does not support oflag, monitor is # bound to fail, with dd spewing an error message to # the logs. On such systems, we must do without status # file monitoring. fi #TODO: How to check the $options ? return $OCF_SUCCESS } # # set the blockdevice variable to "no" or "yes" # set_blockdevice_var() { blockdevice=no # these are definitely not block devices case $FSTYPE in nfs4|nfs|smbfs|cifs|none|glusterfs|ceph) return;; esac if `is_option "loop"`; then return fi case $DEVICE in -*) # Oh... An option to mount instead... Typically -U or -L ;; /dev/null) # Special case for BSC blockdevice=yes ;; *) if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" fi if [ ! -d "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Check the arguments passed to this script if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # Check the OCF_RESKEY_ environment variables... DEVICE=$OCF_RESKEY_device FSTYPE=$OCF_RESKEY_fstype if [ ! -z "$OCF_RESKEY_options" ]; then options="-o $OCF_RESKEY_options" fi FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} OP=$1 # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac if [ x = x"$DEVICE" ]; then ocf_log err "Please set OCF_RESKEY_device to the device to be managed" exit $OCF_ERR_CONFIGURED fi set_blockdevice_var # Normalize instance parameters: # It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". # But the output of `mount` and /proc/mounts do not. if [ -z "$OCF_RESKEY_directory" ]; then if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then ocf_log err "Please specify the directory" exit $OCF_ERR_CONFIGURED fi else MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//') : ${MOUNTPOINT:=/} # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll # kill the whole system. Is that a good idea? fi # Check to make sure the utilites are found if [ "X${HOSTOS}" != "XOpenBSD" ];then check_binary $MODPROBE check_binary $FUSER fi check_binary $FSCK check_binary $MOUNT check_binary $UMOUNT if [ "$OP" != "monitor" ]; then ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" fi # These operations do not require the clone checking + OCFS2 # initialization. case $OP in status) Filesystem_status exit $? ;; monitor) Filesystem_monitor exit $? ;; validate-all) Filesystem_validate_all exit $? ;; stop) Filesystem_stop exit $? ;; esac CLUSTERSAFE=0 is_option "ro" && CLUSTERSAFE=2 case $FSTYPE in ocfs2) ocfs2_init CLUSTERSAFE=1 ;; nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph) CLUSTERSAFE=1 # this is kind of safe too ;; # add here CLUSTERSAFE=0 for all filesystems which are not # cluster aware and which, even if when mounted read-only, # could still modify parts of it such as journal/metadata ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) if ocf_is_true "$OCF_RESKEY_force_clones"; then CLUSTERSAFE=2 else CLUSTERSAFE=0 # these are not allowed fi ;; esac if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then case $CLUSTERSAFE in 0) ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" ocf_log err "DO NOT RUN IT AS A CLONE!" ocf_log err "Politely refusing to proceed to avoid data corruption." exit $OCF_ERR_CONFIGURED ;; 2) ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!" if ocf_is_true "$OCF_RESKEY_force_clones"; then ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so." else ocf_log warn "But we'll let it run because it is mounted read-only." ocf_log warn "Please make sure that it's meta data is read-only too!" fi ;; esac fi case $OP in start) Filesystem_start ;; notify) Filesystem_notify ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 index 782a4df08..b645288ed 100755 --- a/heartbeat/IPaddr2 +++ b/heartbeat/IPaddr2 @@ -1,1068 +1,1068 @@ #!/bin/sh # # $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $ # # OCF Resource Agent compliant IPaddr2 script. # # Based on work by Tuomo Soini, ported to the OCF RA API by Lars # Marowsky-Brée. Implements Cluster Alias IP functionality too. # # Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff # # # Copyright (c) 2003 Tuomo Soini # Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # TODO: # - There ought to be an ocf_run_cmd function which does all logging, # timeout handling etc for us # - Make this the standard IP address agent on Linux; the other # platforms simply should ignore the additional parameters OR can use # the legacy heartbeat resource script... # - Check LVS <-> clusterip incompatibilities. # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_iflabel # OCF_RESKEY_mac # OCF_RESKEY_clusterip_hash # OCF_RESKEY_arp_interval # OCF_RESKEY_arp_count # OCF_RESKEY_arp_bg # OCF_RESKEY_arp_mac # # OCF_RESKEY_CRM_meta_clone # OCF_RESKEY_CRM_meta_clone_max ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/findif.sh # Defaults OCF_RESKEY_lvs_support_default=false OCF_RESKEY_lvs_ipv6_addrlabel_default=false OCF_RESKEY_lvs_ipv6_addrlabel_value_default=99 OCF_RESKEY_clusterip_hash_default="sourceip-sourceport" OCF_RESKEY_unique_clone_address_default=false OCF_RESKEY_arp_interval_default=200 OCF_RESKEY_arp_count_default=5 OCF_RESKEY_arp_bg_default=true OCF_RESKEY_arp_mac_default="ffffffffffff" : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} : ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}} : ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}} : ${OCF_RESKEY_clusterip_hash=${OCF_RESKEY_clusterip_hash_default}} : ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}} : ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}} : ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}} : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} : ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}} ####################################################################### SENDARP=$HA_BIN/send_arp SENDUA=$HA_BIN/send_ua FINDIF=findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip} ####################################################################### meta_data() { cat < 1.0 This Linux-specific resource manages IP alias IP addresses. It can add an IP alias, or remove one. In addition, it can implement Cluster Alias IP functionality if invoked as a clone resource. If used as a clone, you should explicitly set clone-node-max >= 2, and/or clone-max < number of nodes. In case of node failure, clone instances need to be re-allocated on surviving nodes. -Which would not be possible, if there is already an instance on those nodes, +This would not be possible if there is already an instance on those nodes, and clone-node-max=1 (which is the default). Manages virtual IPv4 and IPv6 addresses (Linux specific version) The IPv4 (dotted quad notation) or IPv6 address (colon hexadecimal notation) example IPv4 "192.168.1.1". example IPv6 "2001:db8:DC28:0:0:FC57:D4C8:1FFF". IPv4 or IPv6 address The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. If you want a label, see the iflabel parameter. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. If you can not assign any static IP address on the interface, modify this kernel parameter: sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device) Network interface The netmask for the interface in CIDR format (e.g., 24 and not 255.255.255.0) If unspecified, the script will also try to determine this from the routing table. CIDR netmask Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address You can specify an additional label for your IP address here. This label is appended to your interface name. A label can be specified in nic parameter but it is deprecated. If a label is specified in nic name, this parameter has no effect. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Notes for IPv6: It is not necessary to enable this option on IPv6. Instead, enable 'lvs_ipv6_addrlabel' option for LVS-DR usage on IPv6. Enable support for LVS DR Enable adding IPv6 address label so IPv6 traffic originating from -the address' interface does not use this address as the source. +the address's interface does not use this address as the source. This is necessary for LVS-DR health checks to realservers to work. Without it, the most recently added IPv6 address (probably the address added by IPaddr2) will be used as the source address for IPv6 traffic from that interface and since that address exists on loopback on the realservers, the realserver -response to pings/connections will not never leave its loopback. +response to pings/connections will never leave its loopback. See RFC3484 for the detail of the source address selection. See also 'lvs_ipv6_addrlabel_value' parameter. -Enables adding IPv6 address label. +Enable adding IPv6 address label. Specify IPv6 address label value used when 'lvs_ipv6_addrlabel' is enabled. The value should be an unused label in the policy table which is shown by 'ip addrlabel list' command. You would rarely need to change this parameter. IPv6 address label value. Set the interface MAC address explicitly. Currently only used in case of the Cluster IP Alias. Leave empty to chose automatically. Cluster IP MAC address Specify the hashing algorithm used for the Cluster IP functionality. Cluster IP hashing function -If true, add the clone ID to the supplied value of ip to create +If true, add the clone ID to the supplied value of IP to create a unique address to manage Create a unique address for cloned instances Specify the interval between unsolicited ARP packets in milliseconds. ARP packet interval in ms Number of unsolicited ARP packets to send. ARP packet count -Whether or not to send the arp packets in the background. +Whether or not to send the ARP packets in the background. ARP from background MAC address to send the ARP packets to. You really shouldn't be touching this. ARP MAC The program to send ARP packets with on start. For infiniband interfaces, default is ipoibarping. If ipoibarping is not available, set this to send_arp. ARP sender Flush the routing table on stop. This is for applications which use the cluster IP address and which run on the same physical host that the IP address lives on. The Linux kernel may force that application to take a shortcut to the local loopback interface, instead of the interface the address is really bound to. Under those circumstances, an application may, somewhat unexpectedly, continue to use connections for some time even after the IP address is deconfigured. Set this parameter in order to immediately disable said shortcut when the IP address goes away. Flush kernel routing table on stop END exit $OCF_SUCCESS } ip_init() { local rc if [ X`uname -s` != "XLinux" ]; then ocf_log err "IPaddr2 only supported Linux." exit $OCF_ERR_INSTALLED fi if [ X"$OCF_RESKEY_ip" = "X" ]; then ocf_log err "IP address (the ip parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi if case $__OCF_ACTION in start|stop) ocf_is_root;; *) true;; esac then : YAY! else ocf_log err "You must be root for $__OCF_ACTION operation." exit $OCF_ERR_PERM fi BASEIP="$OCF_RESKEY_ip" BRDCAST="$OCF_RESKEY_broadcast" NIC="$OCF_RESKEY_nic" # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. Don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi NETMASK="$OCF_RESKEY_cidr_netmask" IFLABEL="$OCF_RESKEY_iflabel" IF_MAC="$OCF_RESKEY_mac" IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1} IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1` if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then ocf_log err "LVS and load sharing do not go together well" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_incarnations_max_global [$IP_INC_GLOBAL], should be positive integer" exit $OCF_ERR_CONFIGURED fi echo $OCF_RESKEY_ip | grep -qs ":" if [ $? -ne 0 ];then FAMILY=inet if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then ocf_log err "IPv4 does not support lvs_ipv6_addrlabel" exit $OCF_ERR_CONFIGURED fi else FAMILY=inet6 if ocf_is_true $OCF_RESKEY_lvs_support ;then ocf_log err "The IPv6 does not support lvs_support" exit $OCF_ERR_CONFIGURED fi if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then if ocf_is_decimal "$OCF_RESKEY_lvs_ipv6_addrlabel_value" && [ $OCF_RESKEY_lvs_ipv6_addrlabel_value -ge 0 ]; then : else ocf_log err "Invalid lvs_ipv6_addrlabel_value [$OCF_RESKEY_lvs_ipv6_addrlabel_value], should be positive integer" exit $OCF_ERR_CONFIGURED fi fi fi # support nic:iflabel format in nic parameter case $NIC in *:*) IFLABEL=`echo $NIC | sed 's/[^:]*://'` NIC=`echo $NIC | sed 's/:.*//'` # only the base name should be passed to findif OCF_RESKEY_nic=$NIC ;; esac # $FINDIF takes its parameters from the environment # NICINFO=`$FINDIF` rc=$? if [ $rc -eq 0 ] then NICINFO=`echo "$NICINFO" | sed -e 's/netmask\ //;s/broadcast\ //'` NIC=`echo "$NICINFO" | cut -d" " -f1` NETMASK=`echo "$NICINFO" | cut -d" " -f2` BRDCAST=`echo "$NICINFO" | cut -d" " -f3` else # findif couldn't find the interface if ocf_is_probe; then ocf_log info "[$FINDIF] failed" exit $OCF_NOT_RUNNING elif [ "$__OCF_ACTION" = stop ]; then ocf_log warn "[$FINDIF] failed" exit $OCF_SUCCESS else ocf_log err "[$FINDIF] failed" exit $rc fi fi SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" if [ -n "$IFLABEL" ]; then IFLABEL=${NIC}:${IFLABEL} fi if [ "$IP_INC_GLOBAL" -gt 1 ] && ! ocf_is_true "$OCF_RESKEY_unique_clone_address"; then IP_CIP="yes" IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}" if [ -z "$IF_MAC" ]; then # Choose a MAC # 1. Concatenate some input together # 2. This doesn't need to be a cryptographically # secure hash. # 3. Drop everything after the first 6 octets (12 chars) # 4. Delimit the octets with ':' # 5. Make sure the first octet is odd, # so the result is a multicast MAC IF_MAC=`echo $OCF_RESKEY_ip $NETMASK $BRDCAST | \ md5sum | \ sed -e 's#\(............\).*#\1#' \ -e 's#..#&:#g; s#:$##' \ -e 's#^\(.\)[02468aAcCeE]#\11#'` fi IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$OCF_RESKEY_ip" fi } # # Find out which interfaces serve the given IP address and netmask. # The arguments are an IP address and a netmask. # Its output are interface names devided by spaces (e.g., "eth0 eth1"). # find_interface() { local ipaddr="$1" local netmask="$2" # # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces # local iface="`$IP2UTIL -o -f $FAMILY addr show \ | grep "\ $ipaddr/$netmask" \ | cut -d ' ' -f2 \ | grep -v '^ipsec[0-9][0-9]*$'`" echo "$iface" return 0 } # # Delete an interface # delete_interface () { ipaddr="$1" iface="$2" netmask="$3" CMD="$IP2UTIL -f $FAMILY addr delete $ipaddr/$netmask dev $iface" ocf_run $CMD || return $OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_flush_routes; then ocf_run $IP2UTIL route flush cache fi if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then delete_ipv6_addrlabel $ipaddr fi return $OCF_SUCCESS } # # Add an interface # add_interface () { local cmd msg ipaddr netmask broadcast iface label ipaddr="$1" netmask="$2" broadcast="$3" iface="$4" label="$5" if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then add_ipv6_addrlabel $ipaddr fi cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask dev $iface" msg="Adding $FAMILY address $ipaddr/$netmask to device $iface" if [ "$broadcast" != "none" ]; then cmd="$IP2UTIL -f $FAMILY addr add $ipaddr/$netmask brd $broadcast dev $iface" msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface" fi if [ ! -z "$label" ]; then cmd="$cmd label $label" msg="${msg} (with label $label)" fi ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC msg="Bringing device $iface up" cmd="$IP2UTIL link set $iface up" ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC return $OCF_SUCCESS } # # Delete a route # delete_route () { prefix="$1" iface="$2" CMD="$IP2UTIL route delete $prefix dev $iface" ocf_log info "$CMD" $CMD return $? } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # # TODO: This is very ugly and should be controlled by an additional # instance parameter. Or even: multi-state, with the IP only being # "active" on the master!? # remove_conflicting_loopback() { ipaddr="$1" netmask="$2" broadcast="$3" ifname="$4" ocf_log info "Removing conflicting loopback $ifname." if echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi delete_interface "$ipaddr" "$ifname" "$netmask" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" "$ifname" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by remove_conflicting_loopback() # restore_loopback() { ipaddr="$1" if [ -s "$VLDIR/$ipaddr" ]; then ifinfo=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address " \ "$ifinfo." add_interface $ifinfo rm -f "$VLDIR/$ipaddr" fi } add_ipv6_addrlabel() { local cmd ipaddr value ipaddr="$1" value="$OCF_RESKEY_lvs_ipv6_addrlabel_value" cmd="$IP2UTIL addrlabel add prefix $ipaddr label $value" ocf_log info "Adding IPv6 address label prefix $ipaddr label $value" ocf_run $cmd || ocf_log warn "$cmd failed." } delete_ipv6_addrlabel() { local cmd ipaddr value ipaddr="$1" value="$OCF_RESKEY_lvs_ipv6_addrlabel_value" cmd="$IP2UTIL addrlabel del prefix $ipaddr label $value" ocf_run $cmd # an error can be ignored } is_infiniband() { $IP2UTIL link show $NIC | grep link/infiniband >/dev/null } # # Run send_arp to note peers about new mac address # run_send_arp() { ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip auto not_used not_used" if [ "x$IP_CIP" = "xyes" ] ; then if [ x = "x$IF_MAC" ] ; then MY_MAC=auto else MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'` fi ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" fi ocf_log info "$SENDARP $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 else $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" fi } # # Run send_ua to note send ICMPv6 Unsolicited Neighbor Advertisements. # run_send_ua() { local i # Wait until the allocated IPv6 address gets ready by checking # "tentative" flag is disappeared, otherwise send_ua can not # send the unsolicited advertisement requests. for i in 1 2 3 4 5; do $IP2UTIL -o -f $FAMILY addr show dev $NIC \ | grep -q -e "$OCF_RESKEY_ip/$NETMASK .* tentative" [ $? -ne 0 ] && break if [ $i -eq 5 ]; then ocf_log warn "$OCF_RESKEY_ip still has 'tentative' status. (ignored)" break fi sleep 1 done ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC" ocf_log info "$SENDUA $ARGS" $SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements." } # # Run ipoibarping to note peers about new Infiniband address # run_send_ib_arp() { ARGS="-q -c $OCF_RESKEY_arp_count -U -I $NIC $OCF_RESKEY_ip" ocf_log info "ipoibarping $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 else ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" fi } # Do we already serve this IP address on the given $NIC? # # returns: # ok = served (for CIP: + hash bucket) # partial = served and no hash bucket (CIP only) # partial2 = served and no CIP iptables rule # no = nothing # ip_served() { if [ -z "$NIC" ]; then # no nic found or specified echo "no" return 0 fi cur_nic="`find_interface $OCF_RESKEY_ip $NETMASK`" if [ -z "$cur_nic" ]; then echo "no" return 0 fi if [ -z "$IP_CIP" ]; then for i in $cur_nic; do # only mark as served when on the same interfaces as $NIC [ "$i" = "$NIC" ] || continue echo "ok" return 0 done # There used to be logic here to pretend "not served", # if ${OCF_RESKEY_lvs_support} was enabled, and the IP was # found active on "lo*" only. With lvs_support on, you should # have NIC != lo, so thats already filtered # by the continue above. echo "no" return 0 fi # Special handling for the CIP: if [ ! -e $IP_CIP_FILE ]; then echo "partial2" return 0 fi if egrep -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then echo "ok" return 0 else echo "partial" return 0 fi exit $OCF_ERR_GENERIC } ####################################################################### ip_usage() { cat <$IP_CIP_FILE fi if [ "$ip_status" = "no" ]; then if ocf_is_true ${OCF_RESKEY_lvs_support}; then for i in `find_interface $OCF_RESKEY_ip 32`; do case $i in lo*) remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo ;; esac done fi add_interface $OCF_RESKEY_ip $NETMASK ${BRDCAST:-none} $NIC $IFLABEL if [ $? -ne 0 ]; then ocf_log err "$CMD failed." exit $OCF_ERR_GENERIC fi fi case $NIC in lo*) : no need to run send_arp on loopback ;; *) if [ $FAMILY = "inet" ];then $ARP_SEND_FUN else if [ -x $SENDUA ]; then run_send_ua fi fi ;; esac exit $OCF_SUCCESS } ip_stop() { local ip_del_if="yes" if [ -n "$IP_CIP" ]; then # Cluster IPs need special processing when the last bucket # is removed from the node... take a lock to make sure only one # process executes that code ocf_take_lock $CIP_lockfile ocf_release_lock_on_exit $CIP_lockfile fi if [ -f "$SENDARPPIDFILE" ] ; then kill `cat "$SENDARPPIDFILE"` if [ $? -ne 0 ]; then ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip" else ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip" rm -f "$SENDARPPIDFILE" fi fi local ip_status=`ip_served` ocf_log info "IP status = $ip_status, IP_CIP=$IP_CIP" if [ $ip_status = "no" ]; then : Requested interface not in use exit $OCF_SUCCESS fi if [ -n "$IP_CIP" ] && [ $ip_status != "partial2" ]; then if [ $ip_status = "partial" ]; then exit $OCF_SUCCESS fi echo "-$IP_INC_NO" >$IP_CIP_FILE if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then ocf_log info $OCF_RESKEY_ip, $IP_CIP_HASH i=1 while [ $i -le $IP_INC_GLOBAL ]; do ocf_log info $i $IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ --new \ --clustermac $IF_MAC \ --total-nodes $IP_INC_GLOBAL \ --local-node $i \ --hashmode $IP_CIP_HASH i=`expr $i + 1` done else ip_del_if="no" fi fi if [ "$ip_del_if" = "yes" ]; then delete_interface $OCF_RESKEY_ip $NIC $NETMASK if [ $? -ne 0 ]; then exit $OCF_ERR_GENERIC fi if ocf_is_true ${OCF_RESKEY_lvs_support}; then restore_loopback "$OCF_RESKEY_ip" fi fi exit $OCF_SUCCESS } ip_monitor() { # TODO: Implement more elaborate monitoring like checking for # interface health maybe via a daemon like FailSafe etc... local ip_status=`ip_served` case $ip_status in ok) return $OCF_SUCCESS ;; partial|no|partial2) exit $OCF_NOT_RUNNING ;; *) # Errors on this interface? return $OCF_ERR_GENERIC ;; esac } # make sure that we have something to send ARPs with set_send_arp_program() { ARP_SEND_FUN=run_send_arp if [ -n "$OCF_RESKEY_arp_sender" ]; then case "$OCF_RESKEY_arp_sender" in send_arp) check_binary $SENDARP ;; ipoibarping) check_binary ipoibarping ARP_SEND_FUN=run_send_ib_arp ;; *) ocf_log err "unrecognized arp_sender value: $OCF_RESKEY_arp_sender" exit $OCF_ERR_CONFIGURED ;; esac else if is_infiniband; then ARP_SEND_FUN=run_send_ib_arp if ! have_binary ipoibarping; then [ "$__OCF_ACTION" = start ] && ocf_log warn "using send_arp for infiniband because ipoibarping is not available (set arp_sender to \"send_arp\" to suppress this message)" check_binary $SENDARP ARP_SEND_FUN=run_send_arp fi fi fi } ip_validate() { check_binary $IP2UTIL IP_CIP= ip_init set_send_arp_program if [ -n "$IP_CIP" ]; then check_binary $IPTABLES check_binary $MODPROBE fi # $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init, # do not bother here. if ocf_is_true "$OCF_RESKEY_unique_clone_address" && ! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then ocf_log err "unique_clone_address makes sense only with meta globally_unique set" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]" exit $OCF_ERR_CONFIGURED fi if [ -n "$IP_CIP" ]; then local valid=1 case $IP_CIP_HASH in sourceip|sourceip-sourceport|sourceip-sourceport-destport) ;; *) ocf_log err "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]" exit $OCF_ERR_CONFIGURED ;; esac if ocf_is_true ${OCF_RESKEY_lvs_support}; then ecf_log err "LVS and load sharing not advised to try" exit $OCF_ERR_CONFIGURED fi case $IF_MAC in [0-9a-zA-Z][13579bBdDfF][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]) ;; *) valid=0 ;; esac if [ $valid -eq 0 ]; then ocf_log err "Invalid IF_MAC [$IF_MAC]" exit $OCF_ERR_CONFIGURED fi fi } if ocf_is_true "$OCF_RESKEY_unique_clone_address"; then prefix=`echo $OCF_RESKEY_ip | awk -F. '{print $1"."$2"."$3}'` suffix=`echo $OCF_RESKEY_ip | awk -F. '{print $4}'` suffix=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + $suffix` OCF_RESKEY_ip="$prefix.$suffix" fi case $__OCF_ACTION in meta-data) meta_data ;; usage|help) ip_usage exit $OCF_SUCCESS ;; esac ip_validate case $__OCF_ACTION in start) ip_start ;; stop) ip_stop ;; status) ip_status=`ip_served` if [ $ip_status = "ok" ]; then echo "running" exit $OCF_SUCCESS else echo "stopped" exit $OCF_NOT_RUNNING fi ;; monitor) ip_monitor ;; validate-all) ;; *) ip_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac # vi:sw=4:ts=8: diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain index d74126a91..40865001d 100755 --- a/heartbeat/VirtualDomain +++ b/heartbeat/VirtualDomain @@ -1,570 +1,570 @@ #!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Resource Agent for domains managed by the libvirt API. # Requires a running libvirt daemon (libvirtd). # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_force_stop_default=0 OCF_RESKEY_hypervisor_default="$(virsh --quiet uri)" OCF_RESKEY_autoset_utilization_cpu_default="true" OCF_RESKEY_autoset_utilization_hv_memory_default="true" OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 )) : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} : ${OCF_RESKEY_hypervisor=${OCF_RESKEY_hypervisor_default}} : ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} : ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} : ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}} ####################################################################### ## I'd very much suggest to make this RA use bash, ## and then use magic $SECONDS. ## But for now: NOW=$(date +%s) usage() { echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" } meta_data() { cat < 1.1 Resource agent for a virtual domain (a.k.a. domU, virtual machine, virtual environment etc., depending on context) managed by libvirtd. Manages virtual domains through the libvirt virtualization framework Absolute path to the libvirt configuration file, for this virtual domain. Virtual domain configuration file Hypervisor URI to connect to. See the libvirt documentation for details on supported URI formats. The default is system dependent. Hypervisor URI Always forcefully shut down ("destroy") the domain on stop. The default behavior is to resort to a forceful shutdown only after a graceful shutdown attempt has failed. You should only set this to true if your virtual domain (or your virtualization backend) does not support graceful shutdown. Always force shutdown on stop Transport used to connect to the remote hypervisor while migrating. Please refer to the libvirt documentation for details on transports available. If this parameter is omitted, the resource will use libvirt's default transport to connect to the remote hypervisor. Remote hypervisor transport Use a dedicated migration network. The migration URI is composed by adding this parameters value to the end of the node name. If the node name happens to be an FQDN (as opposed to an unqualified host name), insert the suffix immediately prior to the first period (.) in the FQDN. At the moment Qemu/KVM and Xen migration via a dedicated network is supported. Note: Be sure this composed host name is locally resolveable and the associated IP is reachable through the favored network. Migration network host name suffix To additionally monitor services within the virtual domain, add this parameter with a list of scripts to monitor. Note: when monitor scripts are used, the start and migrate_from operations will complete only when all monitor scripts have completed successfully. Be sure to set the timeout of these operations to accommodate this delay. space-separated list of monitor scripts If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it -into the cpu utilization of the resource when the monitor is executed. +into the CPU utilization of the resource when the monitor is executed. -Enable auto setting the cpu utilization of the resource +Enable auto-setting the CPU utilization of the resource If set true, the agent will detect the number of *Max memory* from virsh, and put it into the hv_memory utilization of the resource when the monitor is executed. -Enable auto setting the hv_memory utilization of the resource +Enable auto-setting the hv_memory utilization of the resource This port will be used in the qemu migrateuri. If unset, the port will be a random highport. Port for migrateuri EOF } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ "$cval" != "$val" ]; then outp=`crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1` || ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp" fi } update_utilization() { local dom_cpu dom_mem if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} | awk '/CPU\(s\)/{print $2}') test -n "$dom_cpu" && set_util_attr cpu $dom_cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} | awk '/Max memory/{printf("%d", $3/1024)}') test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" fi } # Set options to be passed to virsh: VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" # A state file where we record the domain name: STATEFILE="${HA_RSCTMP}/VirtualDomain-${OCF_RESOURCE_INSTANCE}.state" VirtualDomain_Define() { local virsh_output local domain_name # Note: passing in the domain name from outside the script is # intended for testing and debugging purposes only. Don't do this # in production, instead let the script figure out the domain name # from the config file. You have been warned. if [ -z "$DOMAIN_NAME" ]; then # Spin until we have a domain name while true; do virsh_output=$((virsh ${VIRSH_OPTIONS} define ${OCF_RESKEY_config}) 2>&1) domain_name=`echo "$virsh_output" | sed -n -e 's/Domain \(.*\) defined from .*$/\1/p'` if [ -n "$domain_name" ]; then break; fi domain_name=`echo $virsh_output | sed -n -e "s/.* '\(.*\)' already exists .*/\1/p"` if [ -n "$domain_name" ]; then break; fi ocf_log debug "Domain not defined yet, probably unable to connect to hypervisor. Retrying." sleep 1 done echo "$domain_name" > $STATEFILE ocf_log info "Domain name \"$domain_name\" saved to $STATEFILE." else ocf_log warn "Domain name ${DOMAIN_NAME} already defined, overriding configuration file ${OCF_RESKEY_config}. You should do this for testing only." fi } VirtualDomain_Cleanup_Statefile() { rm -f $STATEFILE || ocf_log warn "Failed to remove $STATEFILE during $__OCF_ACTION." } VirtualDomain_Status() { local try=0 rc=$OCF_ERR_GENERIC status="no state" while [ "$status" = "no state" ]; do try=$(($try + 1 )) status="`virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME`" case "$status" in "shut off") # shut off: domain is defined, but not started ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_NOT_RUNNING ;; running|paused|idle|blocked|"in shutdown") # running: domain is currently actively consuming cycles # paused: domain is paused (suspended) # idle: domain is running but idle # blocked: synonym for idle used by legacy Xen versions # in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed. ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_SUCCESS ;; ""|"no state") # Empty string may be returned when virsh does not # receive a reply from libvirtd. # "no state" may occur when the domain is currently # being migrated (on the migration target only), or # whenever virsh can't reliably obtain the domain # state. status="no state" if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then # During the stop operation, we want to bail out # quickly, so as to be able to force-stop (destroy) # the domain if necessary. ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out." return $OCF_ERR_GENERIC; else # During all other actions, we just wait and try # again, relying on the CRM/LRM to time us out if # this takes too long. ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." sleep 1 fi ;; *) # any other output is unexpected. ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" ;; esac done return $rc } VirtualDomain_Start() { if VirtualDomain_Status; then ocf_log info "Virtual domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi virsh $VIRSH_OPTIONS start ${DOMAIN_NAME} rc=$? if [ $rc -ne 0 ]; then ocf_log error "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi while ! VirtualDomain_Monitor; do sleep 1 done return $OCF_SUCCESS } VirtualDomain_Stop() { local i local status local shutdown_timeout local out ex VirtualDomain_Status status=$? case $status in $OCF_SUCCESS) if ! ocf_is_true $OCF_RESKEY_force_stop; then # Issue a graceful shutdown request ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $NOW -lt $shutdown_timeout ]; do VirtualDomain_Status status=$? case $status in $OCF_NOT_RUNNING) # This was a graceful shutdown. Clean # up and return. VirtualDomain_Cleanup_Statefile return $OCF_SUCCESS ;; $OCF_SUCCESS) # Domain is still running, keep # waiting (until shutdown_timeout # expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac NOW=$(date +%s) done fi ;; $OCF_NOT_RUNNING) ocf_log info "Domain $DOMAIN_NAME already stopped." return $OCF_SUCCESS esac # OK. Now if the above graceful shutdown hasn't worked, kill # off the domain with destroy. If that too does not work, # have the LRM time us out. ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1) ex=$? echo >&2 "$out" # unconditionally clean up. VirtualDomain_Cleanup_Statefile case $ex$out in *"error:"*"domain is not running"*) : ;; # unexpected path to the intended outcome, all is well [!0]*) return $OCF_ERR_GENERIC ;; 0*) while [ $status != $OCF_NOT_RUNNING ]; do VirtualDomain_Status status=$? done ;; esac return $OCF_SUCCESS } VirtualDomain_Migrate_To() { local target_node local remoteuri local transport_suffix local migrateuri local migrateport local migrate_target local hypervisor target_node="$OCF_RESKEY_CRM_meta_migrate_target" if VirtualDomain_Status; then # Find out the remote hypervisor to connect to. That is, turn # something like "qemu://foo:9999/system" into # "qemu+tcp://bar:9999/system" if [ -n "${OCF_RESKEY_migration_transport}" ]; then transport_suffix="+${OCF_RESKEY_migration_transport}" fi # A typical migration URI via a special migration network looks # like "tcp://bar-mig:49152". The port would be randomly chosen # by libvirt from the range 49152-49215 if omitted, at least since # version 0.7.4 ... if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" # Hostname might be a FQDN migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") case $hypervisor in qemu) # For quiet ancient libvirt versions a migration port is needed # and the URI must not contain the "//". Newer versions can handle # the "bad" URI. migrateuri="tcp:${migrate_target}:${OCF_RESKEY_migrateport}" ;; xen) migrateuri="xenmigr://${migrate_target}" ;; *) ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." ;; esac fi # Scared of that sed expression? So am I. :-) remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") # OK, we know where to connect to. Now do the actual migration. ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri} ${migrateuri})." virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri} rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc" return $OCF_ERR_GENERIC else ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." VirtualDomain_Cleanup_Statefile return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } VirtualDomain_Migrate_From() { while ! VirtualDomain_Monitor; do sleep 1 done ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." return $OCF_SUCCESS } VirtualDomain_Monitor() { # First, check the domain status. If that returns anything other # than $OCF_SUCCESS, something is definitely wrong. VirtualDomain_Status rc=$? if [ ${rc} -eq ${OCF_SUCCESS} ]; then # OK, the generic status check turned out fine. Now, if we # have monitor scripts defined, run them one after another. for script in ${OCF_RESKEY_monitor_scripts}; do script_output="$($script 2>&1)" script_rc=$? if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then # A monitor script returned a non-success exit # code. Stop iterating over the list of scripts, log a # warning message, and propagate $OCF_ERR_GENERIC. ocf_log warn "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" rc=$OCF_ERR_GENERIC break else ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" fi done fi update_utilization return ${rc} } VirtualDomain_Validate_All() { # Required binaries: for binary in virsh sed; do check_binary $binary done if [ -z $OCF_RESKEY_config ]; then ocf_log error "Missing configuration parameter \"config\"." return $OCF_ERR_CONFIGURED fi # check if we can read the config file (otherwise we're unable to # deduce $DOMAIN_NAME from it, see below) if [ ! -r $OCF_RESKEY_config ]; then if ocf_is_probe; then ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe." elif [ "$__OCF_ACTION" = "stop" ]; then ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped." else ocf_log error "Configuration file $OCF_RESKEY_config does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test VirtualDomain_Validate_All || exit $? # During a probe, it is permissible for the config file to not be # readable (it might be on shared storage not available during the # probe). In that case, VirtualDomain_Define can't work and we're # unable to get the domain name. Thus, we also can't check whether the # domain is running. The only thing we can do here is to assume that # it is not running. if [ ! -r $OCF_RESKEY_config ]; then ocf_is_probe && exit $OCF_NOT_RUNNING [ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS fi # Define the domain on startup, and re-define whenever someone deleted # the state file, or touched the config. if [ ! -e $STATEFILE ] || [ $OCF_RESKEY_config -nt $STATEFILE ]; then VirtualDomain_Define fi # By now, we should definitely be able to read from the state file. # If not, something went wrong. if [ ! -r $STATEFILE ]; then ocf_log err "$STATEFILE not found or unreadable. This is unexpected. Cannot determine domain name." exit $OCF_ERR_GENERIC fi # Finally, retrieve the domain name from the state file. DOMAIN_NAME=`cat $STATEFILE 2>/dev/null` if [ -z $DOMAIN_NAME ]; then ocf_log err "$STATEFILE is empty. This is unexpected. Cannot determine domain name." exit $OCF_ERR_GENERIC fi case $1 in start) VirtualDomain_Start ;; stop) VirtualDomain_Stop ;; migrate_to) VirtualDomain_Migrate_To ;; migrate_from) VirtualDomain_Migrate_From ;; status) VirtualDomain_Status ;; monitor) VirtualDomain_Monitor ;; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/apache b/heartbeat/apache index a313372b7..26b3223f8 100755 --- a/heartbeat/apache +++ b/heartbeat/apache @@ -1,552 +1,552 @@ #!/bin/sh # # High-Availability Apache/IBMhttp control script # # apache (aka IBMhttpd) # # Description: starts/stops apache web servers. # # Author: Alan Robertson # Sun Jiang Dong # # Support: linux-ha@lists.linux-ha.org # # License: GNU General Public License (GPL) # # Copyright: (C) 2002-2005 International Business Machines # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf # node1 10.0.0.170 IBMhttpd # # Our parsing of the Apache config files is very rudimentary. # It'll work with lots of different configurations - but not every # possible configuration. # # Patches are being accepted ;-) # # OCF parameters: # OCF_RESKEY_configfile # OCF_RESKEY_httpd # OCF_RESKEY_port # OCF_RESKEY_statusurl # OCF_RESKEY_options # OCF_RESKEY_testregex # OCF_RESKEY_client # OCF_RESKEY_testurl # OCF_RESKEY_testregex10 # OCF_RESKEY_testconffile # OCF_RESKEY_testname # OCF_RESKEY_envfiles : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/apache-conf.sh . ${OCF_FUNCTIONS_DIR}/http-mon.sh HA_VARRUNDIR=${HA_VARRUN} ####################################################################### # # Configuration options - usually you don't need to change these # ####################################################################### # IBMHTTPD=/opt/IBMHTTPServer/bin/httpd HTTPDLIST="/sbin/httpd2 /usr/sbin/httpd2 /usr/sbin/apache2 /sbin/httpd /usr/sbin/httpd /usr/sbin/apache $IBMHTTPD" MPM=/usr/share/apache2/find_mpm if [ -x $MPM ] then HTTPDLIST="$HTTPDLIST `$MPM 2>/dev/null`" fi LOCALHOST="http://localhost" HTTPDOPTS="-DSTATUS" DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf DEFAULT_NORMCONFIG="/etc/apache2/httpd.conf" # # You can also set # HTTPD # PORT # STATUSURL # CONFIGFILE # in this section if what we're doing doesn't work for you... # # End of Configuration options ####################################################################### CMD=`basename $0` # The config-file-pathname is the pathname to the configuration # file for this web server. Various appropriate defaults are # assumed if no config file is specified. If this command is # invoked as *IBM*, then the default config file name is # $DEFAULT_IBMCONFIG, otherwise the default config file # will be $DEFAULT_NORMCONFIG. usage() { cat <<-! usage: $0 action action: start start the web server stop stop the web server status return the status of web server, run or down monitor return TRUE if the web server appears to be working. For this to be supported you must configure mod_status and give it a server-status URL. You have to have installed either curl or wget for this to work. meta-data show meta data message validate-all validate the instance parameters ! } # # return TRUE if a process with given PID is running # ProcessRunning() { ApachePID=$1 # Use /proc if it looks like it's here... if [ -d /proc -a -d /proc/1 ] then [ -d /proc/$ApachePID ] else # This assumes we're running as root... kill -s 0 "$ApachePID" >/dev/null 2>&1 fi } silent_status() { if [ -f $PidFile ] then ProcessRunning `cat $PidFile` else : No pid file false fi } # May be useful to add other distros in future validate_default_config() { if [ -e /etc/SuSE-release ]; then validate_default_suse_config else return 0 fi } # When using the default /etc/apache2/httpd.conf on SUSE, the file # /etc/apache2/sysconfig.d/include.conf is required to be present, # but this is only generated if you run the apache init script # (with contents derived from /etc/sysconfig/apache2). So, here, # if we're using the default system config file and it requires # that include, we run "/etc/init.d/apache2 configtest" to ensure # the relevant config is generated and valid. We're also taking # this opportunity to enable mod_status if it's not present. validate_default_suse_config() { if [ "$CONFIGFILE" = "$DEFAULT_NORMCONFIG" ] && \ grep -Eq '^Include[[:space:]]+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE" then [ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status ocf_run -q /etc/init.d/apache2 configtest return else return 0 fi } apache_start() { if silent_status then ocf_log info "$CMD already running (pid $ApachePID)" return $OCF_SUCCESS fi validate_default_config || return $OCF_ERR_CONFIGURED # https://bugs.launchpad.net/ubuntu/+source/apache2/+bug/603211 [ -d /var/run/apache2 ] || mkdir /var/run/apache2 if [ -z $PIDFILE_DIRECTIVE ]; then ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE else ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE -c "PidFile $PidFile" fi tries=0 while : # wait until the user set timeout do apache_monitor ec=$? if [ $ec -eq $OCF_NOT_RUNNING ] then tries=`expr $tries + 1` ocf_log info "waiting for apache $CONFIGFILE to come up" sleep 1 else break fi done if [ $ec -ne 0 ] && silent_status; then apache_stop fi return $ec } apache_stop() { if silent_status then if kill $ApachePID then tries=0 while ProcessRunning $ApachePID && [ $tries -lt 10 ] do sleep 1 kill $ApachePID >/dev/null ocf_log info "Killing apache PID $ApachePID" tries=`expr $tries + 1` done else ocf_log warn "Killing apache PID $ApachePID FAILED." fi if ProcessRunning $ApachePID then ocf_log info "$CMD still running ($ApachePID)." false else ocf_log info "$CMD stopped." fi else ocf_log info "$CMD is not running." fi for sig in SIGTERM SIGHUP SIGKILL ; do if pgrep -f $HTTPD.*$CONFIGFILE >/dev/null ; then pkill -$sig -f $HTTPD.*$CONFIGFILE >/dev/null ocf_log info "apache children were signalled ($sig)" sleep 1 else break fi done } apache_monitor_10() { if [ "$TESTCONFFILE" ]; then readtestconf < $TESTCONFFILE else test_url="$TESTURL" test_regex="$TESTREGEX10" fi whattorun=`gethttpclient` fixtesturl is_testconf_sane || return $OCF_ERR_CONFIGURED if $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi } apache_monitor_basic() { if ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi } apache_monitor() { silent_status if [ $? -ne 0 ]; then ocf_log info "$CMD not running" return $OCF_NOT_RUNNING fi ourhttpclient=`findhttpclient` # we'll need one if [ -z "$ourhttpclient" ]; then ocf_log err "could not find a http client; make sure that either wget or curl is available" return $OCF_ERR_INSTALLED fi case `ocf_check_level 10` in 0) apache_monitor_basic;; 10) apache_monitor_10;; esac } apache_meta_data(){ cat < 1.0 -This is the resource agent for the Apache web server. +This is the resource agent for the Apache Web server. This resource agent operates both version 1.x and version 2.x Apache servers. The start operation ends with a loop in which monitor is repeatedly called to make sure that the server started and that it is operational. Hence, if the monitor operation does not succeed within the start operation timeout, the apache resource will end with an error status. The monitor operation by default loads the server status page which depends on the mod_status module and the corresponding configuration file (usually /etc/apache2/mod_status.conf). Make sure that the server status page works and that the access is allowed *only* from localhost (address 127.0.0.1). See the statusurl and testregex attributes for more details. See also http://httpd.apache.org/ -Manages an Apache web server instance +Manages an Apache Web server instance The full pathname of the Apache configuration file. This file is parsed to provide defaults for various other resource agent parameters. configuration file path The full pathname of the httpd binary (optional). httpd binary path A port number that we can probe for status information using the statusurl. This will default to the port number found in the configuration file, or 80, if none can be found in the configuration file. httpd port The URL to monitor (the apache server status page by default). If left unspecified, it will be inferred from the apache configuration file. If you set this, make sure that it succeeds *only* from the localhost (127.0.0.1). Otherwise, it may happen that the cluster complains about the resource being active on multiple nodes. url name Regular expression to match in the output of statusurl. Case insensitive. monitor regular expression Client to use to query to Apache. If not specified, the RA will try to find one on the system. Currently, wget and curl are supported. For example, you can set this parameter to "curl" if you prefer that to wget. http client URL to test. If it does not start with "http", then it's considered to be relative to the Listen address. test url Regular expression to match in the output of testurl. Case insensitive. extended monitor regular expression A file which contains test configuration. Could be useful if you have to check more than one web application or in case sensitive info should be passed as arguments (passwords). Furthermore, using a config file is the only way to specify certain parameters. Please see README.webapps for examples and file description. test configuration file Name of the test within the test configuration file. test name Extra options to apply when starting apache. See man httpd(8). command line options Files (one or more) which contain extra environment variables. If you want to prevent script from reading the default file, set this parameter to empty string. environment settings files We will try to detect if the URL (for monitor) is IPv6, but if that doesn't work set this to true to enforce IPv6. use ipv6 with http clients END return $OCF_SUCCESS } apache_validate_all() { if CheckPort $PORT; then # We are sure to succeed here, since we forced $PORT to be valid in GetParams() : OK else ocf_log err "Port number $PORT is invalid!" return $OCF_ERR_INSTALLED fi case $STATUSURL in http://*) ;; *) ocf_log err "Invalid STATUSURL $STATUSURL" return $OCF_ERR_CONFIGURED ;; esac if [ ! -x $HTTPD ]; then ocf_log err "HTTPD $HTTPD not found or is not an executable!" return $OCF_ERR_INSTALLED fi if [ ! -f $CONFIGFILE ]; then # We are sure to succeed here, since we have parsed $CONFIGFILE before getting here ocf_log err "Configuration file $CONFIGFILE not found!" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } find_httpd_prog() { case $0 in *IBM*) HTTPD=$IBMHTTPD DefaultConfig=$DEFAULT_IBMCONFIG;; *) HTTPD= for h in $HTTPDLIST do if [ -f $h -a -x $h ] then HTTPD=$h break fi done # Let the user know that the $HTTPD used is not the one (s)he specified via $OCF_RESKEY_httpd if [ "X$OCF_RESKEY_httpd" != X -a "X$HTTPD" != X ] then ocf_log info "Using $HTTPD as HTTPD" fi DefaultConfig=$DEFAULT_NORMCONFIG;; esac } apache_getconfig() { # these variables are global HTTPD="$OCF_RESKEY_httpd" PORT="$OCF_RESKEY_port" STATUSURL="$OCF_RESKEY_statusurl" CONFIGFILE="$OCF_RESKEY_configfile" OPTIONS="$OCF_RESKEY_options" CLIENT=${OCF_RESKEY_client} TESTREGEX=${OCF_RESKEY_testregex:-''} TESTURL="$OCF_RESKEY_testurl" TESTREGEX10=${OCF_RESKEY_testregex10} TESTCONFFILE="$OCF_RESKEY_testconffile" TESTNAME="$OCF_RESKEY_testname" : ${OCF_RESKEY_envfiles="/etc/apache2/envvars"} source_envfiles $OCF_RESKEY_envfiles if [ "X$HTTPD" = X -o ! -f "$HTTPD" -o ! -x "$HTTPD" ] then find_httpd_prog fi CONFIGFILE=${CONFIGFILE:-$DefaultConfig} httpd_basename=`basename $HTTPD` case $httpd_basename in *-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;; esac GetParams $CONFIGFILE } OCF_REQUIRED_PARAMS="" OCF_REQUIRED_BINARIES="" ocf_rarun $* # vim:sw=2:ts=8: diff --git a/heartbeat/dhcpd b/heartbeat/dhcpd index c272b119e..835a78835 100755 --- a/heartbeat/dhcpd +++ b/heartbeat/dhcpd @@ -1,545 +1,545 @@ #!/bin/sh # # Resource Agent for managing dhcpd resources. # # License: GNU General Public License (GPL) # (c) 2011-2012 Chris Bowlby, # # A fair amount of this script has been pulled from the official 0dhcpd # init script. Those portions have been integrated into this script to # ensure consistent behavior between the resource agent and the # original script. The copyrights and original authors are credited # as follows: # # Copyright (c) 1996, 1997, 1998 S.u.S.E. GmbH # Copyright (c) 1998, 1999, 2000, 2001 SuSE GmbH # Copyright (c) 2002, 2003 SuSE Linux AG # Copyright (c) 2004-2008 SUSE LINUX Products GmbH, Nuernberg, Germany. # # Author(s) : Rolf Haberrecker , 1997-1999 # Peter Poeml , 2000-2006 # Marius Tomaschewski , 2006-2010 # # and Linux-HA contributors # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_binary_default="dhcpd" OCF_RESKEY_pid_default="/var/run/dhcpd.pid" OCF_RESKEY_user_default=dhcpd OCF_RESKEY_group_default=nogroup OCF_RESKEY_config_default="" OCF_RESKEY_chrooted_default="true" OCF_RESKEY_chrooted_path_default="/var/lib/dhcp" OCF_RESKEY_leases_default="/db/dhcpd.leases" OCF_RESKEY_interface_default="" OCF_RESKEY_includes_default="" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} : ${OCF_RESKEY_chrooted=${OCF_RESKEY_chrooted_default}} : ${OCF_RESKEY_chrooted_path=${OCF_RESKEY_chrooted_path_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_leases=${OCF_RESKEY_leases_default}} : ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} : ${OCF_RESKEY_includes=${OCF_RESKEY_includes_default}} # To enable support for different versions of dhcp, we need # to know what version we are being run against. DHCP_VERSION_MAJOR=`$OCF_RESKEY_binary --version 2>&1 | awk -F- '{print $3}' | awk -F. '{print $1}' | sed s/^[a-zA-Z]//g` # These files are always copied by default to ensure the chroot environment works. DEFAULT_FILE_LIST="/etc/gai.conf /etc/nsswitch.conf /etc/resolv.conf /etc/host.conf /etc/hosts /etc/localtime /dev/urandom" usage() { cat < 0.1 Manage an ISC DHCP server service in a chroot environment. - Chrooted ISC DHCP Server resource agent. + Chrooted ISC DHCP server resource agent. The absolute path to the DHCP server configuration file. Configuration file Configure the dhcpd service to run in a chrooted or non-chrooted mode. Enable chroot mode The absolute path of the chrooted DHCP environment. The chrooted path The binary for the DHCP server process. An absolute path definition is not required, but can be used to override environment path. dhcpd binary The system user the DHCP server process will run as when it is chrooted. dhcpd owner The system group the DHCP server process will run as when it is chrooted. dhcpd group owner The network interface(s) the DHCP server process will bind to. A blank value will bind the process to all interfaces. Network Interface This parameter provides a means to copy include files into the chrooted environment. If a dhcpd.conf file contains a line similar to this: include "/etc/named.keys"; Then an admin also has to tell the dhcpd RA that this file should be pulled into the chrooted environment. This is a space delimited list. - Include Files + Include files The leases database file, relative to chrooted_path. Leases file The path and filename of the PID file. It is relative to chrooted_path. PID file EOF } # Validate most critical parameters dhcpd_validate_all() { check_binary $OCF_RESKEY_binary if ! ocf_is_probe; then # Test for the appropriate configuration files depending on if # chroot mode is enabled. if ocf_is_true $OCF_RESKEY_chrooted ; then if ! test -e "$OCF_RESKEY_chrooted_path"; then ocf_log err "Path $OCF_RESKEY_chrooted_path does not exist." return $OCF_ERR_INSTALLED fi if test -n "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config"; then ocf_log err "Configuration file $OCF_RESKEY_chrooted_path/$OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi else if test -n "$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_config"; then ocf_log err "Configuration file $OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi fi fi if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then ocf_log err "User $OCF_RESKEY_user doesn't exist" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } # dhcpd_monitor. Send a request to dhcpd and check response. dhcpd_monitor() { # Assume chrooted mode is being used, but if not update the PIDF # variable to point to the non-chrooted PID file. PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ! ocf_is_true $OCF_RESKEY_chrooted ; then PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` fi ocf_pidfile_status $PIDF >/dev/null 2>&1 || return $OCF_NOT_RUNNING return $OCF_SUCCESS } # Initialize Chroot dhcpd_initialize_chroot() { # If we are running the initialization for the first time, we need to make # the new chrooted folder, in case we are not using the same default. if ! [ -d $OCF_RESKEY_chrooted_path ] ; then ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use." fi # Make sure all sub-paths are created if something went wrong during # a partial run. for i in db dev etc lib64 var/run; do mkdir -p $OCF_RESKEY_chrooted_path/$i done # If we are running version 4 of the dhcp server, we need to mount a proc partition. if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then mkdir -p $OCF_RESKEY_chrooted_path/proc if ! [ -e $OCF_RESKEY_chrooted_path/proc/net/dev ] ; then mount -t proc -o ro proc $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1 fi fi # If the folder to store the PID file does not exist, make it. if ! [ -d "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" ] ; then mkdir -p "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" fi # Ensure all permissions are in place if the folder was re-created. chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_leases` chown -R $OCF_RESKEY_user:$OCF_RESKEY_group "$OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_pid`" ## If there is no conf file, we can't initialize the chrooted ## environment, return with "program not configured" if ! [ -f $OCF_RESKEY_config ] ; then ocf_log err "dhcpd has not been configured." return $OCF_ERR_CONFIGURED fi # If the leases file does not exist, create it, as this is a fresh install. if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases fi # Remove the random device. test -e "$OCF_RESKEY_chrooted_path/dev/urandom" && rm -f $OCF_RESKEY_chrooted_path/dev/urandom # Test for the existance of the defined include files, and append # them to the list of files to be copied. for i in $OCF_RESKEY_includes ; do if [ -e $i ] ; then DEFAULT_FILE_LIST="$DEFAULT_FILE_LIST $i" else ocf_log err "include file $i does not exist" return $OCF_ERR_INSTALLED fi done # Ensure all "modified" non-chrooted configuration files are copied into the chrooted environment. for i in $OCF_RESKEY_config $DEFAULT_FILE_LIST; do # First, lets make sure the directory exists within the chrooted environment. if test -d "$i" ; then mkdir -p $OCF_RESKEY_chrooted_path/$i elif test -e "$i" ; then mkdir -p "`dirname $OCF_RESKEY_chrooted_path/$i`" fi # Next, we copy the configuration file into place. cp -aL "$i" "$OCF_RESKEY_chrooted_path/${i%/*}/" > /dev/null 2>&1 || { ocf_log err "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } done libdir=$(basename $(echo /var/lib/dhcp/lib*)) if test -x /usr/bin/ldd ; then get_ldd_deps() { ldd_wl="\/$libdir\/lib" ldd_bl="\/$libdir\/libc\." /usr/bin/ldd "$1" | while read a b c d ; do [ -n "$c" ] || continue [[ $c =~ $ldd_wl ]] || continue [[ $c =~ $ldd_bl ]] && continue echo $c done } else get_ldd_deps() { :; } fi cplibs=`for i in /$libdir/libresolv.so.* /$libdir/libnss_*.so.* /$libdir/libpthread.so.0 /$libdir/libdl.so.2 do if [ -s "$i" ] ; then echo "$i" get_ldd_deps "$i" fi done | sort -u` for i in $cplibs ; do if [ -s "$i" ]; then cp -pL "$i" "/var/lib/dhcp/$libdir/" || { ocf_log err "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } fi done return $OCF_SUCCESS } # Initialize a non-chroot environment dhcpd_initialize() { ## If there is no conf file, we can't start a dhcp service. if ! [ -f $OCF_RESKEY_config ] ; then ocf_log err "dhcpd has not been configured." return $OCF_ERR_CONFIGURED fi # As with the standard DHCP init script, we can still use the # chrooted default path for storing the leases file. This behavior # is consistent with the existing /etc/init.d/dhcpd script. if ! [ -d $OCF_RESKEY_chrooted_path ] ; then ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use." fi # If the leases file does not exist, create it, as this is a fresh install. if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases fi # if the PID storage path does not exist, make it, and setup the permissions. # NOTE: This part of the script has a potential security flaw, in that if someone # puts in /var/run as the path, it will change ownership to the dhcpd user # and group. However, all that would do is allow that user to view the contents # of the files, which they can do now anyway. If this becomes an issue, I can work # in some changes. # We need to append "dhcpd" to the path for the PID file storage folder, because # if /var/run is used, that folders permissions can not be changed, otherwise it affects # more then just one application. if ! [ -d `dirname $OCF_RESKEY_pid`/dhcpd ] ; then mkdir -p `dirname $OCF_RESKEY_pid`/dhcpd if [ -n "$OCF_RESKEY_user" -a "x$OCF_RESKEY_user" != "xroot" ] ; then chown $OCF_RESKEY_user `dirname $OCF_RESKEY_pid`/dhcpd fi if [ -n "$OCF_RESKEY_group" -a "x$OCF_RESKEY_group" != "xwheel" ] ; then chgrp $OCF_RESKEY_group `dirname $OCF_RESKEY_pid`/dhcpd fi fi return $OCF_SUCCESS } # Start dhcpd_start() { # Lets make sure we are not already running. if dhcpd_monitor; then ocf_log info "dhcpd already running" return $OCF_SUCCESS fi # Only initialize the chrooted path(s) if chroot mode is enabled. if ocf_is_true $OCF_RESKEY_chrooted ; then dhcpd_initialize_chroot || { ocf_log err "Could not fully initialize the chroot environment." ; return $OCF_ERR_INSTALLED; } else dhcpd_initialize || { ocf_log err "Could not fully initialize the runtime environment." ; return $OCF_ERR_INSTALLED; } fi dhcpd_validate_all || exit # Define an empty string variable, to ensure it exists when needed. DHCPD_ARGS="" # To ensure consistent behavior with the standard DHCPD init script, # use the chrooted default path for storing a leases file, when not in # a chrooted enviroment. if ocf_is_true $OCF_RESKEY_chrooted ; then DHCPD_ARGS="$DHCPD_ARGS -chroot $OCF_RESKEY_chrooted_path -lf $OCF_RESKEY_leases" else DHCPD_ARGS="$DHCPD_ARGS -lf $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases" fi if [ -n "$OCF_RESKEY_user" ]; then DHCPD_ARGS="$DHCPD_ARGS -user $OCF_RESKEY_user" fi if [ -n "$OCF_RESKEY_group" ]; then DHCPD_ARGS="$DHCPD_ARGS -group $OCF_RESKEY_group" fi # If there is a pid file containing a pid, the machine might have crashed. pid files in # /var/run are always cleaned up at boot time, but this is not the case for the pid file in # the chroot jail. Therefore, an old pid file may exist. This is only a problem if it # incidentally contains the pid of a running process. If this process is not a 'dhcpd', # we remove the pid. (dhcpd itself only checks whether the pid is alive or not.) PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ocf_is_true $OCF_RESKEY_chrooted ; then ocf_log info "Starting dhcpd [chroot] service." DHCPD_ARGS="$DHCPD_ARGS -pf $OCF_RESKEY_pid" else ocf_log info "Starting dhcpd [non-chroot] service." PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` DHCPD_ARGS="$DHCPD_ARGS -pf $PIDF" fi test -e "$PIDF" && rm -f $PIDF ocf_run $OCF_RESKEY_binary -cf $OCF_RESKEY_config $DHCPD_ARGS $OCF_RESKEY_interface || return $OCF_ERR_INSTALLED while ! dhcpd_monitor; do sleep .1 ocf_log info "waiting for dhcpd to start" return $OCF_SUCCESS done if ocf_is_true $OCF_RESKEY_chrooted ; then ocf_log info "dhcpd [chrooted] has started." else ocf_log info "dhcpd [non-chrooted] has started." fi return $OCF_SUCCESS } # Stop dhcpd_stop () { local timeout local timewait local rc dhcpd_monitor rc=$? case "$rc" in "$OCF_SUCCESS") # Currently running, and is expected behaviour. ;; "$OCF_NOT_RUNNING") # Currently not running, therefore nothing to do. ocf_log info "dhcpd already stopped" return $OCF_SUCCESS ;; esac PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ! ocf_is_true $OCF_RESKEY_chrooted ; then PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` fi kill `cat $PIDF` # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timewait=$((OCF_RESKEY_CRM_meta_timeout/1500)) sleep 0.1; timeout=0 # Sleep here for .1 sec to let dhcpd finish. while dhcpd_monitor ; do if [ $timeout -ge $timewait ]; then break else sleep 1 timeout=`expr $timeout + 1` fi done #If still up if dhcpd_monitor 2>&1; then ocf_log err "dhcpd is still up! Trying kill -s KILL" kill -s SIGKILL `cat $PIDF` fi # If we are running a dhcp server v4 or higher, unmount the proc partition. if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then # We only want to unmount proc in a chrooted environment, else we could # cause other issues. if ocf_is_true $OCF_RESKEY_chrooted ; then umount $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1 fi fi rm -f $PIDF ocf_log info "dhcpd stopped" return $OCF_SUCCESS } # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) dhcpd_meta_data exit $OCF_SUCCESS ;; validate-all) dhcpd_validate_all exit $OCF_SUCCESS ;; usage|help) dhcpd_usage exit $OCF_SUCCESS ;; esac # Translate each action into the appropriate function call case $__OCF_ACTION in start) dhcpd_start;; stop) dhcpd_stop;; monitor) dhcpd_monitor;; *) dhcpd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor index 77775c264..b85d7fc2e 100755 --- a/heartbeat/ethmonitor +++ b/heartbeat/ethmonitor @@ -1,454 +1,454 @@ #!/bin/sh # # OCF Resource Agent compliant script. # Monitor the vitality of a local network interface. # # Based on the work by Robert Euhus and Lars Marowsky-Brée. # # Transfered from Ipaddr2 into ethmonitor by Alexander Krauth # # Copyright (c) 2011 Robert Euhus, Alexander Krauth, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # OCF parameters are as below # # OCF_RESKEY_interface # OCF_RESKEY_multiplicator # OCF_RESKEY_name # OCF_RESKEY_repeat_count # OCF_RESKEY_repeat_interval # OCF_RESKEY_pktcnt_timeout # OCF_RESKEY_arping_count # OCF_RESKEY_arping_timeout # OCF_RESKEY_arping_cache_entries # # TODO: Check against IPv6 # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.2 Monitor the vitality of a local network interface. -You may setup this RA as a clone resource to monitor the network interfaces on different nodes, with the same interface name. -This is not related to the IP adress or the network on which a interface is configured. +You may set up this RA as a clone resource to monitor the network interfaces on different nodes, with the same interface name. +This is not related to the IP address or the network on which a interface is configured. You may use this RA to move resources away from a node, which has a faulty interface or prevent moving resources to such a node. This gives you independend control of the resources, without involving cluster intercommunication. But it requires your nodes to have more than one network interface. The resource configuration requires a monitor operation, because the monitor does the main part of the work. -In addition to the resource configuration, you need to configure some location contraints, based on a CIB attribute value. +In addition to the resource configuration, you need to configure some location constraints, based on a CIB attribute value. The name of the attribute value is configured in the 'name' option of this RA. Example constraint configuration: location loc_connected_node my_resource_grp \ rule $id="rule_loc_connected_node" -INF: ethmonitor eq 0 The ethmonitor works in 3 different modes to test the interface vitality. 1. call ip to see if the link status is up (if link is down -> error) -2. call ip an watch the RX counter (if packages come around in a certain time -> success) -3. call arping to check wether any of the IPs found in the lokal ARP cache answers an ARP REQUEST (one answer -> success) +2. call ip and watch the RX counter (if packages come around in a certain time -> success) +3. call arping to check whether any of the IPs found in the local ARP cache answers an ARP REQUEST (one answer -> success) 4. return error Monitors network interfaces The name of the network interface which should be monitored (e.g. eth0). Network interface name The name of the CIB attribute to set. This is the name to be used in the constraints. Defaults to "ethmonitor-'interface_name'". Attribute name Multiplier for the value of the CIB attriobute specified in parameter name. Multiplier for result variable Specify how often the interface will be monitored, before the status is set to failed. You need to set the timeout of the monitoring operation to at least repeat_count * repeat_interval Monitor repeat count Specify how long to wait in seconds between the repeat_counts. Monitor repeat interval in seconds Timeout for the RX packet counter. Stop listening for packet counter changes after the given number of seconds. packet counter timeout Number of ARP REQUEST packets to send for every IP. Usually one ARP REQUEST (arping) is send Number of arpings per IP Time in seconds to wait for ARP REQUESTs (all packets of arping_count). This is to limit the time for arp requests, to be able to send requests to more than one node, without running in the monitor operation timeout. Timeout for arpings per IP Maximum number of IPs from ARP cache list to check for ARP REQUEST (arping) answers. Newest entries are tried first. Number of ARP cache entries to try END exit $OCF_SUCCESS } # # Return true, if the interface exists # is_interface() { # # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces # local iface=`$IP2UTIL -o -f inet addr show | grep " $1 " \ | cut -d ' ' -f2 | sort -u | grep -v '^ipsec[0-9][0-9]*$'` [ "$iface" != "" ] } if_init() { local rc if [ X"$OCF_RESKEY_interface" = "X" ]; then ocf_log err "Interface name (the interface parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi NIC="$OCF_RESKEY_interface" if is_interface $NIC then case "$NIC" in *:*) ocf_log err "Do not specify a virtual interface : $OCF_RESKEY_interface" exit $OCF_ERR_CONFIGURED;; *) ;; esac else case $__OCF_ACTION in validate-all) ocf_log err "Interface $NIC does not exist" exit $OCF_ERR_CONFIGURED;; *) ocf_log warn "Interface $NIC does not exist" ## It might be a bond interface which is temporarily not available, therefore we want to continue here ;; esac fi : ${OCF_RESKEY_multiplier:="1"} if ! ocf_is_decimal "$OCF_RESKEY_multiplier"; then ocf_log err "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]" exit $OCF_ERR_CONFIGURED fi ATTRNAME=${OCF_RESKEY_name:-"ethmonitor-$NIC"} REP_COUNT=${OCF_RESKEY_repeat_count:-5} if ! ocf_is_decimal "$REP_COUNT" -o [ $REP_COUNT -lt 1 ]; then ocf_log err "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]" exit $OCF_ERR_CONFIGURED fi REP_INTERVAL_S=${OCF_RESKEY_repeat_interval:-10} if ! ocf_is_decimal "$REP_INTERVAL_S"; then ocf_log err "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]" exit $OCF_ERR_CONFIGURED fi : ${OCF_RESKEY_pktcnt_timeout:="5"} if ! ocf_is_decimal "$OCF_RESKEY_pktcnt_timeout"; then ocf_log err "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]" exit $OCF_ERR_CONFIGURED fi : ${OCF_RESKEY_arping_count:="1"} if ! ocf_is_decimal "$OCF_RESKEY_arping_count"; then ocf_log err "Invalid OCF_RESKEY_arping_count [$OCF_RESKEY_arping_count]" exit $OCF_ERR_CONFIGURED fi : ${OCF_RESKEY_arping_timeout:="1"} if ! ocf_is_decimal "$OCF_RESKEY_arping_timeout"; then ocf_log err "Invalid OCF_RESKEY_arping_timeout [$OCF_RESKEY_arping_count]" exit $OCF_ERR_CONFIGURED fi : ${OCF_RESKEY_arping_cache_entries:="5"} if ! ocf_is_decimal "$OCF_RESKEY_arping_cache_entries"; then ocf_log err "Invalid OCF_RESKEY_arping_cache_entries [$OCF_RESKEY_arping_cache_entries]" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # get the link status on $NIC # asks ip about running (up) interfaces, returns the number of matching interface names that are up get_link_status () { $IP2UTIL -o link show up dev "$NIC" | grep -v 'NO-CARRIER' | grep -c "$NIC" } # returns the number of received rx packets on $NIC get_rx_packets () { ocf_log debug "$IP2UTIL -o -s link show dev $NIC" $IP2UTIL -o -s link show dev "$NIC" \ | sed 's/.* RX: [^0-9]*[0-9]* *\([0-9]*\) .*/\1/' # the first number after RX: ist the # of bytes , # the second is the # of packets received } # watch for packet counter changes for max. OCF_RESKEY_pktcnt_timeout seconds # returns immedeately with return code 0 if any packets were received # otherwise 1 is returned watch_pkt_counter () { local RX_PACKETS_NEW local RX_PACKETS_OLD RX_PACKETS_OLD="`get_rx_packets`" for n in `seq $(( $OCF_RESKEY_pktcnt_timeout * 10 ))`; do sleep 0.1 RX_PACKETS_NEW="`get_rx_packets`" ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW" if [ "$RX_PACKETS_OLD" -ne "$RX_PACKETS_NEW" ]; then ocf_log debug "we received some packets." return 0 fi done return 1 } # returns list of cached ARP entries for $NIC # sorted by age ("last confirmed") # max. OCF_RESKEY_arping_cache_entries entries get_arp_list () { $IP2UTIL -s neighbour show dev $NIC \ | sort -t/ -k2,2n | cut -d' ' -f1 \ | head -n $OCF_RESKEY_arping_cache_entries # the "used" entries in `ip -s neighbour show` are: # "last used"/"last confirmed"/"last updated" } # arping the IP given as argument $1 on $NIC # until OCF_RESKEY_arping_count answers are received do_arping () { # TODO: add the source IP # TODO: check for diffenrent arping versions out there arping -q -c $OCF_RESKEY_arping_count -w $OCF_RESKEY_arping_timeout -I $NIC $1 # return with the exit code of the arping command return $? } # # Check the interface depending on the level given as parameter: $OCF_RESKEY_check_level # # 09: check for nonempty ARP cache # 10: watch for packet counter changes # # 19: check arping_ip_list # 20: check arping ARP cache entries # # 30: watch for packet counter changes in promiscios mode # # If unsuccessfull in levels 18 and above, # the tests for higher check levels are run. # if_check () { # always check link status first link_status="`get_link_status`" ocf_log debug "link_status: $link_status (1=up, 0=down)" [ $link_status -eq 0 ] && return $OCF_NOT_RUNNING # watch for packet counter changes ocf_log debug "watch for packet counter changes" watch_pkt_counter && return $OCF_SUCCESS # check arping ARP cache entries ocf_log debug "check arping ARP cache entries" for ip in `get_arp_list`; do do_arping $ip && return $OCF_SUCCESS done # watch for packet counter changes in promiscios mode # ocf_log debug "watch for packet counter changes in promiscios mode" # be sure switch off promiscios mode in any case # TODO: check first, wether promisc is already on and leave it untouched. # trap "$IP2UTIL link set dev $NIC promisc off; exit" INT TERM EXIT # $IP2UTIL link set dev $NIC promisc on # watch_pkt_counter && return $OCF_SUCCESS # $IP2UTIL link set dev $NIC promisc off # trap - INT TERM EXIT # looks like it's not working (for whatever reason) return $OCF_NOT_RUNNING } ####################################################################### if_usage() { cat < /dev/null` sleep $sleep_time 2> /dev/null runs=$(($runs + 1)) fi if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error" fi done ocf_log debug "Monitoring return code: $mon_rc" if [ $mon_rc -eq $OCF_SUCCESS ]; then set_cib_value 1 attr_rc=$? else ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed." set_cib_value 0 attr_rc=$? fi ## The resource should not fail, if the interface is down. It should fail, if the update of the CIB variable has errors. ## To react on the interface failure you must use constraints based on the CIB variable value, not on the resource itself. exit $attr_rc } if_validate() { check_binary $IP2UTIL check_binary arping if_init } case $__OCF_ACTION in meta-data) meta_data ;; usage|help) if_usage exit $OCF_SUCCESS ;; esac if_validate case $__OCF_ACTION in start) ha_pseudo_resource $OCF_RESOURCE_INSTANCE start exit $? ;; stop) attrd_updater -D -n $ATTRNAME ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop exit $? ;; monitor|status) if_monitor exit $? ;; validate-all) exit $? ;; *) if_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit index a75d888ba..7128c2544 100755 --- a/heartbeat/iSCSILogicalUnit +++ b/heartbeat/iSCSILogicalUnit @@ -1,522 +1,522 @@ #!/bin/bash # # # iSCSILogicalUnit OCF RA. Exports and manages iSCSI Logical Units. # # (c) 2009-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults # Set a default implementation based on software installed if have_binary ietadm; then OCF_RESKEY_implementation_default="iet" elif have_binary tgtadm; then OCF_RESKEY_implementation_default="tgt" elif have_binary lio_node; then OCF_RESKEY_implementation_default="lio" fi : ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}} # Use a default SCSI ID and SCSI SN that is unique across the cluster, # and persistent in the event of resource migration. # SCSI IDs are limited to 24 bytes, but only 16 bytes are known to be # supported by all iSCSI implementations this RA cares about. Thus, # for a default, use the first 16 characters of # $OCF_RESOURCE_INSTANCE. OCF_RESKEY_scsi_id_default="${OCF_RESOURCE_INSTANCE:0:16}" : ${OCF_RESKEY_scsi_id=${OCF_RESKEY_scsi_id_default}} # To have a reasonably unique default SCSI SN, use the first 8 bytes # of an MD5 hash of of $OCF_RESOURCE_INSTANCE sn=`echo -n "${OCF_RESOURCE_INSTANCE}" | openssl md5 | sed -e 's/(stdin)= //'` OCF_RESKEY_scsi_sn_default=${sn:0:8} : ${OCF_RESKEY_scsi_sn=${OCF_RESKEY_scsi_sn_default}} # set 0 as a default value for lio iblock device number OCF_RESKEY_lio_iblock_default=0 OCF_RESKEY_lio_iblock=${OCF_RESKEY_lio_iblock:-$OCF_RESKEY_lio_iblock_default} ####################################################################### meta_data() { cat < 0.9 Manages iSCSI Logical Unit. An iSCSI Logical unit is a subdivision of an SCSI Target, exported via a daemon that speaks the iSCSI protocol. Manages iSCSI Logical Units (LUs) The iSCSI target daemon implementation. Must be one of "iet", "tgt", or "lio". If unspecified, an implementation is selected based on the availability of management utilities, with "iet" being tried first, then "tgt", then "lio". iSCSI target daemon implementation The iSCSI Qualified Name (IQN) that this Logical Unit belongs to. iSCSI target IQN The Logical Unit number (LUN) exposed to initiators. Logical Unit number (LUN) The path to the block device exposed. Some implementations allow this to be a regular file, too. Block device (or file) path The SCSI ID to be configured for this Logical Unit. The default is the resource name, truncated to 24 bytes. SCSI ID The SCSI serial number to be configured for this Logical Unit. The default is a hash of the resource name, truncated to 8 bytes. SCSI serial number The SCSI vendor ID to be configured for this Logical Unit. SCSI vendor ID The SCSI product ID to be configured for this Logical Unit. SCSI product ID Additional LU parameters. A space-separated list of "name=value" pairs which will be passed through to the iSCSI daemon's management interface. The supported parameters are implementation dependent. Neither the name nor the value may contain whitespace. List of iSCSI LU parameters Allowed initiators. A space-separated list of initiators allowed to connect to this lun. Initiators may be listed in any syntax the target implementation allows. If this parameter is empty or not set, access to this lun will not be allowed from any initiator, if target is not in demo mode. -This parameter is only necessary, when using LIO. +This parameter is only necessary when using LIO. List of iSCSI initiators allowed to connect to this lun. LIO iblock device name, a number starting from 0. Using distinct values here avoids a warning in LIO "LEGACY: SHARED HBA"; and it is necessary when using multiple LUNs started at the same time (eg. on node failover) to prevent a race condition in tcm_core on mkdir() in /sys/kernel/config/target/core/. LIO iblock device number END } ####################################################################### iSCSILogicalUnit_usage() { cat < # (C) 2007 Novell Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # See usage() and meta_data() below for more details... # # OCF instance parameters: # OCF_RESKEY_portal: the iSCSI portal address or host name (required) # OCF_RESKEY_target: the iSCSI target (required) # OCF_RESKEY_iscsiadm: iscsiadm program path (optional) # OCF_RESKEY_discovery_type: discovery type (optional; default: sendtargets) # OCF_RESKEY_try_recovery: wait for iSCSI recovery in monitor (optional; default: false) # # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_udev_default="yes" OCF_RESKEY_iscsiadm_default="iscsiadm" OCF_RESKEY_discovery_type_default="sendtargets" OCF_RESKEY_try_recovery_default="false" : ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}} : ${OCF_RESKEY_iscsiadm=${OCF_RESKEY_iscsiadm_default}} : ${OCF_RESKEY_discovery_type=${OCF_RESKEY_discovery_type_default}} usage() { methods=`iscsi_methods` methods=`echo $methods | tr ' ' '|'` cat < 1.0 OCF Resource Agent for iSCSI. Add (start) or remove (stop) iSCSI targets. Manages a local iSCSI initiator and its connections to iSCSI targets The iSCSI portal address in the form: {ip_address|hostname}[":"port] Portal address The iSCSI target IQN. Target IQN Target discovery type. Check the open-iscsi documentation for supported discovery types. Target discovery type open-iscsi administration utility binary. iscsiadm binary If the next resource depends on the udev creating a device then we wait until it is finished. On a normally loaded host this should be done quickly, but you may be unlucky. If you are not using udev set this to "no", otherwise we will spin in a loop until a timeout occurs. udev If the iSCSI session exists but is currently inactive/broken, which is most probably due to network problems, the iSCSI layer will try to recover. If this parameter is set to true, we'll wait for the recovery to succeed. In that case the monitor operation can only time out so you should set the monitor op timeout attribute appropriately. -on error wait for iSCSI recovery in monitor +On error wait for iSCSI recovery in monitor EOF } iscsi_methods() { cat <= "2.0-872" changed discovery semantics # see http://www.mail-archive.com/open-iscsi@googlegroups.com/msg04883.html # there's a new discoverydb command which should be used instead discovery open_iscsi_discovery() { local output local discovery_variant="discovery" local options="" local cmd local version=`$iscsiadm --version | awk '{print $3}'` ocf_version_cmp "$version" "2.0-871" if [ $? -eq 2 ]; then # newer than 2.0-871? discovery_variant="discoverydb" [ "$discovery_type" = "sendtargets" ] && options="-D" fi cmd="$iscsiadm -m $discovery_variant -p $OCF_RESKEY_portal -t $discovery_type $options" output=`$cmd` if [ $? -ne 0 -o x = "x$output" ]; then [ x != "x$output" ] && { ocf_log err "$cmd FAILED" echo "$output" } return 3 fi PORTAL=`echo "$output" | awk -v target="$OCF_RESKEY_target" ' $NF==target{ if( NF==3 ) portal=$2; # sles compat mode else portal=$1; sub(",.*","",portal); print portal; }'` case `echo "$PORTAL" | wc -w` in 0) #target not found echo "$output" ocf_log err "target $OCF_RESKEY_target not found at portal $OCF_RESKEY_portal" return 1 ;; 1) #we're ok return 0 ;; *) # handle multihome hosts reporting multiple portals for p in $PORTAL; do if [ "$OCF_RESKEY_portal" = "$p" ]; then PORTAL="$OCF_RESKEY_portal" return 0 fi done echo "$output" ocf_log err "sorry, can't handle multihomed hosts unless you specify the portal exactly" return 2 ;; esac } open_iscsi_add() { $iscsiadm -m node -p $1 -T $2 -l } open_iscsi_get_session_id() { local target="$1" $iscsiadm -m session 2>/dev/null | grep "$target$" | awk '{print $2}' | tr -d '[]' } open_iscsi_remove() { local target="$1" local session_id session_id=`open_iscsi_get_session_id "$target"` if [ "$session_id" ]; then $iscsiadm -m session -r $session_id -u else ocf_log err "cannot find session id for target $target" return 1 fi } open_iscsi_status() { local target="$1" local session_id conn_state outp local prev_state local recov recov=${2:-$OCF_RESKEY_try_recovery} session_id=`open_iscsi_get_session_id "$target"` prev_state="" [ -z "$session_id" ] && return 1 while :; do outp=`$iscsiadm -m session -r $session_id -P 1` || return 2 conn_state=`echo "$outp" | sed -n '/Connection State/s/.*: //p'` # some drivers don't return connection state, in that case # we'll assume that we're still connected case "$conn_state" in "LOGGED IN") [ -n "$msg_logged" ] && ocf_log info "connection state $conn_state. Session restored." return 0;; "Unknown"|"") # this is also probably OK [ -n "$msg_logged" ] && ocf_log info "connection state $conn_state. Session restored." return 0;; *) # failed if [ "$__OCF_ACTION" != stop ] && ! ocf_is_probe && ocf_is_true $recov; then if [ "$conn_state" != "$prev_state" ]; then ocf_log warning "connection state $conn_state, waiting for recovery..." prev_state="$conn_state" fi sleep 1 else ocf_log err "iscsiadm output: $outp" return 2 fi ;; esac done } disk_discovery() { $discovery # discover and setup the real portal string (address) case $? in 0) ;; 1|2) exit $OCF_ERR_GENERIC ;; 3) if ! is_iscsid_running; then [ $setup_rc -eq 1 ] && ocf_log warning "iscsid.startup probably not correctly set in /etc/iscsi/iscsid.conf" exit $OCF_ERR_INSTALLED fi exit $OCF_ERR_GENERIC ;; esac } # # NB: this is udev specific! # wait_for_udev() { dev=/dev/disk/by-path/ip-$PORTAL-iscsi-$OCF_RESKEY_target while :; do ls $dev* >/dev/null 2>&1 && break ocf_log warning "waiting for udev to create $dev" sleep 1 done } iscsi_status() { $disk_status $OCF_RESKEY_target $* case $? in 0) return $OCF_SUCCESS;; 1) return $OCF_NOT_RUNNING;; 2) return $OCF_ERR_GENERIC;; esac } iscsi_start() { iscsi_status case $? in $OCF_SUCCESS) ocf_log info "iscsi $PORTAL $OCF_RESKEY_target already running" return $OCF_SUCCESS ;; $OCF_NOT_RUNNING) $add_disk $PORTAL $OCF_RESKEY_target || return $OCF_ERR_GENERIC case "$OCF_RESKEY_udev" in [Yy]es) wait_for_udev || return $OCF_ERR_GENERIC ;; *) ;; esac ;; *) # the session exists, but it's broken ocf_log warning "iscsi $PORTAL $OCF_RESKEY_target in failed state" ;; esac iscsi_status 1 # enforce wait if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi } iscsi_stop() { iscsi_status if [ $? -ne $OCF_NOT_RUNNING ] ; then $remove_disk $OCF_RESKEY_target || return $OCF_ERR_GENERIC iscsi_status if [ $? -ne $OCF_NOT_RUNNING ] ; then return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi else ocf_log info "iscsi $OCF_RESKEY_target already stopped" return $OCF_SUCCESS fi } iscsi_monitor() { if $disk_status $OCF_RESKEY_target; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } # # 'main' starts here... # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) iscsi_methods exit $OCF_SUCCESS;; esac if [ x = "x$OCF_RESKEY_target" ]; then ocf_log err "target parameter not set" exit $OCF_ERR_CONFIGURED fi if [ x = "x$OCF_RESKEY_portal" ]; then ocf_log err "portal parameter not set" exit $OCF_ERR_CONFIGURED fi case `uname` in Linux) setup=open_iscsi_setup ;; *) ocf_log info "platform `uname` may not be supported" setup=open_iscsi_setup ;; esac LSB_STATUS_STOPPED=3 $setup setup_rc=$? if [ $setup_rc -gt 1 ]; then ocf_log info "iscsi initiator utilities not installed or not setup" case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $OCF_ERR_INSTALLED;; esac fi if [ `id -u` != 0 ]; then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # which method was invoked? case "$1" in start) discovery_type=${OCF_RESKEY_discovery_type} disk_discovery iscsi_start ;; stop) iscsi_stop ;; status) iscsi_status rc=$? case $rc in $OCF_SUCCESS) echo iscsi target $OCF_RESKEY_target running ;; $OCF_NOT_RUNNING) echo iscsi target $OCF_RESKEY_target stopped ;; *) echo iscsi target $OCF_RESKEY_target failed ;; esac exit $rc ;; monitor) iscsi_status ;; validate-all) # everything already validated # just exit successfully here. exit $OCF_SUCCESS;; *) iscsi_methods exit $OCF_ERR_UNIMPLEMENTED;; esac # # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/heartbeat/jboss b/heartbeat/jboss index 247924ca6..8cc5c86e9 100755 --- a/heartbeat/jboss +++ b/heartbeat/jboss @@ -1,500 +1,500 @@ #!/bin/sh # # Description: Manages a Jboss Server as an OCF High-Availability # resource under Heartbeat/LinuxHA control # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # Copyright (c) 2009 Bauer Systems KG / Stefan Schluppeck # ####################################################################### # OCF parameters: # OCF_RESKEY_resource_name - The name of the resource. Default is ${OCF_RESOURCE_INSTANCE} # why not let the RA log through lrmd? # 2009/09/09 Nakahira: # jboss_console is used to record output of the "run.sh". # The log of "Run.sh" should not be output to ha-log because it is so annoying. # OCF_RESKEY_console - A destination of the log of jboss run and shutdown script. Default is /var/log/${OCF_RESKEY_resource_name}.log # OCF_RESKEY_shutdown_timeout - Time-out at the time of the stop. Default is 5 # OCF_RESKEY_kill_timeout - The re-try number of times awaiting a stop. Default is 10 # OCF_RESKEY_user - A user name to start a JBoss. Default is root # OCF_RESKEY_statusurl - URL for state confirmation. Default is http://127.0.0.1:8080 # OCF_RESKEY_java_home - Home directory of the Java. Default is ${JAVA_HOME} # OCF_RESKEY_java_opts - Options for Java. # OCF_RESKEY_jboss_home - Home directory of Jboss. Default is None # is it possible to devise this string from options? I'm afraid # that allowing users to set this could be error prone. # 2009/09/09 Nakahira: # It is difficult to set it automatically because jboss_pstring # greatly depends on the environment. At any rate, system architect # should note that pstring doesn't influence other processes. # OCF_RESKEY_pstring - String Jboss will found in procceslist. Default is "java -Dprogram.name=run.sh" # OCF_RESKEY_run_opts - Options for jboss to run. Default is "-c default -l lpg4j" # OCF_RESKEY_shutdown_opts - Options for jboss to shutdonw. Default is "-s 127.0.0.1:1099" # OCF_RESKEY_rotate_consolelog - Control console log logrotation flag. Default is false. # OCF_RESKEY_rotate_value - console log logrotation value. Default is 86400 span(seconds). # OCF_RESKEY_rotate_logsuffix - Control console log logrotation suffix. Default is .%F. ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs usage() { cat <<-! usage: $0 action action: start start jboss stop stop the jboss status return the status of jboss, run or down monitor return TRUE if the jboss appears to be working. You have to have installed $WGETNAME for this to work. meta-data show meta data message validate-all validate the instance parameters ! return $OCF_ERR_ARGS } isrunning_jboss() { local rc if [ -z "$1" ];then ocf_run -q -err wget -t 1 -O /dev/null $STATUSURL else # Retry message for restraint wget -t 1 -O /dev/null $STATUSURL 2>/dev/null fi rc=$? if [ $rc -eq 0 ]; then return $OCF_SUCCESS fi # JBoss service error return $OCF_ERR_GENERIC } monitor_jboss() { if ! pgrep -f "$PSTRING" > /dev/null; then return $OCF_NOT_RUNNING fi isrunning_jboss $1 } rotate_console() { # Look for rotatelogs/rotatelogs2 if [ -x /usr/sbin/rotatelogs ]; then ROTATELOGS=/usr/sbin/rotatelogs elif [ -x /usr/sbin/rotatelogs2 ]; then ROTATELOGS=/usr/sbin/rotatelogs2 else ocf_log warn "rotatelogs command not found." return 1 fi # Clean up and set permissions on required files rm -rf "$CONSOLE" mkfifo -m700 "$CONSOLE" chown --dereference "$JBOSS_USER" "$CONSOLE" || true su - -s /bin/sh $JBOSS_USER \ -c "$ROTATELOGS -l \"$CONSOLE$ROTATELOG_SUFFIX\" $ROTATEVALUE" \ < "$CONSOLE" > /dev/null 2>&1 & } start_jboss() { monitor_jboss start if [ $? = $OCF_SUCCESS ]; then ocf_log info "JBoss already running." return $OCF_SUCCESS fi if ocf_is_true $ROTATELOG_FLG; then rotate_console if [ $? = 0 ]; then ocf_log debug "Rotate console log succeeded." else ocf_log warn "Rotate console log failed. Starting jboss without console log rotation." fi fi ocf_log info "Starting JBoss[$RESOURCE_NAME]" if [ "$JBOSS_USER" = root ]; then "$JBOSS_HOME/bin/run.sh" $RUN_OPTS \ >> "$CONSOLE" 2>&1 & else su - -s /bin/bash "$JBOSS_USER" \ -c "export JAVA_HOME=${JAVA_HOME}; \ export JAVA_OPTS=\"${JAVA_OPTS}\"; \ export JBOSS_HOME=${JBOSS_HOME}; \ $JBOSS_HOME/bin/run.sh $RUN_OPTS" \ >> "$CONSOLE" 2>&1 & fi while true; do monitor_jboss start if [ $? = $OCF_SUCCESS ]; then break fi ocf_log info "start_jboss[$RESOURCE_NAME]: retry monitor_jboss" sleep 3 done return $OCF_SUCCESS } stop_jboss() { ocf_log info "Stopping JBoss[$RESOURCE_NAME]" if [ "$JBOSS_USER" = root ]; then "$JBOSS_HOME/bin/shutdown.sh" $SHUTDOWN_OPTS -S \ >> "$CONSOLE" 2>&1 & else su - -s /bin/bash "$JBOSS_USER" \ -c "export JAVA_HOME=${JAVA_HOME}; \ export JBOSS_HOME=${JBOSS_HOME}; \ $JBOSS_HOME/bin/shutdown.sh $SHUTDOWN_OPTS -S" \ >> "$CONSOLE" 2>&1 & fi lapse_sec=0 while pgrep -f "$PSTRING" > /dev/null; do sleep 1 lapse_sec=`expr $lapse_sec + 1` ocf_log info "stop_jboss[$RESOURCE_NAME]: stop NORM $lapse_sec/$SHUTDOWN_TIMEOUT" if [ $lapse_sec -ge $SHUTDOWN_TIMEOUT ]; then break fi done if pgrep -f "$PSTRING" > /dev/null; then ocf_log info "stop_jboss[$RESOURCE_NAME]: output a JVM thread dump to $CONSOLE" pkill -QUIT -f "$PSTRING" lapse_sec=0 while true; do sleep 1 lapse_sec=`expr $lapse_sec + 1` ocf_log info "stop_jboss[$RESOURCE_NAME]: kill jboss by SIGTERM ($lapse_sec/$KILL_TIMEOUT)" pkill -TERM -f "$PSTRING" if pgrep -f "$PSTRING" > /dev/null; then if [ $lapse_sec -ge $KILL_TIMEOUT ]; then break fi else break fi done fi # If the JBoss process hangs, JBoss RA waits $SHUTDOWN_TIMEOUT # seconds and tries kill TERM and QUIT for $KILL_TIMEOUT seconds. # The stop timeout of RA should be # longer than $SHUTDOWN_TIMEOUT + $KILL_TIMEOUT. lapse_sec=0 while pgrep -f "$PSTRING" > /dev/null; do sleep 1 lapse_sec=`expr $lapse_sec + 1` ocf_log info "stop_jboss[$RESOURCE_NAME]: kill jboss by SIGKILL ($lapse_sec/@@@)" pkill -KILL -f "$PSTRING" done if ocf_is_true $ROTATELOG_FLG; then rm -f "${CONSOLE}" fi return $OCF_SUCCESS } status_jboss() { if ! pgrep -f "$PSTRING" > /dev/null; then echo "JBoss process[$RESOURCE_NAME] is not running." return $OCF_NOT_RUNNING fi if isrunning_jboss; then echo "JBoss[$RESOURCE_NAME] is running." return $OCF_SUCCESS else echo "JBoss process[$RESOURCE_NAME] is running." echo "But, we can not access JBoss web service." return $OCF_NOT_RUNNING fi } metadata_jboss() { cat < 1.0 Resource script for Jboss. It manages a Jboss instance as an HA resource. Manages a JBoss application server instance The name of the resource. Defaults to the name of the resource instance. The name of the resource A destination of the log of jboss run and shutdown script. jboss log path Timeout for jboss bin/shutdown.sh. We wait for this timeout to expire, then send the TERM and QUIT signals. Finally, the KILL signal is used to terminate the jboss process. You should set the timeout for the stop operation to a value bigger than the sum of the timeout parameters. See also kill_timeout. shutdown timeout If bin/shutdown.sh doesn't stop the jboss process, then we send it TERM and QUIT signals, intermittently and once a second. After this timeout expires, if the process is still live, we use the KILL signal. See also shutdown_timeout. stop by signal timeout A user name to start a JBoss. A user name to start a resource. URL to test in the monitor operation. URL to test in the monitor operation. Home directory of Java. Defaults to the environment variable JAVA_HOME. If it is not set, then define this parameter. Home directory of Java. Java options. Java options. Home directory of Jboss. Home directory of Jboss. With this string heartbeat matches for the right process to kill. pkill/pgrep search string Start options to start Jboss with, defaults are from the Jboss-Doku. options for jboss run.sh Stop options to stop Jboss with. options for jboss shutdown.sh Rotate console log flag. Rotate console log flag -console log rotation value (default is 86400 seconds). +Console log rotation value (default is 86400 seconds). -console log rotation value (default is 86400 seconds) +Console log rotation value (default is 86400 seconds) Rotate console log suffix. Rotate console log suffix END return $OCF_SUCCESS } validate_all_jboss() { ocf_log info "validate_all_jboss[$RESOURCE_NAME]" return $OCF_SUCCESS } COMMAND=$1 RESOURCE_NAME="${OCF_RESKEY_resource_name-${OCF_RESOURCE_INSTANCE}}" CONSOLE="${OCF_RESKEY_console-/var/log/${RESOURCE_NAME}.log}" SHUTDOWN_TIMEOUT="${OCF_RESKEY_shutdown_timeout-5}" KILL_TIMEOUT="${OCF_RESKEY_kill_timeout-10}" JBOSS_USER="${OCF_RESKEY_user-root}" STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}" PSTRING="${OCF_RESKEY_pstring-java -Dprogram.name=run.sh}" RUN_OPTS="${OCF_RESKEY_run_opts--c default -l lpg4j}" SHUTDOWN_OPTS="${OCF_RESKEY_shutdown_opts--s 127.0.0.1:1099}" ROTATELOG_FLG="${OCF_RESKEY_rotate_consolelog-false}" ROTATEVALUE="${OCF_RESKEY_rotate_value-86400}" ROTATELOG_SUFFIX="${OCF_RESKEY_rotate_logsuffix-.%F}" if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi if [ "$COMMAND" = "meta-data" ]; then metadata_jboss exit $OCF_SUCCESS fi if [ "$COMMAND" = "help" -o "$COMMAND" = "usage" ]; then usage exit $OCF_SUCCESS fi # test if these two are set and if directories exist and if the # required scripts/binaries exist; use OCF_ERR_INSTALLED JAVA_HOME="${OCF_RESKEY_java_home-${JAVA_HOME}}" JAVA_OPTS="${OCF_RESKEY_java_opts}" JBOSS_HOME="${OCF_RESKEY_jboss_home}" LSB_STATUS_STOPPED=3 if [ ! -d "$JAVA_HOME" -o ! -d "$JBOSS_HOME" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac ocf_log err "JAVA_HOME or JBOSS_HOME does not exist." exit $OCF_ERR_INSTALLED fi export JAVA_HOME JAVA_OPTS JBOSS_HOME JAVA=${JAVA_HOME}/bin/java if [ ! -x "$JAVA" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac ocf_log err "java command does not exist." exit $OCF_ERR_INSTALLED fi case "$COMMAND" in start) start_jboss func_status=$? exit $func_status ;; stop) stop_jboss func_status=$? exit $func_status ;; status) status_jboss exit $? ;; monitor) monitor_jboss func_status=$? exit $func_status ;; validate-all) validate_all_jboss exit $? ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/mysql-proxy b/heartbeat/mysql-proxy index 48c6df3c1..024e97e0d 100755 --- a/heartbeat/mysql-proxy +++ b/heartbeat/mysql-proxy @@ -1,719 +1,719 @@ #!/bin/sh # # Resource script for MySQL Proxy # # Description: Manages MySQL Proxy as an OCF resource in # an high-availability setup. # # Tested with MySQL Proxy 0.8.1 and 0.8.3 on Debian 6.0. # # Based on the mysql and Pure-Ftpd OCF resource agents. # # Author: Raoul Bhatia : Original Author # License: GNU General Public License (GPL) # # # usage: $0 {start|stop|reload|status|monitor|validate-all|meta-data} # # The "start" arg starts a MySQL Proxy instance # # The "stop" arg stops it. # # TODO # * add in-depth monitoring by querying the mysql-proxy admin port # # Test via # (note: this did not work with MySQL Proxy 0.8.1 and ocf-tester from resource-agents 3.9.2 on Debian 6.0) # # * /usr/sbin/ocf-tester -n mp -o binary="/usr/sbin/mysql-proxy" -o defaults_file="" -o parameters="--proxy-skip-profiling" \ # -o admin_address="127.0.0.1:4041" -o admin_username="root" -o admin_password="la" -o admin_lua_script="/usr/lib/mysql-proxy/lua/admin.lua" \ # -o proxy_backend_addresses="192.168.100.200:42006" -o proxy_address="/var/run/mysqld/mysqld.sock" /usr/lib/ocf/resource.d/heartbeat/mysql-proxy # # # OCF parameters: # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_defaults_file # OCF_RESKEY_proxy_backend_addresses # OCF_RESKEY_proxy_read_only_backend_addresses # OCF_RESKEY_proxy_address # OCF_RESKEY_log_level # OCF_RESKEY_keepalive # OCF_RESKEY_plugins # OCF_RESKEY_admin_address # OCF_RESKEY_admin_username # OCF_RESKEY_admin_password # OCF_RESKEY_admin_lua_script # OCF_RESKEY_test_table # OCF_RESKEY_test_user # OCF_RESKEY_test_passwd # OCF_RESKEY_parameters # OCF_RESKEY_pidfile # ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs : ${OCF_RESKEY_binary="/usr/sbin/mysql-proxy"} : ${OCF_RESKEY_client_binary="mysql"} : ${OCF_RESKEY_defaults_file=""} : ${OCF_RESKEY_proxy_backend_addresses="127.0.0.1:3306"} : ${OCF_RESKEY_proxy_read_only_backend_addresses=""} : ${OCF_RESKEY_proxy_address=":4040"} : ${OCF_RESKEY_log_level=""} : ${OCF_RESKEY_keepalive=""} : ${OCF_RESKEY_plugins=""} : ${OCF_RESKEY_admin_address="127.0.0.1:4041"} : ${OCF_RESKEY_admin_username=""} : ${OCF_RESKEY_admin_password=""} : ${OCF_RESKEY_admin_lua_script=""} : ${OCF_RESKEY_test_table="mysql.user"} : ${OCF_RESKEY_test_user=""} : ${OCF_RESKEY_test_passwd=""} : ${OCF_RESKEY_parameters=""} : ${OCF_RESKEY_pidfile="${HA_RSCTMP}/mysql-proxy-${OCF_RESOURCE_INSTANCE}.pid"} USAGE="Usage: $0 {start|stop|reload|status|monitor|validate-all|meta-data}" ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 0.1 This script manages MySQL Proxy as an OCF resource in a high-availability setup. The default monitor operation will verify that mysql-proxy is running. The level 10 monitor operation is left out intentionally for possible future enhancements in conjunction with the admin plugin. -The level 20 monitor operation will perform a SELECT on a given table to verify that the connection to a backend-server is actually working. +The level 20 monitor operation will perform a SELECT on a given table to verify that the connection to a back-end server is actually working. Tested with MySQL Proxy 0.8.1 and 0.8.3 on Debian 6.0. Manages a MySQL Proxy instance Full path to the MySQL Proxy binary. For example, "/usr/sbin/mysql-proxy". Full path to MySQL Proxy binary -Location of the MySQL client binary +Location of the MySQL client binary. MySQL client binary Full path to a MySQL Proxy configuration file. For example, "/etc/mysql-proxy.conf". Full path to configuration file -Address:port of the remote backend-servers (default: 127.0.0.1:3306). +Address:port of the remote back-end servers (default: 127.0.0.1:3306). -MySQL Proxy backend-servers +MySQL Proxy back-end servers Address:port of the remote (read only) slave-server (default: ). -MySql Proxy read only backend-servers +MySql Proxy read only back-end servers -Listening address:port of the proxy-server (default: :4040). +Listening address:port of the proxy server (default: :4040). You can also specify a socket like "/tmp/mysql-proxy.sock". MySQL Proxy listening address Log all messages of level (error|warning|info|message|debug|) or higher. An empty value disables logging. MySQL Proxy log level. Try to restart the proxy if it crashed (default: ). Valid values: true or false. An empty value equals "false". Use keepalive option Whitespace separated list of plugins to load (default: ). Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy plugins Listening address:port of the admin plugin (default: 127.0.0.1:4041). Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy admin plugin listening address Username for the admin plugin (default: ). Required since MySQL Proxy 0.8.1, if the admin plugin is loaded. Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy admin plugin username Password for the admin plugin (default: ). Required since MySQL Proxy 0.8.1, if the admin plugin is loaded. Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy admin plugin password Script to execute by the admin plugin. Required since MySQL Proxy 0.8.1, if the admin plugin is loaded. Note: The admin plugin will be auto-loaded in case you specify an admin_* parameter. MySQL Proxy admin plugin lua script Table to be tested in monitor statement (in database.table notation) MySQL test table MySQL test user MySQL test user MySQL test user password MySQL test user password The MySQL Proxy daemon may be called with additional parameters. Specify any of them here. MySQL Proxy additional parameters PID file PID file END } isRunning() { kill -s 0 "$1" 2>/dev/null } mysqlproxy_status() { local PID if [ -f "${pidfile}" ]; then # MySQL Proxy is probably running PID=`head -n 1 "${pidfile}"` if [ ! -z "$PID" ] ; then isRunning "$PID" return $? fi fi # MySQL Proxy is not running false } mysqlproxy_start() { local PARAM_PREFIX OPTIONS local p pa pba proba local pid_dir socket_dir # if MySQL Proxy is running return success if mysqlproxy_status ; then ocf_log info "MySQL Proxy already running." return $OCF_SUCCESS fi PARAM_PREFIX='' # MySQL Proxy plugins to load # @TODO check if the plugins are actually available? if ocf_is_true $plugin_support; then for p in $plugins; do PARAM_PREFIX="$PARAM_PREFIX --plugins=$p" done fi # check if the MySQL Proxy defaults-file exist if [ -f "$defaults_file" ]; then PARAM_PREFIX="$PARAM_PREFIX --defaults-file=$defaults_file" fi # set log-level if [ ! -z "$log_level" ]; then PARAM_PREFIX="$PARAM_PREFIX --log-level=$log_level" fi # set keepalive if [ "$keepalive" = "true" ]; then PARAM_PREFIX="$PARAM_PREFIX --keepalive" fi # honor admin_* options if [ ! -z "$admin_username" ]; then PARAM_PREFIX="$PARAM_PREFIX --admin-username=$admin_username" fi if [ ! -z "$admin_password" ]; then PARAM_PREFIX="$PARAM_PREFIX --admin-password=$admin_password" fi if [ ! -z "$admin_lua_script" ]; then PARAM_PREFIX="$PARAM_PREFIX --admin-lua-script=$admin_lua_script" fi # make sure that the pid directory exists pid_dir=`dirname $pidfile` if [ ! -d $pid_dir ] ; then ocf_log info "Creating PID directory '$pid_dir'." mkdir -p $pid_dir #chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir # c/p from mysql ra; currently not needed fi # split multiple proxy-address options. # currently unsupported but let us hope for the future ;) for pa in $proxy_address; do [ -z "$pa" ] && continue OPTIONS=" $OPTIONS --proxy-address=$pa" # if $pa contains a slash, we are dealing with a socket # make sure that the socket directory exists if echo "$pa" | grep -q '/' ; then socket_dir=`dirname $pa` if [ ! -d $socket_dir ] ; then ocf_log info "Creating socket directory '$socket_dir'." mkdir -p $socket_dir #chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir # c/p from mysql ra; currently not needed fi fi done # split multiple proxy-backend-addresses options. for pba in $proxy_backend_addresses; do [ -z "$pba" ] && continue OPTIONS=" $OPTIONS --proxy-backend-addresses=$pba" done # split multiple proxy-backend-addresses options. for proba in $proxy_read_only_backend_addresses; do [ -z "$proba" ] && continue OPTIONS=" $OPTIONS --proxy-read-only-backend-addresses=$proba" done # build $OPTIONS and add admin-address and pidfile OPTIONS="$PARAM_PREFIX $OPTIONS --admin-address=$admin_address --pid-file=${pidfile}" # add additional parameters if [ -n "$parameters" ]; then OPTIONS="$OPTIONS $parameters" fi # start MySQL Proxy #start-stop-daemon --start --quiet --pidfile $pidfile --make-pidfile --name mysql-proxy --startas $binary -b -- $OPTIONS $binary --daemon $OPTIONS ret=$? if [ $ret -ne 0 ]; then ocf_log err "MySQL Proxy returned error: " $ret return $OCF_ERR_GENERIC fi # @TODO add an initial monitoring action? return $OCF_SUCCESS } mysqlproxy_stop() { local ret local pa if mysqlproxy_status ; then #start-stop-daemon --stop --quiet --retry 3 --exec $binary --pidfile $pidfile /bin/kill `cat "${pidfile}"` ret=$? if [ $ret -ne 0 ]; then ocf_log err "MySQL Proxy returned an error while stopping: " $ret return $OCF_ERR_GENERIC fi # grant some time for shutdown and recheck sleep 1 if mysqlproxy_status ; then ocf_log err "MySQL Proxy failed to stop." return $OCF_ERR_GENERIC fi # remove dangling socketfile, if specified for pa in $proxy_address; do if [ -S "$pa" ]; then ocf_log info "Removing dangling socket file '$pa'." rm -f "$pa" fi done # remove dangling pidfile if [ -f "${pidfile}" ]; then ocf_log info "Removing dangling pidfile '${pidfile}'." rm -f "${pidfile}" fi fi return $OCF_SUCCESS } mysqlproxy_reload() { # @TODO check if pidfile is empty # PID=`head -n 1 "${pidfile}"` # if [ ! -z "$PID" ] ; then if mysqlproxy_status; then ocf_log info "Reloading MySQL Proxy." kill -s HUP `cat ${pidfile}` fi } mysqlproxy_monitor() { local rc if [ "${OCF_RESKEY_CRM_meta_interval:-0}" -eq "0" ]; then # in case of probe, monitor operation is surely treated as # under suspension. This will call start operation. # (c/p from ocf:heartbeat:sfex) mysqlproxy_validate_all rc=$? [ $rc -ne 0 ] && return $rc fi if ! mysqlproxy_status ; then return $OCF_NOT_RUNNING fi if [ $OCF_CHECK_LEVEL -eq 20 ]; then mysqlproxy_monitor_20 rc=$? [ $rc -ne 0 ] && return $rc fi return $OCF_SUCCESS } mysqlproxy_monitor_20() { local rc local mysql_options pa local mysql_server_parameter mysql_server_host mysql_server_port if [ -z "$OCF_RESKEY_test_table" -o -z "$OCF_RESKEY_test_user" -a -z "$OCF_RESKEY_test_passwd" ]; then ocf_log warn "Missing proper configuration for OCF_CHECK_LEVEL=20 (test_table=[$OCF_RESKEY_test_table] test_user=[$OCF_RESKEY_test_user] test_password=[$OCF_RESKEY_test_passwd]). Not running in-depth monitoring." return $OCF_SUCCESS fi mysql_options="--connect_timeout=10 --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd" # cycle each address for pa in $proxy_address; do # build correct connect parameter if [ -S "$pa" ]; then # we need to monitor a mysql socket mysql_server_parameter="--socket=$pa" else # we need to monitor a host address mysql_server_parameter="" # split host:port # @TODO correctly handle IPv6 address # @TODO correctly handle 0.0.0.0 address mysql_server_host=`echo $pa | cut -d : -f 1` mysql_server_port=`echo $pa | cut -d : -f 2` if [ -n $mysql_server_host ]; then mysql_server_parameter="$mysql_server_parameter --host=$mysql_server_host" fi if [ -n $mysql_server_port ]; then mysql_server_parameter="$mysql_server_parameter --port=$mysql_server_port" fi fi # Check for test table ocf_run $mysql $mysql_server_parameter $mysql_options \ -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table" rc=$? if [ $rc -ne 0 ]; then ocf_log err "Failed to select from $OCF_RESKEY_test_table: " $rc return $OCF_ERR_GENERIC fi done return $OCF_SUCCESS } mysqlproxy_validate_all() { # local variables local config_error=0 # check that the MySQL Proxy binary exists and can be executed check_binary $binary # check MySQL client binary only if in-depth monitoring is requested # do not break backwards compatibility otherwise if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary $mysql fi # check for valid log-level echo $log_level | egrep -q "^(error|warning|info|message|debug|)$" if [ $? -ne 0 ]; then ocf_log err "MySQL Proxy log level '$log_level' not in valid range error|warning|info|message|debug" return $OCF_ERR_CONFIGURED fi # if we're running MySQL Proxy > 0.8.1 and there is any admin parameter set, # explicitly load the admin (and the proxy) plugin. # (version 0.8.2 does not load the admin plugin by default anymore) ocf_version_cmp "$version" "0.8.1" ret=$? if [ $ret -eq 2 ]; then # simple check: concat all parameters and check if the string has non-zero length if [ -n "$admin_username$admin_password$admin_lua_script$admin_address" ]; then plugins="proxy admin" has_plugin_admin=1 else has_plugin_admin=0 fi fi # check for required admin_* parameters for 0.8.1 and 0.8.2 (with admin module) # translated: if (version == 0.8.1 or (version > 0.8.1 and has_plugin_admin)) if [ $ret -eq 1 -o \( $ret -eq 2 -a $has_plugin_admin -eq 1 \) ]; then if [ -z "$admin_username" ]; then ocf_log err "Missing required parameter \"admin_username\"" config_error=1 fi if [ -z "$admin_password" ]; then ocf_log err "Missing required parameter \"admin_password\"" config_error=1 fi if [ -z "$admin_lua_script" ]; then ocf_log err "Missing required parameter \"admin_lua_script\"" config_error=1 fi # check if the admin_lua_script, if specified, exists if [ -n "$admin_lua_script" -a ! -e "$admin_lua_script" ]; then ocf_log err "MySQL Proxy admin lua script '$admin_lua_script' does not exist or is not readable." fi fi # issue a warning during start if the user wants to load a plugin # but this version of MySQL Proxy does not support the plugin architecture. if [ -n "$plugins" ] && ocf_is_false "$plugin_support" && [ $__OCF_ACTION = 'start' ]; then ocf_log warn "You are running MySQL Proxy version '$version'. This version does not support the plugin architecture. Please use version 0.7.0 or later to load the plugins '$plugins'." fi # exit in case we have found relevant config errors if [ $config_error -eq 1 ]; then exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi pidfile=$OCF_RESKEY_pidfile binary=$OCF_RESKEY_binary defaults_file=$OCF_RESKEY_defaults_file proxy_backend_addresses=$OCF_RESKEY_proxy_backend_addresses proxy_read_only_backend_addresses=$OCF_RESKEY_proxy_read_only_backend_addresses admin_address=$OCF_RESKEY_admin_address admin_username=$OCF_RESKEY_admin_username admin_password=$OCF_RESKEY_admin_password admin_lua_script=$OCF_RESKEY_admin_lua_script proxy_address=$OCF_RESKEY_proxy_address log_level=$OCF_RESKEY_log_level keepalive=$OCF_RESKEY_keepalive plugins=`echo $OCF_RESKEY_plugins | tr "[:space:]" "\n" | sort -u` mysql=$OCF_RESKEY_client_binary parameters=$OCF_RESKEY_parameters plugin_support=false has_plugin_admin=0 # 0 because this simplifies the if statements # debugging stuff #echo OCF_RESKEY_binary=$OCF_RESKEY_binary >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_defaults_file=$OCF_RESKEY_defaults_file >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_proxy_backend_addresses=$OCF_RESKEY_proxy_backend_addresses >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_proxy_read_only_backend_addresses=$OCF_RESKEY_proxy_read_only_backend_addresses >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_proxy_address=$OCF_RESKEY_proxy_address >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_log_level=$OCF_RESKEY_log_level >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_keepalive=$OCF_RESKEY_keepalive >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_admin_address=$OCF_RESKEY_admin_address >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_admin_username=$OCF_RESKEY_admin_username >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_admin_password=$OCF_RESKEY_admin_password >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_admin_lua_script=$OCF_RESKEY_admin_lua_script >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_parameters=$OCF_RESKEY_parameters >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE #echo OCF_RESKEY_pidfile=$OCF_RESKEY_pidfile >> /tmp/prox_conf_$OCF_RESOURCE_INSTANCE # handle some parameters before performing any additional checks case $1 in meta-data) meta_data exit $? ;; usage) usage exit $OCF_SUCCESS ;; esac # determine MySQL Proxy version check_binary $binary version=`$binary --version | grep ^mysql-proxy | awk '{print $NF}'` # version 0.7.0 (and later) support the plugin architecture and load the admin plugin by default # version 0.8.1 loads admin plugin by default and requires the admin parameters to be set # version 0.8.2 does not load the admin plugin by default anymore ocf_version_cmp "$version" "0.7.0" ret=$? if [ $ret -eq 1 -o $ret -eq 2 ]; then plugin_support=true has_plugin_admin=1 fi # perform action case $1 in start) mysqlproxy_validate_all && mysqlproxy_start exit $? ;; stop) mysqlproxy_validate_all && mysqlproxy_stop exit $? ;; reload) mysqlproxy_reload exit $? ;; status) if mysqlproxy_status; then ocf_log info "MySQL Proxy is running." exit $OCF_SUCCESS else ocf_log info "MySQL Proxy is stopped." exit $OCF_NOT_RUNNING fi ;; monitor) mysqlproxy_monitor exit $? ;; validate-all) mysqlproxy_validate_all exit $? ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver index aaaf27a2f..2f62df4b1 100755 --- a/heartbeat/nfsserver +++ b/heartbeat/nfsserver @@ -1,484 +1,484 @@ #!/bin/sh # nfsserver # # Description: Manages nfs server as OCF resource # by hxinwei@gmail.com # License: GNU General Public License v2 (GPLv2) and later if [ -n "$OCF_DEBUG_LIBRARY" ]; then . $OCF_DEBUG_LIBRARY else : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi DEFAULT_INIT_SCRIPT="/etc/init.d/nfsserver" DEFAULT_NOTIFY_CMD="/sbin/sm-notify" DEFAULT_NOTIFY_FOREGROUND="false" DEFAULT_RPCPIPEFS_DIR="/var/lib/nfs/rpc_pipefs" EXEC_MODE=0 SELINUX_ENABLED=-1 STATD_PATH="/var/lib/nfs" STATD_DIR="" nfsserver_meta_data() { cat < 1.0 Nfsserver helps to manage the Linux nfs server as a failover-able resource in Linux-HA. It depends on Linux specific NFS implementation details, so is considered not portable to other platforms yet. Manages an NFS server The default init script shipped with the Linux distro. The nfsserver resource agent offloads the start/stop/monitor work to the init script because the procedure to start/stop/monitor nfsserver varies on different Linux distro. In the event that this option is not set, this agent will attempt to use an init script at this location, ${DEFAULT_INIT_SCRIPT}, or detect a systemd unit-file to use in the event that no init script is detected. Init script for nfsserver -The tool to send out NSM reboot notification, it should be either sm-notify or rpc.statd. +The tool to send out NSM reboot notification; it should be either sm-notify or rpc.statd. Failover of nfsserver can be considered as rebooting to different machines. -The nfsserver resource agent use this command to notify all clients about the happening of failover. +The nfsserver resource agent use this command to notify all clients about the occurrence of failover. The tool to send out notification. Keeps the notify tool attached to its controlling terminal and running in the foreground. Keeps the notify tool running in the foreground. Specifies the length of sm-notify retry time, in minutes, to continue retrying notifications to unresponsive hosts. If this option is not specified, sm-notify attempts to send notifications for 15 minutes. Specifying a value of 0 causes sm-notify to continue sending notifications to unresponsive peers until it is manually killed. -Specifies the length of sm-notify retry time(minutes). +Specifies the length of sm-notify retry time (minutes). The nfsserver resource agent will save nfs related information in this specific directory. And this directory must be able to fail-over before nfsserver itself. Directory to store nfs server related information. Comma separated list of floating IP addresses used to access the nfs service IP addresses. -The mount point for the sunrpc file system. Default is $DEFAULT_RPCPIPEFS_DIR . -This script will mount(bind) nfs_shared_infodir on /var/lib/nfs/ (can not be changed), +The mount point for the sunrpc file system. Default is $DEFAULT_RPCPIPEFS_DIR. +This script will mount (bind) nfs_shared_infodir on /var/lib/nfs/ (cannot be changed), and this script will mount the sunrpc file system on $DEFAULT_RPCPIPEFS_DIR (default, can be changed by this parameter). -If you want to move only rpc_pipefs/ (e.g. to keep rpc_pipefs/ local) from default , please set this value. +If you want to move only rpc_pipefs/ (e.g. to keep rpc_pipefs/ local) from default, please set this value. The mount point for the sunrpc file system. END return $OCF_SUCCESS } nfsserver_usage() { cat < /dev/null 2>&1 && selinuxenabled SELINUX_ENABLED=$? if [ $SELINUX_ENABLED -eq 0 ]; then export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')" fi ## # EXEC_MODE values # 1 user init script or default init script # 2 systemd # # On error, this function will terminate the process # with error code $OCF_ERR_INSTALLED ## set_exec_mode() { ## # If EXEC_MODE is already set, we don't need to run this function again. ## if [ $EXEC_MODE -ne 0 ]; then return 0; fi ## # If the user defined an init script, It must exist for us to continue ## if [ -n "$OCF_RESKEY_nfs_init_script" ]; then # check_binary will exit the process if init script does not exist check_binary ${OCF_RESKEY_nfs_init_script} EXEC_MODE=1 return 0 fi ## # Check to see if the default init script exists, if so we'll use that. ## if which $DEFAULT_INIT_SCRIPT > /dev/null 2>&1; then OCF_RESKEY_nfs_init_script=$DEFAULT_INIT_SCRIPT EXEC_MODE=1 return 0 fi ## # Last of all, attempt systemd. ## if which systemctl > /dev/null 2>&1; then if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep nfs-lock > /dev/null; then EXEC_MODE=2 return 0 fi fi ocf_log err "No init script or systemd unit file detected for nfs server" exit $OCF_ERR_INSTALLED } nfs_systemd_exec() { local cmd=$1 local server_res local lock_res if [ "$cmd" = "stop" ]; then systemctl $cmd nfs-server.service server_res=$? systemctl $cmd nfs-lock.service lock_res=$? else systemctl $cmd nfs-lock.service lock_res=$? systemctl $cmd nfs-server.service server_res=$? fi if [ $lock_res -ne $server_res ]; then # If one is running and the other isn't, or for whatever other reason # the return code's aren't the same, this is bad. ocf_log err "Systemd services nfs-lock and nfs-server are not in the same state after attempting $cmd command" return $OCF_ERR_GENERIC fi return $server_res } ## # wrapper for init script and systemd calls. ## nfs_exec() { local cmd=$1 set_exec_mode case $EXEC_MODE in 1) ${OCF_RESKEY_nfs_init_script} $cmd;; 2) nfs_systemd_exec $cmd;; esac } nfsserver_monitor () { fn=`mktemp` nfs_exec status > $fn 2>&1 rc=$? ocf_log debug `cat $fn` rm -f $fn #Adapte LSB status code to OCF return code if [ $rc -eq 0 ]; then return $OCF_SUCCESS elif [ $rc -eq 3 ]; then return $OCF_NOT_RUNNING else return $OCF_ERR_GENERIC fi } prepare_directory () { [ -d "$fp" ] || mkdir -p $fp [ -d "$rpcpipefs_make_dir" ] || mkdir -p $rpcpipefs_make_dir [ -d "$fp/v4recovery" ] || mkdir -p $fp/v4recovery [ -d "$fp/$STATD_DIR" ] || mkdir -p "$fp/$STATD_DIR" [ -d "$fp/$STATD_DIR/sm" ] || mkdir -p "$fp/$STATD_DIR/sm" [ -d "$fp/$STATD_DIR/sm.ha" ] || mkdir -p "$fp/$STATD_DIR/sm.ha" [ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak" [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown -R rpcuser.rpcuser "$fp/$STATD_DIR" [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp" } is_bound () { if mount | grep -q "on $1 type"; then return 0 fi return 1 } bind_tree () { if is_bound /var/lib/nfs; then ocf_log debug "$fp is already bound to /var/lib/nfs" return 0 fi mount --bind $fp /var/lib/nfs [ $SELINUX_ENABLED -eq 0 ] && restorecon /var/lib/nfs } unbind_tree () { if `mount | grep -q " on $rpcpipefs_umount_dir"`; then umount -t rpc_pipefs $rpcpipefs_umount_dir fi if is_bound /var/lib/nfs; then umount /var/lib/nfs fi } nfsserver_start () { if nfsserver_monitor; then ocf_log debug "NFS server is already started" return $OCF_SUCCESS fi prepare_directory bind_tree rm -rf $STATD_PATH/sm.ha/* > /dev/null 2>&1 cp -rf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1 ocf_log info "Starting NFS server ..." fn=`mktemp` nfs_exec start > $fn 2>&1 rc=$? ocf_log debug `cat $fn` rm -f $fn if [ $rc -ne 0 ]; then ocf_log err "Failed to start NFS server" return $rc fi #Notify the nfs server has been moved or rebooted #The init script do that already, but with the hostname, which may be ignored by client #we have to do it again with the nfs_ip local opts case ${OCF_RESKEY_nfs_notify_cmd##*/} in sm-notify) # run in foreground, if requested if ocf_is_true "$OCF_RESKEY_nfs_notify_foreground"; then opts="-d" fi if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then opts="$opts -m $OCF_RESKEY_nfs_smnotify_retry_time" fi opts="$opts -f -v" ;; rpc.statd) if ocf_is_true "$OCF_RESKEY_nfs_notify_foreground"; then opts="-F" fi opts="$opts -n" ;; esac rm -rf $STATD_PATH/sm.ha.save > /dev/null 2>&1 cp -rf $STATD_PATH/sm.ha $STATD_PATH/sm.ha.save > /dev/null 2>&1 for ip in `echo ${OCF_RESKEY_nfs_ip} | sed 's/,/ /g'`; do ${OCF_RESKEY_nfs_notify_cmd} $opts $ip -P $STATD_PATH/sm.ha rm -rf $STATD_PATH/sm.ha > /dev/null 2>&1 cp -rf $STATD_PATH/sm.ha.save $STATD_PATH/sm.ha > /dev/null 2>&1 done ocf_log info "NFS server started" return $OCF_SUCCESS } nfsserver_stop () { ocf_log info "Stopping NFS server ..." fn=`mktemp` nfs_exec stop > $fn 2>&1 rc=$? ocf_log debug `cat $fn` rm -f $fn if [ $rc -eq 0 ]; then unbind_tree ocf_log info "NFS server stopped" return $OCF_SUCCESS fi ocf_log err "Failed to stop NFS server" return $rc } nfsserver_validate () { ## # set_exec_mode will exit if nfs server is not installed ## set_exec_mode check_binary ${OCF_RESKEY_nfs_notify_cmd} if [ x = x"${OCF_RESKEY_nfs_ip}" ]; then ocf_log err "nfs_ip not set" exit $OCF_ERR_CONFIGURED fi if [ x = "x$OCF_RESKEY_nfs_shared_infodir" ]; then ocf_log err "nfs_shared_infodir not set" exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then if ! ocf_is_decimal "$OCF_RESKEY_nfs_smnotify_retry_time"; then ocf_log err "Invalid nfs_smnotify_retry_time [$OCF_RESKEY_nfs_smnotify_retry_time]" exit $OCF_ERR_CONFIGURED fi fi case ${OCF_RESKEY_nfs_notify_cmd##*/} in sm-notify|rpc.statd) ;; *) ocf_log err "Invalid nfs_notify_cmd [$OCF_RESKEY_nfs_notify_cmd]" exit $OCF_ERR_CONFIGURED ;; esac return $OCF_SUCCESS } if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then ocf_log err "THIS RA DO NOT SUPPORT CLONE MODE!" exit $OCF_ERR_CONFIGURED fi nfsserver_validate case $__OCF_ACTION in start) nfsserver_start ;; stop) nfsserver_stop ;; monitor) nfsserver_monitor ;; validate-all) exit $OCF_SUCCESS ;; *) nfsserver_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/pgsql b/heartbeat/pgsql index 8d9fc3fa1..fa9427245 100755 --- a/heartbeat/pgsql +++ b/heartbeat/pgsql @@ -1,1836 +1,1836 @@ #!/bin/sh # # Description: Manages a PostgreSQL Server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA # Florian Haas (florian@linbit.com) -- makeover # Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication # # Copyright: 2006-2012 Serge Dubrouski # and other Linux-HA contributors # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # # Get PostgreSQL Configuration parameter # get_pgsql_param() { local param_name param_name=$1 perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) { \$dir=\$1; \$dir =~ s/\s*\#.*//; \$dir =~ s/^'(\S*)'/\$1/; print \$dir;}" perl -ne "$perl_code" < $OCF_RESKEY_config } # Defaults OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl OCF_RESKEY_psql_default=/usr/bin/psql OCF_RESKEY_pgdata_default=/var/lib/pgsql/data OCF_RESKEY_pgdba_default=postgres OCF_RESKEY_pghost_default="" OCF_RESKEY_pgport_default=5432 OCF_RESKEY_start_opt_default="" OCF_RESKEY_pgdb_default=template1 OCF_RESKEY_logfile_default=/dev/null OCF_RESKEY_stop_escalate_default=30 OCF_RESKEY_monitor_user_default="" OCF_RESKEY_monitor_password_default="" OCF_RESKEY_monitor_sql_default="select now();" OCF_RESKEY_check_wal_receiver_default="false" # Defaults for replication OCF_RESKEY_rep_mode_default=none OCF_RESKEY_node_list_default="" OCF_RESKEY_restore_command_default="" OCF_RESKEY_archive_cleanup_command_default="" OCF_RESKEY_recovery_end_command_default="" OCF_RESKEY_master_ip_default="" OCF_RESKEY_repuser_default="postgres" OCF_RESKEY_primary_conninfo_opt_default="" OCF_RESKEY_restart_on_promote_default="false" OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp" OCF_RESKEY_xlog_check_count_default="3" OCF_RESKEY_crm_attr_timeout_default="5" OCF_RESKEY_stop_escalate_in_slave_default=30 : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}} : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}} : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}} : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}} : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}} : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf} : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}} : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}} : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} : ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}} : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}} : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}} : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}} : ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}} # for replication : ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}} : ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}} : ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}} : ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}} : ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}} : ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}} : ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}} : ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}} : ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}} : ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}} : ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}} : ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}} : ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}} usage() { cat < 1.0 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource. Manages a PostgreSQL database instance Path to pg_ctl command. pgctl Start options (-o start_opt in pg_ctl). "-i -p 5432" for example. start_opt Additional pg_ctl options (-w, -W etc..). ctl_opt Path to psql command. psql Path to PostgreSQL data directory. pgdata User that owns PostgreSQL. pgdba Hostname/IP address where PostgreSQL is listening pghost Port where PostgreSQL is listening pgport PostgreSQL user that pgsql RA will user for monitor operations. If it's not set pgdba user will be used. monitor_user Password for monitor user. monitor_password SQL script that will be used for monitor operations. monitor_sql Path to the PostgreSQL configuration file for the instance. Configuration file Database that will be used for monitoring. pgdb Path to PostgreSQL server log output file. logfile -Unix socket directory for PostgeSQL +Unix socket directory for PostgreSQL socketdir Number of shutdown retries (using -m fast) before resorting to -m immediate stop escalation Replication mode may be set to "async" or "sync" or "slave". They require PostgreSQL 9.1 or later. Once set, "async" and "sync" require node_list, master_ip, and -restore_command parameters,as well as configuring postgresql +restore_command parameters,as well as configuring PostgreSQL for replication (in postgresql.conf and pg_hba.conf). "slave" means that RA only makes recovery.conf before starting -to connect to Primary which is running somewhere. -It dosen't need Master/Slave setting. +to connect to primary which is running somewhere. +It dosen't need master/slave setting. It requires master_ip restore_command parameters. rep_mode All node names. Please separate each node name with a space. This is required for replication. node list restore_command for recovery.conf. This is required for replication. restore_command archive_cleanup_command for recovery.conf. This is used for replication and is optional. archive_cleanup_command recovery_end_command for recovery.conf. This is used for replication and is optional. recovery_end_command Master's floating IP address to be connected from hot standby. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. master ip User used to connect to the master server. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. repuser primary_conninfo options of recovery.conf except host, port, user and application_name. This is optional for replication. primary_conninfo_opt If this is true, RA deletes recovery.conf and restarts PostgreSQL on promote to keep Timeline ID. It probably makes fail-over slower. It's recommended to set on-fail of promote up as fence. This is optional for replication. restart_on_promote Path to temporary directory. This is optional for replication. tmpdir -Number of checking xlog on monitor before promote. +Number of checks of xlog on monitor before promote. This is optional for replication. xlog check count The timeout of crm_attribute forever update command. Default value is 5 seconds. This is optional for replication. The timeout of crm_attribute forever update command. Number of shutdown retries (using -m fast) before resorting to -m immediate -in Slave state. +in slave state. This is optional for replication. stop escalation_in_slave If this is true, RA checks wal_receiver process on monitor -and notify its status using "(resource name)-receiver-status" attribute. -It's useful for checking whether PostgreSQL(Hot Standby) connects to primary. +and notifies its status using "(resource name)-receiver-status" attribute. +It's useful for checking whether PostgreSQL (hot standby) connects to primary. The attribute shows status as "normal" or "ERROR". check_wal_receiver EOF } # # Run the given command in the Resource owner environment... # runasowner() { local quietrun="" local loglevel="-err" local var for var in 1 2 do case "$1" in "-q") quietrun="-q" shift 1;; "warn"|"err") loglevel="-$1" shift 1;; *) ;; esac done ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*" } # # Shell escape # escape_string() { echo "$*" | sed -e "s/'/'\\\\''/g" } # # methods: What methods/operations do we support? # pgsql_methods() { cat </dev/null 2>&1" return $? fi # No PID file false } pgsql_wal_receiver_status() { local PID local receiver_parent_pids PID=`head -n 1 $PIDFILE` receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al receiver process" | cut -d " " -f 3` if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" -q return 0 fi attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" -q ocf_log warn "wal receiver process is not running" return 1 } # # pgsql_real_monitor # pgsql_real_monitor() { local loglevel local rc local output # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then ocf_log info "PostgreSQL is down" return $OCF_NOT_RUNNING fi if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then pgsql_wal_receiver_status fi if is_replication; then #Check replication state output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_MS_SQL}\""` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel return $OCF_ERR_GENERIC fi case "$output" in f) ocf_log debug "PostgreSQL is running as a primary." if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then return $OCF_RUNNING_MASTER fi ;; t) ocf_log debug "PostgreSQL is running as a hot standby." return $OCF_SUCCESS;; *) ocf_log err "$CHECK_MS_SQL output is $output" return $OCF_ERR_GENERIC;; esac fi OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"` runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \ -c '$OCF_RESKEY_monitor_sql'" rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel return $OCF_ERR_GENERIC fi if is_replication; then return $OCF_RUNNING_MASTER fi return $OCF_SUCCESS } pgsql_replication_monitor() { local rc rc=$1 if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then return $rc fi # If I am Master if [ $rc -eq $OCF_RUNNING_MASTER ]; then change_data_status "$NODENAME" "LATEST" change_pgsql_status "$NODENAME" "PRI" control_slave_status || return $OCF_ERR_GENERIC return $rc fi # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor, # so I will get master node name using crm_mon -n crm_mon -n1 | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):]Master" if [ $? -ne 0 ] ; then # If I am Slave and Master is not exist ocf_log info "Master does not exist." change_pgsql_status "$NODENAME" "HS:alone" have_master_right if [ $? -eq 0 ]; then rm -f ${XLOG_NOTE_FILE}.* fi else output=`$CRM_ATTR_FOREVER -N "$NODENAME" \ -n "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$output" = "DISCONNECT" ]; then change_pgsql_status "$NODENAME" "HS:alone" fi fi return $rc } #pgsql_monitor: pgsql_real_monitor() wrapper for replication pgsql_monitor() { local rc pgsql_real_monitor rc=$? if ! is_replication; then return $rc else pgsql_replication_monitor $rc return $? fi } # pgsql_post_demote pgsql_post_demote() { DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE" if [ "$DEMOTE_NODE" != "$NODENAME" ]; then if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | tr '[A-Z]' '[a-z]' | grep $NODENAME; then show_master_baseline change_pgsql_status "$NODENAME" "HS:alone" fi fi return $OCF_SUCCESS } pgsql_pre_promote() { local master_baseline local my_master_baseline local cmp_location local number_of_nodes # If my data is newer than new master's one, I fail my resource. PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` number_of_nodes=`echo $NODE_LIST | wc -w` if [ $number_of_nodes -ge 3 -a \ "$OCF_RESKEY_rep_mode" = "sync" -a \ "$PROMOTE_NODE" != "$NODENAME" ]; then master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` if [ $? -eq 0 ]; then my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` # get older location cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\ sort | head -1` if [ "$cmp_location" != "$my_master_baseline" ]; then ocf_log err "My data is newer than new master's one. New master's location : $master_baseline" $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } pgsql_notify() { local type="${OCF_RESKEY_CRM_meta_notify_type}" local op="${OCF_RESKEY_CRM_meta_notify_operation}" local rc if ! is_replication; then return $OCF_SUCCESS fi ocf_log debug "notify: ${type} for ${op}" case $type in pre) case $op in promote) pgsql_pre_promote return $? ;; esac ;; post) case $op in promote) delete_xlog_location PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` if [ "$PROMOTE_NODE" != "$NODENAME" ]; then delete_master_baseline fi return $OCF_SUCCESS ;; demote) pgsql_post_demote return $? ;; start|stop) MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` if [ "$NODENAME " = "$MASTER_NODE" ]; then control_slave_status fi return $OCF_SUCCESS ;; esac ;; esac return $OCF_SUCCESS } control_slave_status() { local rc local data_status local target local all_data_status local tmp_data_status local node_name local number_of_nodes all_data_status=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_REPLICATION_STATE_SQL}\""` rc=$? if [ $rc -eq 0 ]; then if [ -n "$all_data_status" ]; then all_data_status=`echo $all_data_status | sed "s/\n/ /g"` fi else report_psql_error $rc warn return 1 fi number_of_nodes=`echo $NODE_LIST | wc -w` for target in $NODE_LIST; do if [ "$target" = "$NODENAME" ]; then continue fi data_status="DISCONNECT" if [ -n "$all_data_status" ]; then for tmp_data_status in $all_data_status; do node_name=`echo $tmp_data_status | cut -d "|" -f 1` state=`echo $tmp_data_status | cut -d "|" -f 2` sync_state=`echo $tmp_data_status | cut -d "|" -f 3` ocf_log debug "node=$node_name, state=$state, sync_state=$sync_state" if [ "$node_name" = "$target" ];then data_status="$state|$sync_state" break fi done fi case "$data_status" in "STREAMING|SYNC") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_PROMOTE" change_pgsql_status "$target" "HS:sync" ;; "STREAMING|ASYNC") change_data_status "$target" "$data_status" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then change_master_score "$target" "$CAN_NOT_PROMOTE" if ! is_sync_mode "$target"; then set_sync_mode "$target" fi else if [ $number_of_nodes -le 2 ]; then change_master_score "$target" "$CAN_PROMOTE" else # I can't determine which slave's data is newest in async mode. change_master_score "$target" "$CAN_NOT_PROMOTE" fi fi change_pgsql_status "$target" "HS:async" ;; "STREAMING|POTENTIAL") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" change_pgsql_status "$target" "HS:potential" ;; "DISCONNECT") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ] && \ is_sync_mode "$target"; then set_async_mode "$target" fi ;; *) change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ] && \ is_sync_mode "$target"; then set_async_mode "$target" fi change_pgsql_status "$target" "HS:connected" ;; esac done return 0 } have_master_right() { local old local new local output local data_status local node local mylocation local count local newestXlog local oldfile local newfile ocf_log debug "Checking if I have a master right." data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \ "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi else if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "STREAMING|ASYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi fi ocf_log info "My data status=$data_status." show_xlog_location if [ $? -ne 0 ]; then ocf_log err "Failed to show my xlog location." exit $OCF_ERR_GENERIC fi old=0 for count in `seq $OCF_RESKEY_xlog_check_count`; do if [ -f ${XLOG_NOTE_FILE}.$count ]; then old=$count continue fi break done new=`expr $old + 1` # get xlog locations of all nodes for node in ${NODE_LIST}; do output=`$CRM_ATTR_REBOOT -N "$node" -n \ "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null` if [ $? -ne 0 ]; then ocf_log warn "Can't get $node xlog location." continue else ocf_log info "$node xlog location : $output" echo "$node $output" >> ${XLOG_NOTE_FILE}.${new} if [ "$node" = "$NODENAME" ]; then mylocation=$output fi fi done oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null` newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null` if [ "$oldfile" != "$newfile" ]; then # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 return 1 fi if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \ head -1 | cut -d " " -f 2` if [ "$newestXlog" = "$mylocation" ]; then ocf_log info "I have a master right." $CRM_MASTER -v $PROMOTE_ME return 0 fi change_data_status "$NODENAME" "DISCONNECT" ocf_log info "I don't have correct master data." # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 fi return 1 } is_replication() { if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then return 0 fi return 1 } get_my_location() { local rc local output local replay_loc local receive_loc local output1 local output2 local log1 local log2 local newer_location output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_XLOG_LOC_SQL}\""` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc warn ocf_log err "Can't get my xlog location." return 1 fi replay_loc=`echo $output | cut -d "|" -f 1` receive_loc=`echo $output | cut -d "|" -f 2` output1=`echo "$replay_loc" | cut -d "/" -f 1` output2=`echo "$replay_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` replay_loc="${log1}${log2}" output1=`echo "$receive_loc" | cut -d "/" -f 1` output2=`echo "$receive_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` receive_loc="${log1}${log2}" newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1` echo "$newer_location" return 0 } show_xlog_location() { local location location=`get_my_location` || return 1 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location" } delete_xlog_location() { $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D } show_master_baseline() { local rc local location runasowner -q err "$OCF_RESKEY_psql $psql_options \ -U $OCF_RESKEY_pgdba -c 'CHECKPOINT'" rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc warn fi location=`get_my_location` ocf_log info "My master baseline : $location." $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location" } delete_master_baseline() { $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D } set_async_mode_all() { [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0 ocf_log info "Set all nodes into async mode." runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\"" if [ $? -ne 0 ]; then ocf_log err "Can't set all nodes into async mode." return 1 fi return 0 } set_async_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then ocf_log info "Setup $1 into async mode." sync_node_in_conf=`echo $sync_node_in_conf | sed "s/$1//g" |\ sed "s/^,//g" | sed "s/,,/,/g" | sed "s/,$//g"` echo "synchronous_standby_names = '$sync_node_in_conf'" > "$REP_MODE_CONF" else ocf_log info "$1 is already in async mode." return 0 fi ocf_log info "All synced nodes : \"$sync_node_in_conf\"" reload_conf } set_sync_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then ocf_log info "Setup $1 into sync mode." echo "synchronous_standby_names = '$sync_node_in_conf,$1'" > "$REP_MODE_CONF" else ocf_log info "Setup $1 into sync mode." echo "synchronous_standby_names = '$1'" > "$REP_MODE_CONF" fi sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` ocf_log info "All synced nodes : \"$sync_node_in_conf\"" reload_conf } is_sync_mode() { cat $REP_MODE_CONF | grep -q -e "[,' ]$1[,' ]" } reload_conf() { # Invoke pg_ctl runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload" if [ $? -eq 0 ]; then ocf_log info "Reload configuration file." else ocf_log err "Can't reload configuration file." return 1 fi return 0 } user_recovery_conf() { # put archive_cleanup_command and recovery_end_command only when defined by user if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'" fi if [ -n "$OCF_RESKEY_recovery_end_command" ]; then echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'" fi } make_recovery_conf() { runasowner "touch $RECOVERY_CONF" if [ $? -ne 0 ]; then ocf_log err "Can't create recovery.conf." return 1 fi cat > $RECOVERY_CONF <> $RECOVERY_CONF ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}" return 0 } # change pgsql-status. # arg1:node, arg2: value change_pgsql_status() { local output if ! is_node_online $1; then return 0 fi output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then # If slave's disk is broken, RA cannot read PID file # and misjudges the PostgreSQL as down while it is running. # It causes overwriting of pgsql-status by Master because replication is still connected. if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then if [ "$1" != "$NODENAME" ]; then ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited." return 0 fi fi ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2." $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2" if [ $? -ne 0 ]; then ocf_log err "Can't change $PGSQL_STATUS_ATTR." return 1 fi fi return 0 } # change pgsql-data-status. # arg1:node, arg2: value change_data_status() { local output if ! node_exist $1; then return 0 fi while : do output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2." exec_func_with_timeout "$CRM_ATTR_FOREVER" "-N $1 -n \ $PGSQL_DATA_STATUS_ATTR -v \"$2\"" \ $OCF_RESKEY_crm_attr_timeout if [ $? -ne 0 ]; then ocf_log err "Can't change $PGSQL_DATA_STATUS_ATTR." return 1 fi else break fi done return 0 } # set master-score # arg1:node, arg2: score, arg3: resoure set_master_score() { local current_score current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null` if [ -n "$current_score" -a "$current_score" != "$2" ]; then ocf_log info "Changing $3 master score on $1 : $current_score->$2." $CRM_ATTR_REBOOT -N "$target" -n "master-$3" -v "$2" if [ $? -ne 0 ]; then ocf_log err "Can't change master score." return 1 fi fi return 0 } # change master-score # arg1:node, arg2: score change_master_score() { local instance if ! is_node_online $1; then return 0 fi if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then # If Pacemaker version is 1.0.x instance=0 while : do if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then break fi if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then instance=`expr $instance + 1` continue fi set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1 instance=`expr $instance + 1` done else # If globally-unique=false and Pacemaker version is 1.1.8 or higher # Master/Slave resource has no instance number set_master_score $1 $2 ${RESOURCE_NAME} || return 1 fi return 0 } report_psql_error() { local rc local loglevel rc=$1 loglevel=${2:-err} ocf_log $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running" if [ $rc -eq 1 ]; then ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." elif [ $rc -eq 2 ]; then ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." elif [ $rc -eq 3 ]; then ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." fi } # # timeout management function # arg1 : command # arg2 : command's args # arg3 : timeout(s) # exec_func_with_timeout() { local func_pid local count local rc $1 `eval echo $2` & func_pid=$! count=0 while kill -s 0 $func_pid >/dev/null 2>&1; do sleep 1 count=`expr $count + 1` if [ $count -ge $3 ]; then ocf_log debug "Execute $1 time out." kill -s 9 $func_pid >/dev/null 2>&1 return 0 fi done wait $func_pid } is_node_online() { crm_mon -1 -n | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline" } node_exist() { crm_mon -1 -n | tr '[A-Z]' '[a-z]' | grep -q "^node $1" } check_binary2() { if ! have_binary "$1"; then ocf_log err "Setup problem: couldn't find command: $1" return 1 fi return 0 } check_config() { local rc=0 if [ ! -f "$1" ]; then if ocf_is_probe; then ocf_log info "Configuration file is $1 not readable during probe." rc=1 else ocf_log err "Configuration file $1 doesn't exist" rc=2 fi fi return $rc } # Validate most critical parameters pgsql_validate_all() { local version local check_config_rc local rep_mode_string if ! check_binary2 "$OCF_RESKEY_pgctl" || ! check_binary2 "$OCF_RESKEY_psql"; then return $OCF_ERR_INSTALLED fi check_config "$OCF_RESKEY_config" check_config_rc=$? [ $check_config_rc -eq 2 ] && return $OCF_ERR_INSTALLED [ $check_config_rc -eq 0 ] && : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`} getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "User $OCF_RESKEY_pgdba doesn't exist"; return $OCF_ERR_INSTALLED; fi if ocf_is_probe; then ocf_log info "Don't check $OCF_RESKEY_pgdata during probe" else if ! runasowner "test -w $OCF_RESKEY_pgdata"; then ocf_log err "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM; fi fi if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor password can't be empty" return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor_user has to be set if monitor_password is set" return $OCF_ERR_CONFIGURED fi if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then version=`cat $OCF_RESKEY_pgdata/PG_VERSION` if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then ocf_log err "Replication mode needs PostgreSQL 9.1 or higher." return $OCF_ERR_INSTALLED fi if [ ! -n "$OCF_RESKEY_master_ip" ]; then ocf_log err "master_ip can't be empty." return $OCF_ERR_CONFIGURED fi fi if is_replication; then if ! ocf_is_ms; then ocf_log err "Replication(rep_mode=async or sync) requires Master/Slave configuration." return $OCF_ERR_CONFIGURED fi if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then ocf_log err "Invalid rep_mode : $OCF_RESKEY_rep_mode" return $OCF_ERR_CONFIGURED fi if [ ! -n "$NODE_LIST" ]; then ocf_log err "node_list can't be empty." return $OCF_ERR_CONFIGURED fi if [ $check_config_rc -eq 0 ]; then rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if ! grep -q "$rep_mode_string" $OCF_RESKEY_config; then ocf_log info "adding include directive into $OCF_RESKEY_config" echo "$rep_mode_string" >> $OCF_RESKEY_config fi else if grep -q "$rep_mode_string" $OCF_RESKEY_config; then ocf_log info "deleting include directive from $OCF_RESKEY_config" sed -i "/${rep_mode_string//\//\\/}/d" $OCF_RESKEY_config fi fi fi if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then ocf_log err "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM fi fi if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then if ocf_is_ms; then ocf_log err "Replication(rep_mode=slave) does not support Master/Slave configuration." return $OCF_ERR_CONFIGURED fi fi return $OCF_SUCCESS } # # Check if we need to create a log file # check_log_file() { if [ ! -f "$1" ] then touch $1 > /dev/null 2>&1 chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1 fi #Check if $OCF_RESKEY_pgdba can write to the log file if ! runasowner "test -w $1" then return 1 fi return 0 } # # Check socket directory # check_socket_dir() { if [ ! -d "$OCF_RESKEY_socketdir" ]; then if ! mkdir "$OCF_RESKEY_socketdir"; then ocf_log err "Can't create directory $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba:`getent passwd \ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" then ocf_log err "Can't change ownership for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chmod 2775 "$OCF_RESKEY_socketdir"; then ocf_log err "Can't change permissions for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi else if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then ocf_log err "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi rm $OCF_RESKEY_socketdir/test.$$ fi } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_GENERIC fi PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status" RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf NODENAME=`uname -n | tr '[A-Z]' '[a-z]'` if is_replication; then REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot" CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever" CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount" CAN_NOT_PROMOTE="-INFINITY" CAN_PROMOTE="100" PROMOTE_ME="1000" CHECK_MS_SQL="select pg_is_in_recovery()" CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()" CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication" PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status" PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status" PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc" PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline" NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'` fi case "$1" in methods) pgsql_methods exit $?;; meta-data) meta_data exit $OCF_SUCCESS;; esac pgsql_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) if is_replication; then change_pgsql_status "$NODENAME" "UNKNOWN" fi exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi US=`id -u -n` if [ $US != root -a $US != $OCF_RESKEY_pgdba ] then ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba" exit $OCF_ERR_GENERIC fi # make psql command options if [ -n "$OCF_RESKEY_monitor_user" ]; then PGUSER=$OCF_RESKEY_monitor_user; export PGUSER PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" else psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" fi if [ -n "$OCF_RESKEY_pghost" ]; then psql_options="$psql_options -h $OCF_RESKEY_pghost" else if [ -n "$OCF_RESKEY_socketdir" ]; then psql_options="$psql_options -h $OCF_RESKEY_socketdir" fi fi # What kind of method was invoked? case "$1" in status) if pgsql_status then ocf_log info "PostgreSQL is up" exit $OCF_SUCCESS else ocf_log info "PostgreSQL is down" exit $OCF_NOT_RUNNING fi;; monitor) pgsql_monitor exit $?;; start) pgsql_start exit $?;; promote) pgsql_promote exit $?;; demote) pgsql_demote exit $?;; notify) pgsql_notify exit $?;; stop) pgsql_stop exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/pound b/heartbeat/pound index 4fabb9317..d4ffa9fd3 100755 --- a/heartbeat/pound +++ b/heartbeat/pound @@ -1,321 +1,321 @@ #!/bin/sh # # # Pound # # Description: Manage pound instances as a HA resource # # Author: Taro Matsuzawa # # License: GNU General Public License (GPL) # # See usage() for more details # # OCF instance parameters: # OCF_RESKEY_pid # OCF_RESKEY_binary # OCF_RESKEY_ctl_binary # OCF_RESKEY_socket_path # OCF_RESKEY_config # OCF_RESKEY_name # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Set default paramenter values # Set these two first, as other defaults depend on it OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE} : ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} OCF_RESKEY_binary_default=pound OCF_RESKEY_ctl_binary_default=poundctl OCF_RESKEY_pid_default=/var/run/pound_${OCF_RESKEY_name}.pid OCF_RESKEY_socket_path_default=/var/lib/pound/pound.cfg : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_ctl_binary=${OCF_RESKEY_ctl_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_socket_path=${OCF_RESKEY_socket_path_default}} meta_data() { cat < 1.0 -The Pound Resource Agent can manage pound instances. +The Pound Resource Agent can manage Pound instances. Manage a Pound instance -The Pound configuration file that pound should manage, for example +The Pound configuration file that Pound should manage, for example "/etc/pound.cfg". Pound configuration file -Override the name of the instance that should be given to pound +Override the name of the instance that should be given to Pound (defaults to the resource identifier). Instance name Write the process's PID to the specified file. -The default will include the specified name, ie.: -"/var/run/pound_production.pid". Unlike what this help message shows. -It is most likely not necessary to change this parameter. +The default will include the specified name, i.e.: +"/var/run/pound_production.pid". Unlike what this help message shows, +it is most likely not necessary to change this parameter. Pidfile -This is used to start pound server. +This is used to start Pound server. Normally use pound. -This is used to watch pound status via unix socket. +This is used to watch Pound status via Unix socket. Normally use poundctl. -Write the process's unix socket. -This parameter is same 'Control' parameter in configuration file, ie.: -Control "/var/lib/pound/pound.cfg" +Write the process's Unix socket. +This parameter is same 'Control' parameter in configuration file, i.e.: +Control "/var/lib/pound/pound.cfg". END } ####################################################################### pound_usage() { cat </dev/null 2>&1 } ############################################################################ # isalive_tomcat() { # As the server stops, the PID file disappears. To avoid race conditions, # we will have remembered the PID of a running instance on script entry. local pid=$rememberedPID # If there is a PID file, use that if [ -f $CATALINA_PID ]; then ocf_log debug "Reading pid from $CATALINA_PID" # race conditions on PID file being removed by stopping tomcat... pid=`head -n 1 $CATALINA_PID` fi if [ -n "$pid" ] && [ "$pid" -gt 0 ]; then # Retry message for restraint ocf_log debug "Sending noop signal to $pid" kill -s 0 $pid >/dev/null 2>&1 return $? fi # No PID file false } ############################################################################ # Check tomcat process and service availability monitor_tomcat() { isalive_tomcat || return $OCF_NOT_RUNNING isrunning_tomcat || return $OCF_ERR_GENERIC return $OCF_SUCCESS } ############################################################################ # Execute catalina.out log rotation rotate_catalina_out() { # Look for rotatelogs/rotatelogs2 if [ -x /usr/sbin/rotatelogs ]; then ROTATELOGS=/usr/sbin/rotatelogs elif [ -x /usr/sbin/rotatelogs2 ]; then ROTATELOGS=/usr/sbin/rotatelogs2 else ocf_log warn "rotatelogs command not found." return 1 fi # Clean up and set permissions on required files rm -rf "$CATALINA_HOME"/temp/* "$CATALINA_OUT" mkfifo -m700 "$CATALINA_OUT" chown --dereference "$RESOURCE_TOMCAT_USER" "$CATALINA_OUT" || true # -s is required because tomcat5.5's login shell is /bin/false su - -s /bin/sh $RESOURCE_TOMCAT_USER \ -c "$ROTATELOGS -l \"$CATALINA_HOME/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \ < "$CATALINA_OUT" > /dev/null 2>&1 & } ############################################################################ # Tomcat Command tomcatCommand() { cat<<-END_TOMCAT_COMMAND export JAVA_HOME=${JAVA_HOME} export JAVA_OPTS="${JAVA_OPTS}" export CATALINA_HOME=${CATALINA_HOME} export CATALINA_BASE=${CATALINA_BASE} export CATALINA_OUT=${CATALINA_OUT} export CATALINA_PID=${CATALINA_PID} export CATALINA_OPTS="${CATALINA_OPTS}" export CATALINA_TMPDIR="${CATALINA_TMPDIR}" export JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}" export LOGGING_CONFIG="${LOGGING_CONFIG}" export LOGGING_MANAGER="${LOGGING_MANAGER}" $CATALINA_HOME/bin/catalina.sh $@ END_TOMCAT_COMMAND } attemptTomcatCommand() { if [ "$RESOURCE_TOMCAT_USER" = RUNASIS ]; then "$CATALINA_HOME/bin/catalina.sh" $@ >> "$TOMCAT_CONSOLE" 2>&1 else tomcatCommand $@ | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 fi } ############################################################################ # Start Tomcat start_tomcat() { cd "$CATALINA_HOME/bin" validate_all_tomcat || exit $? monitor_tomcat if [ $? = $OCF_SUCCESS ]; then return $OCF_SUCCESS fi # Remove $CATALINA_PID if it exists rm -f $CATALINA_PID #ocf_log debug "catalina.out rotation FLG = ${CATALINA_ROTATE_LOG}" if [ ${CATALINA_ROTATE_LOG} = "YES" ]; then rotate_catalina_out if [ $? = 0 ]; then ocf_log debug "Rotate catalina.out succeeded." else ocf_log warn "Rotate catalina.out failed. Starting tomcat without catalina.out rotation." fi fi echo "`date "+%Y/%m/%d %T"`: start ===========================" >> "$TOMCAT_CONSOLE" ocf_log debug "CATALINA_OPTS value = ${CATALINA_OPTS}" attemptTomcatCommand start ${TOMCAT_START_OPTS} & while true; do monitor_tomcat if [ $? = $OCF_SUCCESS ]; then break fi ocf_log debug "start_tomcat[$TOMCAT_NAME]: retry monitor_tomcat" sleep 3 done return $OCF_SUCCESS } ############################################################################ # Stop Tomcat stop_tomcat() { RA_TIMEOUT=$((OCF_RESKEY_CRM_meta_timeout/1000)) STOP_TIMEOUT=$((RA_TIMEOUT-5)) if [ -n "$MAX_STOP_TIME" ]; then if [ $MAX_STOP_TIME -gt $RA_TIMEOUT ]; then ocf_log warn "max_stop_timeout must be shorter than the timeout of stop operation." fi if [ $MAX_STOP_TIME -eq 0 ]; then STOP_TIMEOUT=$RA_TIMEOUT else STOP_TIMEOUT=$MAX_STOP_TIME fi fi cd "$CATALINA_HOME/bin" memorize_pid # This lets monitoring continue to work reliably echo "`date "+%Y/%m/%d %T"`: stop ###########################" >> "$TOMCAT_CONSOLE" attemptTomcatCommand stop $STOP_TIMEOUT -force lapse_sec=0 while isalive_tomcat; do sleep 1 lapse_sec=`expr $lapse_sec + 1` ocf_log debug "stop_tomcat[$TOMCAT_NAME]: stop failed, killing with SIGKILL ($lapse_sec)" kill -KILL $rememberedPID done if [ ${CATALINA_ROTATE_LOG} = "YES" ]; then rm -f "$CATALINA_PID" "${CATALINA_OUT}" else rm -f "$CATALINA_PID" fi return $OCF_SUCCESS } metadata_tomcat() { cat < 1.0 Resource script for Tomcat. It manages a Tomcat instance as a cluster resource. Manages a Tomcat servlet environment instance to Tomcat process on start. Used to ensure process is still running and must be unique. ]]> The name of the resource Log file, used during start and stop operations. Log file Time-out for stop operation. DEPRECATED Time-out for the stop operation. DEPRECATED Maximum number of times to retry stop operation before suspending and killing Tomcat. DEPRECATED. Does not retry. Max retry count for stop operation. DEPRECATED The user who starts Tomcat. The user who starts Tomcat URL for state confirmation. URL for state confirmation Number of seconds to wait during a stop before drastic measures (force kill) are used on the tomcat process. This number MUST be less than your cluster stop timeout for the resource. The default value is five seconds before the timeout value of stop operation. -When it is over this value, it stop a process in kill commands. +When it is over this value, it stops a process in kill commands. This parameter is only effective on Tomcat 6 or later. The max time it should take for proper shutdown. Home directory of Java. Home directory of Java Java JVM options used on start and stop. Java options parsed to JVM, used on start and stop. Home directory of Tomcat. Home directory of Tomcat Instance directory of Tomcat Instance directory of Tomcat, defaults to catalina_home Log file name of Tomcat Log file name of Tomcat, defaults to catalina_home/logs/catalina.out A PID file name for Tomcat. A PID file name for Tomcat Tomcat start options. Tomcat start options Catalina options, for the start operation only. Catalina options Temporary directory of Tomcat Temporary directory of Tomcat, defaults to none Rotate catalina.out flag. Rotate catalina.out flag catalina.out rotation interval (seconds). catalina.out rotation interval (seconds) Java_endorsed_dirs of tomcat -Java_endorsed_dirs of tomcat, defaults to none +Java_endorsed_dirs of Tomcat, defaults to none Logging_config of tomcat -Logging_config of tomcat, defaults to none +Logging_config of Tomcat, defaults to none Logging_manager of tomcat -Logging_manager of tomcat, defaults to none. +Logging_manager of Tomcat, defaults to none. END return $OCF_SUCCESS } validate_all_tomcat() { ocf_log info "validate_all_tomcat[$TOMCAT_NAME]" misconfigured=0 notinstalled=0 wrongpermissions=0 check_binary $WGET if [ -n "$MAX_STOP_TIME" ] && [ "$MAX_STOP_TIME" -lt 0 ]; then ocf_log err "max_stop_time must be set to a value greater than 0." misconfigured=1 fi if [[ "$RESOURCE_STATUSURL" =~ :[0-9][0-9]* ]]; then port=${RESOURCE_STATUSURL##*:} port=${port%%/*} ocf_log debug "Tomcat port is $port" ocf_log debug "grep port=\"$port\" $CATALINA_HOME/conf/server.xml" if [ "$port" -gt 0 ]; then grep "port=\"$port\"" $CATALINA_HOME/conf/server.xml > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "Your configured status URL specifies a port ($port), but the server does not have a connector listening to that port in $CATALINA_HOME/conf/server.xml" notinstalled=1 fi fi fi if [ $misconfigured -gt 0 ]; then return $OCF_ERR_CONFIGURED fi if [ $notinstalled -gt 0 ]; then return $OCF_ERR_INSTALLED fi if [ $wrongpermissions -gt 0 ]; then return $OCF_ERR_PERM fi return $OCF_SUCCESS } # As we stop tomcat, it removes it's own pid file...we still want to know what it was memorize_pid() { if [ -f $CATALINA_PID ]; then rememberedPID=$(cat $CATALINA_PID) fi } # ### tomcat RA environment variables # COMMAND=$1 TOMCAT_NAME="${OCF_RESKEY_tomcat_name-tomcat}" TOMCAT_CONSOLE="${OCF_RESKEY_script_log-/var/log/$TOMCAT_NAME.log}" RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user-RUNASIS}" RESOURCE_STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}" JAVA_HOME="${OCF_RESKEY_java_home}" JAVA_OPTS="${OCF_RESKEY_java_opts}" CATALINA_HOME="${OCF_RESKEY_catalina_home}" CATALINA_BASE="${OCF_RESKEY_catalina_base-${OCF_RESKEY_catalina_home}}" CATALINA_OUT="${OCF_RESKEY_catalina_out-$CATALINA_HOME/logs/catalina.out}" CATALINA_PID="${OCF_RESKEY_catalina_pid-$CATALINA_HOME/logs/catalina.pid}" MAX_STOP_TIME="${OCF_RESKEY_max_stop_time}" TOMCAT_START_OPTS="${OCF_RESKEY_tomcat_start_opts}" CATALINA_OPTS="-Dname=$TOMCAT_NAME ${OCF_RESKEY_catalina_opts}" CATALINA_TMPDIR="${OCF_RESKEY_catalina_tmpdir}" CATALINA_ROTATE_LOG="${OCF_RESKEY_catalina_rotate_log-NO}" CATALINA_ROTATETIME="${OCF_RESKEY_catalina_rotatetime-86400}" JAVA_ENDORSED_DIRS="${OCF_RESKEY_java_endorsed_dirs}" LOGGING_CONFIG="${OCF_RESKEY_logging_config}" LOGGING_MANAGER="${OCF_RESKEY_logging_manager}" LSB_STATUS_STOPPED=3 if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case "$COMMAND" in meta-data) metadata_tomcat; exit $OCF_SUCCESS;; help|usage) usage; exit $OCF_SUCCESS;; esac if [ ! -d "$JAVA_HOME" -o ! -d "$CATALINA_HOME" -o ! -d "$CATALINA_BASE" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac ocf_log err "JAVA_HOME or CATALINA_HOME or CATALINA_BASE does not exist." exit $OCF_ERR_INSTALLED fi export JAVA_HOME JAVA_OPTS CATALINA_HOME CATALINA_BASE CATALINA_OUT CATALINA_PID CATALINA_OPTS CATALINA_TMPDIR JAVA_ENDORSED_DIRS LOGGING_CONFIG LOGGING_MANAGER JAVA=${JAVA_HOME}/bin/java if [ ! -x "$JAVA" ]; then case $COMMAND in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; esac ocf_log err "java command does not exist." exit $OCF_ERR_INSTALLED fi # # ------------------ # the main script # ------------------ # case "$COMMAND" in start) ocf_log debug "[$TOMCAT_NAME] Enter tomcat start" start_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat start $func_status" exit $func_status ;; stop) ocf_log debug "[$TOMCAT_NAME] Enter tomcat stop" stop_tomcat func_status=$? ocf_log debug "[$TOMCAT_NAME] Leave tomcat stop $func_status" exit $func_status ;; status) if monitor_tomcat; then echo tomcat instance $TOMCAT_NAME is running exit $OCF_SUCCESS else echo tomcat instance $TOMCAT_NAME is stopped exit $OCF_NOT_RUNNING fi exit $? ;; monitor) #ocf_log debug "[$TOMCAT_NAME] Enter tomcat monitor" monitor_tomcat func_status=$? #ocf_log debug "[$TOMCAT_NAME] Leave tomcat monitor $func_status" exit $func_status ;; meta-data) metadata_tomcat exit $? ;; validate-all) validate_all_tomcat exit $? ;; usage|help) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/varnish b/heartbeat/varnish index 99e2092df..b9d7df3fe 100755 --- a/heartbeat/varnish +++ b/heartbeat/varnish @@ -1,414 +1,414 @@ #!/bin/sh # # # Varnish # # Description: Manage varnish instances as a HA resource # # Author: Léon Keijser # # License: GNU General Public License (GPL) # # See usage() for more details # # OCF instance parameters: # OCF_RESKEY_pid # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_config # OCF_RESKEY_name # OCF_RESKEY_listen_address # OCF_RESKEY_mgmt_address # OCF_RESKEY_ttl # OCF_RESKEY_varnish_user # OCF_RESKEY_varnish_group # OCF_RESKEY_backend_type # OCF_RESKEY_backend_size # OCF_RESKEY_backend_file # OCF_RESKEY_worker_threads # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Set default paramenter values # Set these two first, as other defaults depend on it OCF_RESKEY_name_default=${OCF_RESOURCE_INSTANCE} : ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} OCF_RESKEY_binary_default=varnishd OCF_RESKEY_client_binary_default=varnishadm OCF_RESKEY_pid_default=/var/run/varnishd_${OCF_RESKEY_name}.pid OCF_RESKEY_listen_address_default=0.0.0.0:80 OCF_RESKEY_ttl_default=600 OCF_RESKEY_varnish_user_default=varnish OCF_RESKEY_varnish_group_default=varnish OCF_RESKEY_backend_type_default=malloc OCF_RESKEY_backend_size_default=1G OCF_RESKEY_backend_file_default=/var/lib/varnish/${OCF_RESKEY_name}.bin OCF_RESKEY_worker_threads_default=100,3000,120 : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_listen_address=${OCF_RESKEY_listen_address_default}} : ${OCF_RESKEY_ttl=${OCF_RESKEY_ttl_default}} : ${OCF_RESKEY_varnish_user=${OCF_RESKEY_varnish_user_default}} : ${OCF_RESKEY_varnish_group=${OCF_RESKEY_varnish_group_default}} : ${OCF_RESKEY_backend_type=${OCF_RESKEY_backend_type_default}} : ${OCF_RESKEY_backend_size=${OCF_RESKEY_backend_size_default}} : ${OCF_RESKEY_backend_file=${OCF_RESKEY_backend_file_default}} : ${OCF_RESKEY_worker_threads=${OCF_RESKEY_worker_threads_default}} meta_data() { cat < 1.0 The Varnish Resource Agent can manage several varnishd instances throughout the cluster. It does so by creating a unique PID file and requires a unique listen address and name for each instance. Manage a Varnish instance -The VCL configuration file that varnish should manage, for example +The VCL configuration file that Varnish should manage, for example "/etc/varnish/default.vcl". VCL file -Override the name of the instance that should be given to varnish +Override the name of the instance that should be given to Varnish (defaults to the resource identifier). Instance name Write the process's PID to the specified file. -The default will include the specified name, ie.: -"/var/run/varnish_production.pid". Unlike what this help message shows. -It is most likely not necessary to change this parameter. +The default will include the specified name, i.e.: +"/var/run/varnish_production.pid". Unlike what this help message shows, +it is most likely not necessary to change this parameter. Listen address Listen on this address:port, for example "192.168.1.1:80" Listen address Provide a management interface, for example "127.0.0.1:2222" Management interface -Specifies a hard minimum time to live for cached documents. +Specify a hard minimum time to live for cached documents. TTL -Specifies the name of an unprivileged user to which the +Specify the name of an unprivileged user to which the child process should switch before it starts accepting connections. Unprivileged user -Specifies the name of an unprivileged group to which +Specify the name of an unprivileged group to which the child process should switch before it starts accepting connections. Unprivileged group Use the specified storage backend. Valid options are 'malloc' for memory and 'file' for a file backend. Backend type Specify the size of the backend. For example "1G". Backend size Specify the backend filename if you use backend_type file. For example /var/lib/varnish/mybackend.bin Backend file Start at least min but no more than max worker threads with the specified idle timeout. Syntax: min[,max[,timeout]] For example: 100,3000,120 Worker threads -This is used to control varnish via a CLI. It's currently +This is used to control Varnish via a CLI. It's currently only used to check the status of the running child process. Varnish admin utility END } ####################################################################### varnish_usage() { cat < # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # # Defaults # OCF_RESKEY_binary_default="zabbix_server" OCF_RESKEY_pid_default="/var/run/zabbix-server/zabbix_server.pid" OCF_RESKEY_config_default="" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} # sleep interval when waiting for threads cleanup sleepint=1 # # Functions # zabbixserver_meta_data() { cat < 0.0.1 -This is a zabbixserver Resource Agent for zabbix_server monitoring +This is a Zabbix server Resource Agent for zabbix_server monitoring daemon. See: http://www.zabbix.com/ Zabbix server resource agent Location of the zabbix_server binary. Zabbix server binary Path to zabbix_server pidfile. As it's created by daemon itself -it must be the same as specified in the zabbix configuration file +it must be the same as specified in the Zabbix configuration file with parameter 'PidFile='. Path to pidfile Path to zabbix_server configuration file. Assumed server default if not specified. Path to configuration file END } ####################################################################### zabbixserver_usage() { cat < /dev/null 1>&2 } # # start the agent # zabbixserver_start() { local rc # check the resource status zabbixserver_monitor rc=$? case "$rc" in $OCF_SUCCESS) ocf_log info "Resource is already running" return $OCF_SUCCESS ;; $OCF_NOT_RUNNING) ;; *) exit $OCF_ERR_GENERIC ;; esac # remove stale pidfile if it exists if [ -f $OCF_RESKEY_pid ]; then ocf_log info "Removing stale pidfile" rm $OCF_RESKEY_pid fi startserver if [ $? -ne 0 ]; then ocf_log err "Can't start Zabbix server" return $OCF_ERR_GENERIC fi # wait if it starts really while ! zabbixserver_monitor; do ocf_log debug "Resource has not started yet, waiting" sleep $sleepint done return $OCF_SUCCESS } # # stop the agent # zabbixserver_stop() { local pid local rc # check the resource status zabbixserver_monitor rc=$? case "$rc" in $OCF_SUCCESS) ;; $OCF_NOT_RUNNING) ocf_log info "Resource is already stopped" return $OCF_SUCCESS ;; *) exit $OCF_ERR_GENERIC ;; esac pid=`getpid $OCF_RESKEY_pid` if [ $? -ne 0 ]; then ocf_log err "Can't find process PID" return $OCF_ERR_GENERIC fi # kill the process ocf_run -q kill $pid if [ $? -ne 0 ]; then ocf_log err "Can't stop process (PID $pid)" return $OCF_ERR_GENERIC fi # Wait until the parent process terminates. # NOTE: The parent may be still waiting for its children. A regular monitor # function will not detect this condition because the pidfile may be # removed just now. while process_status $pid; do ocf_log debug "Waiting for process to terminate..." sleep $sleepint done # wait if it stops really while zabbixserver_monitor; do ocf_log debug "Resource has not stopped yet, waiting" sleep $sleepint done # remove stale pidfile if it exists if [ -f $OCF_RESKEY_pid ]; then ocf_log debug "Pidfile still exists, removing" rm $OCF_RESKEY_pid fi return $OCF_SUCCESS } # # resource monitor # zabbixserver_monitor() { local pid pid=`getpid $OCF_RESKEY_pid` if [ $? -eq 0 ]; then process_status $pid if [ $? -eq 0 ]; then ocf_log debug "Resource is running" return $OCF_SUCCESS fi fi ocf_log info "Resource is not running" return $OCF_NOT_RUNNING } # # validate configuration # zabbixserver_validate_all() { check_piddir || return $OCF_ERR_INSTALLED check_config $OCF_RESKEY_config || return $OCF_ERR_INSTALLED return $OCF_SUCCESS } # # main # OCF_REQUIRED_PARAMS="" OCF_REQUIRED_BINARIES="$OCF_RESKEY_binary" ocf_rarun $*