diff --git a/heartbeat/.ocf-binaries.in b/heartbeat/.ocf-binaries.in index 777a69abc..3567cc23b 100644 --- a/heartbeat/.ocf-binaries.in +++ b/heartbeat/.ocf-binaries.in @@ -1,65 +1,68 @@ # Make sure PATH contains all the usual suspects export PATH="$PATH:/sbin:/bin:/usr/sbin:/usr/bin" # Include /usr/ucb for finding whoami on Solaris export PATH="$PATH:/usr/ucb" # Binaries and binary options for use in Resource Agents : ${AWK:=@AWK@} -: ${GREP:="@EGREP@"} +: ${EGREP:="@EGREP@"} : ${IFCONFIG_A_OPT:="@IFCONFIG_A_OPT@"} : ${MAILCMD:=@MAILCMD@} : ${MKTEMP:=@MKTEMP@} : ${PING:=@PING@} : ${RPM:=@RPM@} : ${SH:=@SHELL@} : ${TEST:=@TEST@} : ${TESTPROG:=@TEST@} # Entries that should probably be removed : ${BASENAME:=basename} : ${BLOCKDEV:=blockdev} +: ${CAT:=cat} : ${FSCK:=fsck} : ${FUSER:=fuser} : ${GETENT:=getent} +: ${GREP:=grep} : ${IFCONFIG:=ifconfig} : ${IPTABLES:=iptables} : ${IP2UTIL:=ip} : ${MDADM:=mdadm} : ${MODPROBE:=modprobe} : ${MOUNT:=mount} : ${MSGFMT:=msgfmt} : ${NETSTAT:=netstat} : ${PERL:=perl} : ${PYTHON:=python} : ${RAIDSTART:=raidstart} : ${RAIDSTOP:=raidstop} : ${ROUTE:=route} : ${UMOUNT:=umount} : ${REBOOT:=reboot} : ${POWEROFF_CMD:=poweroff} +: ${WGET:=wget} : ${WHOAMI:=whoami} : ${STRINGSCMD:=strings)} : ${SCP:=scp} : ${SSH:=ssh} : ${SWIG:=swig} : ${MKTEMP:=mktemp} : ${GZIP_PROG:=gzip} : ${TAR:=tar} : ${MD5:=md5} : ${DRBDADM:=drbdadm} : ${DRBDSETUP:=drbdsetup} check_binary () { if [ ! -x "`which $1`" ] ; then ocf_log err "Setup problem: Couldn't find utility $1" exit $OCF_ERR_INSTALLED fi } have_binary () { if [ ! -x "`which $1`" ] ; then return 0 fi return 1 } diff --git a/heartbeat/EvmsSCC b/heartbeat/EvmsSCC index 7f250124e..72bc786e7 100644 --- a/heartbeat/EvmsSCC +++ b/heartbeat/EvmsSCC @@ -1,198 +1,191 @@ #!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # EvmsSCC # Description: Runs evms_activate in a heartbeat cluster to activate a # EVMS shared cluster container in the cluster. # Original Author: Jo De Baer (jdebaer@novell.com) # Original Release: 06 Nov 2006 # # usage: ./EvmsSCC {start|stop|status|monitor|meta-data} # # The goal of this resource agent is to provoke the creation of device file # in /dev/emvs which correspond to EVMS2 volumes that reside in a EVMS2 shared # cluster container. As such it should be run as a clone resource in the # cluster. Logic inside the resource agent will make sure that "evms_activate" # is run on only one node in the cluster, both at cluster startup time as well # as when a node joins the cluster. # # Typically, resources that need to mount EVMS2 volumes should run after this # resource agent has finished it's run. As such those resources should be made # "dependent" on this resource agent by the cluster administrator. An example # of resources that should depend on this resource agent are Filesystem resource # agent that mount OCFS2 volumes that reside on EVMS2 volumes in a shared # EVMS2 cluster container. # # For this resource agent to do it's job correctly, evmsd must be running on # the node where the agent is started. Usually evmsd is started by the cluster # software via a respawn statement in /etc/ha.d/ha.cf. If you encounter timing # issues where evmsd is not yet started but where the cluster already starts # the EvmsSCC clone, then you should comment out the evmsd respawn statement # in /etc/ha.d/ha.cf and start evmsd on each node in the cluster via a separate # clone resource agent. The EvmsSCC resource agent cloneset should then be made # dependent to this evmsd cloneset. This will guarantee that emvsd is running # before EvmsSCC is started, on each node in the cluster. # ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ####################################################################### # Utilities used by this script -CUT=/usr/bin/cut -EVMSACTIVATE=/sbin/evms_activate - -check_util () { - if [ ! -x "$1" ] ; then - ocf_log err "Setup problem: Couldn't find utility $1" - exit $OCF_ERR_GENERIC - fi -} +CUT=cut +EVMSACTIVATE=evms_activate usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|meta-data} EOT } meta_data() { cat < 1.0 Resource script for EVMS shared cluster container. It runs evms_activate on one node in the cluster. EVMS SCC resource agent END } EvmsSCC_status() { # At the moment we don't support monitoring EVMS activations. We just return "not running" to cope with the pre-start monitor call. return $OCF_NOT_RUNNING } EvmsSCC_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" case "$n_type" in pre) case "$n_op" in start) ocf_log debug "EvmsSCC: Notify: Starting node(s): $n_start." EvmsSCC_start_notify_common ;; esac ;; esac return $OCF_SUCCESS } EvmsSCC_start() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" ocf_log debug "EvmsSCC: Start: starting node(s): $n_start." EvmsSCC_start_notify_common return $OCF_SUCCESS } EvmsSCC_stop() { return $OCF_SUCCESS } EvmsSCC_start_notify_common() { local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)} ocf_log debug "EvmsSCC: Start_Notify: I am node $n_myself." n_active="$n_active $n_start" case " $n_active " in *" $n_myself "*) ;; *) ocf_log err "EvmsSCC: $n_myself (local) not on active list!" return $OCF_ERR_GENERIC ;; esac #pick the first node from the starting list #when the cluster boots this will be one of the many booting nodes #when a node later joins the cluster, this will be the joining node local n_first=$(echo $n_start | cut -d ' ' -f 1) ocf_log debug "EvmsSCC: Start_Notify: First node in starting list is $n_first." if [ "$n_myself" = "$n_first" ] ; then ocf_log debug "EvmsSCC: Start_Notify: I am running evms_activate." evms_activate fi return $OCF_SUCCESS } # Check the arguments passed to this script if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi OP=$1 case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac -check_util $CUT -check_util $EVMSACTIVATE +check_binary $CUT +check_binary $EVMSACTIVATE case $OP in start) EvmsSCC_start ;; notify) EvmsSCC_notify ;; stop) EvmsSCC_stop ;; status|monitor) EvmsSCC_status ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 index e291a0016..5b5fab831 100644 --- a/heartbeat/IPaddr2 +++ b/heartbeat/IPaddr2 @@ -1,806 +1,798 @@ #!/bin/sh # # $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $ # # OCF Resource Agent compliant IPaddr2 script. # # Based on work by Tuomo Soini, ported to the OCF RA API by Lars # Marowsky-Brée. Implements Cluster Alias IP functionality too. # # Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff # # # Copyright (c) 2003 Tuomo Soini # Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # TODO: # - There ought to be an ocf_run_cmd function which does all logging, # timeout handling etc for us # - Make this the standard IP address agent on Linux; the other # platforms simply should ignore the additional parameters OR can use # the legacy heartbeat resource script... # - Check LVS <-> clusterip incompatibilities. # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_iflabel # OCF_RESKEY_mac # OCF_RESKEY_clusterip_hash # OCF_RESKEY_arp_interval # OCF_RESKEY_arp_count # OCF_RESKEY_arp_bg # OCF_RESKEY_arp_mac # # OCF_RESKEY_CRM_meta_clone # OCF_RESKEY_CRM_meta_clone_max ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs SENDARP=$HA_BIN/send_arp FINDIF=$HA_BIN/findif VLDIR=$HA_RSCTMP/IPaddr SENDARPPIDDIR=$HA_RSCTMP/send_arp CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip} ####################################################################### meta_data() { cat < 1.0 This Linux-specific resource manages IP alias IP addresses. It can add an IP alias, or remove one. In addition, it can implement Cluster Alias IP functionality if invoked as a clone resource. Manages virtual IPv4 addresses The IPv4 address to be configured in dotted quad notation, for example "192.168.1.1". IPv4 address The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. Network interface The netmask for the interface in CIDR format (e.g., 24 and not 255.255.255.0) If unspecified, the script will also try to determine this from the routing table. CIDR netmask Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address You can specify an additional label for your IP address here. This label is appended to your interface name. If a label is specified in nic name, this parameter has no effect. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Enable support for LVS DR Set the interface MAC address explicitly. Currently only used in case of the Cluster IP Alias. Leave empty to chose automatically. Cluster IP MAC address Specify the hashing algorithm used for the Cluster IP functionality. Cluster IP hashing function Specify the interval between unsolicited ARP packets in milliseconds. ARP packet interval in ms Number of unsolicited ARP packets to send. ARP packet count Whether or not to send the arp packets in the background. ARP from background MAC address to send the ARP packets too. You really shouldn't be touching this. ARP MAC END exit $OCF_SUCCESS } ip_init() { if [ X`uname -s` != "XLinux" ]; then ocf_log err "IPaddr2 only supported Linux." exit $OCF_ERR_INSTALLED fi if case $__OCF_ACTION in start|stop) ocf_is_root;; *) true;; esac then : YAY! else ocf_log err "You must be root for $__OCF_ACTION operation." exit $OCF_ERR_PERM fi - if [ ! -x "$IP2UTIL" ]; then - ocf_log err "$IP2UTIL not found." - exit $OCF_ERR_INSTALLED - fi - BASEIP="$OCF_RESKEY_ip" BRDCAST="$OCF_RESKEY_broadcast" NIC="$OCF_RESKEY_nic" # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. Don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi NETMASK="$OCF_RESKEY_cidr_netmask" IFLABEL="$OCF_RESKEY_iflabel" IF_MAC="$OCF_RESKEY_mac" LVS_SUPPORT=0 if [ x"${OCF_RESKEY_lvs_support}" = x"true" \ -o x"${OCF_RESKEY_lvs_support}" = x"on" \ -o x"${OCF_RESKEY_lvs_support}" = x"1" ]; then LVS_SUPPORT=1 fi IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1} IP_INC_NO=$((OCF_RESKEY_CRM_meta_clone+1)) IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}" if [ $LVS_SUPPORT -gt 0 ] && [ $IP_INC_GLOBAL -gt 1 ]; then ocf_log err "LVS and load sharing do not go together well" exit OCF_ERR_ARGS fi ARP_INTERVAL_MS=${OCF_RESKEY_arp_interval:-200} ARP_REPEAT=${OCF_RESKEY_arp_count:-5} ARP_BACKGROUND=${OCF_RESKEY_arp_bg:-yes} ARP_NETMASK=${OCF_RESKEY_arp_mac:-ffffffffffff} if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_incarnations_max_global [$IP_INC_GLOBAL], should be positive integer" exit $OCF_ERR_ARGS fi # Validation is performed in ip_validate()... # # $FINDIF now takes its parameters from the environment # if NICINFO=`$FINDIF -C` then NICINFO=`echo $NICINFO | sed -e 's/netmask\ //;s/broadcast\ //'` NIC=`echo "$NICINFO" | cut -d" " -f1` NETMASK=`echo "$NICINFO" | cut -d" " -f2` BRDCAST=`echo "$NICINFO" | cut -d" " -f3` else ocf_log err "[$FINDIF -C] failed" exit $OCF_ERR_ARGS fi SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP" case $NIC in *:*) IFLABEL=$NIC NIC=`echo $NIC | sed 's/:.*//'` ;; *) if [ -n "$IFLABEL" ]; then IFLABEL=${NIC}:${IFLABEL} fi ;; esac IP_CIP= if [ "$IP_INC_GLOBAL" -gt 1 ]; then - if [ ! -x "$IPTABLES" ]; then - ocf_log err "Cluster Alias IP mode selected, but iptables not configured" - exit $OCF_ERR_INSTALLED - fi + check_binary $IPTABLES IP_CIP="yes" if [ -z "$IF_MAC" ]; then # Choose a MAC # 1. Concatenate some input together # 2. This doesn't need to be a cryptographically # secure hash. # 3. Drop everything after the first 6 octets (12 chars) # 4. Delimit the octets with ':' # 5. Make sure the first octet is odd, # so the result is a multicast MAC IF_MAC=`echo $BASEIP $NETMASK $BRDCAST | \ md5sum | \ sed -e 's#\(............\).*#\1#' \ -e 's#..#&:#g; s#:$##' \ -e 's#^\(.\)[02468aAcCeE]#\11#'` fi IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$BASEIP" fi } # # Find out which interface serves the given IP address # The argument is an IP address, and its output # is an interface name (e.g., "eth0"). # find_interface() { # # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces # local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP/" \ | cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'` echo $iface return 0 } # # Delete an interface # delete_interface () { ipaddr="$1" iface="$2" netmask="$3" CMD="$IP2UTIL -f inet addr delete $ipaddr/$netmask dev $iface" ocf_log info "$CMD" $CMD if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi CMD="$IP2UTIL -o -f inet addr show $iface" ocf_log info "$CMD" ADDR=`$CMD` if [ $? -ne 0 -o ! -z "$ADDR" ]; then return $? fi CMD="$IP2UTIL link set $iface down" ocf_log info "$CMD" $CMD return $? } # # Add an interface # add_interface () { ipaddr="$1" netmask="$2" broadcast="$3" iface="$4" label="$5" CMD="$IP2UTIL -f inet addr add $ipaddr/$netmask brd $broadcast dev $iface" if [ ! -z "$label" ]; then CMD="$CMD label $label" fi ocf_log info "$CMD" $CMD if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi CMD="$IP2UTIL link set $iface up" ocf_log info "$CMD" $CMD return $? } # # Delete a route # delete_route () { prefix="$1" iface="$2" CMD="$IP2UTIL route delete $prefix dev $iface" ocf_log info "$CMD" $CMD return $? } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # # TODO: This is very ugly and should be controlled by an additional # instance parameter. Or even: multi-state, with the IP only being # "active" on the master!? # remove_conflicting_loopback() { ipaddr="$1" netmask="$2" broadcast="$3" ifname="$4" ocf_log info "Removing conflicting loopback $ifname." if [ -d "$VLDIR/" ] || mkdir -p "$VLDIR/"; then : Directory $VLDIR now exists else ocf_log err "Could not create \"$VLDIR/\" conflicting" \ " loopback $ifname cannot be restored." fi if echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi delete_interface "$ipaddr" "$ifname" "$netmask" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" "$ifname" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by remove_conflicting_loopback() # restore_loopback() { ipaddr="$1" if [ -s "$VLDIR/$ipaddr" ]; then ifinfo=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address " \ "$ifinfo." add_interface $ifinfo rm -f "$VLDIR/$ipaddr" fi } # # Run send_arp to note peers about new mac address # run_send_arp() { ARGS="-i $ARP_INTERVAL_MS -r $ARP_REPEAT -p $SENDARPPIDFILE $NIC $BASEIP auto not_used not_used" if [ $IP_CIP="yes" ] ; then MY_MAC=`echo ${IF_MAC} | sed -e 's/://'` ARGS="-i $ARP_INTERVAL_MS -r $ARP_REPEAT -p $SENDARPPIDFILE $NIC $BASEIP $MY_MAC not_used not_used" fi ocf_log info "$SENDARP $ARGS" case $ARP_BACKGROUND in yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" &) >&2 ;; *) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" ;; esac } # Do we already serve this IP address? # # returns: # ok = served (for CIP: + hash bucket) # partial = served and no hash bucket (CIP only) # no = nothing # ip_served() { cur_nic="`find_interface $BASEIP`" if [ -z "$cur_nic" ]; then echo "no" return 0 fi if [ -z "$IP_CIP" ]; then case $cur_nic in lo*) if [ "$LVS_SUPPORT" = "1" ]; then echo "no" return 0 fi ;; esac echo "ok" return 0 fi # Special handling for the CIP: if grep -q "^${IP_INC_NO},\|,${IP_INC_NO},\|,${IP_INC_NO}$\|^${IP_INC_NO}$" $IP_CIP_FILE ; then echo "ok" return 0 else echo "partial" return 0 fi exit $OCF_ERR_GENERIC } ####################################################################### ip_usage() { cat <$IP_CIP_FILE fi if [ "$ip_status" = "no" ]; then if [ "$LVS_SUPPORT" = "1" ]; then case `find_interface $BASEIP` in lo*) remove_conflicting_loopback $BASEIP 32 255.255.255.255 lo ;; esac fi add_interface $BASEIP $NETMASK $BRDCAST $NIC $IFLABEL if [ $? -ne 0 ]; then ocf_log err "$CMD failed." exit $OCF_ERR_GENERIC fi fi case $NIC in lo*) : no need to run send_arp on loopback ;; *) run_send_arp ;; esac exit $OCF_SUCCESS } ip_stop() { ip_init local ip_del_if="yes" if [ -n "$IP_CIP" ]; then # Cluster IPs need special processing when the last bucket # is removed from the node... take a lock to make sure only one # process executes that code ocf_take_lock $CIP_lockfile ocf_release_lock_on_exit $CIP_lockfile fi if [ -f "$SENDARPPIDFILE" ] ; then kill `cat "$SENDARPPIDFILE"` if [ $? -ne 0 ]; then ocf_log warn "Could not kill previously running send_arp for $BASEIP" else ocf_log info "killed previously running send_arp for $BASEIP" rm -f "$SENDARPPIDFILE" fi fi local ip_status=`ip_served` if [ $ip_status = "no" ]; then : Requested interface not in use exit $OCF_SUCCESS fi if [ -n "$IP_CIP" ]; then if [ $ip_status = "partial" ]; then exit $OCF_SUCCESS fi echo "-$IP_INC_NO" >$IP_CIP_FILE if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then ocf_log info $BASEIP, $IP_CIP_HASH for ((i=1; i<=$IP_INC_GLOBAL; i++)) ; do ocf_log info $i $IPTABLES -D INPUT -d $BASEIP -i $NIC -j CLUSTERIP \ --new \ --clustermac $IF_MAC \ --total-nodes $IP_INC_GLOBAL \ --local-node $i \ --hashmode $IP_CIP_HASH done else ip_del_if="no" fi fi if [ "$ip_del_if" = "yes" ]; then delete_interface $BASEIP $NIC $NETMASK if [ $? -ne 0 ]; then exit $OCF_ERR_GENERIC fi if [ "$LVS_SUPPORT" = 1 ]; then restore_loopback "$BASEIP" fi fi exit $OCF_SUCCESS } ip_monitor() { ip_init # TODO: Implement more elaborate monitoring like checking for # interface health maybe via a daemon like FailSafe etc... local ip_status=`ip_served` case $ip_status in ok) return $OCF_SUCCESS ;; partial|no) exit $OCF_NOT_RUNNING ;; *) # Errors on this interface? return $OCF_ERR_GENERIC ;; esac } ip_validate() { check_binary $IP2UTIL check_binary $IPTABLES check_binary $MODPROBE ip_init # $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init, # do not bother here. if ocf_is_decimal "$ARP_INTERVAL_MS" && [ $ARP_INTERVAL_MS -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_arp_interval [$ARP_INTERVAL_MS]" exit $OCF_ERR_ARGS fi if ocf_is_decimal "$ARP_REPEAT" && [ $ARP_REPEAT -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_arp_count [$ARP_REPEAT]" exit $OCF_ERR_ARGS fi if [ -n "$IP_CIP" ]; then local valid=1 case $IP_CIP_HASH in sourceip|sourceip-sourceport|sourceip-sourceport-destport) ;; *) ocf_log err "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]" exit $OCF_ERR_ARGS ;; esac if [ "$LVS_SUPPORT" = 1 ]; then ecf_log err "LVS and load sharing not advised to try" exit $OCF_ERR_ARGS fi case $IF_MAC in [0-9a-zA-Z][1379bBdDfF][^0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][^0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][^0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][^0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][^0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]) ;; *) valid=0 ;; esac if [ $valid -eq 0 ]; then ocf_log err "Invalid IF_MAC [$IF_MAC]" exit $OCF_ERR_ARGS fi fi exit $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data ;; start) ip_start ;; stop) ip_stop ;; status) ip_init ip_status=`ip_served` if [ $ip_status = "ok" ]; then echo "running" exit $OCF_SUCCESS else echo "stopped" exit $OCF_NOT_RUNNING fi ;; monitor) ip_monitor ;; validate-all) ip_validate ;; usage|help) ip_usage exit $OCF_SUCCESS ;; *) ip_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/ManageRAID b/heartbeat/ManageRAID index 9f40ae677..3fdfcbf23 100644 --- a/heartbeat/ManageRAID +++ b/heartbeat/ManageRAID @@ -1,402 +1,383 @@ #!/bin/sh # # Name ManageRAID # Author Matthias Dahl, m.dahl@designassembly.de # License GPL version 2 # # (c) 2006 The Design Assembly GmbH. # # # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING # # This resource agent is most likely function complete but not error free. Please # consider it BETA quality for the moment until it has proven itself stable... # # USE AT YOUR OWN RISK. # # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING # # # partly based on/inspired by original Heartbeat2 OCF resource agents # # Description # # Manages starting, mounting, unmounting, stopping and monitoring of RAID devices # which are preconfigured in /etc/conf.d/HB-ManageRAID. # # # Created 11. Sep 2006 # Updated 18. Sep 2006 # # rev. 1.00.2 # # Changelog # # 18/Sep/06 1.00.1 more cleanup # 12/Sep/06 1.00.1 add more functionality # add sanity check for config parameters # general cleanup all over the place # 11/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-) # # # TODO # # - check if at least one disk out of PREFIX_LOCALDISKS is still active # in RAID otherwise consider RAID broken and stop it. # # The reason behind this: consider a RAID-1 which contains iSCSI devices # shared over Ethernet which get dynamically added/removed to/from the RAID. # Once all local disks have failed and only those iSCSI disks remain, the RAID # should really stop to prevent bad performance and possible data loss. # ### . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ### # required utilities -MDADM=/sbin/mdadm -MOUNT=/bin/mount -UMOUNT=/bin/umount -GREP=/bin/grep -CAT=/bin/cat -TEST=/usr/bin/test -ECHO=/bin/echo # required files/devices RAID_MDSTAT=/proc/mdstat -# -# check_util() -# -# taken from Raid1 Heartbeat2 OCF resource agent -check_util () -{ - if [[ ! -x $1 ]]; then - ocf_log err "setup problem: utility $1 required." - exit $OCF_ERR_INSTALLED - fi -} - # # check_file() # check_file () { if [[ ! -e $1 ]]; then ocf_log err "setup problem: file $1 does not exist." exit $OCF_ERR_GENERIC fi } # # usage() # usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } # # meta_data() # meta_data() { cat < 1.00.2 Manages starting, stopping and monitoring of RAID devices which are preconfigured in /etc/conf.d/HB-ManageRAID. Manages RAID devices Name (case sensitive) of RAID to manage. (preconfigured in /etc/conf.d/HB-ManageRAID) RAID name END } # # start_raid() # start_raid() { declare -i retcode status_raid retcode=$? if [[ $retcode == $OCF_SUCCESS ]]; then return $OCF_SUCCESS elif [[ $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi for ldev in ${RAID_LOCALDISKS[@]}; do if [[ ! -b $ldev ]]; then ocf_log err "$ldev is not a (local) block device." return $OCF_ERR_ARGS fi done $MDADM -A $RAID_DEVPATH -a yes -u ${!RAID_UUID} ${RAID_LOCALDISKS[@]} &> /dev/null if [[ $? != 0 ]]; then ocf_log err "starting ${!RAID_DEV} with ${RAID_LOCALDISKS[@]} failed." return $OCF_ERR_GENERIC fi $MOUNT -o ${!RAID_MOUNTOPTIONS} $RAID_DEVPATH ${!RAID_MOUNTPOINT} &> /dev/null if [[ $? != 0 ]]; then $MDADM -S $RAID_DEVPATH &> /dev/null if [[ $? != 0 ]]; then ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed as well as stopping the RAID itself." else ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed. RAID stopped again." fi return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # stop_raid() # stop_raid() { status_raid if [[ $? == $OCF_NOT_RUNNING ]]; then return $OCF_SUCCESS fi $UMOUNT ${!RAID_MOUNTPOINT} &> /dev/null if [[ $? != 0 ]]; then ocf_log err "unmounting ${!RAID_MOUNTPOINT} failed. not stopping ${!RAID_DEV}!" return $OCF_ERR_GENERIC fi $MDADM -S $RAID_DEVPATH &> /dev/null if [[ $? != 0 ]]; then ocf_log err "stopping RAID ${!RAID_DEV} failed." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # status_raid() # status_raid() { declare -i retcode_raidcheck declare -i retcode_uuidcheck $CAT $RAID_MDSTAT | $GREP -e "${!RAID_DEV}[\ ]*:[\ ]*active" &> /dev/null if [[ $? != 0 ]]; then return $OCF_NOT_RUNNING fi if [[ ! -e $RAID_DEVPATH ]]; then return $OCF_ERR_GENERIC fi $MDADM --detail -t $RAID_DEVPATH &> /dev/null retcode_raidcheck=$? $MDADM --detail -t $RAID_DEVPATH | $GREP -qEe "^[\ ]*UUID[\ ]*:[\ ]*${!RAID_UUID}" &> /dev/null retcode_uuidcheck=$? if [[ $retcode_raidcheck > 3 ]]; then ocf_log err "mdadm returned error code $retcode_raidcheck while checking ${!RAID_DEV}." return $OCF_ERR_GENERIC elif [[ $retcode_raidcheck == 3 ]]; then ocf_log err "${!RAID_DEV} has failed." return $OCF_ERR_GENERIC elif [[ $retcode_raidcheck < 3 && $retcode_uuidcheck != 0 ]]; then ocf_log err "active RAID ${!RAID_DEV} and configured UUID (!$RAID_UUID) do not match." return $OCF_ERR_GENERIC fi $MOUNT | $GREP -e "$RAID_DEVPATH on ${!RAID_MOUNTPOINT}" &> /dev/null if [[ $? != 0 ]]; then ocf_log err "${!RAID_DEV} seems to be no longer mounted at ${!RAID_MOUNTPOINT}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # validate_all_raid() # validate_all_raid() { # # since all parameters are checked every time ManageRAID is # invoked, there not much more to check... # # status_raid should cover the rest. # declare -i retcode status_ve retcode=$? if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac ## required configuration # [ -f /etc/conf.d/HB-ManageRAID ] || { ocf_log err "/etc/conf.d/HB-ManageRAID missing" exit $OCF_ERR_INSTALLED } . /etc/conf.d/HB-ManageRAID # ## # # check relevant environment variables for sanity and security # declare -i retcode_test declare -i retcode_grep $TEST -z "$OCF_RESKEY_raidname" retcode_test=$? $ECHO "$OCF_RESKEY_raidname" | $GREP -qEe "^[[:alnum:]\_]+$" retcode_grep=$? if [[ $retcode_test != 1 || $retcode_grep != 0 ]]; then ocf_log err "OCF_RESKEY_raidname not set or invalid." exit $OCF_ERR_ARGS fi RAID_UUID=${OCF_RESKEY_raidname}_UUID $ECHO ${!RAID_UUID} | $GREP -qEe "^[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_UUID is invalid." exit $OCF_ERR_ARGS fi RAID_DEV=${OCF_RESKEY_raidname}_DEV $ECHO ${!RAID_DEV} | $GREP -qEe "^md[0-9]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_DEV is invalid." exit $OCF_ERR_ARGS fi RAID_DEVPATH=/dev/${!RAID_DEV/md/md\/} RAID_MOUNTPOINT=${OCF_RESKEY_raidname}_MOUNTPOINT $ECHO ${!RAID_MOUNTPOINT} | $GREP -qEe "^[[:alnum:]\/\_\"\ ]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_MOUNTPOINT is invalid." exit $OCF_ERR_ARGS fi RAID_MOUNTOPTIONS=${OCF_RESKEY_raidname}_MOUNTOPTIONS $ECHO ${!RAID_MOUNTOPTIONS} | $GREP -qEe "^[[:alpha:]\,]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_MOUNTOPTIONS is invalid." exit $OCF_ERR_ARGS fi RAID_LOCALDISKS=${OCF_RESKEY_raidname}_LOCALDISKS[@] RAID_LOCALDISKS=( "${!RAID_LOCALDISKS}" ) if [[ ${#RAID_LOCALDISKS[@]} < 1 ]]; then ocf_log err "you have to specify at least one local disk." exit $OCF_ERR_ARGS fi # # check that all relevant utilities are available # -check_util $MDADM -check_util $MOUNT -check_util $UMOUNT -check_util $GREP -check_util $CAT -check_util $TEST -check_util $ECHO +check_binary $MDADM +check_binary $MOUNT +check_binary $UMOUNT +check_binary $GREP +check_binary $CAT +check_binary $TEST +check_binary $ECHO # # check that all relevant devices are available # check_file $RAID_MDSTAT # # finally... let's see what we are ordered to do :-) # case "$1" in start) start_raid ;; stop) stop_raid ;; status|monitor) status_raid ;; validate-all) validate_all_raid ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/ManageVE b/heartbeat/ManageVE index 8a9e4ce4c..0ff2da452 100644 --- a/heartbeat/ManageVE +++ b/heartbeat/ManageVE @@ -1,301 +1,288 @@ #!/bin/sh # # Name ManageVE # Author Matthias Dahl, m.dahl@designassembly.de # License GPL version 2 # # (c) 2006 The Design Assembly GmbH. # # # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING # # This resource agent is most likely function complete but not error free. Please # consider it BETA quality for the moment until it has proven itself stable... # # USE AT YOUR OWN RISK. # # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING # # # partly based on/inspired by original Heartbeat2 OCF resource agents # # Description # # This OCF complaint resource agent manages OpenVZ VEs and thus requires # a proper OpenVZ installation including a recent vzctl util. # # # Created 07. Sep 2006 # Updated 18. Sep 2006 # # rev. 1.00.3 # # Changelog # # 12/Sep/06 1.00.3 more cleanup # 12/Sep/06 1.00.2 fixed some logic in start_ve # general cleanup all over the place # 11/Sep/06 1.00.1 fixed some typos # 07/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-) # ### . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ### # required utilities VZCTL=/usr/sbin/vzctl -AWK=/usr/bin/awk - -# -# check_util() -# -# taken from Raid1 Heartbeat2 OCF resource agent -check_util () -{ - if [[ ! -x "$1" ]]; then - ocf_log err "setup problem: Couldn't find utility $1" - exit $OCF_ERR_GENERIC - fi -} # # usage() # # taken from Raid1 Heartbeat2 OCF resource agent usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } # # meta_data() # meta_data() { cat < 1.00.3 This OCF complaint resource agent manages OpenVZ VEs and thus requires a proper OpenVZ installation including a recent vzctl util. OpenVZ VE resource agent OpenVZ ID of virtual environment (see output of vzlist -a for all assigned IDs) OpenVZ ID of VE END } # # start_ve() # # ATTENTION: The following code relies on vzctl's exit codes, especially: # # 0 : success # 32 : VE already running # # In case any of those exit codes change, this function will need fixing. # start_ve() { declare -i retcode status_ve retcode=$? if [[ $retcode == $OCF_SUCCESS ]]; then return $OCF_SUCCESS elif [[ $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi $VZCTL start $VEID >& /dev/null retcode=$? if [[ $retcode != 0 && $retcode != 32 ]]; then ocf_log err "vzctl start $VEID returned: $retcode" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # stop_ve() # # ATTENTION: The following code relies on vzctl's exit codes, especially: # # 0 : success # # In case any of those exit codes change, this function will need fixing. # stop_ve() { declare -i retcode $VZCTL stop $VEID >& /dev/null retcode=$? if [[ $retcode != 0 ]]; then ocf_log err "vzctl stop $VEID returned: $retcode" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # status_ve() # # ATTENTION: The following code relies on vzctl's status output. The fifth # column is interpreted as the VE status (either up or down). # # In case the output format should change, this function will need fixing. # status_ve() { declare -i retcode veexists=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $3}'` vestatus=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $5}'` retcode=$? if [[ $retcode != 0 ]]; then ocf_log err "vzctl status $VEID returned: $retcode" return $OCF_ERR_GENERIC fi if [[ $veexists != "exist" ]]; then ocf_log err "vzctl status $VEID returned: $VEID does not exist." return $OCF_ERR_INSTALLED fi case "$vestatus" in running) return $OCF_SUCCESS ;; down) return $OCF_NOT_RUNNING ;; *) ocf_log err "vzctl status $VEID, wrong output format. (5th column: $vestatus)" return $OCF_ERR_GENERIC ;; esac } # # validate_all_ve() # # ATTENTION: The following code relies on vzctl's status output. The fifth # column is interpreted as the VE status (either up or down). # # In case the output format should change, this function will need fixing. # validate_all_ve() { declare -i retcode # VEID should be a valid VE `status_ve` retcode=$? if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi return $OCF_SUCCESS } if [[ $# != 1 ]]; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # # check relevant environment variables for sanity and security # # empty string? `test -z "$OCF_RESKEY_veid"` declare -i veidtest1=$? # really a number? `echo "$OCF_RESKEY_veid" | egrep -q '^[[:digit:]]+$'` if [[ $veidtest1 != 1 || $? != 0 ]]; then ocf_log err "OCF_RESKEY_veid not set or not a number." exit $OCF_ERR_ARGS fi declare -i VEID=$OCF_RESKEY_veid # # check that all relevant utilities are available # -check_util $VZCTL -check_util $AWK +check_binary $VZCTL +check_binary $AWK # # finally... let's see what we are ordered to do :-) # case "$1" in start) start_ve ;; stop) stop_ve ;; status|monitor) status_ve ;; validate-all) validate_all_ve ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Raid1 b/heartbeat/Raid1 index 24f10c403..db640d3e8 100644 --- a/heartbeat/Raid1 +++ b/heartbeat/Raid1 @@ -1,399 +1,392 @@ #!/bin/sh # # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # Raid1 # Description: Manages a software Raid1 device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # OCF parameters are as below: # OCF_RESKEY_raidconf # (name of MD configuration file. e.g. /etc/raidtab or /etc/mdadm.conf) # OCF_RESKEY_raiddev # (of the form /dev/md* the block device to use) # # in /etc/ha.d/haresources, use a line such as: # nodea 10.0.0.170 Raid1::/etc/raidtab.md0::/dev/md0 Filesystem::/dev/md0::/data1::ext2 # (for systems with raidtools) # or # nodea 10.0.0.170 Raid1::/etc/mdadm.conf::/dev/md0 Filesystem::/dev/md0::/data1::ext2 # (for systems with mdadm) # # The "start" arg starts up the raid device # The "stop" arg stops it. NOTE: all filesystems must be unmounted # and no processes should be accessing the device. # The "status" arg just prints out whether the device is running or not # # # DISCLAIMER: Use at your own risk! # # Besides all of the usual legalese that accompanies free software, # I will warn you that I do not yet use this kind of setup (software RAID # over shared storage) in production, and I have reservations about doing so. # # The linux md driver/scsi drivers under Raid 0.90 and kernel version 2.2 # do not behave well when a drive is in the process of going bad. # The kernel slows down, but doesn't completely crash. This is about the # worst possible thing that could happen in an un-attended HA type # environment. (Once the system is rebooted, the sofware raid stuff works # like a champ.) # My other reservation has to do with the interation of RAID recovery with # journaling filesystems and other parts of the kernel. Subscribe to # linux-raid@vger.kernel.org for other opinions and possible solutions. # # -EZA 25 Oct 2000 # # SETUP: # # You might need to pass the command line parameter: raid=noautodetect # in an HA environment so that the kernel doesn't automatically start # up your raid partitions when you boot the node. This means that it isn't # going to work to use RAID for the system disks and the shared disks. # # 0) partition the disks to use for RAID. Use normal Linux partition # types, not the RAID autodetect type for your partitions. # 1) Create /etc/raidtab.md? on both systems (see example file below) # or for systems with mdadm tools create /etc/mdadm.conf (see example below) # 2) Initialize your raid partition with # /sbin/mkraid --configfile /etc/raidtab.md? /dev/md? # or create mirror raid with the following command # mdadm --create /dev/md? -l 1 -n 2 /dev/sdb? /dev/sdb? # 3) Format your filesystem # mke2fs /dev/md0 # for ext2fs... a journaling filesystem would be nice # 3) Create the mount point on both systems. # DO NOT add your raid filesystem to /etc/fstab # 4) copy this script (to /etc/rc.d/init.d if you wish) and edit it to # reflect your desired settings. # 5) Modify the heartbeat 'haresources' (for non-crm heartbeat) or 'cib.xml' (for crm heartbeat) setup file # 6) unmount the filesystem and stop the raid device with 'raidstop' or 'mdadm -S' # 7) fire up heartbeat! # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdb2 # ARRAY /dev/md0 level=raid1 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ####################################################################### -check_util () { - if [ ! -x "$1" ] ; then - ocf_log err "setup problem: Couldn't find utility $1" - exit $OCF_ERR_GENERIC - fi -} - usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 Resource script for RAID1. It manages a software Raid1 device on a shared storage medium. RAID1 resource agent The RAID configuration file. e.g. /etc/raidtab or /etc/mdadm.conf. RAID config file The block device to use. block device END } # # START: Start up the RAID device # raid1_start() { # See if the md device is already mounted. $MOUNT | grep -e "^$MDDEV\>" >/dev/null if [ $? -ne 1 ] ; then ocf_log err "Device $MDDEV is already mounted!" return $OCF_ERR_GENERIC fi if [ "running" = `raid1_status` ]; then # We are already online, do not bother return $OCF_SUCCESS fi # Insert SCSI module $MODPROBE scsi_hostadapter if [ $? -ne 0 ] ; then ocf_log warn "Couldn't insert SCSI module." fi # Insert raid personality module $MODPROBE raid1 if [ $? -ne 0 ] ; then # It is not fatal, chance is that we have raid1 builtin... ocf_log warn "Couldn't insert RAID1 module" fi grep -q "^Personalities.*\[raid1\]" /proc/mdstat 2>/dev/null if [ $? -ne 0 ] ; then ocf_log err "We don't have RAID1 support! Exiting" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else # Run mdadm $MDADM --assemble $MDDEV --config=$RAIDCONF fi if [ "running" = `raid1_status` ]; then return $OCF_SUCCESS else ocf_log err "Couldn't start RAID for $MDDEV" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # raid1_stop() { # See if the MD device is online if [ "stopped" = `raid1_status` ]; then return $OCF_SUCCESS fi # See if the MD device is mounted $MOUNT | grep -e "^$MDDEV\>" >/dev/null if [ $? -ne 1 ] ; then # Kill all processes open on filesystem $FUSER -m -k $MDDEV # the return from fuser doesn't tell us much #if [ $? -ne 0 ] ; then # ocf_log "err" "Couldn't kill processes on $MOUNTPOINT" # return 1; #fi # Unmount the filesystem $UMOUNT $MDDEV $MOUNT | grep -e "^$MDDEV\>" >/dev/null if [ $? -ne 1 ] ; then ocf_log err "filesystem for $MDDEV still mounted" return $OCF_ERR_GENERIC fi fi # Turn off raid if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else $MDADM --stop $MDDEV --config=$RAIDCONF fi if [ $? -ne 0 ] ; then ocf_log err "Couldn't stop RAID for $MDDEV" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # STATUS: is the raid device online or offline? # raid1_status() { # See if the MD device is online grep -e "^$MD[ \t:]" /proc/mdstat >/dev/null if [ $? -ne 0 ] ; then echo "stopped" return $OCF_NOT_RUNNING else echo "running" return $OCF_SUCCESS fi } raid1_validate_all() { # Utilities used by this script check_binary $MODPROBE check_binary $FSCK check_binary $FUSER check_binary $MOUNT check_binary $UMOUNT if [ $HAVE_RAIDTOOLS = "true" ]; then # $MDDEV should be an md device lsraid -a $MDDEV 2>&1 | grep -q -i "is not an md device" if [ $? -eq 0 ]; then ocf_log err "$MDDEV is not an md device!" exit $OCF_ERR_ARGS fi COMMENT="\(#.*\)" grep -q "^[[:space:]]*raiddev[[:space:]]\+$MDDEV[[:space:]]*$COMMENT\?$" $RAIDCONF 2>/dev/null if [ $? -ne 0 ]; then ocf_log err "Raid device $MDDEV does not appear in $RAIDCONF" exit $OCF_ERR_GENERIC fi else error=`$MDADM --query $MDDEV 2>&1` if [ $? -ne 0 ]; then ocf_log err "$error" exit $OCF_ERR_GENERIC fi echo $error | grep -q -i "^$MDDEV[ \t:].*is not an md array" if [ $? -eq 0 ]; then ocf_log err "$MDDEV is not an md array!" exit $OCF_ERR_ARGS fi fi return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # # Check the necessary enviroment virable's setting # RAIDCONF=$OCF_RESKEY_raidconf MDDEV=$OCF_RESKEY_raiddev if [ -z "$RAIDCONF" ] ; then ocf_log err "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_ARGS fi if [ ! -r "$RAIDCONF" ] ; then ocf_log err "Configuration file [$RAIDCONF] does not exist, or can not be opend!" exit $OCF_ERR_ARGS fi if [ -z "$MDDEV" ] ; then ocf_log err "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_ARGS fi if [ ! -b "$MDDEV" ] ; then ocf_log err "$MDDEV is not a block device!" exit $OCF_ERR_ARGS fi # strip off the /dev/ prefix to get the name of the MD device MD=`echo $MDDEV | sed -e 's/\/dev\///'` HAVE_RAIDTOOLS=false if have_binary $RAIDSTART then check_binary $RAIDSTOP HAVE_RAIDTOOLS=true else check_binary $MDADM fi # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; status|monitor) raid1_status ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/WAS b/heartbeat/WAS index b3d6fa6ea..d0f31334b 100644 --- a/heartbeat/WAS +++ b/heartbeat/WAS @@ -1,572 +1,571 @@ #!/bin/sh # # # WAS # # Description: Manages a Websphere Application Server as an HA resource # # # Author: Alan Robertson # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_config # (WAS-configuration file, used for the single server edition of WAS) # OCF_RESKEY_port # (WAS--port-number, used for the advanced edition of WAS) ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ####################################################################### WASDIR=/opt/WebSphere/AppServer if [ ! -d $WASDIR ] then WASDIR=/usr/WebSphere/AppServer fi STARTTIME=300 # 5 minutes -WGET=/usr/bin/wget DEFAULT_WASPORTS="9080" # # WASBIN=$WASDIR/bin DEFAULT=$WASDIR/config/server-cfg.xml # # Print usage message # usage() { methods=`WAS_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) For the single server edition of WAS, you have to set the following enviroment virable: OCF_RESKEY_config (WAS-configuration file) For the advanced edition of WAS, you have to set the following enviroment virable: OCF_RESKEY_port (WAS--port-number) $0 manages a Websphere Application Server (WAS) as an HA resource The 'start' operation starts WAS. The 'stop' operation stops WAS. The 'status' operation reports whether WAS is running The 'monitor' operation reports whether the WAS seems to be working (httpd also needs to be working for this case) The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_config or OCF_RESKEY_port) is valid The 'methods' operation reports on the methods $0 supports This is known to work with the Single Server edition of Websphere, and is believed to work with the Advanced edition too. Since the Advanced Edition has no configuration file (it's in a the database) you need to give a port number instead of a configuration file for this config parameter. The default configuration file for the single server edition is: $DEFAULT The default snoop-port for the advanced edition is: $DEFAULT_WASPORTS The start and stop operations must be run as root. The status operation will report a pid of "-" for the WAS root process using unless it is run as root. If you don't have xmllint on your system, parsing of WAS configuration files is very primitive. In this case, the port specification we need from the XML config file has to be on the same line as the first part of the tag. We run servlet/snoop on the first transport port listed in the config file for the "monitor" operation. ! } meta_data() { cat < 1.0 Resource script for WAS. It manages a Websphere Application Server (WAS) as an HA resource. WAS resource agent The WAS-configuration file. configration file The WAS-(snoop)-port-number. port END } # # Reformat the XML document in a sort of canonical form # if we can. If we don't have xmllint, we just cat it out # and hope for the best ;-) # xmlcat() { if [ "X$XMLcat" = X ] then XMLcat=`which xmllint 2>/dev/null` if [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] then XMLcat=cat else XMLcat="$XMLcat --recover --format" fi fi for j in "$@" do ${XMLcat} "$j" done } # #This is a bit skanky, but it works anyway... # # # # # # It's not really skanky if we can find xmllint on the system, because it # reformats tags so they are all on one line, which is all we we need... # # # Get the numbers of the ports WAS should be listening on... # # If we don't have xmllint around, then the applicationserver and the # port= specification have to be on the same line in the XML config file. # GetWASPorts() { case $1 in [0-9]*) echo "$1" | tr ',' '\012';; *) xmlcat $1 | grep -i 'transports.*applicationserver:HTTPTransport' | grep port= | sed -e 's%.*port= *"* *%%' \ -e 's%[^0-9][^0-9]*.*$%%' # Delete up to port=, throw away optional quote and optional # white space. # Throw away everything after the first non-digit. # This should leave us the port number all by itself... esac } # # We assume that the first port listed in the # is the one we should run servlet/snoop on. # GetWASSnoopPort() { GetWASPorts "$@" | head -n1 } # # Return information on the processname/id for the WAS ports # # pid/java is the expected output. Several lines, one per port... # # WASPortInfo() { pat="" once=yes PortCount=0 for j in $* do case $pat in "") pat="$j";; *) pat="$pat|$j";; esac PortCount=`expr $PortCount + 1` done netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' } # # Return the number of WAS ports which are open # CheckWASPortsInUse() { count=`WASPortInfo "$@" | wc -l` echo $count } # # Return the pid(s) of the processes that have WAS ports open # WASPIDs() { WASPortInfo "$@" | sort -u | cut -f1 -d/ } # # The version of ps that returns all processes and their (long) args # It's only used by WAS_procs, which isn't used for anything ;-) # ps_long() { ps axww } # # The total set of WAS processes (single server only) # WAS_procs() { ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 } # # methods: What methods/operations do we support? # WAS_methods() { cat <<-! start stop status methods validate-all meta-data usage ! if - [ -f $WGET -a -x $WGET ] + have_binary $WGET then echo monitor fi } # # Return WAS status (silently) # WAS_status() { WASPorts=`GetWASPorts $1` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) false;; *) true;; esac } # # Report on WAS status to stdout... # WAS_report_status() { WASPorts=`GetWASPorts $1` PortCount=`echo $WASPorts | wc -w` PortCount=`echo $PortCount` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; *) pids=`WASPIDs $WASPorts` if [ $PortsInUse -ge $PortCount ] then ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." else ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." fi return $OCF_SUCCESS;; esac } # # Monitor WAS - does it really seem to be working? # # For this we invoke the snoop applet via wget. # # This is actually faster than WAS_status above... # WAS_monitor() { tmpfile=`maketempfile` SnoopPort=`GetWASSnoopPort $1` output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/servlet/snoop 2>&1` rc=$? if [ $rc -eq 0 ] then if grep -i 'user-agent.*Wget' $tmpfile >/dev/null then : OK else ocf_log "err" "WAS: $1: no user-agent from snoop application" rc=$OCF_ERR_GENERIC fi else ocf_log "err" "WAS: $1: wget failure: $output" rc=$OCF_ERR_GENERIC fi rm -fr $tmpfile return $rc } # # Start WAS instance # WAS_start() { # Launch Arguments: # # -configFile # -nodeName # -serverName # -oltEnabled # -oltHost # -oltPort # -debugEnabled # -jdwpPort # -debugSource # -serverTrace # -serverTraceFile # -script [] # -platform # -noExecute # -help if [ -x $WASBIN/startServer.sh ] then cmd="$WASBIN/startServer.sh -configFile $1" else cmd="$WASBIN/startupServer.sh" fi if ocf_run $cmd then if WAS_wait_4_start $STARTTIME "$@" then #true return $OCF_SUCCESS else ocf_log "err" "WAS server $1 did not start correctly" return $OCF_ERR_GENERIC fi else #false return $OCF_ERR_GENERIC fi } # # Wait for WAS to actually start up. # # It seems to take between 30 and 60 seconds for it to # start up on a trivial WAS instance. # WAS_wait_4_start() { max=$1 retries=0 shift while [ $retries -lt $max ] do if WAS_status "$@" then return $OCF_SUCCESS else sleep 1 fi retries=`expr $retries + 1` done WAS_status "$@" } # # Shut down WAS # WAS_stop() { # They don't return good return codes... # And, they seem to allow anyone to stop WAS (!) if [ -x $WASBIN/stopServer.sh ] then ocf_run $WASBIN/stopServer.sh -configFile $1 else WASPorts=`GetWASPorts $1` kill `WASPIDs $WASPorts` fi if WAS_status $1 then ocf_log "err" "WAS: $1 did not stop correctly" #false return $OCF_ERR_GENERIC else #true return $OCF_SUCCESS fi } # # Check if the port is valid # CheckPort() { ocf_is_decimal "$1" && [ $1 -gt 0 ] } WAS_validate_all() { if [ -x $WASBIN/startServer.sh ]; then # $arg should be config file if [ ! -f "$arg" ]; then ocf_log err "Configuration file [$arg] does not exist" exit $OCF_ERR_ARGS fi # $arg should specify a valid port number at the very least local WASPorts=`GetWASPorts $arg` if [ -z "$WASPorts" ]; then ocf_log err "No port number specified in configuration file [$arg]" exit $OCF_ERR_CONFIGURED fi local port local have_valid_port=false for port in "$WASPorts"; do if CheckPort $port; then have_valid_port=true break fi done if [ "false" = "$have_valid_port" ]; then ocf_log err "No valid port number specified in configuration file [$arg]" exit $OCF_ERR_CONFIGURED fi elif [ -x $WASBIN/startupServer.sh ]; then # $arg should be port number if CheckPort "$arg"; then ocf_log err "Port number is required but [$arg] is not valid port number" exit $OCF_ERR_ARGS fi else # Do not know hot to validate_all ocf_log warn "Do not know how to validate-all, assuming validation OK" return $OCF_SUCCESS fi } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # # Supply default configuration parameter(s) # if ( [ -z $OCF_RESKEY_config ] && [ -z $OCF_RESKEY_port ] ) then if [ -f $DEFAULT ] then arg=$DEFAULT else arg=$DEFAULT_WASPORTS fi elif [ ! -z $OCF_RESKEY_config ] then arg=$OCF_RESKEY_config else arg=$OCF_RESKEY_port fi if [ ! -f $arg ] then case $arg in [0-9]*) ;; # ignore port numbers... *) ocf_log "err" "WAS configuration file $arg does not exist!" usage exit $OCF_ERR_ARGS;; esac fi # What kind of method was invoked? case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; start) WAS_start $arg exit $?;; stop) WAS_stop $arg exit $?;; status) WAS_report_status $arg exit $?;; monitor) WAS_monitor $arg exit $?;; validate-all) WAS_validate_all $arg exit $?;; methods) WAS_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/WAS6 b/heartbeat/WAS6 index d6023903b..8076f7c09 100644 --- a/heartbeat/WAS6 +++ b/heartbeat/WAS6 @@ -1,546 +1,545 @@ #!/bin/sh # WAS6 # # Description: Manages a Websphere Application Server as an HA resource # # # Author: Ru Xiang Min # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2006 International Business Machines China, Ltd., Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/IBM/WebSphere/AppServer/profiles/default/config/cells/Node01Cell/nodes/Node01/serverindex.xml # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_profile # (WAS profile name, used for the single server edition of WAS6) ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs ####################################################################### WAS_DIR=/opt/IBM/WebSphere/AppServer if [ ! -d $WAS_DIR ] then WAS_DIR=/usr/IBM/WebSphere/AppServer fi STARTTIME=300 # 5 minutes -WGET=/usr/bin/wget DEFAULT_WASPORTS="9080" # # WAS_BIN=$WAS_DIR/bin DEFAULT=default # # Print usage message # usage() { methods=`WAS_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) For the single server edition of WAS6, you have to set the following enviroment virable: OCF_RESKEY_profile (WAS profile name) $0 manages a Websphere Application Server 6(WAS6) as an HA resource The 'start' operation starts WAS6. The 'stop' operation stops WAS6. The 'status' operation reports whether WAS6 is running The 'monitor' operation reports whether the WAS6 seems to be working (httpd also needs to be working for this case) The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_profileName ) is valid The 'methods' operation reports on the methods $0 supports This is known to work with the Single Server edition of Websphere. The default profile name for the single server edition is: $DEFAULT The start and stop operations must be run as root. The status operation will report a pid of "-" for the WAS root process using unless it is run as root. If you don't have xmllint on your system, parsing of WAS configuration files is very primitive. We run servlet/snoop on the seventh transport port listed in the config file for the "monitor" operation. ! } meta_data() { cat < 1.0 Resource script for WAS6. It manages a Websphere Application Server (WAS6) as an HA resource. WAS6 resource agent The WAS profile name. profile name END } # # Reformat the XML document in a sort of canonical form # if we can. If we don't have xmllint, we just cat it out # and hope for the best ;-) # xmlcat() { if [ "X$XMLcat" = X ] then XMLcat=`which xmllint 2>/dev/null` if [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] then XMLcat=cat else XMLcat="$XMLcat --recover --format" fi fi for j in "$@" do ${XMLcat} "$j" done } # #This is a bit skanky, but it works anyway... # # It's not really skanky if we can find xmllint on the system, because it # reformats tags so they are all on one line, which is all we we need... # # # Get the numbers of the ports WAS should be listening on... # # If we don't have xmllint around, then the applicationserver and the # port= specification have to be on the same line in the XML config file. # GetWASPorts() { case $1 in [0-9]*) echo "$1" | tr ',' '\012';; *) xmlcat ${WAS_DIR}/profiles/${WAS_PROFILE_NAME}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml | grep port= | sed -e 's%.*port= *"* *%%' \ -e 's%[^0-9][^0-9]*.*$%%' # Delete up to port=, throw away optional quote and optional # white space. # Throw away everything after the first non-digit. # This should leave us the port number all by itself... esac } # # We assume that the seventh port listed in the serverindex.xml # is the one we should run servlet/snoop on. # GetWASSnoopPort() { GetWASPorts "$@" | sed -n '7p' } # # Return information on the processname/id for the WAS ports # # pid/java is the expected output. Several lines, one per port... # # WASPortInfo() { pat="" once=yes PortCount=0 for j in $* do case $pat in "") pat="$j";; *) pat="$pat|$j";; esac PortCount=`expr $PortCount + 1` done netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' } # # Return the number of WAS ports which are open # CheckWASPortsInUse() { count=`WASPortInfo "$@" | wc -l` echo $count } # # Return the pid(s) of the processes that have WAS ports open # WASPIDs() { WASPortInfo "$@" | sort -u | cut -f1 -d/ } # # The version of ps that returns all processes and their (long) args # It's only used by WAS_procs, which isn't used for anything ;-) # ps_long() { ps axww } # # The total set of WAS processes (single server only) # WAS_procs() { ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 } # # methods: What methods/operations do we support? # WAS_methods() { cat <<-! start stop status methods validate-all meta-data usage ! if - [ -f $WGET -a -x $WGET ] + have_binary $WGET then echo " monitor" fi } # # Return WAS status (silently) # WAS_status() { WASPorts=`GetWASPorts $1` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) false;; *) true;; esac } # # Report on WAS status to stdout... # WAS_report_status() { WASPorts=`GetWASPorts $1` PortCount=`echo $WASPorts | wc -w` PortCount=`echo $PortCount` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; *) pids=`WASPIDs $WASPorts` if [ $PortsInUse -ge $PortCount ] then ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." else ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." fi return $OCF_SUCCESS;; esac } # # Monitor WAS - does it really seem to be working? # # For this we invoke the snoop applet via wget. # # This is actually faster than WAS_status above... # WAS_monitor() { tmpfile=`maketempfile` SnoopPort=`GetWASSnoopPort $1` output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/snoop 2>&1` rc=$? if [ $rc -eq 0 ] then if grep -i 'user-agent.*Wget' $tmpfile >/dev/null then : OK else ocf_log "err" "WAS: $1: no user-agent from snoop application" rc=$OCF_ERR_GENERIC fi else ocf_log "err" "WAS: $1: wget failure: $output" rc=$OCF_ERR_GENERIC fi rm -fr $tmpfile return $rc } # # Start WAS instance # WAS_start() { # Launch Arguments: # -nowait # -quiet # -logfile # -replacelog # -trace # -script [