diff --git a/heartbeat/IPaddr b/heartbeat/IPaddr index e7db6740f..581df956a 100644 --- a/heartbeat/IPaddr +++ b/heartbeat/IPaddr @@ -1,834 +1,836 @@ #!/bin/sh # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # This script manages IP alias IP addresses # # It can add an IP alias, or remove one. # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds an IP alias. # # Surprisingly, the "stop" arg removes one. :-) # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_lvs_support ( e.g. true, on, 1 ) # OCF_RESKEY_ARP_INTERVAL_MS # OCF_RESKEY_ARP_REPEAT # OCF_RESKEY_ARP_BACKGROUND (e.g. yes ) # OCF_RESKEY_ARP_NETMASK # OCF_RESKEY_local_start_script # OCF_RESKEY_local_stop_script # ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs HA_HBCONF_DIR=${HA_DIR} SENDARP=$HA_BIN/send_arp FINDIF=$HA_BIN/findif VLDIR=$HA_RSCTMP/IPaddr SENDARPPIDDIR=$HA_RSCTMP/send_arp SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ####################################################################### # Prevent ifconfig localization issues unset LC_ALL; export LC_ALL unset LANGUAGE; export LANGUAGE LC_ALL=C; export LC_ALL LC_MESSAGES=C; export LC_MESSAGES . $HA_HBCONF_DIR/shellfuncs SYSTYPE="`uname -s`" case "$SYSTYPE" in SunOS) # `uname -r` = 5.9 -> SYSVERSION = 9 SYSVERSION="`uname -r | cut -d. -f 2`" ;; Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac meta_data() { cat < 1.0 This script manages IP alias IP addresses It can add an IP alias, or remove one. Manages virtual IPv4 addresses The IPv4 address to be configured in dotted quad notation, for example "192.168.1.1". IPv4 address The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. Network interface The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). If unspecified, the script will also try to determine this from the routing table. Netmask Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address You can specify an additional label for your IP address here. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Enable support for LVS DR Script called when the IP is released Script called when the IP is released Script called when the IP is added Script called when the IP is added milliseconds between ARPs milliseconds between gratuitous ARPs How many gratuitous ARPs to send out when bringing up a new address repeat count run in background (no longer any reason to do this) run in background netmask for ARP - in nonstandard hexadecimal format. netmask for ARP END exit $OCF_SUCCESS } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # lvs_remove_conflicting_loopback() { ipaddr="$1" ifname="$2" ocf_log info "Removing conflicting loopback $ifname." if echo $ifname > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi delete_interface "$ifname $ipaddr" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by lvs_remove_conflicting_loopback() # lvs_restore_loopback() { ipaddr="$1" if [ ! -s "$VLDIR/$ipaddr" ]; then return fi ifname=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address $ipaddr on $ifname." CMD="OCF_RESKEY_cidr_netmask=32 OCF_RESKEY_ip=$1 OCF_RESKEY_nic=$ifname $FINDIF" if NICINFO=`eval $CMD` then - netmask_text=`echo "$NICINFO" | cut -f2` - broadcast=`echo "$NICINFO" | cut -f3` + netmask_text=`echo "$NICINFO" | cut -f2 -d " "` + broadcast=`echo "$NICINFO" | cut -f3 -d " "` else echo "ERROR: $CMD failed (rc=$rc)" exit $OCF_ERR_GENERIC fi add_interface "$ipaddr" "$ifname" "$ifname" $netmask_text $broadcast rm -f "$VLDIR/$ipaddr" } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_solaris() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ;; *) continue;; esac # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:$%%'` case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } find_interface_bsd() { #$IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$OCF_RESKEY_ip " $IFCONFIG | grep "$ipaddr" -B20 | grep "UP," | tail -n 1 | cut -d ":" -f 1 } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_generic() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | while read ifname linkstuff do : Read gave us ifname = $ifname read inet addr junk : Read gave us inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ;; *) continue;; esac : "comparing $ipaddr to $addr (from ifconfig)" case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface() { ipaddr="$1" case "$SYSTYPE" in SunOS) NIC=`find_interface_solaris $ipaddr`;; *BSD) NIC=`find_interface_bsd $ipaddr`;; *) NIC=`find_interface_generic $ipaddr`;; esac echo $NIC return $OCF_SUCCESS; } # # Find an unused interface/alias name for us to use for new IP alias # The argument is an IP address, and the output # is an aliased interface name (e.g., "eth0:0", "dc0", "le0:0"). # find_free_interface() { NIC="$1" if [ "X$NIC" = "X" ]; then ocf_log err "No free interface found for $OCF_RESKEY_ip" return $OCF_ERR_GENERIC; fi NICBASE="$VLDIR/$NIC" touch "$NICBASE" case "$SYSTYPE" in *BSD) echo $NIC; return $OCF_SUCCESS;; SunOS) j=1 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's%: .*%%'`;; *) j=0 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's% .*%%'` TRYADRCNT=`ls "${NICBASE}:"* 2>/dev/null | wc -w | tr -d ' '` if [ -f "${NICBASE}:${TRYADRCNT}" ]; then : OK else j="${TRYADRCNT}" fi ;; esac IFLIST=" `echo $IFLIST` " while [ $j -lt 512 ] do case $IFLIST in *" "$NIC:$j" "*) ;; *) NICLINK="$NICBASE:$j" if ln "$NICBASE" "$NICLINK" 2>/dev/null then echo "$NIC:$j" return $OCF_SUCCESS fi ;; esac j=`expr $j + 1` done return $OCF_ERR_GENERIC } delete_route () { ipaddr="$1" case "$SYSTYPE" in SunOS) return 0;; *BSD) CMD="$ROUTE -n delete -host $ipaddr";; *) CMD="$ROUTE -n del -host $ipaddr";; esac $CMD return $? } delete_interface () { ifname="$1" ipaddr="$2" case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then CMD="$IFCONFIG $ifname unplumb" else CMD="$IFCONFIG $ifname 0 down" fi;; Darwin*) CMD="$IFCONFIG $ifname $ipaddr delete";; *BSD) CMD="$IFCONFIG $ifname inet $ipaddr delete";; *) CMD="$IFCONFIG $ifname down";; esac ocf_log info "$CMD" $CMD return $? } add_interface () { ipaddr="$1" iface_base="$2" iface="$3" netmask="$4" broadcast="$5" if [ $# != 5 ]; then ocf_log err "Insufficient arguments to add_interface: $*" exit $OCF_ERR_ARGS fi case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then $IFCONFIG $iface plumb rc=$? if [ $rc -ne 0 ] ; then echo "ERROR: '$IFCONFIG $iface plumb' failed." return $rc fi fi # At Solaris 10, this single-command version sometimes broke. # Almost certainly an S10 bug. # CMD="$IFCONFIG $iface inet $ipaddr $text up" # So hack the following workaround: CMD="$IFCONFIG $iface inet $ipaddr" CMD="$CMD && $IFCONFIG $iface netmask $netmask" CMD="$CMD && $IFCONFIG $iface up" ;; *BSD) # netmask is always set to 255.255.255.255 for an alias CMD="$IFCONFIG $iface inet $ipaddr netmask 255.255.255.255 alias";; *) CMD="$IFCONFIG $iface $ipaddr netmask $netmask broadcast $broadcast";; esac # Use "eval $CMD" (not "$CMD"): it might be a chain of two or more commands. ocf_log info "eval $CMD" eval $CMD rc=$? if [ $rc != 0 ]; then echo "ERROR: eval $CMD failed (rc=$rc)" fi return $rc } # # Remove the IP alias for the requested IP address... # ip_stop() { SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" NIC=`find_interface $OCF_RESKEY_ip` if [ -f "$SENDARPPIDFILE" ]; then cat "$SENDARPPIDFILE" | xargs kill rm -f "$SENDARPPIDFILE" fi if [ -z "$NIC" ]; then : Requested interface not in use return $OCF_SUCCESS fi if [ ${OCF_RESKEY_lvs_support} = 1 ]; then case $NIC in lo*) : Requested interface is on loopback return $OCF_SUCCESS;; esac fi delete_route "$OCF_RESKEY_ip" delete_interface "$NIC $OCF_RESKEY_ip" rc=$? if [ ${OCF_RESKEY_lvs_support} = 1 ]; then lvs_restore_loopback "$OCF_RESKEY_ip" fi # remove lock file... rm -f "$VLDIR/$NIC" if [ $rc != 0 ]; then ocf_log warn "IP Address $OCF_RESKEY_ip NOT released" fi return $rc } # # Add an IP alias for the requested IP address... # # It could be that we already have taken it, in which case it should # do nothing. # ip_start() { # # Do we already service this IP address? # ip_status_internal if [ $? = $OCF_SUCCESS ]; then # Nothing to do, the IP is already active return $OCF_SUCCESS; fi NIC_unique=`find_free_interface $OCF_RESKEY_nic` if [ -n "$NIC_unique" ]; then : OK got interface [$NIC_unique] for $OCF_RESKEY_ip else return $OCF_ERR_GENERIC fi # This logic is mostly to support LVS (If I understand it correctly) if [ ${OCF_RESKEY_lvs_support} = 1 ]; then NIC_current=`find_interface $OCF_RESKEY_ip` case $NIC_unique in lo*) if [ x"$NIC_unique" = x"$NIC_current" ]; then # Its already "running" and not moving, nothing to do. ocf_log err "Could not find a non-loopback device to move $OCF_RESKEY_ip to" return $OCF_ERR_GENERIC fi;; *) lvs_remove_conflicting_loopback "$OCF_RESKEY_ip" "$NIC_current";; esac fi add_interface "$OCF_RESKEY_ip" "$OCF_RESKEY_nic" "$NIC_unique" \ "$OCF_RESKEY_cidr_netmask" "$OCF_RESKEY_broadcast" rc=$? if [ $rc != 0 ]; then ocf_log err "Could not add $OCF_RESKEY_ip to $OCF_RESKEY_nic: $rc" return $rc fi # The address is active, now notify others about it using sendarp if [ "$SYSTYPE" = "DarwinBSD" -a "$NIC_unique" = "lo0" ]; then # Darwin can't send ARPs on loopback devices SENDARP="" fi if [ x$SENDARP != x ]; then TARGET_INTERFACE=`echo $NIC_unique | sed 's%:.*%%'` SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" ARGS="-i $OCF_RESKEY_ARP_INTERVAL_MS -r $OCF_RESKEY_ARP_REPEAT" ARGS="$ARGS -p $SENDARPPIDFILE $TARGET_INTERFACE $OCF_RESKEY_ip" ARGS="$ARGS auto $OCF_RESKEY_ip $OCF_RESKEY_ARP_NETMASK" ocf_log debug "Sending Gratuitous Arp for $OCF_RESKEY_ip on $NIC_unique [$TARGET_INTERFACE]" case $OCF_RESKEY_ARP_BACKGROUND in yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?" & ) >&2 ;; *) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?";; esac fi ip_status_internal return $? } ip_status_internal() { NIC=`find_interface "$OCF_RESKEY_ip"` if [ "x$NIC" = x ]; then return $OCF_NOT_RUNNING elif [ "${OCF_RESKEY_lvs_support}" = "1" ]; then case $NIC in lo*) return $OCF_NOT_RUNNING;; *) return $OCF_SUCCESS;; esac else if [ x$OCF_RESKEY_nic != x ]; then simple_OCF_NIC=`echo $OCF_RESKEY_nic | awk -F: '{print $1}'` simple_NIC=`echo $NIC | awk -F: '{print $1}'` if [ $simple_OCF_NIC != $simple_NIC ]; then ocf_log err "$OCF_RESKEY_ip is running an interface ($simple_NIC) instead of the configured one ($simple_OCF_NIC)" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS fi } ip_status() { ip_status_internal rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "running" elif [ $rc = $OCF_NOT_RUNNING ]; then echo "stopped" else echo "unknown" fi return $rc; } # # Determine if this IP address is really being served, or not. # Note that we must distinguish if *we're* serving it locally... # ip_monitor() { ip_status_internal rc=$? if [ $OCF_CHECK_LEVEL = 0 -o $rc != 0 ]; then return $rc fi ocf_log info "Checking IP stack" PINGARGS="`pingargs $OCF_RESKEY_ip`" for j in 1 2 3 4 5 6 7 8 9 10; do if $PING $PINGARGS >/dev/null 2>&1 ; then return $OCF_SUCCESS fi done return $OCF_ERR_GENERIC } is_positive_integer() { ocf_is_decimal $1 && [ $1 -ge 1 ] if [ $? = 0 ]; then return 1 fi return 0 } ip_validate_all() { : ${OCF_RESKEY_ARP_BACKGROUND=yes} : ${OCF_RESKEY_ARP_NETMASK=ffffffffffff} : ${OCF_RESKEY_ARP_INTERVAL_MS=500} : ${OCF_RESKEY_ARP_REPEAT=10} check_binary $AWK check_binary $IFCONFIG check_binary $ROUTE check_binary $PING if [ -d "$VLDIR/" ] || mkdir -p "$VLDIR/" then : Directory $VLDIR now exists else ocf_log err "Could not create \"$VLDIR/\"." return $OCF_ERR_GENERIC fi if is_positive_integer $OCF_RESKEY_ARP_INTERVAL_MS then ocf_log err "Invalid parameter value: ARP_INTERVAL_MS [$OCF_RESKEY_ARP_INTERVAL_MS]" return $OCF_ERR_ARGS fi if is_positive_integer $OCF_RESKEY_ARP_REPEAT then ocf_log err "Invalid parameter value: ARP_REPEAT [$OCF_RESKEY_ARP_REPEAT]" return $OCF_ERR_ARGS fi : ${OCF_RESKEY_lvs_support=0} if [ "$SYSTYPE" = "Linux" -o "$SYSTYPE" = "SunOS" ]; then : else if [ "${OCF_RESKEY_lvs_support}" = "1" ]; then ocf_log err "$SYSTYPE does not support LVS" return $OCF_ERR_GENERIC fi fi case $OCF_RESKEY_ip in "") ocf_log err "Required parameter OCF_RESKEY_ip is missing" return $OCF_ERR_CONFIGURED;; [0-9]*.[0-9]*.[0-9]*.*[0-9]) : OK;; *) ocf_log err "Parameter OCF_RESKEY_ip [$OCF_RESKEY_ip] not an IP address" return $OCF_ERR_CONFIGURED;; esac # Unconditionally do this? case $OCF_RESKEY_nic in *:*) OCF_RESKEY_nic=`echo $OCF_RESKEY_nic | sed 's/:.*//'` ;; esac NICINFO=`$FINDIF` rc=$? if [ $rc != 0 ]; then ocf_log err "$FINDIF failed [rc=$rc]." return $OCF_ERR_GENERIC fi tmp=`echo "$NICINFO" | cut -f1` if [ "x$OCF_RESKEY_nic" = "x" ]; then ocf_log info "Using calculated nic for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_nic=$tmp elif [ x$tmp != x${OCF_RESKEY_nic} ]; then ocf_log err "Invalid parameter value: nic [$OCF_RESKEY_nic [Calculated nic: $tmp]" return $OCF_ERR_ARGS fi tmp=`echo "$NICINFO" | cut -f2 | cut -d ' ' -f2` if [ -z "$OCF_RESKEY_cidr_netmask" ] then ocf_log debug "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_cidr_netmask=$tmp; export OCF_RESKEY_cidr_netmask elif [ "x$OCF_RESKEY_netmask" != "x$tmp" ]; then ocf_log info "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp" fi # Always use the calculated version becuase it might have been specified # using CIDR notation which not every system accepts OCF_RESKEY_netmask=$tmp tmp=`echo "$NICINFO" | cut -f3 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_broadcast" = "x" ]; then ocf_log debug "Using calculated broadcast for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_broadcast=$tmp elif [ x$tmp != x${OCF_RESKEY_broadcast} ]; then ocf_log err "Invalid parameter value: broadcast [$OCF_RESKEY_broadcast [Calculated broadcast: $tmp]" return $OCF_ERR_ARGS fi return $OCF_SUCCESS } usage() { echo $USAGE >&2 return $1 } if [ $# -ne 1 ]; then usage $OCF_ERR_ARGS fi : ${OCF_RESKEY_lvs_support=0} # Normalize the value of lvs_support if [ "${OCF_RESKEY_lvs_support}" = "true" \ - -o "${OCF_RESKEY_lvs_support}" = "on" ]; then + -o "${OCF_RESKEY_lvs_support}" = "on" \ + -o "${OCF_RESKEY_lvs_support}" = "yes" \ + -o "${OCF_RESKEY_lvs_support}" = "1" ]; then OCF_RESKEY_lvs_support=1 else OCF_RESKEY_lvs_support=0 fi # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. So, don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi case $1 in meta-data) meta_data;; start) ip_validate_all && ip_start;; stop) ip_stop;; status) ip_status;; monitor) ip_monitor;; validate-all) ip_validate_all;; usage) usage $OCF_SUCCESS;; *) usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? diff --git a/heartbeat/o2cb b/heartbeat/o2cb index e6978e7b0..edfe5a08c 100644 --- a/heartbeat/o2cb +++ b/heartbeat/o2cb @@ -1,346 +1,352 @@ #!/bin/bash # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Description: Manage the O2CB membership layer. # # Copyright (c) 2007 SUSE LINUX Products GmbH, Lars Marowsky-Bree # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # ####################################################################### # Initialization: . @hb_libdir@/ocf-shellfuncs # . /usr/lib64/heartbeat/ocf-shellfuncs ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|meta-data} EOT } meta_data() { cat < 1.0 This script manages the Oracle Cluster membership layer. It obsoletes manual configuration of the nodes in /etc/ocfs2/cluster.conf, and automates the discovery of the IP addresses uses by o2cb. It should be used below one or more ocfs2 mounts managed by Filesystem. OCFS2 membership layer manager. The network interface label which you want o3cb to run over. Network device for o2cb The port number you want o2cb to use for communications. Port number The name of the cluster for which this resource is managing the membership. The default is likely fine. o2cb cluster name END } o2cb_init() { # Check & initialize the OCFS2 specific variables. if [ $OP != "stop" ]; then if [ -z "$OCF_RESKEY_CRM_meta_clone" ]; then ocf_log err "o2cb must be run as a clone." exit $OCF_ERR_GENERIC fi fi if [ -n "$OCF_RESKEY_ocfs2_cluster" ]; then O2CB_CLUSTER=$(echo $OCF_RESKEY_ocfs2_cluster | tr '[a-z]' '[A-Z]') else O2CB_CLUSTER=$(o2cb_ctl -I -t cluster -o | sed -ne '/^[^#]/{ s/\([^:]*\):.*$/\1/; p }') set -- $O2CB_CLUSTER local n="$#" if [ $n -gt 1 ]; then ocf_log err "$O2CB_CLUSTER: several clusters found." exit $OCF_ERR_GENERIC fi if [ $n -eq 0 ]; then ocf_log info "$O2CB_CLUSTER: no clusters found." exit $OCF_ERR_GENERIC fi fi } o2cb_start() { ocf_log info "Loading o2cb:" $RCO2CB stop $RCO2CB load o2cb_init local MYIP=$(ip addr show label $OCF_RESKEY_netdev | sed -ne '/inet /{ s/.*inet \(.*\)\/.*/\1/; p }') if [ -z "$MYIP" ]; then ocf_log err "$O2CB_CLUSTER: No IP found with label $OCF_RESKEY_netdev" exit $OCF_ERR_GENERIC fi ocf_log info "$O2CB_CLUSTER: Using IP $MYIP as found on $OCF_RESKEY_netdev" crm_attribute -! -t status -U ${O2CB_MYSELF} -n o2cb-${O2CB_CLUSTER}-ip -v $MYIP >/dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "$O2CB_CLUSTER: Failed to write local IP address into CIB." exit $OCF_ERR_GENERIC fi # This is a semaphore; just make sure it's never set to a # possible magic value. crm_attribute -! -t status -U ${O2CB_MYSELF} -n o2cb-${O2CB_CLUSTER}-lock -v unset >/dev/null 2>&1 return 0 } o2cb_notify() { o2cb_init local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" if [ "$n_type" = "pre" -a "$n_op" = "start" ]; then crm_attribute -! -t status -U ${O2CB_MYSELF} -n o2cb-${O2CB_CLUSTER}-lock -v unset >/dev/null 2>&1 fi # We only have to do something for post-start - "someone" just # came online and needs to be integrated into the cluster. if [ "$n_type" != "post" -o "$n_op" != "start" ]; then return $OCF_SUCCESS fi # Duplicate removal - start can contain nodes # already on the active list, confusing the # script later on: for UNAME in "$n_active"; do n_start="${n_start//$UNAME/}" done - # Merge pruned lists again: + + # Merge pruned lists again; this will be the same order on all + # nodes thanks to the PE. The first node thus will be one which + # is already active (if any), so the newly starting node(s) will + # get the configuration from an existing member, to prevent + # fluctuations. n_active="$n_active $n_start" - # Ugliness behold - we need n_active to be sorted: - n_active=$( (for W in $n_active; do echo $W ; done) | sort ) - local lock_key=$(echo "$n_active" | md5sum | cut -d ' ' -f 1) + local lock_mykey=$(md5sum $O2CB_CONF | cut -f 1 -d ' ') local n_first=$(echo $n_active | cut -d ' ' -f 1) local lock_done=0 - + local lock_cib="unset" + if [ "$n_first" != "$O2CB_MYSELF" ]; then - # I'm not the first one. I have to wait until - # the first one has completed the pass. - ocf_log info "$O2CB_CLUSTER: Waiting for leader ($n_first)" - while [ $lock_done -eq 0 ]; do - if [ "$(crm_attribute -t status -n o2cb-${O2CB_CLUSTER}-lock -U $n_first -G -Q 2>/dev/null)" = "$lock_key" ] ; then - lock_done=1 - else - sleep 3 - fi + ocf_log info "$O2CB_CLUSTER: Waiting for leader ($n_first) to complete." + + while [ "$lock_cib" = "unset" ]; do + lock_cib="$(crm_attribute -t status -n o2cb-${O2CB_CLUSTER}-lock -U $n_first -G -Q 2>/dev/null)" + sleep 3 done + ocf_log info "$O2CB_CLUSTER: Leader has completed." + + if [ "$lock_cib" != "$lock_mykey" ]; then + ocf_log info "$O2CB_CLUSTER: Retrieving updated config from leader ($lock_cib != $lock_mykey)." + scp $n_first:$O2CB_CONF $O2CB_CONF + else + ocf_log info "$O2CB_CLUSTER: cluster config unchanged." + fi else - ocf_log info "$O2CB_CLUSTER: I am the leader." + ocf_log info "$O2CB_CLUSTER: Leading cluster re-configuration." # The highest slot number in use so far, on-disk: max_slot=$(o2cb_ctl -I -o -t node | cut -d ':' -f 3 | sort -n | tail -n 1) if [ "$max_slot" = "slot" ]; then max_slot=0 fi for n in $n_active ; do if o2cb_ctl -I -t node -n $n -o >/dev/null 2>&1 ; then ocf_log info "$O2CB_CLUSTER: $n already configured locally." # If it's already configured locally, it's # already accounted for in max_slot. + # TODO: If o2cb_ctl -H eventually works we + # might need to reconfigure the IP + # address still. continue fi n_ip=$(crm_attribute -t status -n o2cb-${O2CB_CLUSTER}-ip -U $n -G -Q 2>/dev/null) if [ -z "$n_ip" ]; then # This should actually be impossible. It means # that start on that node has failed, and it'll # be stopped any second now anyway - ocf_log err "$O2CB_CLUSTER: No IP was found for $n, skipping!" continue fi - ocf_log info "$O2CB_CLUSTER: Using IP $n_ip for $n" + ocf_log info "$O2CB_CLUSTER: Using IP $n_ip for new node $n" n_slot=$[max_slot+1] max_slot=$n_slot - ocf_log info "$O2CB_CLUSTER: Assigned slot $n_slot to $n (max_slot: $max_slot)" + ocf_log info "$O2CB_CLUSTER: Assigned slot $n_slot to $n" # TODO: Handle the case where o2cb is already # active and the node needs to be added - # "online". + # "online" using -i. # This also will need to be handled on the - # slaves. + # slaves, which don't know which nodes were + # added. o2cb_ctl -C -n $n -t node \ -a number=$n_slot -a ip_address=$n_ip \ -a ip_port=$OCF_RESKEY_port -a cluster=$O2CB_CLUSTER ocf_log info "$O2CB_CLUSTER: $n added to o2cb configuration." done - # Push the file out to all other nodes! - for n in $n_active ; do - if [ "$n" = "$O2CB_MYSELF" ]; then - continue - fi - ocf_log info "$O2CB_CLUSTER: Distributing cluster.conf to $n" - scp $O2CB_CONF $n:$O2CB_CONF - done - - # Mark our run as completed - this will allow all other nodes to - # continue running as well. - crm_attribute -! -t status -U ${O2CB_MYSELF} -n o2cb-${O2CB_CLUSTER}-lock -v $lock_key >/dev/null 2>&1 + lock_cib=$(md5sum $O2CB_CONF | cut -d ' ' -f 1) + # Mark our run as completed - this will unlock the other + # nodes as well. + ocf_log info "$O2CB_CLUSTER: Completed - config hash now: $lock_cib" + crm_attribute -! -t status -U ${O2CB_MYSELF} \ + -n o2cb-${O2CB_CLUSTER}-lock -v $lock_cib >/dev/null 2>&1 fi - ocf_log info "Bringing o2cb online:" + # it'd be very nice if there was an o2cb reload. $RCO2CB online return $OCF_SUCCESS } o2cb_stop() { ocf_log info "Invoking o2cb stop" $RCO2CB stop exit 0 } o2cb_monitor() { # o2cb_init exit $OCF_NOT_RUNNING } o2cb_validate_all() { return $OCF_SUCCESS } # Check the arguments passed to this script if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi OP=$1 # Make sure all sorts etc are as expected export LC_ALL=C # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac RCO2CB=@INITDIR@/o2cb # RCO2CB=/etc/init.d/o2cb if [ ! -x $RCO2CB ]; then ocf_log err "o2cb init script not found." exit $OCF_NOT_RUNNING fi O2CB_MYSELF=${HA_CURHOST:-$(uname -n | tr '[A-Z]' '[a-z]')} O2CB_CONF=/etc/ocfs2/cluster.conf if [ ! -e "$O2CB_CONF" ]; then ocf_log err "$O2CB_CONF not found." exit $OCF_NOT_RUNNING fi if [ "$OP" != "monitor" ]; then ocf_log info "Running $OP" fi case $OP in status|monitor) o2cb_monitor ;; validate-all) o2cb_validate_all ;; stop) o2cb_stop ;; start) o2cb_start ;; notify) o2cb_notify ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?