diff --git a/doc/Makefile.am b/doc/Makefile.am index 249b76cf6..c119e9143 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,157 +1,158 @@ # # doc: Linux-HA resource agents # # Copyright (C) 2009 Florian Haas # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(doc_DATA) $(REFENTRY_STYLESHEET) \ mkappendix.sh ralist.sh CLEANFILES = $(man_MANS) $(xmlfiles) metadata-*.xml doc_DATA = README.webapps STYLESHEET_PREFIX ?= http://docbook.sourceforge.net/release/xsl/current MANPAGES_STYLESHEET ?= $(STYLESHEET_PREFIX)/manpages/docbook.xsl HTML_STYLESHEET ?= $(STYLESHEET_PREFIX)/xhtml/docbook.xsl FO_STYLESHEET ?= $(STYLESHEET_PREFIX)/fo/docbook.xsl REFENTRY_STYLESHEET ?= ra2refentry.xsl XSLTPROC_OPTIONS ?= --xinclude XSLTPROC_MANPAGES_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS) radir = $(top_srcdir)/heartbeat # OCF_ROOT=. is necessary due to a sanity check in ocf-shellfuncs # (which tests whether $OCF_ROOT points to a directory metadata-%.xml: $(radir)/% OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ metadata-IPv6addr.xml: ../heartbeat/IPv6addr OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ # Please note: we can't name the man pages # ocf:heartbeat:. Believe me, I've tried. It looks like it # works, but then it doesn't. While make can deal correctly with # colons in target names (when properly escaped), it royally messes up # when it is deals with _dependencies_ that contain colons. See Bug # 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was # first reported in 1995 and added to Savannah in in 2005... if BUILD_DOC man_MANS = ocf_heartbeat_AoEtarget.7 \ ocf_heartbeat_AudibleAlarm.7 \ ocf_heartbeat_ClusterMon.7 \ ocf_heartbeat_CTDB.7 \ ocf_heartbeat_Delay.7 \ ocf_heartbeat_Dummy.7 \ ocf_heartbeat_EvmsSCC.7 \ ocf_heartbeat_Evmsd.7 \ ocf_heartbeat_Filesystem.7 \ ocf_heartbeat_ICP.7 \ ocf_heartbeat_IPaddr.7 \ ocf_heartbeat_IPaddr2.7 \ ocf_heartbeat_IPsrcaddr.7 \ ocf_heartbeat_LVM.7 \ ocf_heartbeat_LinuxSCSI.7 \ ocf_heartbeat_MailTo.7 \ ocf_heartbeat_ManageRAID.7 \ ocf_heartbeat_ManageVE.7 \ ocf_heartbeat_Pure-FTPd.7 \ ocf_heartbeat_Raid1.7 \ ocf_heartbeat_Route.7 \ ocf_heartbeat_SAPDatabase.7 \ ocf_heartbeat_SAPInstance.7 \ ocf_heartbeat_SendArp.7 \ ocf_heartbeat_ServeRAID.7 \ ocf_heartbeat_SphinxSearchDaemon.7 \ ocf_heartbeat_Squid.7 \ ocf_heartbeat_Stateful.7 \ ocf_heartbeat_SysInfo.7 \ ocf_heartbeat_VIPArip.7 \ ocf_heartbeat_VirtualDomain.7 \ ocf_heartbeat_WAS.7 \ ocf_heartbeat_WAS6.7 \ ocf_heartbeat_WinPopup.7 \ ocf_heartbeat_Xen.7 \ ocf_heartbeat_Xinetd.7 \ ocf_heartbeat_anything.7 \ ocf_heartbeat_apache.7 \ ocf_heartbeat_conntrackd.7 \ ocf_heartbeat_db2.7 \ ocf_heartbeat_drbd.7 \ ocf_heartbeat_eDir88.7 \ + ocf_heartbeat_ethmonitor.7 \ ocf_heartbeat_exportfs.7 \ ocf_heartbeat_fio.7 \ ocf_heartbeat_iSCSILogicalUnit.7 \ ocf_heartbeat_iSCSITarget.7 \ ocf_heartbeat_ids.7 \ ocf_heartbeat_iscsi.7 \ ocf_heartbeat_jboss.7 \ ocf_heartbeat_lxc.7 \ ocf_heartbeat_mysql.7 \ ocf_heartbeat_mysql-proxy.7 \ ocf_heartbeat_nfsserver.7 \ ocf_heartbeat_nginx.7 \ ocf_heartbeat_oracle.7 \ ocf_heartbeat_oralsnr.7 \ ocf_heartbeat_pgsql.7 \ ocf_heartbeat_pingd.7 \ ocf_heartbeat_portblock.7 \ ocf_heartbeat_postfix.7 \ ocf_heartbeat_proftpd.7 \ ocf_heartbeat_rsyncd.7 \ ocf_heartbeat_scsi2reservation.7 \ ocf_heartbeat_sfex.7 \ ocf_heartbeat_symlink.7 \ ocf_heartbeat_syslog-ng.7 \ ocf_heartbeat_tomcat.7 \ ocf_heartbeat_vmware.7 if USE_IPV6ADDR man_MANS += ocf_heartbeat_IPv6addr.7 endif xmlfiles = $(man_MANS:.7=.xml) %.1 %.5 %.7 %.8: %.xml $(XSLTPROC) \ $(XSLTPROC_MANPAGES_OPTIONS) \ $(MANPAGES_STYLESHEET) $< ocf_heartbeat_%.xml: metadata-%.xml $(srcdir)/$(REFENTRY_STYLESHEET) $(XSLTPROC) --novalid \ --stringparam package $(PACKAGE_NAME) \ --stringparam version $(VERSION) \ --output $@ \ $(srcdir)/$(REFENTRY_STYLESHEET) $< ocf_resource_agents.xml: $(xmlfiles) mkappendix.sh ./mkappendix.sh $(xmlfiles) > $@ %.html: %.xml $(XSLTPROC) \ $(XSLTPROC_HTML_OPTIONS) \ --output $@ \ $(HTML_STYLESHEET) $< xml: ocf_resource_agents.xml endif diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am index 0d995f645..e47caff88 100644 --- a/heartbeat/Makefile.am +++ b/heartbeat/Makefile.am @@ -1,120 +1,121 @@ # Makefile.am for OCF RAs # # Author: Sun Jing Dong # Copyright (C) 2004 IBM # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(ocf_SCRIPTS) $(ocfcommon_DATA) \ $(common_DATA) $(hb_DATA) $(dtd_DATA) \ README INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/linux-ha ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat dtddir = $(datadir)/$(PACKAGE_NAME) dtd_DATA = ra-api-1.dtd if USE_IPV6ADDR ocf_PROGRAMS = IPv6addr else ocf_PROGRAMS = endif IPv6addr_SOURCES = IPv6addr.c IPv6addr_LDADD = -lplumb $(LIBNETLIBS) ocf_SCRIPTS = ClusterMon \ CTDB \ Dummy \ IPaddr \ IPaddr2 \ drbd \ anything \ AoEtarget \ apache \ nginx \ AudibleAlarm \ conntrackd \ db2 \ Delay \ eDir88 \ EvmsSCC \ Evmsd \ + ethmonitor \ exportfs \ Filesystem \ fio \ ids \ iscsi \ ICP \ IPsrcaddr \ iSCSITarget \ iSCSILogicalUnit \ jboss \ LinuxSCSI \ LVM \ lxc \ MailTo \ ManageRAID \ ManageVE \ mysql \ mysql-proxy \ nfsserver \ oracle \ oralsnr \ pingd \ portblock \ postfix \ pgsql \ proftpd \ Pure-FTPd \ Raid1 \ Route \ rsyncd \ SAPDatabase \ SAPInstance \ SendArp \ ServeRAID \ SphinxSearchDaemon \ Squid \ Stateful \ SysInfo \ scsi2reservation \ sfex \ symlink \ syslog-ng \ tomcat \ VIPArip \ VirtualDomain \ vmware \ WAS \ WAS6 \ WinPopup \ Xen \ Xinetd ocfcommondir = $(OCF_LIB_DIR_PREFIX)/heartbeat ocfcommon_DATA = ocf-shellfuncs \ ocf-binaries \ ocf-directories \ ocf-returncodes # Legacy locations hbdir = $(sysconfdir)/ha.d hb_DATA = shellfuncs diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor new file mode 100755 index 000000000..f772910d7 --- /dev/null +++ b/heartbeat/ethmonitor @@ -0,0 +1,454 @@ +#!/bin/sh +# +# OCF Resource Agent compliant script. +# Monitor the vitality of a local network interface. +# +# Based on the work by Robert Euhus and Lars Marowsky-Brée. +# +# Transfered from Ipaddr2 into ethmonitor by Alexander Krauth +# +# Copyright (c) 2011 Robert Euhus, Alexander Krauth, Lars Marowsky-Brée +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# OCF parameters are as below +# +# OCF_RESKEY_interface +# OCF_RESKEY_multiplicator +# OCF_RESKEY_name +# OCF_RESKEY_repeat_count +# OCF_RESKEY_repeat_interval +# OCF_RESKEY_pktcnt_timeout +# OCF_RESKEY_arping_count +# OCF_RESKEY_arping_timeout +# OCF_RESKEY_arping_cache_entries +# +# TODO: Check against IPv6 +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +meta_data() { + cat < + + +1.2 + + +Monitor the vitality of a local network interface. + +You may setup this RA as a clone resource to monitor the network interfaces on different nodes, with the same interface name. +This is not related to the IP adress or the network on which a interface is configured. +You may use this RA to move resources away from a node, which has a faulty interface or prevent moving resources to such a node. +This gives you independend control of the resources, without involving cluster intercommunication. But it requires your nodes to have more than one network interface. + +The resource configuration requires a monitor operation, because the monitor does the main part of the work. +In addition to the resource configuration, you need to configure some location contraints, based on a CIB attribute value. +The name of the attribute value is configured in the 'name' option of this RA. + +Example constraint configuration: +location loc_connected_node my_resource_grp \ + rule $id="rule_loc_connected_node" -INF: ethmonitor eq 0 + +The ethmonitor works in 3 different modes to test the interface vitality. +1. call ip to see if the link status is up (if link is down -> error) +2. call ip an watch the RX counter (if packages come around in a certain time -> success) +3. call arping to check wether any of the IPs found in the lokal ARP cache answers an ARP REQUEST (one answer -> success) +4. return error + +Monitors network interfaces + + + + +The name of the network interface which should be monitored (e.g. eth0). + +Network interface name + + + + + +The name of the CIB attribute to set. This is the name to be used in the constraints. Defaults to "ethmonitor-". + +Attribute name + + + + + +Multiplier for the value of the CIB attriobute specified in parameter name. + +Multiplier for result variable + + + + + +Specify how often the interface will be monitored, before the status is set to failed. You need to set the timeout of the monitoring operation to at least repeat_count * repeat_interval + +Monitor repeat count + + + + + +Specify how long to wait in seconds between the repeat_counts. + +Monitor repeat interval in seconds + + + + + +Timeout for the RX packet counter. Stop listening for packet counter changes after the given number of seconds. + +packet counter timeout + + + + + +Number of ARP REQUEST packets to send for every IP. +Usually one ARP REQUEST (arping) is send + +Number of arpings per IP + + + + + +Time in seconds to wait for ARP REQUESTs (all packets of arping_count). +This is to limit the time for arp requests, to be able to send requests to more than one node, without running in the monitor operation timeout. + +Timeout for arpings per IP + + + + + +Maximum number of IPs from ARP cache list to check for ARP REQUEST (arping) answers. Newest entries are tried first. + +Number of ARP cache entries to try + + + + + + + + + + + + + +END + + exit $OCF_SUCCESS +} + +# +# Return true, if the interface exists +# +is_interface() { + # + # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces + # + local iface=`$IP2UTIL -o -f inet addr show | grep " $1 " \ + | cut -d ' ' -f2 | sort -u | grep -v '^ipsec[0-9][0-9]*$'` + [ "$iface" != "" ] +} + +if_init() { + local rc + + if [ X"$OCF_RESKEY_interface" = "X" ]; then + ocf_log err "Interface name (the interface parameter) is mandatory" + exit $OCF_ERR_CONFIGURED + fi + + NIC="$OCF_RESKEY_interface" + + if is_interface $NIC + then + case "$NIC" in + *:*) ocf_log err "Do not specify a virtual interface : $OCF_RESKEY_interface" + exit $OCF_ERR_CONFIGURED;; + *) ;; + esac + else + case $__OCF_ACTION in + validate-all) ocf_log err "Interface $NIC does not exist" + exit $OCF_ERR_CONFIGURED;; + *) ocf_log warn "Interface $NIC does not exist" + ## It might be a bond interface which is temporarily not available, therefore we want to continue here + ;; + esac + fi + + : ${OCF_RESKEY_multiplier:="1"} + if ! ocf_is_decimal "$OCF_RESKEY_multiplier"; then + ocf_log err "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]" + exit $OCF_ERR_CONFIGURED + fi + + ATTRNAME=${OCF_RESKEY_name:-"ethmonitor-$NIC"} + + REP_COUNT=${OCF_RESKEY_repeat_count:-5} + if ! ocf_is_decimal "$REP_COUNT" -o [ $REP_COUNT -lt 1 ]; then + ocf_log err "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]" + exit $OCF_ERR_CONFIGURED + fi + REP_INTERVAL_S=${OCF_RESKEY_repeat_interval:-10} + if ! ocf_is_decimal "$REP_INTERVAL_S"; then + ocf_log err "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]" + exit $OCF_ERR_CONFIGURED + fi + : ${OCF_RESKEY_pktcnt_timeout:="5"} + if ! ocf_is_decimal "$OCF_RESKEY_pktcnt_timeout"; then + ocf_log err "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]" + exit $OCF_ERR_CONFIGURED + fi + : ${OCF_RESKEY_arping_count:="1"} + if ! ocf_is_decimal "$OCF_RESKEY_arping_count"; then + ocf_log err "Invalid OCF_RESKEY_arping_count [$OCF_RESKEY_arping_count]" + exit $OCF_ERR_CONFIGURED + fi + : ${OCF_RESKEY_arping_timeout:="1"} + if ! ocf_is_decimal "$OCF_RESKEY_arping_timeout"; then + ocf_log err "Invalid OCF_RESKEY_arping_timeout [$OCF_RESKEY_arping_count]" + exit $OCF_ERR_CONFIGURED + fi + : ${OCF_RESKEY_arping_cache_entries:="5"} + if ! ocf_is_decimal "$OCF_RESKEY_arping_cache_entries"; then + ocf_log err "Invalid OCF_RESKEY_arping_cache_entries [$OCF_RESKEY_arping_cache_entries]" + exit $OCF_ERR_CONFIGURED + fi + return $OCF_SUCCESS +} + +# get the link status on $NIC +# asks ip about running (up) interfaces, returns the number of matching interface names that are up +get_link_status () { + $IP2UTIL -o link show up dev "$NIC" | grep -c "$NIC" +} + +# returns the number of received rx packets on $NIC +get_rx_packets () { + ocf_log debug "$IP2UTIL -o -s link show dev $NIC" + $IP2UTIL -o -s link show dev "$NIC" \ + | sed 's/.* RX: [^0-9]*[0-9]* *\([0-9]*\) .*/\1/' + # the first number after RX: ist the # of bytes , + # the second is the # of packets received +} + +# watch for packet counter changes for max. OCF_RESKEY_pktcnt_timeout seconds +# returns immedeately with return code 0 if any packets were received +# otherwise 1 is returned +watch_pkt_counter () { + local RX_PACKETS_NEW + local RX_PACKETS_OLD + RX_PACKETS_OLD="`get_rx_packets`" + for n in `seq $(( $OCF_RESKEY_pktcnt_timeout * 10 ))`; do + sleep 0.1 + RX_PACKETS_NEW="`get_rx_packets`" + ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW" + if [ "$RX_PACKETS_OLD" -ne "$RX_PACKETS_NEW" ]; then + ocf_log debug "we received some packets." + return 0 + fi + done + return 1 +} + +# returns list of cached ARP entries for $NIC +# sorted by age ("last confirmed") +# max. OCF_RESKEY_arping_cache_entries entries +get_arp_list () { + $IP2UTIL -s neighbour show dev $NIC \ + | sort -t/ -k2,2n | cut -d' ' -f1 \ + | head -n $OCF_RESKEY_arping_cache_entries + # the "used" entries in `ip -s neighbour show` are: + # "last used"/"last confirmed"/"last updated" +} + +# arping the IP given as argument $1 on $NIC +# until OCF_RESKEY_arping_count answers are received +do_arping () { + # TODO: add the source IP + # TODO: check for diffenrent arping versions out there + arping -q -c $OCF_RESKEY_arping_count -w $OCF_RESKEY_arping_timeout -I $NIC $1 + # return with the exit code of the arping command + return $? +} + +# +# Check the interface depending on the level given as parameter: $OCF_RESKEY_check_level +# +# 09: check for nonempty ARP cache +# 10: watch for packet counter changes +# +# 19: check arping_ip_list +# 20: check arping ARP cache entries +# +# 30: watch for packet counter changes in promiscios mode +# +# If unsuccessfull in levels 18 and above, +# the tests for higher check levels are run. +# +if_check () { + # always check link status first + link_status="`get_link_status`" + ocf_log debug "link_status: $link_status (1=up, 0=down)" + [ $link_status -eq 0 ] && return $OCF_NOT_RUNNING + + # watch for packet counter changes + ocf_log debug "watch for packet counter changes" + watch_pkt_counter && return $OCF_SUCCESS + + # check arping ARP cache entries + ocf_log debug "check arping ARP cache entries" + for ip in `get_arp_list`; do + do_arping $ip && return $OCF_SUCCESS + done + + # watch for packet counter changes in promiscios mode +# ocf_log debug "watch for packet counter changes in promiscios mode" + # be sure switch off promiscios mode in any case + # TODO: check first, wether promisc is already on and leave it untouched. +# trap "$IP2UTIL link set dev $NIC promisc off; exit" INT TERM EXIT +# $IP2UTIL link set dev $NIC promisc on +# watch_pkt_counter && return $OCF_SUCCESS +# $IP2UTIL link set dev $NIC promisc off +# trap - INT TERM EXIT + + # looks like it's not working (for whatever reason) + return $OCF_NOT_RUNNING +} + +####################################################################### + +if_usage() { + cat < /dev/null` + sleep $sleep_time 2> /dev/null + runs=$(($runs + 1)) + fi + + if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then + ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error" + fi + done + + ocf_log debug "Monitoring return code: $mon_rc" + if [ $mon_rc -eq $OCF_SUCCESS ]; then + set_cib_value 1 + attr_rc=$? + else + ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed." + set_cib_value 0 + attr_rc=$? + fi + + ## The resource should not fail, if the interface is down. It should fail, if the update of the CIB variable has errors. + ## To react on the interface failure you must use constraints based on the CIB variable value, not on the resource itself. + exit $attr_rc +} + +if_validate() { + check_binary $IP2UTIL + check_binary arping + if_init +} + +case $__OCF_ACTION in +meta-data) meta_data + ;; +usage|help) if_usage + exit $OCF_SUCCESS + ;; +esac + +if_validate + +case $__OCF_ACTION in +start) ha_pseudo_resource $OCF_RESOURCE_INSTANCE start + exit $? + ;; +stop) attrd_updater -D -n $ATTRNAME + ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop + exit $? + ;; +monitor|status) if_monitor + exit $? + ;; +validate-all) exit $? + ;; +*) if_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac