diff --git a/.gitignore b/.gitignore
index 7bdd5cfd8..59df506be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,80 +1,102 @@
 *.swp
 Makefile.in
 aclocal.m4
 autoconf
 autoheader
 autom4te.cache
 automake
 autoscan.log
 compile
 configure
 configure.scan
 config.guess
 config.log
 config.sub
 config.status
 Makefile
 depcomp
 install-sh
 libtoolize
 ltmain.sh
 libtool
 make/stamp-h1
 m4
 make/clusterautoconfig.h*
 missing
 *.pc
 .deps
 .libs
 *.o
 *.la
 *.lo
 *.loT
 rgmanager/src/resources/fs.sh
 rgmanager/src/resources/oracledb.sh
 rgmanager/src/resources/utils/config-utils.sh
 resource-agents-*
 .version
 
+# generated by ./autogen.sh && ./configure
+heartbeat/ocf-binaries
+heartbeat/ocf-directories
+heartbeat/ocf-shellfuncs
+heartbeat/shellfuncs
+include/agent_config.h
+include/config.h
+include/config.h.in
+include/stamp-h1
+include/stamp-h2
+ldirectord/OCF/ldirectord
+ldirectord/init.d/ldirectord
+ldirectord/init.d/ldirectord.debian
+ldirectord/init.d/ldirectord.debian.default
+ldirectord/ldirectord
+ldirectord/systemd/ldirectord.service
+tools/ocf-tester
+tools/ocft/README
+tools/ocft/README.zh_CN
+tools/ocft/caselib
+tools/ocft/ocft
+
 *.cache
 *.upgrade.xml
 py-compile
 ylwrap
 
 # BEAM Entries
 *.beam
 parser-messages
 MISC_ERRORS
 cscope.files
 cscope.out
 patches
 updates
 logs
 
 # OS and Editor Artifacts
 .DS_Store
 .bomb
 *.rej
 *.bz2
 *.gz
 *.xz
 *.sed
 *.diff
 *.patch
 *.gres
 *~
 
 # Misc
 HTML
 TAGS
 GPATH
 GRTAGS
 GSYMS
 GTAGS
 .gres.*
 *.orig
 .gdb_history
 *~ 
 \#*
 .changes
 pacemaker.tar.gz
diff --git a/.travis.yml b/.travis.yml
index 79f775399..e6943fadd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,8 +1,13 @@
 language: bash
-install:
-  - ./ci/install.sh
+sudo: false
+
+addons:
+  apt:
+    sources:
+      - debian-sid
+    packages:
+      - shellcheck
 script:
   - ./ci/build.sh
 notifications:
   email: false
-sudo: required
diff --git a/ChangeLog b/ChangeLog
index 38053273a..371157420 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,736 +1,834 @@
+* Wed Feb  3 2016 resource-agents contributors
+- stable release 3.9.7
+- ldirectord: fix unset failcount error
+- iscsi: add portal check to open_iscsi_get_session_id()
+- galera: use mysql's --tc-heuristic-recover if crash recovery is needed
+- nfsserver: fix monitor for systemd
+
+* Wed Jan 20 2016 resource-agents contributors
+- release candidate 3.9.7 rc1
+- nfsserver.sh: add hostname attribute for NFS export (required for NFSv4+Kerberos support)
+- oradg.sh: new RA for Oracle Data Guard
+- ocf_shellfuncs: suppress bash specific trace_ra log on dash
+- sg_persist: remove uncalled for ocf_run calls
+- multiple RA: replace error log messages with calls to ocf_exit_reason
+- nfsserver: only do redhat specific stuff on redhat
+- exportfs: don't increment fsid for single directory
+- Filesystem: add tmpfs support
+- netfs.sh: move defaults to metadata
+- nfsserver: /var/lock/subsys is non-standard, check for it first
+- nagios: new RA
+- docker: check for errors in the container name
+- mysql: fix grep failure on MySQL 5.6 or higher when checking read_only variable
+- VirtualDomain: new attributes migration_speed and migration_downtime
+- fs: remove not-working tmpfs support
+- vm.sh: add migrate_options parameter
+- nfsserver: Use rpc-statd.service for NFS locking in EXEC_MODE=3 (bsc#955114)
+- nfsserver: Add EXEC_MODE for systemd without nfs-lock.service (bsc#955114)
+- IPaddr2: Add IPv6 DAD collision detection
+- Filesystem: add overlay as supported filesystem
+- ldirectord: dns_check and fallbackcommand enhancements
+- IPaddr2: fix potential syntax error on if-then-else
+- SAPDatabase: add Oracle 12 to list of supported databases (bsc#953991)
+- mysql-common.sh: fix issue where "removing old PID file" wasnt logged
+- mysql-common.sh: when mysql has been stopped, mysql stop returns success
+- mysql.sh: wait up to startup_wait seconds before failing if mysqld startup is slow
+- orainstance.sh: fix 90s wait/killing of databases containing the name of the database being killed, and added cleanup code to kill remaining listener process
+- ip.sh: Use DAD to check for IPv6 address collision
+- iSCSITarget: fix to only create one IQN and add portals to it
+- galera: document the bootstrap flow
+- galera: start joining nodes during 'monitor' to allow long-running SST
+- galera: add support for MYSQL_HOST and MYSQL_PORT from /etc/sysconfig/clustercheck
+- redis: fix password parser
+- pgsql fix exec_sql errors like "unknown variable select pg_ " in dash
+- pgsql: fix get_my_location() sql regression
+- docker: fix image variable name
+- pgsql: Fix return code override in pgsql_real_start()
+- slapd: add "maxfiles" parameter to set max number of open files (for ulimit -n)
+- redis: use required client password when set
+- send_arp: fix for infiniband, re-merge from upstream iputils arping
+- CTDB: Preserve smb.conf permissions (bsc#935253)
+- lxc: fix emergency stop functionality on 1.0
+- tomcat: use runuser instead of su for SELinux enforcing mode
+- pgsql: use runuser intead of su command for SELinux enforcing mode
+- docker: image name check fixes
+- iSCSITarget: properly create portals for lio-t implementation
+- iSCSILogicalUnit: when deleting a LUN or initiator fails with lio-t, proceed with warning
+- iSCSILogicalUnit: return OCF_NOT_RUNNING on monitor if backing path does not exist
+- iSCSILogicalUnit: add check for leftover target/core entries for lio-t
+- pgsql: delete old replication slot when creating a new slot.
+- Filesystem: support RozoFS
+- orainstance.sh: interpret listener stop results correctly
+- dhcpd: use correct default chroot for RHEL based systems
+- LVM: allow vgck failures if partial_activation is true
+- redis: avoid 0 byte dump.rdb start failures
+- docker: fix container_exist test
+- redis: fixed start operation if replication sync takes > 20 seconds
+- ethmonitor: add link_status_only option for skipping RX counter and arping tests
+- clvm: fix issue with only first option of daemon_options being used
+- IPsrcaddr: return correct error code during stop when misconfigured
+- clvm: activate_vgs option for enable/disable of automatic vg activation
+- galera: properly redetect bootstrap after demote
+- galera: clear last know sequence number any time promote is even attempted
+- asterisk: fix return code
+- galera: retrieve last sequence number without using read-only mode
+- redis: add wait_last_known_master option
+- redis: only connect to active master instances
+- redis: do not attempt to demote if redis is dead
+- redis: reliable shutdown.
+- pgsql: add support for replication slots
+- redis: set executable bit to be able to greate docs (make rpm)
+- rabbitmq-cluster: fix rmq_join_list() to only return online nodes
+- rabbitmq-cluster: new RA
+- Filesystem: support overlayfs
+- sg_persist: use default binary setting in meta-data
+- dnsupdate: use nsupdate_opts parameter
+- nfsserver: merge options into existing /etc/sysconfig/nfs
+- portblock: portno param can be a string like 137,138
+- portblock: replace ancient heartbeat config with crm configure
+- portblock: clarify TCP RST vs ICMP port unreachable
+- VirtualDomain: enforce C locale in force_stop
+- redis: retry on unknown error when starting
+- redis: remove stop timeout and add placeholder master during election period
+- CTDB: Change default socket location to CTDB's expected default.
+- multiple RA: make sure that the pidfile directory exist
+- multiple RA: create state-directory writable by the application
+- orainstance.sh: Handle ORA-* error messages
+- redis: new RA
+
 * Thu Jan 29 2015 resource-agents contributors
 - stable release 3.9.6
 - VirtualDomain: add migrate_options parameter
 - VirtualDomain: enforce C locale in status
 - ocf-shellfuncs: add printenv to RA trace
 - nginx: allow different URLs for level 10 and 20 monitor
 
 * Tue Jan 20 2015 resource-agents contributors
 - release candidate 3.9.6 rc1
 - VirtualDomain: add sync_config_on_stop to sync the config to
   other nodes
 - mysql.sh: Allow MySQL to run as user other than mysql
 - fs-lib.sh: Fix missed detection of write failure
 - iscsi: run iscsi discovery only when necessary
 - VirtualDomain: save the config before virsh undefine
   (bnc#891915)
 - sg_persist: new RA
 - ldirectord: Add Install section for systemd unit file.
 - kamilio: new RA
 - findif.sh: Use the most specific matching route
 - LVM: volgrpname is unique (meta-data update)
 - ldirectord: Get correct user for sending email (bnc#910497)
 - iSCSILogicalUnit: do not set write_back when creating
   backstore/block
 - portblock: Feature: reset_tcp_on_unblock_stop
 - nfsserver: prevent error messages on platforms without rpcuser
 - Xen: fix regression with xm and quoting (lf#2671)
 - lxc: re-add LXCpre1.0 logic
 - lxc: fix for missing lxc-ps command
 - Route: some unique attributes are not unique (meta-data)
 - IPaddr2 findif: accept dotted quad netmask
 - VirtualDomain: For Xen, prefer xl to xen-list (boo#901453)
 - Xen: Use xl list $domain return code in status check
   (boo#901453)
 - pgsql: PostgreSQL 9.3 compatibility for unix_socket_directories
 - lvm_by_vg.sh: Only strip tag on stop if we are owner
 - ocf-shellfuncs: set HA_LOGD depending on HA_use_logd
 - mysql: avoid use of check_binary in common validation function.
 - mysql: report error when validation fails during monitor yet
   pid is still active
 - docker: new RA
 - mysql: do not report success on 'stop' if validation fails
 - anything: fix output redirection
 - mysql: fix unexpected operation error that caused by MySQL
   client timeout.
 - Middle: anything: Prevent stop failure, even if the job takes
   time to stop.
 - doc: Add pcs to man page example section
 - multiple RA: exit reason support
 - Build: add --compat-habindir option for HA_BIN backward
   compatibility
 - iSCSITarget: monitor operation must pass before start is
   considered complete
 - iSCSILogicalUnit: monitor operation must pass before start is
   considered complete
 - iSCSILogicalUnit:  Fixes the check for pre-existing LUN ACLs
 - iSCSITarget: fixes syntax error caused by targetcli update
 - Filesystem: when loading kernel modules wait for filesystem to
   initialize
 - ethmonitor: add infiniband status monitoring support
 - Xinetd: refine exit codes when xinetd is stopped
 - Xen: Properly quote domain name in call to create
 - Xen: Use xenstore-ls to get status if available
 - Xen: Replace use of xm with xl (bnc#882548)
 - oracle: fix setting monitor user profile for other languages
   (nls)
 - apache: Revised fix for init script reference on SUSE
   (bnc#884674)
 - VirtualDomain: Try xenstore-ls if no emulator is set
   (bnc#885292)
 - galera: new RA
 - VirtualDomain: Add support for qemu-dm as emulator (bnc#885292)
 - CTDB: add ctdb_rundir parameter and create on startup
 - VirtualDomain: new parameter save_config_on_stop to enable
   saving running VM's configuration
 - fs-lib.sh: Force kill processes with access to shared libraries
   on mount point
 - iSCSILogicalUnit: add targetcli support
 - Filesystem: Add force_unmount option
 - nfsserver: regenerate statd state file before starting nfs
 - nfsnotify: new RA
 - nfsserver: introducing nfs_no_notify option for suppressing
   reboot notifications
 - nfsserver: Keep statd directories synced with ha backup
 - oracle: reset MONUSR password if expired
 - oracle: try as sysdba if monitor as MONUSR fails
 - oracle: Make monitoring user configurable (bnc#850589)
 - oralistener.sh: Do not fail during stop if listener has already
   stopped
 - nfsserver: Add options for explicitly setting nfs daemon ports
 - vsftpd: new RA
 - fs-lib.sh: Fix usage of findmnt to work for filesystems
   containing bind mounts
 - exportfs: allow multiple exports
 - ldirectord: Update .spec file with systemd support (bnc#863250)
 - ldirectord: Add systemd unit file (bnc#863250)
 - nfsserver: Do not require shared info directory when cloned
 - nfsserver: Allow dynamically setting rpc.nfsd options.
 - iscsi: iscsi status fails with open-iscsi with support for
   flash (bnc#878039)
 - Med: rgmanager/fs: typo preventing passing some mount opts
 - nfsserver.sh: add an optional option for the rpc.statd
   listening port
 - Feature: addition of bind-mount resource agent for rgmanager
 - exportfs: Do not fail during monitor and stop operations if dir
   has trailing slash
 - nfsserver: Do not fail monitor if lock services are enabled
 - nfsserver: let systemd service files handle lock notifications
 - vm.sh: 'no_kill' option for preventing auto destruction of vm
   after timeout period
 - vm.sh: Monitor kvm resources without requiring libvirtd to be
   up
 - nfsserver: preserve statd directory permissions during
   sm-notify or else lock recovery fails
 - kamailio: new RA
 - nfsserver.sh: On stop, kill sm-notify so /var/lib/nfs/statd can
   be unmounted
 - db2: Report OCF_ERR_GENERIC instead of OCF_NOT_RUNNING when
   instance is not completely up
 - named.sh: Fix issues running named daemon as a non-root user
 - db2.sh: rgmanager wrapper for the heartbeat db2 agent
 - CTDB: do not fail monitor operation when ctdb socket does not
   exist
 - exportfs: set unlock_on_stop_default=1 (bnc#864263)
 - dnsupdate: new RA
 - clvm: new RA
 - mysql: handle $secs_behind = NULL
 - dhcpd: Added a restart-function
 - high: conntrackd: allow probe to return OCF_RUNNING_MASTER
 - VirtualDomain: check process table for qemu-system-* too, not
   just qemu-kvm
 - VirtualDomain: avoid running "virsh uri" if hypervisor is set
 - apache.sh: Add ability to set custom httpd binary
 - db2: Allow db2 agent to work without crm_master binary
 - VirtualDomain: Attempt to determine vm status even when libvirt
   is unavailable
 - VirtualDomain: Fixes parsing domain name from xml file.
 - fs-lib.sh: Fixes failure to unmount local fs when process runs
   with cwd inside fs mount
 - ldirectord: Fix sockaddr_in6 redefined error
 - Pure-FTPd: Create pid directory if needed
 - VirtualDomain: support more virsh domstate output formats
 - varnish: Added support for ulimit -l and ulimit -n
 - lxc: support up-to-date lxc-ps versions
 - tomcat: Override default tomcat config with resource options
 - nfsserver: nfsserver not starting due to missing etab file
 - tomcat: Avoid unnecessary force kill of tomcat on stop
 - tomcat: Avoid race condition in reading pid file on stop
 - slapd: find the correct default slapd config in fedora
 - tomcat: Detect start script location
 - pgsql: Support for non-standard port and library locations
 - fix netfs unmount/self_fence integration
 - Med: oracledb.sh: Remove quotes around listener name
 - IPaddr2/findif.sh: Do a sanity check only on start and
   validate-all
 - ldirectord: Disable HTTPS SSL certificate hostname checking
 - tomcat: Monitor rotatelogs process and restart when it is
   stopped
 - jboss: Monitor rotatelogs process and restart when it is
   stopped
 - mysql: Fix for the issue of detecting an unconfigured slave
   with empty master_host since setting empty master_host is not
   allowed with 5.5
 - IPaddr2/findif.sh: Robust parameter checking for 'nic'
 - VirtualDomain: Ensure it is possible to manage a libvirt domain
   defined outside of VirtualDomain
 - VirtualDomain: Fix ability to use default libvirt hypervisor
   without explicitly setting agent attribute
 - tomcat-6.sh: Fixes setting TOMCAT_USER correctly
 - tomcat-s.sh: Do not fail on stop if config validation fails.
 - fs-lib.sh: Removes usage of fuser -kvm from fs-lib.sh based
   agents.
 - ldirectord: Use an alarm for LDAP check to ensure it times out
 - exportfs: stop with no directory should succeed
 - Xen: retry domain lookup in repeating monitor and stop
 - Fixes lvm metadata corruption caused when activating by lv
   using tags.
 - ldirector: fix using service name instead of port number
   (bnc#836759)
 - Filesystem: remove SLES10 compatibility code
 - Raid1: disallow md raid arrays as clone resources to avoid data
   corruption
 - apache: put back config file existence test
 - VirtualDomain: Do not attempt graceful shutdown if force_stop
   is enabled
 - Med: oracledb.sh: Fix process name grep in exit_idle
 - VirtualDomain: use virsh create instead of define to start it
   non-persistent
 - eDir88: multiple IP support
 - apache: better handling of not installed apache
 - jboss: add jboss_base_dir parameter to support multi-instances
   on JBoss 6
 - jboss: stop the JBoss daemon by sending a signal on JBoss 6
 - jboss: add run_command parameter to change the start up script
 - jboss: add jboss_version parameter to support JBoss 6
 - tomcat: use root as the default for tomcat_user instead of
   RUNASIS mode
 - tomcat: multiple tomcat instances based on CATALINE_BASE
 - pound: add parameter maxfiles to set ulimit
 - apache: remove unnecessary and imperfect checks from
   validate_all (bnc#827927)
 - pgsql: set only one node into sync mode when using 3 nodes or
   higher
 - Med: oracledb.sh: Set RESTART_RETRIES back to 0
 - Raid1: stop arrays even with block device file missing
   (bnc#821861)
 - oracle/oralsnr: use /bin/sh in sudo calls (bnc#825517)
 - apache: Properly check meta_timeout variable during graceful
   stop
 - Raid1: set MDADM_NO_UDEV appropriately if udev is not used
 - Raid1: wait for udevd to settle (bnc#821861)
 - iSCSILogicalUnit: add tgt specific parameters bstype, bsoflags,
   device_type
 - apache: Attempt graceful stop before -TERM signal
 - LVM: Warn user if initrd is older than lvm.conf when using
   exclusive activation with tags
 - LVM: Exclusive activation without clvmd using filtering with
   tags.
 - named: Attempt to autogen /etc/rndc.key using rndc-confgen tool
 - mysql: really use log setting (bnc#823095)
 - mysql: test properly for failed process start (bnc#823095)
 - Med: Don't preserve SELinux context when copying files to
   /var/lib/nfs/sm
 - Med: Cleanup oracledb.sh rgmanager agent and add support for
   Oracle 11g
 - Med: Cleanup oralistener.sh and add support for Oracle 11g
 - Med: Cleanup orainstance.sh and add support for Oracle 11g
 - fs-lib.sh: Faster filesystem start/stop through use of
   'findmnt' command for 'is_mounted' function
 - named.sh: Addition of update-source option.
 - LVM: Retry exclusive activation after deactivating vg
   cluster-wide
 - LVM: Retry deactivating vg allowing udev to settle
 - ip.sh: Fixes usage of ipv6 addresses with uppercase lettering
 - LVM: Verify setup on start plus verify exclusive activation is
   possible.
 - Route: add IPv6 support
 - build: Place resource state information in /var/run/... by
   default
 - DRBD: remove deprecated drbd agent.
 - fs-lib.sh: Always honor self_fence option when
   force_unmount="on"
 - postgres-8: Shutdown postgres with SIGINT before forcing
   SIGQUIT
 - tools: send_arp.libnet: reuse ARP packets (debian#701914)
 - ethmonitor: correctly detect when the network is unplugged.
 - Raid1: do not test for device existence in the stop operation
   (bnc#821861)
 - mysql: Attempt to auto-detect mysql binary default location.
 - VirtualDomain: Support saving and restoring virtual machine
   snapshot state files
 - nfsserver: Cleanup shared nfs dir mount
 - nfsserver: Enable file locking daemon when systemd is being
   used
 - nfsserver: Maintain SELinux permissions on failover for nfs v3
   lock state
 - nfsserver: Improve support for v3 file lock recovery
 - nfsserver: Add systemd unit-file support when init scripts are
   not present
 - VirtualDomain: Properly detect defined lxc domains
 
 * Thu Feb  7 2013 Linux-HA contributors
 - stable release 3.9.5
 - IPaddr2: support nic:iflabel format in nic parameter
 - VirtualDomain: allow for custom migrateport
 * Wed Jan 30 2013 Linux-HA contributors
 - release candidate 3.9.5 rc1
 - ocf-shellfuncs: RA tracing
 - IPaddr2: make sure that some ARP send program runs
 - pgsql: add check_wal_receiver parameter
 - pgsql: support starting as Hot Standby
 - nfsserver: improve rpc.statd support
 - nfsserver: add option -n for rpc.statd (bnc#794479)
 - nfsserver: make the retry time for sm-notify configurable
 - nfsserver: make sm-notify running in the foreground configurable
 - exportfs: handle '<world>' exportfs embelishment for '*' (bnc#791690)
 - jboss: set JAVA_OPTS correctly
 - pound: use correct default for control-binary
 - pound: No error when stopping pound and no pound is running
 - Squid: support systems using IPv6
 - Xinetd: do not fail in stop if the daemon is not running
 - Xinetd: improve finding Xinetd process (thanks to Vadym Chepkov)
 - SendArp: add background parameter
 - SendArp: fix monitor
 - ocft: print the actual case names when testing
 - ocft: make only the updated test-case file
 - ocft: add "incremental" mode (ocft test -i)
 - include a copy of LGPL license file
 
 * Tue Nov 22 2012 Linux-HA contributors
 - stable release 3.9.4
 - IPaddr2: fix waiting the completion of IPv6 address allocation
 - zabbixserver: RA for zabbix servers management
 * Tue Nov 13 2012 Linux-HA contributors
 - release candidate 3.9.4 rc1
 - ocf-rarun: add the RA driver
 - IPaddr2: use better test for infiniband (bnc#783353)
 - IPaddr2: replace the findif binary by findif.sh
 - IPaddr2: add IPv6 support
 - IPaddr2: really send arps in background if requested
 - IPaddr2: use send_arp instead of ipoibarping if not available
 - Filesystem: include ceph in the list of non-blockdev filesystems
 - Raid1: stop processes using raiddev
 - Raid1: manage multiple arrays
 - Raid1: discover block size for dd (bnc#781137)
 - exportfs: cleanup exportfs cache on stop (bnc#770210)
 - iscsi: don't fail on stop of the iscsi server fails
 - iscsi: use iscsiadm -m session -r in stop
 - iscsi: do discovery in start only
 - iscsi: check session status in monitor
 - iscsi: add try_recovery parameter
 - oracle: ignore password expiry warning in monitor (bnc#770250)
 - oracle: connect as a regular user in monitor
 - pgsql: add option recovery_end_command
 - Xen: repair node_ip_attribute use
 - SAPDatabase: improvement of cleanup of saphostctrl process
 - SAPInstance: monitor-master not advertised (bnc#782482)
 - SAPInstance: don't wait for timeout if the stop attempt failed
 - SAPInstance: failes on sapcontrol message Unauthorized (bnc#782486)
 - mysql-proxy: copy in-depth monitoring action from the mysql resource agent
 - mysql-proxy: add test_table, test_user and test_passwd parameters
 - mysql-proxy: implement "parameters" parameter
 - mysql-proxy: add the 'plugins' parameter
 - mysql-proxy: version specific checks 
 - mysql-proxy: admin plugin auto loading
 - mysql-proxy: perform SELECT for OCF_CHECK_LEVEL 20 only
 - mysql-proxy: create pid/socket directories if needed
 - conntrackd: resync from other hosts in start
 - slapd: Gracefully handle config check during probe
 - tomcat: Correction of the time-out level of the stop processing.
 - tomcat: Correction of the process alive monitoring.
 - ldirectord: Added READDQUIESCENT parameter 
 - sfex: fix sfex_init for 64-bit big endian platforms
 - SendArp: exit with the right code when not properly configured
 - Squid: fix getting PIDs of squid processes (lf#2653)
 - portblock: use end-of-word instead of space at the end of the line
 - named: use pgrep instead of searching in ps(1) output
 - named: fix monitor if named_rootdir is set to /
 - VIPArip: fix exit codes
 - VIPArip: make start idempotent
 - tools: add test-findif.sh
 - tools: add send_ua binary for IPv6 support in IPaddr2
 - ocft: new test for Raid1
 
 * Fri May 25 2012 Linux-HA contributors
 - stable release 3.9.3
 - dhcpd: new RA to manage ISC DHCP servers
 - Filesystem: add nfs4 to the list of well known types
 - IPaddr2: fix regression introduce in d93b5fd, nic=lo always
   "stopped"
 - iSCSILogicalUnit: correctly match for target IQN and backing
   device name (iet and tgt)
 - jboss: implememnt rotating of console log
 - mysql: improve handling of reset slave
 - oracle, oralsnr: get rid of eval
 - slapd: pass bind_dn correctly to ldapsearch
 * Wed May 16 2012 Linux-HA contributors
 - release candidate 3.9.3 rc1
 - asterisk: new resource agent
 - named: new RA to manage bind servers
 - pound: new RA for Pound HTTP/HTTPS reverse-proxy and
   load-balancer
 - rsyslog: new RA to manage rsyslog servers
 - slapd: new RA to manage OpenLDAP servers
 - varnish: new resource agent
 - apache: add support for IPv6 in monitor
 - apache: create /var/run/apache2 if it doesn't exist
 - apache: fix sysconfig includes & enable status for default SUSE
   conf
 - conntrackd: test for socket existence in monitor instead of
   process grep
 - conntrackd: rename parameter "conntrackd" to "binary"
 - CTDB: Add smb_fileid_algorithm parameter (bnc#696978)
 - CTDB: Improve monitor op (check output of ctdb status,
   bnc#712192)
 - CTDB: Set ctdb_start_as_disabled=no by default (bnc#712410,
   required by samba 3.6)
 - exportfs: allow expanding the fsid parameter to produce correct
   exportfs options
 - exportfs: don't grow /var/lib/nfs/rmtab indefinitely
 - exportfs: fix monitor action for special characters and common
   suffixes
 - Filesystem: add support for glusterfs (lf#2620)
 - Filesystem: add tmpfs to the list of supported filesystems
 - Filesystem: allow to force cloning for local mounts
 - Filesystem: don't use direct dd option in monitor depth 20 for
   non-blockdevice fs
 - Filesystem: fix determining if the device is a block device
 - Filesystem: improve read/write checks for CHECK_LEVEL 10, 20
 - Filesystem: repair the fast_stop parameter use (its value was
   always false)
 - Filesystem: support ceph
 - Filesystem: remove a status file only when OCF_CHECK_LEVEL is
   set to 20
 - IPaddr: add back the local_start/stop_script code
 - IPaddr: remove colon at the end of the interface name
 - IPv6addr: always use the provided nic and cidr_netmask when
   specified
 - IPv6addr: handle a link-local address properly in send_ua
 - iscsi: do not rely on iscsid.startup being set correctly
   (bnc#751783)
 - iscsi: proceed if iscsid is not running if iscsid.startup is
   present in iscsid.conf
 - iSCSILogicalUnit: fix default for scsi_sn
 - iSCSITarget: treat an empty "implementation" parameter
   specially
 - jboss: add the java_opts parameter for java options
 - ldirectord: precedence error with perl v5.8.8 in IPv6 code
 - LVM: drop vgck(8) from monitor
 - LVM: force dmevent monitoring for clones
 - LVM: use ls instead of vgdisplay in status
 - lxc: fix LXC_status to work with lxc-0.7.5 or later
 - mysql: improve replication support
 - mysql: check mysql status more thoroughly before stopping
 - mysql: fix validation return codes
 - mysql: support 5.5 slave status message format
 - nfsserver: Support of multiple IP addresses (bnc#684143)
 - nfsserver: don't run sm-notify in foreground (bnc#759616)
 - ocf-shellfuncs: fix loglevel variable scope in ha_log
 - ocft: new tests for named, IPv6addr, oracle, Xinetd
 - ocft: several improvements
 - oracle: improve managing IPC objects
 - oracle: improve matching instance specific files and processes
 - pgsql: support for replication
 - postfix: multiple fixes
 - Raid1: support for multiple MD arrays, as specified in raidconf
 - SAPDatabase: add support for Sybase ASE and SAP HANA database
 - SAPDatabase: correcting the unique values of RAs parameters
 - SAPDatabase: replace method for checking responsiveness of
   saphostexec
 - SAPDatabase: version 2.00 make use of saphostagent
 - SAPInstance : correcting the unique values of RAs parameters
 - slapd: always set the exit code correctly in monitor
 - tomcat: remove pidfile before start, it may prevent some tomcat
   releases from starting
 - VirtualDomain: add a functionality that modifies utilization of
   resource automatically
 - VirtualDomain: if the configuration file is missing on stop
   exit with success
 - VirtualDomain: honor virsh "in shutdown" state
 - Xen: add support for HVM ACPI graceful shutdown
 - Xen: wait in migrate_from for the migration to finish instead
   of bailing out immediately
 - Tools: findif: Use most specific matching route (bnc#740738)
 - Tools: send_arp.libnet: fix for big endian platforms
   (bnc#721334)
 - doc: add the RA developer's guide
 
 * Wed Jun 29 2011 Dejan Muhamedagic <dejan@suse.de> and others
 - stable release 3.9.2
 - ethermon: new resource agent
 - iscsi: fix regression in 3.9.1 for open-iscsi version 2.0-872 (lf#2562)
 - pgsql: fix regression in 3.9.1 in directories on probes
 - VirtualDomain: if there's no config exit on stop with success
 - doc: add sfex_init(8) man page
 * Wed Jun 15 2011 Dejan Muhamedagic <dejan@suse.de> and others
 - stable release 3.9.1
 - ocf-tester: tolerate OCF_ERR_INSTALLED on probes and missing binaries
 - pgsql: improve configuration check and probe handling
 * Wed Jun 01 2011 Dejan Muhamedagic <dejan@suse.de> and others
 - release candidate 3.9.1 rc1
 - first release since establishing joined repository with RHCS agents
 - build: new spec file and autoconf to support both agents' sets
 - build: use ./configure --with-ras-set=linux-ha to configure for
   heartbeat RA set
 - build: create compatibility symlinks in autofoo not in spec
 - build: GNUmakefile removed
 - lxc: new RA to manage lxc linux containers
 - symlink: new RA to manage symbolic links
 - db2: new implementation with master/slave mode
 - oracle: improve oracle process list test (bnc#673027)
 - exportfs: backup and restore rmtab to ensure smooth client
   failover on node failures
 - CTDB: Allow stop to succeed when using pkill on ctdbd (bnc#695829)
 - mysql: --skip-slave-start option is default now
 - mysql: set connect timeout to 10 seconds rather than 1 second
 - mysql: keep replication state (prevents data loss on master reset)
 - mysql: don't rely on state information from pacemaker, but
   check if the instance is in the read-only mode
 - mysql: if test parameters are all set, assume OCF_CHECK_LEVEL=10
 - mysql: support for master/slave for more than two nodes
 - mysql: don't wait for replication to finish, when not replicating
 - mysql: store replication state in separate attributes for each master
 - VirtualDomain: correctly create migration URI when target is an FQDN
 - VirtualDomain: properly wait until domain_name is non-empty
 - ldirectord: add a support of "netmask" directive for IPv6
 - ldirectord: fix fwmark behavior for IPv6
 - ldirectord: ignore children in Net::DNS
 - iscsi: add support for open-iscsi version 2.0-872 (lf#2562)
 - postfix: issue error if 'postfix abort' failed
 - postfix: improve exit codes on installation problems
 - postfix: use monitor to test if postfix works after the start action
 - ocft: fix make command for compatibility with mawk/Debian (lf#2600)
 - ocft: test case for pgsql
 - ocft: test case for postfix
 - ocft: test case for iscsi
 - doc: improve man pages output
 - doc: add examples for master/slave resource agents
 
 * Wed Feb 16 2011 Dejan Muhamedagic <dejan@suse.de> and others
 - stable release 1.0.4
 - ocft: testcases for db2, LVM, and Filesystem
 * Fri Feb 11 2011 Dejan Muhamedagic <dejan@suse.de> and others
 - release candidate 1.0.4
 - add GPLv3 license file (bnc#655700)
 - ocf-shellfuncs: allow ocf_run to return the actual exit code
 - ocf-shellfuncs: handle properly syslog facility set to none (bnc#621818)
 - ocf-shellfuncs: correctly identify root by id only (bnc#602312)
 - RA: add OCF_ROOT/lib/heartbeat directory (development)
 - RA: set the HA_RSCTMP directory to /var/run/resource-agents (lf#2378)
 - build: install jboss
 - conntrackd: new RA
 - exportfs: new RA
 - nginx: new RA
 - fio: new RA for IO load simulation
 - Filesystem: allow cloning of some filesystems as read-only (lf#2440)
 - Filesystem: add fast_stop parameter (lf#2402)
 - Filesystem: Clarify metadata and improve non-clone warning
 - Filesystem: new run_fsck parameter
 - LVM: add partial_activation parameter (lf#2490)
 - IPaddr2: fix reference to Infiniband arping binary (bnc#668447)
 - IPaddr2: optionally flush kernel routing table on interface stop
 - IPaddr2: exit with the right code when not properly configured
 - IPaddr2: exit early and with the right code if the ip parameter is not set
 - IPaddr2: unique_clone_address should work without CIP (lf#2442)
 - IPaddr: return the correct code if interface delete failed
 - IPv6addr: allow link-local addresses in case the interface name is provided
 - IPv6addr: interface index in /proc/net/if_inet6 may be longer than 2
   chars (lf#2462)
 - IPsrcaddr: exit with the right code when not properly configured
 - IPsrcaddr: add the cidr_netmask parameter
 - Tools: findif: differentiate between error conditions
 - nfsserver: fix the default string for the notification parameter
 - nfsserver: don't use -v in the notify cmd with rpc.statd
 - iSCSITarget: fix race for target IDs when using IET (lf#2432)
 - iSCSITarget: follow changed IET access policy
 - Raid1: Support attempting to re-add mirrors on deep monitor action
   (bnc#619121)
 - Raid1: Fix graceful stop code path
 - Raid1: Handle stop for failed arrays properly (bnc#618775)
 - sfex: output log messages also to stderr in sfex_init
 - sfex: add the sfex_stat command
 - sfex: wait in the start and stop actions until sfex_daemon starts/exits
 - Xen: implement stop of a migrating domain (bnc#656227)
 - Xen: check the allow_mem_management boolean properly (bnc#637525)
 - Xen: Always run destroy in stop sequence.
 - Xen: use xen-list command for status check if available (bnc#628735)
 - Xen: use xen-destroy for stop, if available.
 - Xen: Allow node configurable attribute to specify which IP to 
   use for live migration (bnc#628735)
 - VirtualDomain: fix spurious stop failures
 - VirtualDomain: don't timeout in stop before escalating to "forced stop"
 - ManageVE: add migration capability
 - MailTo: don't check if user exists for email address (might be an
   alias or remote)
 - CTDB: Remove hard-coded timeout on start op
 - CTDB: Don't manage Samba and Winbind by default
 - CTDB: Deprecate (and make optional) smb_private_dir param (bnc#623788)
 - tomcat: Ensure name of tomcat resource is only used on start operation
   and expose JAVA_OPTS variable for use
 - tomcat: Fix to ensure default OCF_RESKEY_xx values are observed
 - tomcat: Add CATALINA_BASE parameter, defaults to CATALINA_HOME,
   permits multiple tomcat instances
 - tomcat: Use Tomcat stop TIMEOUT -force to improve stop
 - Dummy: migrate_from/to: correct OCF_RESKEY_CRM_meta_migrate_xxx
   variable names
 - Dummy: make method reload work
 - anything: add the workdir parameter
 - mysql: clone and master-slave functionality
 - mysql: add replication monitoring
 - mysql: check for write permissions after creating pid and socket directory
 - mysql: make client binary path configurable
 - pgsql: cd to pgdata before running commands (fixes permission error)
 - pgsql: add optional username, password, and sqlcode parameters for monitor
 - pgsql: add new "config" parameter
 - pgsql: properly implement pghost parameter
 - pgsql: socketdir parameter to manage non-default UNIX socket directories
 - oracle: reduce output from sqlplus to the last line for queries (bnc#567815)
 - db2: Replace call to db2_local_ps with db2nps
 - db2: guard against a hanging db2stop by spawning this into the
   background. Use db2_kill after grace period.
 - db2: add multi partition support
 - db2: improve behaviour on probes
 - db2: support for v9.x instances (bnc#608952)
 - SAPDatabase,SAPInstance: improve LD_LIBRARY_PATH processing (bnc#640026)
 - SAPInstance: prevent premature expansion of [:upper:] [:lower:] when
   producing sidadm uid
 - SAPInstance: Moved testing of SAP profile directory and START profile
   to a later stage (only when needed), for more robustness
 - SAPInstance: fix return codes in probes
 - SAPInstance: New parameter: SHUTDOWN_METHOD
 - SAPInstance: ensure enqueue failover in monitor_clone on process
   failure
 - SAPInstance: don't rely on op target rc when monitoring clones (lf#2371)
 - SAPDatabase: prevent premature expansion of [:upper:] and [:lower:]
   when producing sidadm/orasid/db2sid uids
 - SAPdatabase: Changed Oracle recovery method from "recover automatic
   database" to "end backup"
 - SAPDatabase: Adapt process search pattern for DB/2 9.5
 - SAPDatabase: start listener only if database processes are found
 - SAPDatabase: avoid continuous output to syslog in monitor with SAP
   7.20 and J2EE_ONLY=1
 - ldirectord: http: connect to server instead of protocol (Debian#594958)
 - ldirectord: add implicit support for submission RFC4409
 - ldirectord: example configuration for a submission virtual service
 - ldirectord: Shutdown write-side of client connection after writing has
   finished
 - ldirectord: port number mismatch of imaps and pops
 - ldirectord: Oracle compatibility
 - ldirectord: don't exit on timeout in HTTP/HTTPS check
 - ldirectord: allow underscore in service name
 - ldirectord: use $1 instead of \1 in pattern replace (bnc#605086)
 - Tools: ocf-tester: Extend to cover initial probe (monitor_0) test.
 - Tools: ocf-tester: set and export some common meta variables (lf#2524)
 - Tools: ocf-tester: meta-data also should never be affected by missing
   binaries.
 - Tools: ocf-tester: show output from the agent in case of error
 
 * Tue Apr 13 2010 Dejan Muhamedagic <dejan@suse.de> and others
 - stable release 1.0.3
 - meta-data: improve timeouts in most resource agents (reduce the number of warnings by the shell)
 - RA: log messages to stderr if attached to a terminal
 - ocf-shellfuncs: tests to check for clone/ms resources
 - ocf-shellfuncs: don't output to stderr if using syslog (prevents double logging from the RA and lrmd)
 - make sure that OCF_RESKEY_CRM_meta_interval is always defined (lf#2284)
 - ocft: new RA test suite
 - VirtualDomain: bail out early if config file can't be read
   during probe (nbc#593988)
 - VirtualDomain: spin on define until we definitely have a domain name
 - VirtualDomain: fix incorrect use of __OCF_ACTION (the stop operation may timeout otherwise)
 - Filesystem: prefer /proc/mounts to /etc/mtab for non-bind mounts (lf#2388)
 - IPaddr2: don't bring the interface down on stop (otherwise IPv6 addresses may be removed)
 - oracle/oralsnr: improve exit codes if the environment isn't valid
 - oracle/oralsnr: improve logging
 - Route: don't assume that OCF_RESKEY_CRM_meta_clone_node_max is set to a number (lf#2375)
 - Route: add route table parameter (lf#2335)
 - sfex: don't use pid file (lf#2363,bnc#585416)
 - SFEX daemon: fix logging
 - ldirectord: fix the configfile default (bnc#589457)
 - drbd: fix metadata (bnc#588684)
 - IPsrcaddr: modify the interface route (lf#2367)
 - ldirectord: Allow multiple email addresses (lf#2168)
 - vmware: fix set_environment() invocation (lf#2342)
 - vmware: updated to version 0.2
 - apache: return the right exit code from monitor (bnc#578628)
 - iSCSILogicalUnit: fix monitor for STGT
 
 * Mon Feb 01 2010 Dejan Muhamedagic <dejan@suse.de> and others
 - stable release 1.0.2
 - EvmsSCC, Evmsd, LinuxSCSI, drbd, pingd: marked as deprecated (lf#2244)
 - CTDB: new resource agent for clustered samba
 - postfix: new resource agent
 - proftpd: new resource agent
 - AoEtarget: new resource agent to export ATA-over-Ethernet (AoE) targets
 - Squid: new resource agent
 - VirtualDomain: new resource agent (manage virtual domains using libvirt/virsh)
 - anything: new resource agent for arbitrary daemons
 - mysql-proxy: new resource agent
 - iSCSITarget/iSCSILogicalUnit: two new resource agents
 - portblock: fast reconnect/tickle ACK (new feature)
 - IPv6addr: new nic and cidr_netmask parameters
 - mysql-proxy: log_level and keepalive parameters
 - Filesystem: implement deep monitor operation
 - apache: monitor operation of depth 10 for web applications (lf#2234)
 - SAPDatabase + SAPInstance: New versions from SAP
 - CTDB: auto-generate cluster-specific part of smb.conf (lf#2308)
 - ClusterMon: don't fail in stop if the process is missing (bnc#569957)
 - Filesystem: allow configuring smbfs mounts as clones
 - IPaddr2: CLUSTERIP/iptables rule not always inserted on failed monitor (lf#2281)
 - IPaddr2: behave if the interface is down (lf#2147)
 - IPaddr2: check binaries when it makes sense
 - IPaddr2: fix invalid default value for OCF_RESKEY_clusterip_hash (bnc#553753)
 - IPaddr2: include netmask in search for the right interface
 - IPaddr2: remove all colons from the mac address before passing it to send_arp (lf#2165)
 - IPsrcaddr: replace 0/0 with proper ip prefix
 - IPv6addr: recognize network masks properly
 - IPv6addr: supply checksum for ICMPv6 packets
 - IPv6addr: ifdef out the ip offset hack for libnet v1.1.4 (lf#2034)
 - IPv6addr: supply checksum for ICMPv6 packets
 - LVM: Make monitor operation quiet in logs (bnc#546353)
 - MailTo: Provide a default for MAILCMD (bnc#534803, bnc#556366)
 - MailTo: allow multiple word subject line
 - Raid1: improve monitor function (bnc#546551)
 - Route: improve validate (lf#2232)
 - Squid: make the regexp match more precisely output of netstat
 - VIParip: Pathname needed to be configurable (lf#1331)
 - VirtualDomain: avoid needlessly invoking "virsh define"
 - VirtualDomain: destroy domain shortly before timeout expiry
 - VirtualDomain: fix forceful stop (lf#2283)
 - VirtualDomain: loop on status if libvirtd is unreachable
 - Xen: Remove instance_attribute "allow_migrate" (bnc#539968)
 - apache: make sure that proxies are not used for monitor
 - iSCSILogicalUnit: add support for SCSI ID, SCSI SN, Vendor ID, and Product ID
 - iSCSILogicalUnit: add support for per-LU parameters
 - iSCSILogicalUnit: set default for SCSI SN, truncate SCSI ID default to 24 bytes
 - iSCSILogicalUnit: use a 16-byte default SCSI ID
 - iSCSITarget, iSCSILogicalUnit: add support for tgt
 - iSCSITarget: reintroduce "tid" parameter
 - iSCSITarget, iSCSILogicalUnit: identify targets by IQN, not by tid
 - iSCSITarget, iSCSILogicalUnit: support LIO
 - iSCSITarget: add support for CHAP authentication
 - iSCSITarget: add support for restricting target access
 - iSCSITarget: be more persistent deleting targets on stop
 - include ldirectord (formerly known as heartbeat-ldirectord)
 - iscsi: replace wrong variable reference (bnc#499291)
 - jboss: Added JBoss support
 - ldirectord: fix setting defaults for configfile and ldirectord (lf#2328)
 - ldirectord: fix various bugs in OCF RA (lf#1949)
 - mysql: escalate stop to KILL if regular shutdown doesn't work
 - mysql: handle monitor and stop properly on invalid environment
 - nfsserver: use default values (lf#2321)
 - nfsserver: validate should not check if nfs_shared_infodir exists (lf#2219)
 - nfsserver: use check_binary properly in validate (lf#2211)
 - nfsserver: exit properly in nfsserver_validate (lf#2173)
 - oracle/oralsnr: export variables properly
 - oracle: drop spurious output from sqlplus
 - pgsql: remove the previous backup_label if it exists
 - portblock: add per-IP filtering capability
 - portblock: fix invalid exit codes on monitor
 - postfix: fix double stop
 - scsi2reservation: fix wrong logic in check for scsi_reserve
 - vmware: make meta-data work and several cleanups (lf#2212)
 - shellfuncs: make the mktemp wrappers work
 - ocf-shellfuncs: add mercurial repository version information
 - ocf-shellfuncs: add ocf_is_probe function
 - doc: add resource agents' man pages including examples
 
 * Thu Oct 23 2008 Lars Marowsky-Bree <lmb@suse.de> and MANY others
 - beta release 2.99.2
 - LVM: stop correctly in case vol group does not exist
 
 * Tue Sep 23 2008 Lars Marowsky-Bree <lmb@suse.de> and MANY others
 - beta release 2.99.1
 
 * Tue Aug 19 2008 Andrew Beekhof <abeekhof@suse.de> and MANY others
 - beta release 2.99.0
diff --git a/ci/build.sh b/ci/build.sh
index 798bd39ee..4c26ab9ce 100755
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -1,46 +1,74 @@
 #!/usr/bin/env bash
-set -eo pipefail
+set -o pipefail
 [[ "${DEBUG:-}" ]] && set -x
 
 declare -i failed
 failed=0
 
+# SC2046: Quote this to prevent word splitting.
+# SC1090: Can't follow non-constant source. Use a directive to specify location.
+# SC2039: In POSIX sh, 'local' is undefined.
+# SC2086: Double quote to prevent globbing and word splitting.
+# SC2154: var is referenced but not assigned.
+ignored_errors="SC1090,SC2039,SC2154"
+
 success() {
-  printf "\r\033[2K  [ \033[00;32mOK\033[0m ] Checking %s...\n" "$1"
+	printf "\r\033[2K  [ \033[00;32mOK\033[0m ] Checking %s...\n" "$1"
+}
+
+warn() {
+	printf "\r\033[2K  [\033[0;33mWARNING\033[0m] Checking %s...\n" "$1"
 }
 
 fail() {
 	printf "\r\033[2K  [\033[0;31mFAIL\033[0m] Checking %s...\n" "$1"
 	failed=1
 }
 
 check() {
-  local script="$1"
-  shellcheck "$script" || fail "$script"
-  success "$script"
+	local script="$1"
+
+	out="$(shellcheck -s sh -f gcc -x -e "$ignored_errors" "$script" 2>&1)"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		success "$script"
+	elif echo "$out" | grep -i 'error' >/dev/null; then
+		fail "$script"
+	else
+		warn "$script"
+	fi
+	echo "$out"
 }
 
 find_prunes() {
-  local prunes="! -path './.git/*'"
-  if [ -f .gitmodules ]; then
-    while read module; do
-      prunes="$prunes ! -path './$module/*'"
-    done < <(grep path .gitmodules | awk '{print $3}')
-  fi
-  echo "$prunes"
+	local prunes="! -path './.git/*'"
+	if [ -f .gitmodules ]; then
+		while read -r module; do
+			prunes="$prunes ! -path './$module/*'"
+		done < <(grep path .gitmodules | awk '{print $3}')
+	fi
+	echo "$prunes"
 }
 
 find_cmd() {
-  echo "find . -type f -and \( -perm +111 -or -name '*.sh' \) $(find_prunes)"
+	echo "find heartbeat -type f -and \( -perm /111 -or -name '*.sh' \) $(find_prunes)"
 }
 
 check_all_executables() {
-  echo "Checking executables and .sh files..."
-  eval "$(find_cmd)" | while read script; do
-    head=$(head -n1 "$script")
-    check "$script"
-  done
-  exit $failed
+	echo "Checking executables and .sh files..."
+	while read -r script; do
+		head=$(head -n1 "$script")
+		[[ "$head" =~ .*ruby.* ]] && continue
+		[[ "$head" =~ .*zsh.* ]] && continue
+		[[ "$head" =~ ^#compdef.* ]] && continue
+		[[ "$head" =~ ^.*\.c ]] && continue
+		[[ "$head" =~ ^ldirectord.in ]] && continue
+		check "$script"
+	done < <(eval "$(find_cmd)")
+	exit $failed
 }
 
+./autogen.sh
+./configure
+make
 check_all_executables
diff --git a/ci/install.sh b/ci/install.sh
deleted file mode 100755
index c66b56c59..000000000
--- a/ci/install.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-set -eo pipefail
-
-main() {
-  local filename="shellcheck_0.3.7-1_amd64.deb"
-  wget "http://ftp.debian.org/debian/pool/main/s/shellcheck/$filename"
-  sudo dpkg -i "$filename"
-}
-
-main
diff --git a/doc/dev-guides/ra-dev-guide.txt b/doc/dev-guides/ra-dev-guide.asc
similarity index 100%
rename from doc/dev-guides/ra-dev-guide.txt
rename to doc/dev-guides/ra-dev-guide.asc
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 5e2889513..43a3f70c8 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -1,174 +1,176 @@
 #
 # doc: Linux-HA resource agents
 #
 # Copyright (C) 2009 Florian Haas
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 
 
 MAINTAINERCLEANFILES    = Makefile.in
 
 EXTRA_DIST 		= $(doc_DATA) $(REFENTRY_STYLESHEET) \
 			  mkappendix.sh ralist.sh
 
 CLEANFILES              = $(man_MANS) $(xmlfiles) metadata-*.xml
 
 STYLESHEET_PREFIX 	?= http://docbook.sourceforge.net/release/xsl/current
 MANPAGES_STYLESHEET 	?= $(STYLESHEET_PREFIX)/manpages/docbook.xsl
 HTML_STYLESHEET 	?= $(STYLESHEET_PREFIX)/xhtml/docbook.xsl
 FO_STYLESHEET 		?= $(STYLESHEET_PREFIX)/fo/docbook.xsl
 REFENTRY_STYLESHEET	?= ra2refentry.xsl
 
 XSLTPROC_OPTIONS 	?= --xinclude
 XSLTPROC_MANPAGES_OPTIONS ?= $(XSLTPROC_OPTIONS)
 XSLTPROC_HTML_OPTIONS 	?= $(XSLTPROC_OPTIONS)
 XSLTPROC_FO_OPTIONS 	?= $(XSLTPROC_OPTIONS)
 
 radir			= $(top_srcdir)/heartbeat
 
 # OCF_ROOT=. is necessary due to a sanity check in ocf-shellfuncs
 # (which tests whether $OCF_ROOT points to a directory
 metadata-%.xml: $(radir)/%
 	OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@
 
 metadata-IPv6addr.xml: ../../heartbeat/IPv6addr
 	OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@
 
 # Please note: we can't name the man pages
 # ocf:heartbeat:<name>. Believe me, I've tried. It looks like it
 # works, but then it doesn't. While make can deal correctly with
 # colons in target names (when properly escaped), it royally messes up
 # when it is deals with _dependencies_ that contain colons. See Bug
 # 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was
 # first reported in 1995 and added to Savannah in in 2005...
 if BUILD_DOC
 man_MANS	       = ocf_heartbeat_AoEtarget.7 \
                           ocf_heartbeat_AudibleAlarm.7 \
                           ocf_heartbeat_ClusterMon.7 \
                           ocf_heartbeat_CTDB.7 \
                           ocf_heartbeat_Delay.7 \
                           ocf_heartbeat_Dummy.7 \
                           ocf_heartbeat_EvmsSCC.7 \
                           ocf_heartbeat_Evmsd.7 \
                           ocf_heartbeat_Filesystem.7 \
                           ocf_heartbeat_ICP.7 \
                           ocf_heartbeat_IPaddr.7 \
                           ocf_heartbeat_IPaddr2.7 \
                           ocf_heartbeat_IPsrcaddr.7 \
                           ocf_heartbeat_LVM.7 \
                           ocf_heartbeat_LinuxSCSI.7 \
                           ocf_heartbeat_MailTo.7 \
                           ocf_heartbeat_ManageRAID.7 \
                           ocf_heartbeat_ManageVE.7 \
                           ocf_heartbeat_Pure-FTPd.7 \
                           ocf_heartbeat_Raid1.7 \
                           ocf_heartbeat_Route.7 \
                           ocf_heartbeat_SAPDatabase.7 \
                           ocf_heartbeat_SAPInstance.7 \
                           ocf_heartbeat_SendArp.7 \
                           ocf_heartbeat_ServeRAID.7 \
                           ocf_heartbeat_SphinxSearchDaemon.7 \
                           ocf_heartbeat_Squid.7 \
                           ocf_heartbeat_Stateful.7 \
                           ocf_heartbeat_SysInfo.7 \
                           ocf_heartbeat_VIPArip.7 \
                           ocf_heartbeat_VirtualDomain.7 \
                           ocf_heartbeat_WAS.7 \
                           ocf_heartbeat_WAS6.7 \
                           ocf_heartbeat_WinPopup.7 \
                           ocf_heartbeat_Xen.7 \
                           ocf_heartbeat_Xinetd.7 \
                           ocf_heartbeat_anything.7 \
                           ocf_heartbeat_apache.7 \
                           ocf_heartbeat_asterisk.7 \
                           ocf_heartbeat_clvm.7 \
                           ocf_heartbeat_conntrackd.7 \
                           ocf_heartbeat_db2.7 \
                           ocf_heartbeat_dhcpd.7 \
                           ocf_heartbeat_docker.7 \
                           ocf_heartbeat_dnsupdate.7 \
                           ocf_heartbeat_eDir88.7 \
                           ocf_heartbeat_ethmonitor.7 \
                           ocf_heartbeat_exportfs.7 \
                           ocf_heartbeat_fio.7 \
                           ocf_heartbeat_galera.7 \
+                          ocf_heartbeat_garbd.7 \
                           ocf_heartbeat_iSCSILogicalUnit.7 \
                           ocf_heartbeat_iSCSITarget.7 \
                           ocf_heartbeat_iface-bridge.7 \
                           ocf_heartbeat_iface-vlan.7 \
                           ocf_heartbeat_ids.7 \
                           ocf_heartbeat_iscsi.7 \
                           ocf_heartbeat_jboss.7 \
                           ocf_heartbeat_kamailio.7 \
                           ocf_heartbeat_lxc.7 \
                           ocf_heartbeat_mysql.7 \
                           ocf_heartbeat_mysql-proxy.7 \
+                          ocf_heartbeat_nagios.7 \
                           ocf_heartbeat_named.7 \
                           ocf_heartbeat_nfsnotify.7 \
                           ocf_heartbeat_nfsserver.7 \
                           ocf_heartbeat_nginx.7 \
                           ocf_heartbeat_oracle.7 \
                           ocf_heartbeat_oralsnr.7 \
                           ocf_heartbeat_pgsql.7 \
                           ocf_heartbeat_pingd.7 \
                           ocf_heartbeat_portblock.7 \
                           ocf_heartbeat_postfix.7 \
                           ocf_heartbeat_pound.7 \
                           ocf_heartbeat_proftpd.7 \
                           ocf_heartbeat_rabbitmq-cluster.7 \
                           ocf_heartbeat_redis.7 \
                           ocf_heartbeat_rsyncd.7 \
                           ocf_heartbeat_rsyslog.7 \
                           ocf_heartbeat_scsi2reservation.7 \
                           ocf_heartbeat_sfex.7 \
                           ocf_heartbeat_slapd.7 \
                           ocf_heartbeat_sg_persist.7 \
                           ocf_heartbeat_symlink.7 \
                           ocf_heartbeat_syslog-ng.7 \
                           ocf_heartbeat_tomcat.7 \
                           ocf_heartbeat_varnish.7 \
                           ocf_heartbeat_vmware.7 \
                           ocf_heartbeat_zabbixserver.7
 
 if USE_IPV6ADDR_AGENT
 man_MANS           	+= ocf_heartbeat_IPv6addr.7
 endif
 
 xmlfiles		= $(man_MANS:.7=.xml)
 
 %.1 %.5 %.7 %.8: %.xml
 	$(XSLTPROC) \
 	$(XSLTPROC_MANPAGES_OPTIONS) \
 	$(MANPAGES_STYLESHEET) $<
 
 ocf_heartbeat_%.xml: metadata-%.xml $(srcdir)/$(REFENTRY_STYLESHEET)
 	$(XSLTPROC) --novalid \
 	--stringparam package $(PACKAGE_NAME) \
 	--stringparam version $(VERSION) \
 	--output $@ \
 	$(srcdir)/$(REFENTRY_STYLESHEET) $<
 
 ocf_resource_agents.xml: $(xmlfiles) mkappendix.sh
 	./mkappendix.sh $(xmlfiles) > $@
 
 %.html: %.xml
 	$(XSLTPROC) \
 	$(XSLTPROC_HTML_OPTIONS) \
 	--output $@ \
 	$(HTML_STYLESHEET) $<
 
 xml: ocf_resource_agents.xml
 endif
diff --git a/doc/man/mkappendix.sh b/doc/man/mkappendix.sh
index 8f8a6220c..8f3ed3d27 100755
--- a/doc/man/mkappendix.sh
+++ b/doc/man/mkappendix.sh
@@ -1,18 +1,18 @@
 #!/bin/sh
 
 cat <<EOF
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN" "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
 <appendix id="ap-ra-man-pages">
   <title>Resource agent manual pages</title>
 EOF
 
-for manpage in `printf "%s\n" $@ | sort -f`; do
+for manpage in $(printf "%s\n" "$@" | sort -f); do
     cat <<EOF
   <xi:include href="./$manpage" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 EOF
 done
 
 cat <<EOF
 </appendix>
 EOF
diff --git a/doc/man/ralist.sh b/doc/man/ralist.sh
index ef8f528a6..31444b6e2 100755
--- a/doc/man/ralist.sh
+++ b/doc/man/ralist.sh
@@ -1,9 +1,9 @@
 #!/bin/sh
 
 RADIR=$1
 PREFIX=$2
 SUFFIX=$3
 
-for f in `find $RADIR -type f -executable`; do
-    echo ${PREFIX}`basename $f`${SUFFIX}
+find "$RADIR" -type f -executable | while read -r file; do
+    echo "${PREFIX}$(basename "$file")${SUFFIX}"
 done
diff --git a/heartbeat/Delay b/heartbeat/Delay
index 9cfa939d6..f9d303bf8 100755
--- a/heartbeat/Delay
+++ b/heartbeat/Delay
@@ -1,223 +1,223 @@
 #!/bin/sh
 #
 #
 # Support:      linux-ha@lists.linux-ha.org
 # License:      GNU General Public License (GPL)
 #
 #	This script is a test resource for introducing delay.
 #
 #	usage: $0  {start|stop|status|monitor|meta-data}
 #
 #	  OCF parameters are as below:
 #		OCF_RESKEY_startdelay
 #		OCF_RESKEY_stopdelay
 #		OCF_RESKEY_mondelay
 #
 #	
-#		OCF_RESKEY_startdelay defaults to 30 (seconds)
+#		OCF_RESKEY_startdelay defaults to 20 (seconds)
 #		OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay
 #		OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay
 #
 #
 #	This is really a test resource script.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 #######################################################################
 
 usage() {
   cat <<-!
 	usage: $0 {start|stop|status|monitor|meta-data|validate-all}
 	!
 }
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="Delay">
 <version>1.0</version>
 
 <longdesc lang="en">
 This script is a test resource for introducing delay.
 </longdesc>
 <shortdesc lang="en">Waits for a defined timespan</shortdesc>
 
 <parameters>
 
 <parameter name="startdelay" unique="0" required="0">
 <longdesc lang="en">
 How long in seconds to delay on start operation.
 </longdesc>
 <shortdesc lang="en">Start delay</shortdesc>
-<content type="integer" default="30" />
+<content type="integer" default="20" />
 </parameter>
 
 <parameter name="stopdelay" unique="0"  required="0">
 <longdesc lang="en">
 How long in seconds to delay on stop operation.
 Defaults to "startdelay" if unspecified.
 </longdesc>
 <shortdesc lang="en">Stop delay</shortdesc>
 <content type="integer" default="30" />
 </parameter>
 
 <parameter name="mondelay" unique="0"  required="0">
 <longdesc lang="en">
 How long in seconds to delay on monitor operation.
 Defaults to "startdelay" if unspecified.
 </longdesc>
 <shortdesc lang="en">Monitor delay</shortdesc>
 <content type="integer" default="30" />
 </parameter>
 </parameters>
 
 <actions>
 <action name="start" timeout="30" />
 <action name="stop" timeout="30" />
 <action name="status" depth="0" timeout="30" interval="10" />
 <action name="monitor" depth="0" timeout="30" interval="10" />
 <action name="meta-data" timeout="5" />
 <action name="validate-all" timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 Delay_stat() {
 	ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor
 }
 
 Delay_Status() {
   if
     Delay_stat
   then
     ocf_log info "Delay is running OK"
     return $OCF_SUCCESS
   else
     ocf_log info "Delay is stopped"
     return $OCF_NOT_RUNNING
   fi
 }
 
 Delay_Monitor() {
   Delay_Validate_All -q
   sleep $OCF_RESKEY_mondelay
   Delay_Status
 }
 
 Delay_Start() {
   if
     Delay_stat
   then
     ocf_log info "Delay already running."
     return $OCF_SUCCESS
   else
     Delay_Validate_All -q
 	ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start
     rc=$?
     sleep $OCF_RESKEY_startdelay
     if
       [ $rc -ne 0 ]
     then
       return $OCF_ERR_PERM
     fi
     return $OCF_SUCCESS
   fi
 }
 
 Delay_Stop() {
   if
     Delay_stat
   then
     Delay_Validate_All -q
 	ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop
     rc=$?
     sleep $OCF_RESKEY_stopdelay
     if
       [ $rc -ne 0 ]
     then
       return $OCF_ERR_PERM
     fi
     return $OCF_SUCCESS
   else
     ocf_log info "Delay already stopped."
     return $OCF_SUCCESS
   fi
 }
 
 # Check if all the arguments are valid numbers, a string is considered valid if:
 # 1. It does not contain any character but digits and period ".";
 # 2. The period "." does not occur more than once
 
 Are_Valid_Numbers() {
   for i in "$@"; do
 	echo $i |grep -v [^0-9.] |grep -q -v [.].*[.]
 	if test $? -ne 0; then
 	  return $OCF_ERR_ARGS
 	fi
   done
   return $OCF_SUCCESS
 }
 
 Delay_Validate_All() {
 # Be quiet when specified -q option _and_ validation succeded
   getopts "q" option
 
   if test $option = "q"; then
 	quiet=yes
   else
 	quiet=no
   fi
   shift $(($OPTIND -1))
 
   if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \
 	$OCF_RESKEY_mondelay; then
     if test $quiet = "no"; then
 	echo "Validate OK"
     fi
 # _Return_ on validation success
 	return $OCF_SUCCESS
   else
 	ocf_exit_reason "Some of the instance parameters are invalid"
 # _Exit_ on validation failure
 	exit $OCF_ERR_ARGS
   fi
 }
 
 if [ $# -ne 1 ]; then
   usage
   exit $OCF_ERR_ARGS
 fi
 
-: ${OCF_RESKEY_startdelay=30}
+: ${OCF_RESKEY_startdelay=20}
 : ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay}
 : ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay}
 
 case $1 in
   meta-data)		meta_data
 			exit $OCF_SUCCESS
 			;;
   start)		Delay_Start
 			;;
   stop)			Delay_Stop
 			;;
   monitor)		Delay_Monitor
 			;;
   status)		Delay_Status
 			;;
   validate-all)		Delay_Validate_All
 			;;
   usage)		usage
 			exit $OCF_SUCCESS
 			;;
   *)			usage
 			exit $OCF_ERR_ARGS
 			;;
 esac
 exit $?
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index e59414896..96673f90d 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -1,883 +1,883 @@
 #!/bin/sh
 #
 # Support:      linux-ha@lists.linux-ha.org
 # License:      GNU General Public License (GPL)
 # 
 # Filesystem
 #      Description: Manages a Filesystem on a shared storage medium.
 #  Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
 # Original Release: 25 Oct 2000
 #
 # usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data}
 #
 #	OCF parameters are as below:
 #		OCF_RESKEY_device
 #		OCF_RESKEY_directory
 #		OCF_RESKEY_fstype
 #		OCF_RESKEY_options
 #		OCF_RESKEY_statusfile_prefix
 #		OCF_RESKEY_run_fsck
 #		OCF_RESKEY_fast_stop
 #		OCF_RESKEY_force_clones
 #
 #OCF_RESKEY_device    : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0
 #                       Or a -U or -L option for mount, or an NFS mount specification
 #OCF_RESKEY_directory : the mount point for the filesystem
 #OCF_RESKEY_fstype    : optional name of the filesystem type. e.g. ext2
 #OCF_RESKEY_options   : options to be given to the mount command via -o
 #OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring
 #OCF_RESKEY_run_fsck  : fsck execution mode: auto(default)/force/no
 #OCF_RESKEY_fast_stop : fast stop: yes(default)/no
 #OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts
 #                         for each brick in a glusterfs setup
 #
 #
 # This assumes you want to manage a filesystem on a shared (SCSI) bus,
 # on a replicated device (such as DRBD), or a network filesystem (such
 # as NFS or Samba).
 #
 # Do not put this filesystem in /etc/fstab.  This script manages all of
 # that for you.
 #
 # NOTE: If 2 or more nodes mount the same file system read-write, and
 #       that file system is not designed for that specific purpose
 #       (such as GFS or OCFS2), and is not a network file system like
 #       NFS or Samba, then the filesystem is going to become
 #       corrupted.
 #
 #	As a result, you should use this together with the stonith
 #	option and redundant, independent communications paths.
 #
 #	If you don't do this, don't blame us when you scramble your
 #	disk.
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # Defaults
 DFLT_STATUSDIR=".Filesystem_status/"
 
 # Variables used by multiple methods
 HOSTOS=`uname`
 
 # The status file is going to an extra directory, by default
 #
 prefix=${OCF_RESKEY_statusfile_prefix}
 : ${prefix:=$DFLT_STATUSDIR}
 suffix="${OCF_RESOURCE_INSTANCE}"
 [ "$OCF_RESKEY_CRM_meta_clone" ] &&
 	suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone"
 suffix="${suffix}_`uname -n`"
 STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix
 
 #######################################################################
 
 usage() {
 	cat <<-EOT
 	usage: $0 {start|stop|status|monitor|validate-all|meta-data}
 	EOT
 }
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="Filesystem">
 <version>1.1</version>
 
 <longdesc lang="en">
 Resource script for Filesystem. It manages a Filesystem on a
 shared storage medium. 
 
 The standard monitor operation of depth 0 (also known as probe)
 checks if the filesystem is mounted. If you want deeper tests,
 set OCF_CHECK_LEVEL to one of the following values:
 
 10: read first 16 blocks of the device (raw read)
 
 This doesn't exercise the filesystem at all, but the device on
 which the filesystem lives. This is noop for non-block devices
 such as NFS, SMBFS, or bind mounts.
 
 20: test if a status file can be written and read
 
 The status file must be writable by root. This is not always the
 case with an NFS mount, as NFS exports usually have the
 "root_squash" option set. In such a setup, you must either use
 read-only monitoring (depth=10), export with "no_root_squash" on
 your NFS server, or grant world write permissions on the
 directory where the status file is to be placed.
 </longdesc>
 <shortdesc lang="en">Manages filesystem mounts</shortdesc>
 
 <parameters>
 <parameter name="device" required="1">
 <longdesc lang="en">
 The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification.
 </longdesc>
 <shortdesc lang="en">block device</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="directory" required="1">
 <longdesc lang="en">
 The mount point for the filesystem.
 </longdesc>
 <shortdesc lang="en">mount point</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="fstype" required="1">
 <longdesc lang="en">
 The type of filesystem to be mounted.
 </longdesc>
 <shortdesc lang="en">filesystem type</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="options">
 <longdesc lang="en">
 Any extra options to be given as -o options to mount.
 
 For bind mounts, add "bind" here and set fstype to "none".
 We will do the right thing for options such as "bind,ro".
 </longdesc>
 <shortdesc lang="en">options</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="statusfile_prefix">
 <longdesc lang="en">
 The prefix to be used for a status file for resource monitoring
 with depth 20. If you don't specify this parameter, all status
 files will be created in a separate directory.
 </longdesc>
 <shortdesc lang="en">status file prefix</shortdesc>
 <content type="string" default="$DFLT_STATUSDIR" />
 </parameter>
 
 <parameter name="run_fsck">
 <longdesc lang="en">
 Specify how to decide whether to run fsck or not.
 
 "auto"  : decide to run fsck depending on the fstype(default)
 "force" : always run fsck regardless of the fstype
 "no"    : do not run fsck ever.
 </longdesc>
 <shortdesc lang="en">run_fsck</shortdesc>
 <content type="string" default="auto" />
 </parameter>
 
 <parameter name="fast_stop">
 <longdesc lang="en">
 Normally, we expect no users of the filesystem and the stop
 operation to finish quickly. If you cannot control the filesystem
 users easily and want to prevent the stop action from failing,
 then set this parameter to "no" and add an appropriate timeout
 for the stop operation.
 </longdesc>
 <shortdesc lang="en">fast stop</shortdesc>
 <content type="boolean" default="yes" />
 </parameter>
 
 <parameter name="force_clones">
 <longdesc lang="en">
 The use of a clone setup for local filesystems is forbidden
 by default. For special setups like glusterfs, cloning a mount
 of a local device with a filesystem like ext4 or xfs independently
 on several nodes is a valid use case.
 
 Only set this to "true" if you know what you are doing!
 </longdesc>
 <shortdesc lang="en">allow running as a clone, regardless of filesystem type</shortdesc>
 <content type="boolean" default="false" />
 </parameter>
 
 <parameter name="force_unmount">
 <longdesc lang="en">
 This option allows specifying how to handle processes that are
 currently accessing the mount directory.
 
 "true"  : Default value, kill processes accessing mount point
 "safe"  : Kill processes accessing mount point using methods that
           avoid functions that could potentially block during process
           detection 
 "false" : Do not kill any processes.
 
 The 'safe' option uses shell logic to walk the /procs/ directory
 for pids using the mount point while the default option uses the
 fuser cli tool. fuser is known to perform operations that can potentially
 block if unresponsive nfs mounts are in use on the system.
 </longdesc>
 <shortdesc lang="en">Kill processes before unmount</shortdesc>
 <content type="boolean" default="true" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start" timeout="60" />
 <action name="stop" timeout="60" />
 <action name="notify" timeout="60" />
 <action name="monitor" depth="0" timeout="40" interval="20" />
 <action name="validate-all" timeout="5" />
 <action name="meta-data" timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 #
 #	Make sure the kernel does the right thing with the FS buffers
 #	This function should be called after unmounting and before mounting
 #	It may not be necessary in 2.4 and later kernels, but it shouldn't hurt
 #	anything either...
 #
 #	It's really a bug that you have to do this at all...
 #
 flushbufs() {
 	if have_binary $BLOCKDEV ; then
 		if [ "$blockdevice" = "yes" ] ; then
 			$BLOCKDEV --flushbufs $1
 			return $?
 		fi
 	fi
 	return 0
 }
 
 # Take advantage of /etc/mtab if present, use portable mount command
 # otherwise. Normalize format to "dev mountpoint fstype".
 is_bind_mount() {
 	echo "$options" | grep -w bind >/dev/null 2>&1
 }
 list_mounts() {
 	local inpf=""
 	if [ -e "/proc/mounts" ] && ! is_bind_mount; then
 		inpf=/proc/mounts
 	elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then
 		inpf=/etc/mtab
 	fi
 	if [ "$inpf" ]; then
 		cut -d' ' -f1,2,3 < $inpf
 	else
 		$MOUNT | cut -d' ' -f1,3,5
 	fi
 }
 
 determine_blockdevice() {
 	if [ $blockdevice = "yes" ]; then
 		return
 	fi
 
 	# Get the current real device name, if possible.
 	# (specified devname could be -L or -U...)
 	case "$FSTYPE" in
 	nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|none)
 		: ;;
 	*)
 		DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
 		if [ -b "$DEVICE" ]; then
 			blockdevice=yes
 		fi
 		;;
 	esac
 }
 
 # Lists all filesystems potentially mounted under a given path,
 # excluding the path itself.
 list_submounts() {
 	list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r
 }
 
 # kernels < 2.6.26 can't handle bind remounts
 bind_kernel_check() {
 	echo "$options" | grep -w ro >/dev/null 2>&1 ||
 		return
 	uname -r | awk -F. '
 	$1==2 && $2==6 {
 		sub("[^0-9].*","",$3);
 		if ($3<26)
 			exit(1);
 	}'
 	[ $? -ne 0 ] &&
 		ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
 }
 bind_mount() {
 	if is_bind_mount && [ "$options" != "-o bind" ]
 	then
 		bind_kernel_check
 		bind_opts=`echo $options | sed 's/bind/remount/'`
 		$MOUNT $bind_opts $MOUNTPOINT
 	else
 		true # make sure to return OK
 	fi
 }
 
 is_option() {
 	echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1
 }
 
 is_fsck_needed() {
 	case $OCF_RESKEY_run_fsck in
 		force) true;;
 		no)    false;;
 		""|auto)
 		case $FSTYPE in
 			ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs)
 			false;;
 			*)
 			true;;
 		esac;;
 		*)
 		ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'"
 		OCF_RESKEY_run_fsck="auto"
 		is_fsck_needed;;
 	esac
 }
 
 fstype_supported()
 {
 	local support="$FSTYPE"
 	local rc
 
 	if [ "X${HOSTOS}" != "XOpenBSD" ];then
 		# skip checking /proc/filesystems for obsd
 		return $OCF_SUCCESS
 	fi
 
 	if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then
 		: No FSTYPE specified, rely on the system has the right file-system support already 
 		return $OCF_SUCCESS
 	fi
 
 	# support fuse-filesystems (e.g. GlusterFS)
 	case $FSTYPE in
 		fuse.*|glusterfs|rozofs) support="fuse";;
 	esac
 
 	grep -w "$support"'$' /proc/filesystems >/dev/null
 	if [ $? -eq 0 ]; then
 		# found the fs type
 		return $OCF_SUCCESS
 	fi
 
 	# if here, we should attempt to load the module and then
 	# check the if the filesystem support exists again.
 	$MODPROBE $support >/dev/null
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernel module"
 		return $OCF_ERR_INSTALLED
 	fi
 
 	# It is possible for the module to load and not be complete initialized
 	# before we check /proc/filesystems again. Give this a few trys before
 	# giving up entirely.
 	for try in $(seq 5); do
 		grep -w "$support"'$' /proc/filesystems >/dev/null
 		if [ $? -eq 0 ] ; then
 			# yes. found the filesystem after doing the modprobe
 			return $OCF_SUCCESS
 		fi
 		ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again"
 		sleep 1
 	done
 
 	ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems"
 	return $OCF_ERR_INSTALLED
 }
 
 
 #
 # START: Start up the filesystem
 #
 Filesystem_start()
 {
 	# See if the device is already mounted.
 	if Filesystem_status >/dev/null 2>&1 ; then
 		ocf_log info "Filesystem $MOUNTPOINT is already mounted."
 		return $OCF_SUCCESS
 	fi
 
 	fstype_supported || exit $OCF_ERR_INSTALLED
 
 	# Check the filesystem & auto repair.  
 	# NOTE: Some filesystem types don't need this step...  Please modify
 	#       accordingly
 
 	if [ $blockdevice = "yes" ]; then
 		if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then
 			ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
 			exit $OCF_ERR_INSTALLED
 		fi
 
 		if is_fsck_needed; then
 			ocf_log info  "Starting filesystem check on $DEVICE"
 			if [ -z "$FSTYPE" ]; then
 				$FSCK -p $DEVICE
 			else
 				$FSCK -t $FSTYPE -p $DEVICE
 			fi
 
 			# NOTE: if any errors at all are detected, it returns non-zero
 			# if the error is >= 4 then there is a big problem
 			if [ $? -ge 4 ]; then
 				ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE"
 				return $OCF_ERR_GENERIC
 			fi
 		fi
 	fi
 
 	[ -d "$MOUNTPOINT" ] ||
 		ocf_run mkdir -p $MOUNTPOINT
 	if [ ! -d "$MOUNTPOINT" ] ; then
 		ocf_exit_reason "Couldn't find directory  [$MOUNTPOINT] to use as a mount point"
 		exit $OCF_ERR_INSTALLED
 	fi
 
 	flushbufs $DEVICE
 	# Mount the filesystem.
 	case "$FSTYPE" in
 		none) $MOUNT $options $DEVICE $MOUNTPOINT &&
 			bind_mount
 			;;
 		"") $MOUNT $options $DEVICE $MOUNTPOINT ;;
 		*) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;;
 	esac
 
 	if [ $? -ne 0 ]; then
-		ocf_exit_reason "Couldn't mount filesystem $DEVICE on $MOUNTPOINT"
+		ocf_exit_reason "Couldn't mount device [$DEVICE] as $MOUNTPOINT"
 		return $OCF_ERR_GENERIC
 	fi
 	return $OCF_SUCCESS
 }
 # end of Filesystem_start
 
 get_pids()
 {
 	local dir=$1
 	local procs
 	local mmap_procs
 
 	if ocf_is_true  "$FORCE_UNMOUNT"; then
 		if [ "X${HOSTOS}" = "XOpenBSD" ];then
 			fstat | grep $dir | awk '{print $3}'
 		else
 			$FUSER -m $dir 2>/dev/null
 		fi
 	elif [ "$FORCE_UNMOUNT" = "safe" ]; then
 		procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}')
 		mmap_procs=$(grep " ${dir}" /proc/[0-9]*/maps | awk -F/ '{print $3}')
 		printf "${procs}\n${mmap_procs}" | sort | uniq
 	fi
 }
 
 signal_processes() {
 	local dir=$1
 	local sig=$2
 	local pids pid
 	# fuser returns a non-zero return code if none of the
 	# specified files is accessed or in case of a fatal 
 	# error.
 	pids=$(get_pids "$dir")
 	if [ -z "$pids" ]; then
 		ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'"
 		return
 	fi
 	for pid in $pids; do
 		ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`"
 		kill -s $sig $pid
 	done
 }
 try_umount() {
 	local SUB=$1
 	$UMOUNT $umount_force $SUB
 	list_mounts | grep -q " $SUB " >/dev/null 2>&1 || {
 		ocf_log info "unmounted $SUB successfully"
 		return $OCF_SUCCESS
 	}
 	return $OCF_ERR_GENERIC
 }
 fs_stop() {
 	local SUB=$1 timeout=$2 sig cnt
 	for sig in TERM KILL; do
 		cnt=$((timeout/2)) # try half time with TERM
 		while [ $cnt -gt 0 ]; do
 			try_umount $SUB &&
 				return $OCF_SUCCESS
 			ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig"
 			signal_processes $SUB $sig
 			cnt=$((cnt-1))
 			sleep 1
 		done
 	done
 	return $OCF_ERR_GENERIC
 }
 
 #
 # STOP: Unmount the filesystem
 #
 Filesystem_stop()
 {
 	# See if the device is currently mounted
 	Filesystem_status >/dev/null 2>&1
 	if [ $? -eq $OCF_NOT_RUNNING ]; then
 		# Already unmounted, wonderful.
 		rc=$OCF_SUCCESS
 	else
 		# Wipe the status file, but continue with a warning if
 		# removal fails -- the file system might be read only
 		if [ $OCF_CHECK_LEVEL -eq 20 ]; then
 			rm -f ${STATUSFILE}
 			if [ $? -ne 0 ]; then
 				ocf_log warn "Failed to remove status file ${STATUSFILE}."
 			fi
 		fi
 
 		# Determine the real blockdevice this is mounted on (if
 		# possible) prior to unmounting.
 		determine_blockdevice
 
 		# For networked filesystems, there's merit in trying -f:
 		case "$FSTYPE" in
 		nfs4|nfs|cifs|smbfs) umount_force="-f" ;;
 		esac
 
 		# Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
 		local timeout
 		for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do
 			ocf_log info "Trying to unmount $SUB"
 			if ocf_is_true "$FAST_STOP"; then
 				timeout=6
 			else
 				timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"}
 				timeout=$((timeout/1000))
 			fi
 			fs_stop $SUB $timeout
 			rc=$?
 			if [ $rc -ne $OCF_SUCCESS ]; then
 				ocf_exit_reason "Couldn't unmount $SUB, giving up!"
 			fi
 		done
 	fi
 
 	flushbufs $DEVICE
 
 	return $rc
 }
 # end of Filesystem_stop
 
 #
 # STATUS: is the filesystem mounted or not?
 #
 Filesystem_status()
 {
 	if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then
 		rc=$OCF_SUCCESS
 		msg="$MOUNTPOINT is mounted (running)"
 	else
 		rc=$OCF_NOT_RUNNING
 		msg="$MOUNTPOINT is unmounted (stopped)"
 	fi
 
 	# Special case "monitor" to check whether the UUID cached and
 	# on-disk still match?
 	case "$OP" in
 		status)	ocf_log info "$msg";;
 	esac
 
 	return $rc
 }
 # end of Filesystem_status
 
 
 # Note: the read/write tests below will stall in case the
 # underlying block device (or in the case of a NAS mount, the
 # NAS server) has gone away. In that case, if I/O does not
 # return to normal in time, the operation hits its timeout
 # and it is up to the CRM to initiate appropriate recovery
 # actions (such as fencing the node).
 #
 # MONITOR 10: read the device
 #
 Filesystem_monitor_10()
 {
 	if [ "$blockdevice" = "no" ] ; then
 		ocf_log warn "$DEVICE is not a block device, monitor 10 is noop"
 		return $OCF_SUCCESS
 	fi
 	dd_opts="iflag=direct bs=4k count=1"
 	err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null`
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Failed to read device $DEVICE"
 		ocf_log err "dd said: $err_output"
 		return $OCF_ERR_GENERIC
 	fi
 	return $OCF_SUCCESS
 }
 #
 # MONITOR 20: write and read a status file
 #
 Filesystem_monitor_20()
 {
 	if [ "$blockdevice" = "no" ] ; then
 		# O_DIRECT not supported on cifs/smbfs
 		dd_opts="oflag=sync bs=4k conv=fsync,sync"
 	else
 		# Writing to the device in O_DIRECT mode is imperative
 		# to bypass caches.
 		dd_opts="oflag=direct,sync bs=4k conv=fsync,sync"
 	fi
 	status_dir=`dirname $STATUSFILE`
 	[ -d "$status_dir" ] || mkdir -p "$status_dir"
 	err_output=`echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1`
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Failed to write status file ${STATUSFILE}"
 		ocf_log err "dd said: $err_output"
 		return $OCF_ERR_GENERIC
 	fi
 	test -f ${STATUSFILE}
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Cannot stat the status file ${STATUSFILE}"
 		return $OCF_ERR_GENERIC
 	fi
 	cat ${STATUSFILE} > /dev/null
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Cannot read the status file ${STATUSFILE}"
 		return $OCF_ERR_GENERIC
 	fi
 	return $OCF_SUCCESS
 }
 Filesystem_monitor()
 {
 	Filesystem_status
 	rc=$?
 	if [ $rc -ne $OCF_SUCCESS ]; then
 		return $rc
 	fi
 
 	if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then
 		case "$OCF_CHECK_LEVEL" in
 		10) Filesystem_monitor_10; rc=$?;;
 		20) Filesystem_monitor_20; rc=$?;;
 		*)
 			ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"
 			rc=$OCF_ERR_CONFIGURED
 		;;
 		esac
 	fi
 	return $rc
 }
 # end of Filesystem_monitor
 
 
 #
 #	VALIDATE_ALL: Are the instance parameters valid?
 #	FIXME!!  The only part that's useful is the return code.
 #	This code always returns $OCF_SUCCESS (!)
 #
 Filesystem_validate_all()
 {
 	if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then
 		ocf_log warn "Mountpoint $MOUNTPOINT does not exist"
 	fi
 
 	# Check if the $FSTYPE is workable
 	# NOTE: Without inserting the $FSTYPE module, this step may be imprecise
 	# TODO: This is Linux specific crap.
 	if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then
 		cut -f2 /proc/filesystems |grep -q ^$FSTYPE$
 		if [ $? -ne 0 ]; then
 			modpath=/lib/modules/`uname -r` 
 			moddep=$modpath/modules.dep
 			# Do we have $FSTYPE in modules.dep?
 			cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$"
 			if [ $? -ne 0 ]; then
 				ocf_log info "It seems we do not have $FSTYPE support"
 			fi
 		fi
 	fi
 
 	# If we are supposed to do monitoring with status files, then
 	# we need a utility to write in O_DIRECT mode.
 	if [ $OCF_CHECK_LEVEL -gt 0 ]; then
 		check_binary dd
 		# Note: really old coreutils version do not support
 		# the "oflag" option for dd. We don't check for that
 		# here. In case dd does not support oflag, monitor is
 		# bound to fail, with dd spewing an error message to
 		# the logs. On such systems, we must do without status
 		# file monitoring.
 	fi
 
 	#TODO: How to check the $options ?
 	return $OCF_SUCCESS
 }
 
 #
 # set the blockdevice variable to "no" or "yes"
 #
 set_blockdevice_var() {
 	blockdevice=no
 
 	# these are definitely not block devices
 	case $FSTYPE in
 	nfs4|nfs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs) return;;
 	esac
 
 	if `is_option "loop"`; then
 		return
 	fi
 
 	case $DEVICE in
 	-*) # Oh... An option to mount instead...  Typically -U or -L
 		;;
 	/dev/null) # Special case for BSC
 		blockdevice=yes
 		;;
 	*)
 		if [ ! -b "$DEVICE"  -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then
 			ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
 		fi
 		if [ ! -d "$DEVICE" ]; then
 			blockdevice=yes
 		fi
 		;;
 	esac
 }
 
 # Check the arguments passed to this script
 if [ $# -ne 1 ]; then
 	usage
 	exit $OCF_ERR_ARGS
 fi
 
 # Check the OCF_RESKEY_ environment variables...
 FORCE_UNMOUNT="yes"
 if [ -n "${OCF_RESKEY_force_unmount}" ]; then
 	FORCE_UNMOUNT=$OCF_RESKEY_force_unmount
 fi
 
 DEVICE=$OCF_RESKEY_device
 FSTYPE=$OCF_RESKEY_fstype
 if [ ! -z "$OCF_RESKEY_options" ]; then
 	options="-o $OCF_RESKEY_options"
 fi
 FAST_STOP=${OCF_RESKEY_fast_stop:="yes"}
 
 OP=$1
 
 # These operations do not require instance parameters
 case $OP in
 	meta-data) meta_data
 		exit $OCF_SUCCESS
 		;;
 	usage) usage
 		exit $OCF_SUCCESS
 		;;
 esac
 
 if [ x = x"$DEVICE" ]; then
 	ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed"
 	exit $OCF_ERR_CONFIGURED
 fi
 
 set_blockdevice_var
 
 # Normalize instance parameters:
 
 # It is possible that OCF_RESKEY_directory has one or even multiple trailing "/".
 # But the output of `mount` and /proc/mounts do not.
 if [ -z "$OCF_RESKEY_directory" ]; then
 	if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then
 		ocf_exit_reason "Please specify the directory"
 		exit $OCF_ERR_CONFIGURED 
 	fi
 else
 	MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
 	: ${MOUNTPOINT:=/}
 	# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
 	# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
 	# kill the whole system. Is that a good idea?
 fi
 
 # Check to make sure the utilites are found
 if [ "X${HOSTOS}" != "XOpenBSD" ];then
 check_binary $MODPROBE
 check_binary $FUSER
 fi
 check_binary $FSCK
 check_binary $MOUNT
 check_binary $UMOUNT
 
 if [ "$OP" != "monitor" ]; then
 	ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT"
 fi
 
 case $OP in
 	status) Filesystem_status
 		exit $?
 		;;
 	monitor) Filesystem_monitor
 		exit $?
 		;;
 	validate-all) Filesystem_validate_all
 		exit $?
 		;;
 	stop) Filesystem_stop
 		exit $?
 		;;
 esac
 
 CLUSTERSAFE=0
 is_option "ro" &&
 	CLUSTERSAFE=2
 
 case $FSTYPE in
 nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs)
 	CLUSTERSAFE=1 # this is kind of safe too
 	;;
 # add here CLUSTERSAFE=0 for all filesystems which are not
 # cluster aware and which, even if when mounted read-only,
 # could still modify parts of it such as journal/metadata
 ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs)
 	if ocf_is_true "$OCF_RESKEY_force_clones"; then
 		CLUSTERSAFE=2
 	else
 		CLUSTERSAFE=0 # these are not allowed
 	fi
 	;;
 esac
 
 if ocf_is_clone; then
 	case $CLUSTERSAFE in
 	0)
 		ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
 		ocf_log err "DO NOT RUN IT AS A CLONE!"
 		ocf_log err "Politely refusing to proceed to avoid data corruption."
 		exit $OCF_ERR_CONFIGURED
 		;;
 	2)
 		ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!"
 		if ocf_is_true "$OCF_RESKEY_force_clones"; then
 			ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so."
 		else
 			ocf_log warn "But we'll let it run because it is mounted read-only."
 			ocf_log warn "Please make sure that it's meta data is read-only too!"
 		fi
 		;;
 	esac
 fi
 
 case $OP in
 	start) Filesystem_start
 		;;
 	*) usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 	esac
 exit $?
 
 
diff --git a/heartbeat/LVM b/heartbeat/LVM
index 5d202c6f2..49ebce85c 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -1,713 +1,713 @@
 #!/bin/sh
 #
 # 
 # LVM
 #
 # Description:	Manages an LVM volume as an HA resource
 #
 #
 # Author:	Alan Robertson
 # Support:	linux-ha@lists.linux-ha.org
 # License:	GNU General Public License (GPL)
 # Copyright:	(C) 2002 - 2005 International Business Machines, Inc.
 #
 #	This code significantly inspired by the LVM resource
 #	in FailSafe by Lars Marowsky-Bree
 #
 #
 # An example usage in /etc/ha.d/haresources: 
 #			 node1	10.0.0.170 ServeRAID::1::1 LVM::myvolname
 #
 # See usage() function below for more details...
 #
 #		OCF parameters are as below:
 #		OCF_RESKEY_volgrpname
 #		
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 #######################################################################
 
 
 usage() {
 	methods=`LVM_methods`
 	methods=`echo $methods | tr ' ' '|'`
 	cat <<EOF
 	usage: $0 $methods
 
 	$0 manages an	Linux Volume Manager volume (LVM) as an HA resource
 
 	The 'start' operation brings the given volume online
 	The 'stop' operation takes the given volume offline
 	The 'status' operation reports whether the volume is available
 	The 'monitor' operation reports whether the volume seems present
 	The 'validate-all' operation checks whether the OCF parameters are valid
 	The 'meta-data' operation show meta data 
 	The 'methods' operation reports on the methods $0 supports
 
 EOF
 }
 
 # default for "tag"
 OUR_TAG="pacemaker"
 
 meta_data() {
 	cat <<EOF
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="LVM">
 <version>1.0</version>
 
 <longdesc lang="en">
 Resource script for LVM. It manages an Linux Volume Manager volume (LVM) 
 as an HA resource. 
 </longdesc>
 <shortdesc lang="en">Controls the availability of an LVM Volume Group</shortdesc>
 
 <parameters>
 <parameter name="volgrpname" unique="1" required="1">
 <longdesc lang="en">
 The name of volume group.
 </longdesc>
 <shortdesc lang="en">Volume group name</shortdesc>
 <content type="string" default="" />
 </parameter>
 <parameter name="exclusive" unique="0" required="0">
 <longdesc lang="en">
 If set, the volume group will be activated exclusively.  This option works one of
 two ways.  If the volume group has the cluster attribute set, then the volume group
 will be activated exclusively using clvmd across the cluster.  If the cluster attribute
 is not set, the volume group will be activated exclusively using a tag and the volume_list 
 filter. When the tag option is in use, the volume_list in lvm.con must be initialized. This 
 can be as simple as setting 'volume_list = []' depending on your setup.
 </longdesc>
 <shortdesc lang="en">Exclusive activation</shortdesc>
 <content type="boolean" default="false" />
 </parameter>
 
 <parameter name="tag" unique="0" required="0">
 <longdesc lang="en">
 If "exclusive" is set on a non clustered volume group, this overrides the tag to be used.
 </longdesc>
 <shortdesc lang="en">Exclusive activation tag</shortdesc>
 <content type="string" default="$OUR_TAG" />
 </parameter>
 
 <parameter name="partial_activation" unique="0" required="0">
 <longdesc lang="en">
 If set, the volume group will be activated partially even with some
 physical volumes missing. It helps to set to true when using mirrored
 logical volumes.
 </longdesc>
 <shortdesc lang="en">Activate VG partially when missing PVs</shortdesc>
 <content type="string" default="false" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start" timeout="30" />
 <action name="stop" timeout="30" />
 <action name="status" timeout="30" />
 <action name="monitor" depth="0" timeout="30" interval="10" />
 <action name="methods" timeout="5" />
 <action name="meta-data" timeout="5" />
 <action name="validate-all" timeout="5" />
 </actions>
 </resource-agent>
 EOF
 }
 
 #
 # methods: What methods/operations do we support?
 #
 LVM_methods() {
 	cat <<EOF
 	start
 	stop
 	status
 	monitor
 	methods
 	validate-all
 	meta-data
 	usage
 EOF
 }
 
 ##
 # returns mode
 #
 # 0 = normal (non-exclusive) local activation
 # 1 = tagged-exclusive activation
 # 2 = clvm-exclusive activation
 ##
 VG_MODE=
 get_vg_mode()
 {
 	if [ -n "$VG_MODE" ]; then
 		echo "$VG_MODE"
 		return
 	fi
 
 	VG_MODE=0
 	if ocf_is_true "$OCF_RESKEY_exclusive"; then
 		case $(vgs -o attr --noheadings $OCF_RESKEY_volgrpname | tr -d ' ') in
 		?????c*)
 			VG_MODE=2 ;;
 		*)
 			VG_MODE=1 ;;
 		esac
 	fi
 
 	echo "$VG_MODE"
 }
 
 ##
 # Verify tags setup
 ##
 verify_tags_environment()
 {
 	##
 	# The volume_list must be initialized to something in order to
 	# guarantee our tag will be filtered on startup
 	##
 	if ! lvm dumpconfig activation/volume_list; then
 		ocf_log err  "LVM: Improper setup detected"
 		ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
 		return $OCF_ERR_GENERIC
 	fi
 
 	##
 	# Our tag must _NOT_ be in the volume_list.  This agent
 	# overrides the volume_list during activation using the
 	# special tag reserved for cluster activation
 	##
 	if lvm dumpconfig activation/volume_list | grep -e "\"@$OUR_TAG\"" -e "\"${OCF_RESKEY_volgrpname}\""; then
 		ocf_log err "LVM:  Improper setup detected"
 		ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"$OUR_TAG\", or volume group, $OCF_RESKEY_volgrpname"
 		return $OCF_ERR_GENERIC
 	fi
 
 	return $OCF_SUCCESS
 }
 
 check_initrd_warning()
 {
 	# First check to see if there is an initrd img we can safely
 	# compare timestamps agaist.  If not, don't even bother with
 	# this check.  This is known to work in rhel/fedora distros
 	ls "/boot/*$(uname -r)*.img" > /dev/null 2>&1
 	if [ $? -ne 0 ]; then
 		return
 	fi
 
 	##
 	# Now check to see if the initrd has been updated.
 	# If not, the machine could boot and activate the VG outside
 	# the control of pacemaker
 	##
 	if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" = "" ]; then
 		ocf_log warn "LVM:  Improper setup detected"
 		ocf_log warn "* initrd image needs to be newer than lvm.conf"
 
 		# While dangerous if not done the first time, there are many
 		# cases where we don't simply want to fail here.  Instead,
 		# keep warning until the user remakes the initrd - or has
 		# it done for them by upgrading the kernel.
 		#
 		# initrd can be updated using this command.
 		# dracut -H -f /boot/initramfs-$(uname -r).img $(uname -r)
 		#
 	fi
 }
 
 ##
 # does this vg have our tag
 ##
 check_tags()
 {
 	local owner=`vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '`
 
 	if [ -z "$owner" ]; then
 		# No-one owns this VG yet
 		return 1
 	fi
 
 	if [ "$OUR_TAG" = "$owner" ]; then
 		# yep, this is ours
 		return 0
 	fi
 
 	# some other tag is set on this vg
 	return 2
 }
 
 strip_tags()
 {
 	local i
 
 	for i in `vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g`; do
 		ocf_log info "Stripping tag, $i"
 
 		# LVM version 2.02.98 allows changing tags if PARTIAL
 		vgchange --deltag $i $OCF_RESKEY_volgrpname
 	done
 
 	if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '` ]; then
 		ocf_exit_reason "Failed to remove ownership tags from $OCF_RESKEY_volgrpname"
 		return $OCF_ERR_GENERIC
 	fi
 
 	return $OCF_SUCCESS
 }
 
 set_tags()
 {
 	check_tags
 	case $? in
 	0)
 		# we already own it.
 		return $OCF_SUCCESS
 		;;
 	2)
 		# other tags are set, strip them before setting
 		if ! strip_tags; then
 			return $OCF_ERR_GENERIC
 		fi
 		;;
 	*)
 		: ;;
 	esac
 
 	vgchange --addtag $OUR_TAG $OCF_RESKEY_volgrpname
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Failed to add ownership tag to $OCF_RESKEY_volgrpname"
 		return $OCF_ERR_GENERIC
 	fi
 
 	ocf_log info "New tag \"$OUR_TAG\" added to $OCF_RESKEY_volgrpname"
 	return $OCF_SUCCESS
 }
 
 #
 #	Return LVM status (silently)
 #
 LVM_status() {
 	local rc=1
 	loglevel="debug"
 
 	# Set the log level of the error message
 	if [ "X${2}" = "X" ]; then
 		loglevel="err"
 		if ocf_is_probe; then
 			loglevel="warn"
 		else 
 			if [ ${OP_METHOD} = "stop" ]; then
 				loglevel="info"
 			fi
 		fi
 	fi
 	
 	if [ -d /dev/$1 ]; then
 		test "`cd /dev/$1 && ls`" != ""
 		rc=$?
 		if [ $rc -ne 0 ]; then
 			ocf_exit_reason "VG $1 with no logical volumes is not supported by this RA!"
 		fi
 	fi
 
 	if [ $rc -ne 0 ]; then
 		ocf_log $loglevel "LVM Volume $1 is not available (stopped)"
 		rc=$OCF_NOT_RUNNING
 	else
 		case $(get_vg_mode) in
 		1) # exclusive with tagging.
 			# If vg is running, make sure the correct tag is present. Otherwise we
 			# can not guarantee exclusive activation.
 			if ! check_tags; then
 				ocf_exit_reason "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\""
 				rc=$OCF_ERR_GENERIC
 			fi
 
 			# make sure the environment for tags activation is still valid
 			if ! verify_tags_environment; then
 				rc=$OCF_ERR_GENERIC
 			fi
 			# let the user know if their initrd is older than lvm.conf.
 			check_initrd_warning
 			;;
 		*)
 			: ;;
 		esac
 	fi
 
 	if [ "X${2}" = "X" ]; then
 		# status call return
 		return $rc
 	fi
 
 	# Report on LVM volume status to stdout...
 	if [ $rc -eq 0 ]; then
 		echo "Volume $1 is available (running)"
 	else
 		echo "Volume $1 is not available (stopped)"
 	fi
 	return $rc
 }
 
 get_activate_options()
 {
 	local options="-a"
 
 	case $(get_vg_mode) in
 	0) options="${options}ly";;
 	1) options="${options}y --config activation{volume_list=[\"@${OUR_TAG}\"]}";;
 	2) options="${options}ey";;
 	esac
 
 	if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
 		options="${options} --partial"
 	fi
 
 	# for clones (clustered volume groups), we'll also have to force
 	# monitoring, even if disabled in lvm.conf.
 	if ocf_is_clone; then
 		options="$options --monitor y"
 	fi
 	
 	echo $options
 }
 
 ##
 # Attempt to deactivate vg cluster wide and then start the vg exclusively
 ##
 retry_exclusive_start()
 {
-	local vgchange_options=$(get_activate_options)
+	local vgchange_options="$(get_activate_options)"
 
 	# Deactivate each LV in the group one by one cluster wide
 	set -- $(lvs -o name,attr --noheadings $OCF_RESKEY_volgrpname 2> /dev/null)
 	while [ $# -ge 2 ]; do
 		case $2 in
 		????ao*)
 			# open LVs cannot be deactivated.
 			return $OCF_ERR_GENERIC;;
 		*)
 			if ! lvchange -an $OCF_RESKEY_volgrpname/$1; then
 				ocf_exit_reason "Unable to perform required deactivation of $OCF_RESKEY_volgrpname/$1 before starting"
 				return $OCF_ERR_GENERIC
 			fi
 			;;
 		esac
 		shift 2
 	done
 
 	ocf_run vgchange $vgchange_options $OCF_RESKEY_volgrpname
 }
 
 #
 #	Enable LVM volume
 #
 LVM_start() {
-	local vgchange_options=$(get_activate_options)
+	local vgchange_options="$(get_activate_options)"
 	local vg=$1
 	local clvmd=0
 
 	# TODO: This MUST run vgimport as well
 	ocf_log info "Activating volume group $vg"
 	if [ "$LVM_MAJOR" -eq "1" ]; then
 		ocf_run vgscan $vg
 	else
 		ocf_run vgscan
 	fi
 
 	case $(get_vg_mode) in
 	2)
 		clvmd=1
 		;;
 	1)
 		if ! set_tags; then
 			return $OCF_ERR_GENERIC
 		fi
 		;;
 	*)
 		: ;;
 	esac
 
 	if ! ocf_run vgchange $vgchange_options $vg; then
 		if [ $clvmd -eq 0 ]; then
 			return $OCF_ERR_GENERIC
 		fi
 
 		# Failure to exclusively activate cluster vg.:
 		# This could be caused by a remotely active LV, Attempt
 		# to disable volume group cluster wide and try again.
 		# Allow for some settling
 		sleep 5
 		if ! retry_exclusive_start; then
 			return $OCF_ERR_GENERIC
 		fi
 	fi
 
 	if LVM_status $vg; then
 		: OK Volume $vg activated just fine!
 		return $OCF_SUCCESS 
 	else
 		ocf_exit_reason "LVM: $vg did not activate correctly"
 		return $OCF_NOT_RUNNING
 	fi
 }
 
 #
 #	Disable the LVM volume
 #
 LVM_stop() {
 	local res=$OCF_ERR_GENERIC
 	local vgchange_options="-aln"
 	local vg=$1
 
 	if ! vgs $vg > /dev/null 2>&1; then
 		ocf_log info "Volume group $vg not found"
 		return $OCF_SUCCESS
 	fi
 
 	ocf_log info "Deactivating volume group $vg"
 
 	case $(get_vg_mode) in
 		1) vgchange_options="-an" ;;
 	esac
 
 	for i in $(seq 10)
 	do
 		ocf_run vgchange $vgchange_options $vg
 		res=$?
 		if LVM_status $vg; then
 			ocf_exit_reason "LVM: $vg did not stop correctly"
 			res=1
 		fi
 
 		if [ $res -eq 0 ]; then
 			break
 		fi
 
 		res=$OCF_ERR_GENERIC
 		ocf_log warn "$vg still Active"
 		ocf_log info "Retry deactivating volume group $vg"
 		sleep 1
 		which udevadm > /dev/null 2>&1 && udevadm settle --timeout=5
 	done
 
 	case $(get_vg_mode) in
 	1)
 		if [ $res -eq 0 ]; then
 			strip_tags
 			res=$?
 		fi
 		;;
 	esac
 
 	return $res
 }
 
 #
 #	Check whether the OCF instance parameters are valid
 #
 LVM_validate_all() {
 	check_binary $AWK
 
 	##
 	# lvmetad is a daemon that caches lvm metadata to improve the
 	# performance of LVM commands. This daemon should never be used when
 	# volume groups exist that are being managed by the cluster. The lvmetad
 	# daemon introduces a response lag, where certain LVM commands look like
 	# they have completed (like vg activation) when in fact the command
 	# is still in progress by the lvmetad.  This can cause reliability issues
 	# when managing volume groups in the cluster.  For Example, if you have a
 	# volume group that is a dependency for another application, it is possible
 	# the cluster will think the volume group is activated and attempt to start
 	# the application before volume group is really accesible... lvmetad is bad.
 	##
 	lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1
 	if [ $? -eq 0 ]; then
 		# for now warn users that lvmetad is enabled and that they should disable it. In the
 		# future we may want to consider refusing to start, or killing the lvmetad daemon.
 		ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process"
 	fi
 	
 	##
 	# Off-the-shelf tests...
 	##
 	VGOUT=`vgck ${VOLUME} 2>&1`
 	if [ $? -ne 0 ]; then
 		# Inconsistency might be due to missing physical volumes, which doesn't 
 		# automatically mean we should fail.  If partial_activation=true then 
 		# we should let start try to handle it, or if no PVs are listed as
 		# "unknown device" then another node may have marked a device missing 
 		# where we have access to all of them and can start without issue. 
 		if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then
 			if vgs -o pv_name --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'unknown device' > /dev/null 2>&1; then
 				if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
 					# We are missing devices and cannot activate partially
 					ocf_exit_reason "Volume group [$VOLUME] has devices missing.  Consider partial_activation=true to attempt to activate partially"
 					exit $OCF_ERR_GENERIC
 				else
 					# We are missing devices but are allowed to activate partially. 
 					# Assume that caused the vgck failure and carry on
 					ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled.  Proceeding with requested action."
 				fi
 			fi
 			# else the vg is partial but all devices are accounted for, so another 
 			# node must have marked the device missing.  Proceed.
 		else
 			# vgck failure was for something other than missing devices
 			ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
 			exit $OCF_ERR_GENERIC
 		fi
 	fi
 
 	##
 	# Does the Volume Group exist?
 	##
 	if [ "$LVM_MAJOR" = "1" ]; then
 		VGOUT=`vgdisplay ${VOLUME} 2>&1`
 	else
 		VGOUT=`vgdisplay -v ${VOLUME} 2>&1`
 	fi
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
 		exit $OCF_ERR_GENERIC
 	fi
 
 	##
 	# If exclusive activation is not enabled, then
 	# further checking of proper setup is not necessary
 	##
 	if ! ocf_is_true "$OCF_RESKEY_exclusive"; then
 		return $OCF_SUCCESS;
 	fi
 
 	##
 	# Having cloned lvm resources with exclusive vg activation makes no sense at all.
 	##
 	if ocf_is_clone; then
 		ocf_exit_reason "cloned lvm resources can not be activated exclusively"
 		exit $OCF_ERR_CONFIGURED
 	fi
 
 	##
 	# Make sure the cluster attribute is set and clvmd is up when exclusive
 	# activation is enabled. Otherwise we can't exclusively activate the volume group.
 	##
 	case $(get_vg_mode) in
 	1)  # exclusive activation using tags
 		if ! verify_tags_environment; then
 			exit $OCF_ERR_GENERIC
 		fi
 		;;
 	2)  # exclusive activation with clvmd
 		##
 		# verify is clvmd running
 		##
 		if ! ps -C clvmd > /dev/null 2>&1; then
 			ocf_exit_reason "$OCF_RESKEY_volgrpname has the cluster attribute set, but 'clvmd' is not running"
 			exit $OCF_ERR_GENERIC
 		fi
 		;;
 	*)
 		: ;;
 	esac
 
 	return $OCF_SUCCESS
 }
 
 #
 #	'main' starts here...
 #
 
 if
 	[ $# -ne 1 ]
 then
 	usage
 	exit $OCF_ERR_ARGS 
 fi
 
 case $1 in
 	meta-data)	meta_data
 		exit $OCF_SUCCESS;;
 
 	methods)	LVM_methods
 		exit $?;;
 
 	usage)	usage
 		exit $OCF_SUCCESS;;
 	*)		;;
 esac
 
 if 
 	[ -z "$OCF_RESKEY_volgrpname" ]
 then
 	ocf_exit_reason "You must identify the volume group name!"
 	exit $OCF_ERR_CONFIGURED 
 fi
 
 # Get the LVM version number, for this to work we assume(thanks to panjiam):
 # 
 # LVM1 outputs like this
 #
 #	# vgchange --version
 #	vgchange: Logical Volume Manager 1.0.3
 #	Heinz Mauelshagen, Sistina Software	19/02/2002 (IOP 10)
 #
 # LVM2 and higher versions output in this format
 #
 #	# vgchange --version
 #	LVM version:		 2.00.15 (2004-04-19)
 #	Library version: 1.00.09-ioctl (2004-03-31)
 #	Driver version:	4.1.0
 
 LVM_VERSION=`vgchange --version 2>&1 | \
 	$AWK '/Logical Volume Manager/ {print $5"\n"; exit; }
 			 /LVM version:/ {printf $3"\n"; exit;}'`
 rc=$?
 
 if
 	( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] )
 then
 	ocf_exit_reason "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?"
 	exit $OCF_ERR_INSTALLED
 fi
 LVM_MAJOR="${LVM_VERSION%%.*}"
 
 VOLUME=$OCF_RESKEY_volgrpname
 OP_METHOD=$1
 
 if [ -n "$OCF_RESKEY_tag" ]; then
 	OUR_TAG=$OCF_RESKEY_tag
 fi
 
 # What kind of method was invoked?
 case "$1" in
 
 	start)
 		LVM_validate_all
 		LVM_start $VOLUME
 		exit $?;;
 
 	stop)	LVM_stop $VOLUME
 		exit $?;;
 
 	status)	LVM_status $VOLUME $1
 		exit $?;;
 
 	monitor)	LVM_status $VOLUME
 		exit $?;;
 
 	validate-all)	LVM_validate_all
 		;;
 
 	*)		usage
 		exit $OCF_ERR_UNIMPLEMENTED;;
 esac
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index b70c10404..df0e3b8b6 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -1,160 +1,161 @@
 # Makefile.am for OCF RAs
 #
 # Author: Sun Jing Dong
 # Copyright (C) 2004 IBM
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 MAINTAINERCLEANFILES = Makefile.in
 
 EXTRA_DIST		= $(ocf_SCRIPTS) $(ocfcommon_DATA) \
 			  $(common_DATA) $(hb_DATA) $(dtd_DATA) \
 			  README
 
 AM_CPPFLAGS		= -I$(top_srcdir)/include -I$(top_srcdir)/linux-ha
 
 halibdir		= $(libexecdir)/heartbeat
 
 ocfdir		        = $(OCF_RA_DIR_PREFIX)/heartbeat
 
 dtddir			= $(datadir)/$(PACKAGE_NAME)
 dtd_DATA		= ra-api-1.dtd
 
 if USE_IPV6ADDR_AGENT
 ocf_PROGRAMS           = IPv6addr
 else
 ocf_PROGRAMS           =
 endif
 
 if IPV6ADDR_COMPATIBLE
 halib_PROGRAMS         = send_ua
 else
 halib_PROGRAMS         =
 endif
 
 IPv6addr_SOURCES        = IPv6addr.c IPv6addr_utils.c
 send_ua_SOURCES         = send_ua.c IPv6addr_utils.c
 
 IPv6addr_LDADD          = -lplumb $(LIBNETLIBS)
 send_ua_LDADD           = $(LIBNETLIBS)
 
 ocf_SCRIPTS	     =  ClusterMon		\
 			CTDB			\
 			Dummy			\
 		        IPaddr			\
 		        IPaddr2			\
 			anything		\
 			AoEtarget		\
 			apache			\
 			asterisk		\
 			nginx			\
 			AudibleAlarm		\
 			clvm		\
 			conntrackd		\
 			db2			\
 			dhcpd		\
 			Delay			\
 			dnsupdate		\
 			docker			\
 			eDir88			\
 			EvmsSCC			\
 			Evmsd			\
 			ethmonitor		\
 			exportfs		\
 			Filesystem		\
 			fio			\
 			galera			\
+			garbd			\
 			ids			\
 			iscsi			\
 			ICP			\
 			IPsrcaddr		\
 			iSCSITarget		\
 			iSCSILogicalUnit	\
 			iface-bridge		\
 			iface-vlan		\
 			jboss			\
 			kamailio		\
 			LinuxSCSI		\
 			LVM			\
 			lxc			\
 			MailTo			\
 			ManageRAID		\
 			ManageVE		\
 			mysql			\
 			mysql-proxy		\
 			nagios			\
 			named			\
 			nfsnotify		\
 			nfsserver		\
 			oracle			\
 			oralsnr			\
 			pingd			\
 			portblock		\
 			postfix			\
 			pound			\
 			pgsql			\
 			proftpd			\
 			Pure-FTPd		\
 			rabbitmq-cluster	\
 			Raid1			\
 			redis			\
 			Route			\
 			rsyncd			\
 			rsyslog			\
 			SAPDatabase		\
 			SAPInstance		\
 			SendArp			\
 			ServeRAID		\
 			slapd			\
 			SphinxSearchDaemon	\
 			Squid			\
 			Stateful		\
 			SysInfo			\
 			scsi2reservation	\
 			sfex			\
                         sg_persist              \
 			symlink			\
 			syslog-ng		\
 			tomcat			\
 			VIPArip			\
 			VirtualDomain		\
 			varnish			\
 			vmware			\
 			WAS			\
 			WAS6			\
 			WinPopup		\
 			Xen			\
 			Xinetd			\
 			zabbixserver
 
 ocfcommondir		= $(OCF_LIB_DIR_PREFIX)/heartbeat
 ocfcommon_DATA		= ocf-shellfuncs 	\
 			  ocf-binaries	 	\
 			  ocf-directories 	\
 			  ocf-returncodes 	\
  			  ocf-rarun		\
  			  ocf-distro		\
 			  apache-conf.sh 	\
 			  http-mon.sh    	\
 			  sapdb-nosha.sh	\
 			  sapdb.sh		\
 			  ora-common.sh		\
 			  mysql-common.sh		\
 			  nfsserver-redhat.sh		\
 			  findif.sh
 
 # Legacy locations
 hbdir			= $(sysconfdir)/ha.d
 hb_DATA			= shellfuncs
diff --git a/heartbeat/README.galera b/heartbeat/README.galera
index 56390e60b..dd45618e3 100644
--- a/heartbeat/README.galera
+++ b/heartbeat/README.galera
@@ -1,132 +1,148 @@
 Notes regarding the Galera resource agent
 ---
 
 In the resource agent, the action of bootstrapping a Galera cluster is
 implemented into a series of small steps, by using:
 
   * Two CIB attributes `last-committed` and `bootstrap` to elect a
     bootstrap node that will restart the cluster.
 
   * One CIB attribute `sync-needed` that will identify that joining
     nodes are in the process of synchronizing their local database
     via SST.
 
   * A Master/Slave pacemaker resource which helps splitting the boot
     into steps, up to a point where a galera node is available.
 
   * the recurring monitor action to coordinate switch from one
     state to another.
 
 How boot works
 ====
 
 There are two things to know to understand how the resource agent
 restart a Galera cluster.
 
 ### Bootstrap the cluster with the right node
 
-When synced, the nodes of a galera clusters have in common a last seqno,
+When synced, the nodes of a galera cluster have in common a last seqno,
 which identifies the last transaction considered successful by a
 majority of nodes in the cluster (think quorum).
 
 To restart a cluster, the resource agent must ensure that it will
 bootstrap the cluster from an node which is up-to-date, i.e which has
 the highest seqno of all nodes.
 
 As a result, if the resource agent cannot retrieve the seqno on all
 nodes, it won't be able to safely identify a bootstrap node, and
 will simply refuse to start the galera cluster.
 
 ### synchronizing nodes can be a long operation
 
 Starting a bootstrap node is relatively fast, so it's performed
 during the "promote" operation, which is a one-off, time-bounded
 operation.
 
 Subsequent nodes will need to synchronize via SST, which consists
 in "pushing" an entire Galera DB from one node to another.
 
 There is no perfect time-out, as time spent during synchronization
 depends on the size of the DB. Thus, joiner nodes are started during
 the "monitor" operation, which is a recurring operation that can
 better track the progress of the SST.
 
 
 State flow
 ====
 
 General idea for starting Galera:
 
   * Before starting the Galera cluster each node needs to go in Slave
     state so that the agent records its last seqno into the CIB.
     __ This uses attribute last-committed __
 
   * When all node went in Slave, the agent can safely determine the
     last seqno and elect a bootstrap node (`detect_first_master()`).
     __ This uses attribute bootstrap __
 
   * The agent then sets the score of the elected bootstrap node to
     Master so that pacemaker promote it and start the first Galera
     server.
 
   * Once the first Master is running, the agent can start joiner
     nodes during the "monitor" operation, and starts monitoring
     their SST sync.
     __ This uses attribute sync-needed __
 
   * Only when SST is over on joiner nodes, the agent promotes them
     to Master. At this point, the entire Galera cluster is up.
 
 
 Attribute usage and liveness
 ====
 
 Here is how attributes are created on a per-node basis. If you
 modify the resource agent make sure those properties still hold.
 
 ### last-committed
 
 It is just a temporary hint for the resource agent to help
 elect a bootstrap node. Once the bootstrap attribute is set on one
 of the nodes, we can get rid of last-committed.
 
  - Used   : during Slave state to compare seqno
  - Created: before entering Slave state:
               . at startup in `galera_start()`
               . or when a Galera node is stopped in `galera_demote()`
  - Deleted: just before node starts in `galera_start_local_node()`;
             cleaned-up during `galera_demote()` and `galera_stop()`
 
 We delete last-committed before starting Galera, to avoid race
 conditions that could arise due to discrepancies between the CIB and
 Galera.
 
 ### bootstrap
 
 Attribute set on the node that is elected to bootstrap Galera.
 
 - Used   : during promotion in `galera_start_local_node()`
 - Created: at startup once all nodes have `last-committed`;
            or during monitor if all nodes have failed
 - Deleted: in `galera_start_local_node()`, just after the bootstrap
            node started and is ready;
            cleaned-up during `galera_demote()` and `galera_stop()`
 
 There cannot be more than one bootstrap node at any time, otherwise
 the Galera cluster would stop replicating properly.
 
 ### sync-needed
 
 While this attribute is set on a node, the Galera node is in JOIN
 state, i.e. SST is in progress and the node cannot serve queries.
 
 The resource agent relies on the underlying SST method to monitor
 the progress of the SST. For instance, with `wsrep_sst_rsync`,
 timeout would be reported by rsync, the Galera node would go in
 Non-primary state, which would make `galera_monitor()` fail.
 
 - Used   : during recurring slave monitor in `check_sync_status()`
 - Created: in `galera_start_local_node()`, just after the joiner
            node started and entered the Galera cluster
 - Deleted: during recurring slave monitor in `check_sync_status()`
            as soon as the Galera code reports to be SYNC-ed.
+
+### no-grastate
+
+If a galera node was unexpectedly killed in a middle of a replication,
+InnoDB can retain the equivalent of a XA transaction in prepared state
+in its redo log. If so, mysqld cannot recover state (nor last seqno)
+automatically, and special recovery heuristic has to be used to
+unblock the node.
+
+This transient attribute is used to keep track of forced recoveries to
+prevent bootstrapping a cluster from a recovered node when possible.
+
+- Used   : during `detect_first_master()` to elect the bootstrap node
+- Created: in `detect_last_commit()` if the node has a pending XA
+           transaction to recover in the redo log
+- Deleted: when a node is promoted to Master.
diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase
index de7959fee..641bd4086 100755
--- a/heartbeat/SAPDatabase
+++ b/heartbeat/SAPDatabase
@@ -1,331 +1,341 @@
 #!/bin/sh
 #
 # SAPDatabase
 #
 # Description:	Manages any type of SAP supported database instance
 #               as a High-Availability OCF compliant resource.
 #
 # Author:       Alexander Krauth, October 2006
 # Support:      linux@sap.com
 # License:      GNU General Public License (GPL)
 # Copyright:    (c) 2006, 2007, 2010, 2012 Alexander Krauth
 #
 # An example usage: 
 #      See usage() function below for more details...
 #
 # OCF instance parameters:
 #       OCF_RESKEY_SID
 #       OCF_RESKEY_DIR_EXECUTABLE      (optional, well known directories will be searched by default)
 #       OCF_RESKEY_DBTYPE              (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB)
 #       OCF_RESKEY_DBINSTANCE          (optional, Database instance name, if not equal to SID)
+#       OCF_RESKEY_DBOSUSER            (optional, the Linux user that owns the database processes on operating system level)
 #       OCF_RESKEY_STRICT_MONITORING   (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck)
 #       OCF_RESKEY_AUTOMATIC_RECOVER   (optional, automatic startup recovery, default is false)
 #       OCF_RESKEY_MONITOR_SERVICES    (optional, default is to monitor all database services)
 #       OCF_RESKEY_PRE_START_USEREXIT  (optional, lists a script which can be executed before the resource is started)
 #       OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started)
 #       OCF_RESKEY_PRE_STOP_USEREXIT   (optional, lists a script which can be executed before the resource is stopped)
 #       OCF_RESKEY_POST_STOP_USEREXIT  (optional, lists a script which can be executed after the resource is stopped)
 #     Deprecated parameters:
 #       OCF_RESKEY_NETSERVICENAME
 #       OCF_RESKEY_DBJ2EE_ONLY
 #       OCF_RESKEY_JAVA_HOME
 #       OCF_RESKEY_DIR_BOOTSTRAP
 #       OCF_RESKEY_DIR_SECSTORE
 #       OCF_RESKEY_DB_JARS
 #
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 #######################################################################
 
 SH=/bin/sh
 
 usage() {
   methods=`sapdatabase_methods`
   methods=`echo $methods | tr ' ' '|'`
   cat <<-!
 	usage: $0 ($methods)
 
 	$0 manages a SAP database of any type as an HA resource.
         Currently Oracle, MaxDB, DB/2 UDB, Sybase ASE and SAP HANA Database are supported.
         ABAP databases as well as JAVA only databases are supported.
 
 	The 'start' operation starts the instance.
 	The 'stop' operation stops the instance.
 	The 'status' operation reports whether the instance is running
 	The 'monitor' operation reports whether the instance seems to be working
 	The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!)
 	The 'validate-all' operation reports whether the parameters are valid
 	The 'methods' operation reports on the methods $0 supports
 
 	!
 }
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="SAPDatabase">
-<version>2.06</version>
+<version>2.14</version>
 
 <shortdesc lang="en">Manages a SAP database instance as an HA resource.</shortdesc>
 <longdesc lang="en">
 Resource script for SAP databases. It manages a SAP database of any type as an HA resource.
 
 The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB.
 The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure.
 The resource agent supports the following databases:
 - Oracle 10.2, 11.2 and 12
 - DB/2 UDB for Windows and Unix 9.x
 - SAP-DB / MaxDB 7.x
 - Sybase ASE 15.7
 - SAP HANA Database since 1.00 - with SAP node 1625203 (http://sdn.sap.com)
 
 In fact this resource agent does not run any database commands directly. It uses the SAP standard process SAPHostAgent to control the database.
 The SAPHostAgent must be installed on each cluster node locally. It will not work, if you try to run the SAPHostAgent also as a HA resource.
 Please follow SAP note 1031096 for the installation of SAPHostAgent.
 The required minimum version of SAPHostAgent is:
 Release: 7.20
 Patch Number: 90
 or compile time after: Dec 17 2011
 </longdesc>
 <parameters>
  <parameter name="SID" unique="1" required="1">
   <longdesc lang="en">The unique database system identifier. e.g. P01</longdesc>
   <shortdesc lang="en">Database system ID</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="DIR_EXECUTABLE" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find saphostexec and saphostctrl.
 Usually you can leave this empty. Then the default: /usr/sap/hostctrl/exe is used.
   </longdesc>
   <shortdesc lang="en">path of saphostexec and saphostctrl</shortdesc>
   <content type="string" default="/usr/sap/hostctrl/exe" />
  </parameter>
  <parameter name="DBTYPE" unique="0" required="1">
   <longdesc lang="en">The name of the database vendor you use. Set either: ADA, DB6, ORA, SYB, HDB</longdesc>
   <shortdesc lang="en">database vendor</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="DBINSTANCE" unique="1" required="0">
   <longdesc lang="en">Must be used for special database implementations, when database instance name is not equal to the SID (e.g. Oracle DataGuard)</longdesc>
   <shortdesc lang="en">Database instance name, if not equal to SID</shortdesc>
   <content type="string" default="" />
  </parameter>
+ <parameter name="DBOSUSER" unique="1" required="0">
+  <longdesc lang="en">The parameter can be set, if the database processes on operating system level are not executed with the default user of the used database type. Defaults: ADA=taken from /etc/opt/sdb, DB6=db2SID, ORA=oraSID and oracle, SYB=sybSID, HDB=SIDadm</longdesc>
+  <shortdesc lang="en">the Linux user that owns the database processes on operating system level</shortdesc>
+  <content type="string" default="" />
+ </parameter>
  <parameter name="NETSERVICENAME" unique="0" required="0">
   <longdesc lang="en">Deprecated - do not use anymore. This parameter will be deleted in one of the next releases.</longdesc>
   <shortdesc lang="en">deprecated - do not use anymore</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="DBJ2EE_ONLY" unique="0" required="0">
   <longdesc lang="en">Deprecated - do not use anymore. This parameter will be deleted in one of the next releases.</longdesc>
   <shortdesc lang="en">deprecated - do not use anymore</shortdesc>
   <content type="boolean" default=""/>
  </parameter>
  <parameter name="JAVA_HOME" unique="0" required="0">
   <longdesc lang="en">Deprecated - do not use anymore. This parameter will be deleted in one of the next releases.</longdesc>
   <shortdesc lang="en">deprecated - do not use anymore</shortdesc>
   <content type="string" default=""/>
  </parameter>
  <parameter name="STRICT_MONITORING" unique="0" required="0">
   <longdesc lang="en">This controls how the resource agent monitors the database. If set to true, it will use 'saphostctrl -function GetDatabaseStatus' to test the database state. If set to false, only operating system processes are monitored.</longdesc>
   <shortdesc lang="en">Activates application level monitoring</shortdesc>
   <content type="boolean" default="false"/>
  </parameter>
  <parameter name="AUTOMATIC_RECOVER" unique="0" required="0">
   <longdesc lang="en">If you set this to true, 'saphostctrl -function StartDatabase' will always be called with the '-force' option.</longdesc>
   <shortdesc lang="en">Enable or disable automatic startup recovery</shortdesc>
   <content type="boolean" default="false"/>
  </parameter>
  <parameter name="MONITOR_SERVICES" unique="0" required="0">
   <longdesc lang="en">Defines which services are monitored by the SAPDatabase resource agent, if STRICT_MONITORING is set to true. Service names must correspond with the output of the 'saphostctrl -function GetDatabaseStatus' command.</longdesc>
   <shortdesc lang="en">Database services to monitor</shortdesc>
   <content type="string" default="Instance|Database|Listener"/>
  </parameter>
  <parameter name="DIR_BOOTSTRAP" unique="0" required="0">
   <longdesc lang="en">Deprecated - do not use anymore. This parameter will be deleted in one of the next releases.</longdesc>
   <shortdesc lang="en">deprecated - do not use anymore</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="DIR_SECSTORE" unique="0" required="0">
   <longdesc lang="en">Deprecated - do not use anymore. This parameter will be deleted in one of the next releases.</longdesc>
   <shortdesc lang="en">deprecated - do not use anymore</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="DB_JARS" unique="0" required="0">
   <longdesc lang="en">Deprecated - do not use anymore. This parameter will be deleted in one of the next releases.</longdesc>
   <shortdesc lang="en">deprecated - do not use anymore</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="PRE_START_USEREXIT" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets started.</longdesc>
   <shortdesc lang="en">path to a pre-start script</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="POST_START_USEREXIT" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got started.</longdesc>
   <shortdesc lang="en">path to a post-start script</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="PRE_STOP_USEREXIT" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets stopped.</longdesc>
   <shortdesc lang="en">path to a pre-start script</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="POST_STOP_USEREXIT" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got stopped.</longdesc>
   <shortdesc lang="en">path to a post-start script</shortdesc>
   <content type="string" default="" />
  </parameter>
 </parameters>
 
 <actions>
 <action name="start" timeout="1800" />
 <action name="stop" timeout="1800" />
 <action name="status" timeout="60" />
 <action name="monitor" depth="0" timeout="60" interval="120" />
 <action name="validate-all" timeout="5" />
 <action name="meta-data" timeout="5" />
 <action name="methods" timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 
 #
 # methods: What methods/operations do we support?
 #
 sapdatabase_methods() {
   cat <<-!
 	start
 	stop
 	status
 	monitor
 	recover
 	validate-all
 	methods
 	meta-data
 	usage
 	!
 }
 
 
 #
 # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems.
 #               This specialties do not allow a totally generic SAP cluster resource agent.
 #               Someone should write a resource agent for each additional process you need, if it
 #               is required to monitor that process within the cluster manager. To enable
 #               you to extent this resource agent without developing a new one, this user exit
 #               was introduced.
 #
 sapuserexit() {
   NAME="$1"
   VALUE="$2"
 
   if [ -n "$VALUE" ]
   then
     if have_binary "$VALUE"
     then
       ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}"
       "$VALUE" >/dev/null 2>&1
       ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?"
     else
       ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable"
     fi
   fi
   return $OCF_SUCCESS
 }
 
 
 #
 # saphostctrl_installed
 #
 saphostctrl_installed() {
   OCF_RESKEY_DIR_EXECUTABLE_default="/usr/sap/hostctrl/exe"
   : ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}}
   SAPHOSTCTRL="${OCF_RESKEY_DIR_EXECUTABLE}/saphostctrl"
   SAPHOSTEXEC="${OCF_RESKEY_DIR_EXECUTABLE}/saphostexec"
   SAPHOSTSRV="${OCF_RESKEY_DIR_EXECUTABLE}/sapstartsrv"
   SAPHOSTOSCOL="${OCF_RESKEY_DIR_EXECUTABLE}/saposcol"
 
   have_binary $SAPHOSTCTRL && have_binary $SAPHOSTEXEC
 }
 
 
 #
 #	'main' starts here...
 #
 
 if
   ( [ $# -ne 1 ] )
 then
   usage
   exit $OCF_ERR_ARGS
 fi
 
 # These operations don't require OCF instance parameters to be set
 case "$1" in
   meta-data)	meta_data
 		exit $OCF_SUCCESS;;
 
   usage) 	usage
 		exit $OCF_SUCCESS;;
 
   methods)	sapdatabase_methods
 		exit $?;;
 
   *);;
 esac
 
 if  ! ocf_is_root 
 then
   ocf_log err "$0 must be run as root"
   exit $OCF_ERR_PERM
 fi
 
 # mandatory parameter check
 if  [ -z "$OCF_RESKEY_SID" ]; then
   ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!"
   exit $OCF_ERR_ARGS
 fi
 SID=`echo "$OCF_RESKEY_SID"`
 
 if [ -z "$OCF_RESKEY_DBTYPE" ]; then
   ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ADA,DB6,ORA,SYB,HDB)!"
   exit $OCF_ERR_ARGS
 fi
 DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'`
 
 
 # source functions and initialize global variables
 if saphostctrl_installed; then
                     . ${OCF_FUNCTIONS_DIR}/sapdb.sh
 else
+                    if [ -n "${OCF_RESKEY_DBOSUSER}" ]; then
+                      ocf_exit_reason "Usage of parameter OCF_RESKEY_DBOSUSER is not possible without having SAP Host-Agent installed"
+                      exit $OCF_ERR_ARGS
+                    fi
                     . ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh
 fi
 sapdatabase_init
 
 
 # we always want to fall to the faster status method in case of a probe by the cluster  
 ACTION=$1
 if ocf_is_probe 
 then
   ACTION=status
 fi
 
 # What kind of method was invoked?
 case "$ACTION" in
 
   start|stop|status|recover)   sapdatabase_$ACTION
                                exit $?;;
   monitor)                     sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING
                                exit $?;;
   validate-all)                sapdatabase_validate
                                exit $?;;
   *)		                   sapdatabase_methods
                                exit $OCF_ERR_UNIMPLEMENTED;;
 esac
diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance
index da394f5a1..be2ff3054 100755
--- a/heartbeat/SAPInstance
+++ b/heartbeat/SAPInstance
@@ -1,942 +1,943 @@
 #!/bin/sh
 #
 # SAPInstance
 #
 # Description:	Manages a single SAP Instance as a High-Availability
 #		resource. One SAP Instance is defined by one 
 #               SAP Instance-Profile. start/stop handels all services
 #               of the START-Profile, status and monitor care only
 #               about essential services.
 #
 # Author:       Alexander Krauth, June 2006
 # Support:      linux@sap.com
 # License:      GNU General Public License (GPL)
 # Copyright:    (c) 2006-2008 Alexander Krauth
 #
 # An example usage: 
 #      See usage() function below for more details...
 #
 # OCF instance parameters:
 #	OCF_RESKEY_InstanceName
 #	OCF_RESKEY_DIR_EXECUTABLE   (optional, well known directories will be searched by default)
 #	OCF_RESKEY_DIR_PROFILE      (optional, well known directories will be searched by default)
 #	OCF_RESKEY_START_PROFILE    (optional, well known directories will be searched by default)
 #	OCF_RESKEY_START_WAITTIME   (optional, to solve timing problems during J2EE-Addin start)
 #	OCF_RESKEY_AUTOMATIC_RECOVER    (optional, automatic startup recovery using cleanipc, default is false)
 #       OCF_RESKEY_MONITOR_SERVICES     (optional, default is to monitor critical services only)
 #       OCF_RESKEY_SHUTDOWN_METHOD      (optional, defaults to NORMAL, KILL: terminate the SAP instance with OS commands - faster, at your own risk)
 #       OCF_RESKEY_ERS_InstanceName     (optional, InstanceName of the ERS instance in a Master/Slave configuration)
 #       OCF_RESKEY_ERS_START_PROFILE    (optional, START_PROFILE of the ERS instance in a Master/Slave configuration)
 #	OCF_RESKEY_PRE_START_USEREXIT	(optional, lists a script which can be executed before the resource is started)
 #	OCF_RESKEY_POST_START_USEREXIT	(optional, lists a script which can be executed after the resource is started)
 #	OCF_RESKEY_PRE_STOP_USEREXIT	(optional, lists a script which can be executed before the resource is stopped)
 #	OCF_RESKEY_POST_STOP_USEREXIT	(optional, lists a script which can be executed after the resource is stopped)
 #
 #  TODO: - Option to shutdown sapstartsrv for non-active instances -> that means: do probes only with OS tools (sapinstance_status)
 #        - Option for better standalone enqueue server monitoring, using ensmon (test enque-deque)
 #        - Option for cleanup abandoned enqueue replication tables
 #
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 #######################################################################
 
 SH=/bin/sh
 
 sapinstance_usage() {
   methods=`sapinstance_methods`
   methods=`echo $methods | tr ' ' '|'`
   cat <<-!
 	usage: $0 ($methods)
 
 	$0 manages a SAP Instance as an HA resource.
 
 	The 'start' operation starts the instance or the ERS instance in a Master/Slave configuration
 	The 'stop' operation stops the instance
 	The 'status' operation reports whether the instance is running
 	The 'monitor' operation reports whether the instance seems to be working
         The 'promote' operation starts the primary instance in a Master/Slave configuration
         The 'demote' operation stops the primary instance and starts the ERS instance
         The 'notify' operation always returns SUCCESS
 	The 'validate-all' operation reports whether the parameters are valid
 	The 'methods' operation reports on the methods $0 supports
 
 	!
 }
 
 sapinstance_meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="SAPInstance">
 <version>2.14</version>
 
 <shortdesc lang="en">Manages a SAP instance as an HA resource.</shortdesc>
 <longdesc lang="en">
 Usually a SAP system consists of one database and at least one or more SAP instances (sometimes called application servers). One SAP Instance is defined by having exactly one instance profile. The instance profiles can usually be found in the directory /sapmnt/SID/profile. Each instance must be configured as it's own resource in the cluster configuration.
 The resource agent supports the following SAP versions:
-- SAP WebAS ABAP Release 6.20 - 7.30
-- SAP WebAS Java Release 6.40 - 7.30
-- SAP WebAS ABAP + Java Add-In Release 6.20 - 7.30 (Java is not monitored by the cluster in that case)
+- SAP WebAS ABAP Release 6.20 - 7.40
+- SAP WebAS Java Release 6.40 - 7.40
+- SAP WebAS ABAP + Java Add-In Release 6.20 - 7.40 (Java is not monitored by the cluster in that case)
 When using a SAP Kernel 6.40 please check and implement the actions from the section "Manual postprocessing" from SAP note 995116 (http://sdn.sap.com).
+Other versions may also work with this agent, but have not been verified.
 
 All operations of the SAPInstance resource agent are done by using the startup framework called SAP Management Console or sapstartsrv that was introduced with SAP kernel release 6.40. Find more information about the SAP Management Console in SAP note 1014480. Using this framework defines a clear interface for the Heartbeat cluster, how it sees the SAP system. The options for monitoring the SAP system are also much better than other methods like just watching the ps command for running processes or doing some pings to the application. sapstartsrv uses SOAP messages to request the status of running SAP processes. Therefore it can actually ask a process itself what it's status is, independent from other problems that might exist at the same time.
 
 sapstartsrv knows 4 status colours:
 - GREEN   = everything is fine
 - YELLOW  = something is wrong, but the service is still working
 - RED     = the service does not work
 - GRAY    = the service has not been started
 
 The SAPInstance resource agent will interpret GREEN and YELLOW as OK. That means that minor problems will not be reported to the Heartbeat cluster. This prevents the cluster from doing an unwanted failover.
 The statuses RED and GRAY are reported as NOT_RUNNING to the cluster. Depending on the status the cluster expects from the resource, it will do a restart, failover or just nothing.
 </longdesc>
 <parameters>
  <parameter name="InstanceName" unique="1" required="1">
   <longdesc lang="en">The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci. Usually this is the name of the SAP instance profile.</longdesc>
   <shortdesc lang="en">Instance name: SID_INSTANCE_VIR-HOSTNAME</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="DIR_EXECUTABLE" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find sapstartsrv and sapcontrol. Specify this parameter, if you have changed the SAP kernel directory location after the default SAP installation.</longdesc>
   <shortdesc lang="en">Path of sapstartsrv and sapcontrol</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="DIR_PROFILE" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find the SAP START profile. Specify this parameter, if you have changed the SAP profile directory location after the default SAP installation.</longdesc>
   <shortdesc lang="en">Path of start profile</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="START_PROFILE" unique="1" required="0">
   <longdesc lang="en">The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than.</longdesc>
   <shortdesc lang="en">Start profile name</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="START_WAITTIME" unique="0" required="0">
   <longdesc lang="en">After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start ishandled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.Usually the resource agent waits until all services are started and the SAP Management Console reports a GREEN status. A double stack installation (ABAP + Java AddIn) consists of an ABAP dispatcher and aJAVA instance. Normally the start of the JAVA instance takes much longer than the start of the ABAP instance. For a JAVA Instance you may need to configure a much higher timeout for the start operation of the resource in Heartbeat. The disadvantage here is, that the discovery of a failed start by the cluster takes longer. Somebody might say: For me it is important, that the ABAP instance is up and running. A failure of the JAVA instance shall not cause a failover of the SAP instance.
 Actually the SAP MC reports a YELLOW status, if the JAVA instance of a double stack system fails. From the resource agent point of view YELLOW means:everything is OK. Setting START_WAITTIME to a lower value determines the resource agent to check the status of the instance during a start operation after that time. As it would wait normally for a GREEN status, now it reports SUCCESS to the cluster in case of a YELLOW status already after the specified time.
 
 That is only useful for double stack systems.
   </longdesc>
   <shortdesc lang="en">Check the successful start after that time (do not wait for J2EE-Addin)</shortdesc>
   <content type="string" default="3600" />
  </parameter>
  <parameter name="AUTOMATIC_RECOVER" unique="0" required="0">
   <longdesc lang="en">The SAPInstance resource agent tries to recover a failed start attempt automaticaly one time. This is done by killing runing instance processes, removing the kill.sap file and executing cleanipc. Sometimes a crashed SAP instance leaves some processes and/or shared memory segments behind. Setting this option to true will try to remove those leftovers during a start operation. That is to reduce manual work for the administrator.</longdesc>
   <shortdesc lang="en">Enable or disable automatic startup recovery</shortdesc>
   <content type="boolean" default="false"/>
  </parameter>
  <parameter name="MONITOR_SERVICES" unique="0" required="0">
   <longdesc lang="en">Within a SAP instance there can be several services. Usually you will find the defined services in the START profile of the related instance (Attention: with SAP Release 7.10 the START profile content was moved to the instance profile). Not all of those services are worth to monitor by the cluster. For example you properly do not like to failover your SAP instance, if the central syslog collector daemon fails.
 Those services are monitored within the SAPInstance resource agent:
 
 - disp+work
 - msg_server
 - enserver
 - enrepserver
 - jcontrol
 - jstart
 
 That names match the strings used in the output of the command 'sapcontrol -nr [Instance-Nr] -function GetProcessList'.
 The default should fit most cases where you want to manage a SAP Instance from the cluster. You may change this with this parameter, if you like to monitor more/less or other services that sapstartsrv supports.
 You may specify multiple services seperated by a | (pipe) sign in this parameter: disp+work|msg_server|enserver
   </longdesc>
   <shortdesc lang="en">Services to monitor</shortdesc>
   <content type="string" default="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart"/>
  </parameter>
   <parameter name="SHUTDOWN_METHOD" unique="0" required="0">
   <longdesc lang="en">Usual a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the gracefull stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !!</longdesc>
   <shortdesc lang="en">Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL)</shortdesc>
   <content type="string" default="normal"/>
  </parameter>
  <parameter name="ERS_InstanceName" unique="1" required="0">
   <longdesc lang="en">Only used in a Master/Slave resource configuration:
 The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile.
 The enqueue replication instance must be installed, before you want to configure a master-slave cluster recource.
 
 The master-slave configuration in the cluster must use this properties:
 clone_max = 2
 clone_node_max = 1
 master_node_max = 1
 master_max = 1
   </longdesc>
   <shortdesc lang="en">Enqueue replication instance name: SID_INSTANCE_VIR-HOSTNAME</shortdesc>
   <content type="string" default=""/>
  </parameter>
  <parameter name="ERS_START_PROFILE" unique="1" required="0">
   <longdesc lang="en">Only used in a Master/Slave resource configuration:
 The parameter ERS_InstanceName must also be set in this configuration.
 The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than.
   </longdesc>
   <shortdesc lang="en">Enqueue replication start profile name</shortdesc>
   <content type="string" default=""/>
  </parameter>
  <parameter name="PRE_START_USEREXIT" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets started.</longdesc>
   <shortdesc lang="en">Path to a pre-start script</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="POST_START_USEREXIT" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got started.</longdesc>
   <shortdesc lang="en">Path to a post-start script</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="PRE_STOP_USEREXIT" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets stopped.</longdesc>
   <shortdesc lang="en">Path to a pre-start script</shortdesc>
   <content type="string" default="" />
  </parameter>
  <parameter name="POST_STOP_USEREXIT" unique="0" required="0">
   <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got stopped.</longdesc>
   <shortdesc lang="en">Path to a post-start script</shortdesc>
   <content type="string" default="" />
  </parameter>
 </parameters>
 
 <actions>
 <action name="start" timeout="180" />
 <action name="stop" timeout="240" />
 <action name="status" timeout="60" />
 <action name="monitor" depth="0" timeout="60" interval="120" />
 <action name="monitor" depth="0" timeout="60" interval="121" role="Slave" />
 <action name="monitor" depth="0" timeout="60" interval="119" role="Master" />
 <action name="promote" timeout="320" />
 <action name="demote" timeout="320" />
 <action name="validate-all" timeout="5" />
 <action name="meta-data" timeout="5" />
 <action name="methods" timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 
 #
 # methods: What methods/operations do we support?
 #
 sapinstance_methods() {
   cat <<-!
 	start
 	stop
 	status
 	monitor
         promote
         demote
         notify
 	validate-all
 	methods
 	meta-data
 	usage
 	!
 }
 
 
 
 #
 # is_clone : find out if we are configured to run in a Master/Slave configuration
 #
 is_clone() {
   if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ] \
    && [ "$OCF_RESKEY_CRM_meta_clone_max" -gt 0 ]
   then
     if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] || \
        [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] || \
        [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] || \
        [ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ]
     then
             ocf_log err "Clone options misconfigured. (expect: clone_max=2,clone_node_max=1,master_node_max=1,master_max=1)"
             exit $OCF_ERR_CONFIGURED
     fi
 
     if [ -z "$OCF_RESKEY_ERS_InstanceName" ]
     then
       ocf_log err "In a Master/Slave configuration the ERS_InstanceName parameter is mandatory."
       exit $OCF_ERR_ARGS
     fi
   else
     return 0
   fi
   return 1
 }
 
 
 #
 # abnormal_end : essential things are missing, but in the natur of a SAP installation - which can be very different
 #                from customer to customer - we cannot handle this always as an error
 #                This would be the case, if the software is installed on shared disks and not visible
 #                to all cluster nodes at all times.
 #
 abnormal_end() {
   local err_msg=$1
 
   ocf_is_probe && {
     sapinstance_status
     exit $?
   }
 
   if [ "$ACTION" = "stop" ]
   then
     cleanup_instance
     exit $OCF_SUCCESS
   fi
 
   ocf_log err $err_msg
   exit $OCF_ERR_CONFIGURED
 }
 
 #
 # sapinstance_init : Define global variables with default values, if optional parameters are not set
 #
 #
 sapinstance_init() {
 
   local myInstanceName="$1"
 
   SID=`echo "$myInstanceName" | cut -d_ -f1`
   InstanceName=`echo "$myInstanceName" | cut -d_ -f2`
   InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'`
   SAPVIRHOST=`echo "$myInstanceName" | cut -d_ -f3`
 
   # optional OCF parameters, we try to guess which directories are correct
   if  [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ]
   then
     if have_binary /usr/sap/$SID/$InstanceName/exe/sapstartsrv && have_binary /usr/sap/$SID/$InstanceName/exe/sapcontrol
     then
       DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe"
       SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv"
       SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol"
     elif have_binary /usr/sap/$SID/SYS/exe/run/sapstartsrv && have_binary /usr/sap/$SID/SYS/exe/run/sapcontrol
     then
       DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run"
       SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv"
       SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol"
     fi
   else
     if have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" && have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol"
     then
       DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE"
       SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv"
       SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol"
     fi
   fi
 
   sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm"
 
   [ -z "$DIR_EXECUTABLE" ] && abnormal_end "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!"
 
   if [ -z "$OCF_RESKEY_DIR_PROFILE" ]
   then
     DIR_PROFILE="/usr/sap/$SID/SYS/profile"
   else
     DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE"
   fi
 
   if [ "$myInstanceName" != "$OCF_RESKEY_InstanceName" ]
   then
     currentSTART_PROFILE=$OCF_RESKEY_ERS_START_PROFILE
   else
     currentSTART_PROFILE=$OCF_RESKEY_START_PROFILE
   fi
 
   if [ -z "$currentSTART_PROFILE" ]
   then
     SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}"
   else
     SAPSTARTPROFILE="$currentSTART_PROFILE"
   fi
 
   if [ -z "$OCF_RESKEY_START_WAITTIME" ]
   then
     export OCF_RESKEY_START_WAITTIME=3600
   fi
 
   if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ]
   then
     export OCF_RESKEY_MONITOR_SERVICES="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart"
   fi
 
   # as root user we need the library path to the SAP kernel to be able to call sapcontrol
   if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then
     LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH
     export LD_LIBRARY_PATH
   fi
 
   return $OCF_SUCCESS
 }
 
 
 #
 # check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance.
 #                     We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance,
 #                     because then we have two instances with the same instance number.
 #
 check_sapstartsrv() {
   local restart=0
   local runninginst=""
   local chkrc=$OCF_SUCCESS
   local output=""
 
   if [ ! -S /tmp/.sapstream5${InstanceNr}13 ]; then
     ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName (no UDS), it will be started now"
     restart=1
   else
     output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script`
     if [ $? -eq 0 ]
     then
       runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3`
       if [ "$runninginst" != "$InstanceName" ]
       then 
         ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed"
         restart=1
       else
         output=`$SAPCONTROL -nr $InstanceNr -function AccessCheck Start`
         if [ $? -ne 0 ]; then
           ocf_log warn "FAILED : sapcontrol -nr $InstanceNr -function AccessCheck Start (`ls -ld1 /tmp/.sapstream5${InstanceNr}13`)"
           ocf_log warn "sapstartsrv will be restarted to try to solve this situation, otherwise please check sapstsartsrv setup (SAP Note 927637)"
           restart=1
         fi
       fi
     else
       ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now"
       restart=1
     fi
   fi
 
   if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi
 
   if [ $restart -eq 1 ]
   then
 
     if [ -d /usr/sap/$SID/SYS/profile/ ]
     then
       DIR_PROFILE="/usr/sap/$SID/SYS/profile"
     else
       abnormal_end "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!"
     fi
 
     [ ! -r $SAPSTARTPROFILE ] && abnormal_end "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!"
 
     pkill -9 -f "sapstartsrv.*$runninginst"
 
     # removing the unix domain socket files as they might have wrong permissions
     # or ownership - they will be recreated by sapstartsrv during next start
     rm -f /tmp/.sapstream5${InstanceNr}13
     rm -f /tmp/.sapstream5${InstanceNr}14
 
     $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm
 
     # now make sure the daemon has been started and is able to respond
     local srvrc=1
     while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ]
     do
       sleep 1
       $SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1
       srvrc=$?
     done
 
     if [ $srvrc -ne 1 ]
     then
       ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !"
       chkrc=$OCF_SUCCESS
     else
       ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!"
       chkrc=$OCF_ERR_GENERIC
       ocf_is_probe && chkrc=$OCF_NOT_RUNNING
     fi
   fi
 
   return $chkrc
 }
 
 
 #
 # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems.
 #               This specialties do not allow a totally generic SAP cluster resource agent.
 #               Someone should write a resource agent for each additional process you need, if it
 #               is required to monitor that process within the cluster manager. To enable 
 #               you to extent this resource agent without developing a new one, this user exit
 #               was introduced.
 #
 sapuserexit() {
   local NAME="$1"
   local VALUE="$2"
 
   if [ -n "$VALUE" ]
   then
     if have_binary "$VALUE"
     then
       ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}"
       "$VALUE" >/dev/null 2>&1
       ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?"
     else
       ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable"
     fi
   fi
   return 0
 }
 
 
 #
 # cleanup_instance : remove resources (processes and shared memory) from a crashed instance)
 #
 cleanup_instance() {
   pkill -9 -f -U $sidadm $InstanceName
   ocf_log info "Terminated instance using 'pkill -9 -f -U $sidadm $InstanceName'"
 
   # it is necessary to call cleanipc as user sidadm if the system has 'vmcj/enable = ON' set - otherwise SHM-segments in /dev/shm/SAP_ES2* cannot beremoved
   su - $sidadm -c "cleanipc $InstanceNr remove"
   ocf_log info "Tried to remove shared memory resources using 'cleanipc $InstanceNr remove' as user $sidadm"
 
   ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/kill.sap
   ocf_run rm -fv /usr/sap/$SID/$InstanceName/work/shutdown.sap
   ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgcpid
   ocf_run rm -fv /usr/sap/$SID/$InstanceName/data/rslgspid
 
   return 0
 }
 
 #
 # sapinstance_start : Start the SAP instance
 #
 sapinstance_start() {
 
   sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT"
 
   local rc=$OCF_NOT_RUNNING
   local output=""
   local loopcount=0
 
   while [ $loopcount -lt 2 ]
   do
     loopcount=$(($loopcount + 1))
 
     check_sapstartsrv
     rc=$?
     if [ $rc -eq $OCF_SUCCESS ]; then
       output=`$SAPCONTROL -nr $InstanceNr -function Start`
       rc=$?
       ocf_log info "Starting SAP Instance $SID-$InstanceName: $output"
     fi
 
     if [ $rc -ne 0 ]
     then
       ocf_log err "SAP Instance $SID-$InstanceName start failed."
       return $OCF_ERR_GENERIC
     fi
 
     local startrc=1
     while [ $startrc -gt 0 ]
     do
       local waittime_start=`date +%s`
       output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10`
       startrc=$?
       local waittime_stop=`date +%s`
 
       if [ $startrc -ne 0 ]
       then
         if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ]
         then
           sapinstance_monitor NOLOG
           if [ $? -eq $OCF_SUCCESS ]
           then
             output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running."
             startrc=0; loopcount=2
           fi
         else
           if [ $loopcount -eq 1 ] && ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER
           then
             ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output"
             ocf_log warn "Try to recover $SID-$InstanceName"
             cleanup_instance
           else
             loopcount=2
           fi
           startrc=-1
         fi
       else
         loopcount=2
       fi
     done
   done
 
   if [ $startrc -eq 0 ]
   then
     ocf_log info "SAP Instance $SID-$InstanceName started: $output"
     rc=$OCF_SUCCESS
     sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT"
   else
     ocf_log err "SAP Instance $SID-$InstanceName start failed: $output"
     rc=$OCF_NOT_RUNNING
   fi
 
   return $rc
 }
 
 
 #
 # sapinstance_recover: Try startup of failed instance by cleaning up resources
 #
 sapinstance_recover() {
   cleanup_instance
   sapinstance_start
   return $?
 }
 
 
 #
 # sapinstance_stop: Stop the SAP instance
 #
 sapinstance_stop() {
   local output=""
   local rc
 
   sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT"
 
   if [ "$OCF_RESKEY_SHUTDOWN_METHOD" = "KILL" ]
   then
     ocf_log info "Stopping SAP Instance $SID-$InstanceName with shutdown method KILL!"
     cleanup_instance
     return $OCF_SUCCESS
   fi
 
   check_sapstartsrv
   rc=$?
   if [ $rc -eq $OCF_SUCCESS ]; then
     output=`$SAPCONTROL -nr $InstanceNr -function Stop`
     rc=$?
     ocf_log info "Stopping SAP Instance $SID-$InstanceName: $output"
   fi
 
   if [ $rc -eq 0 ]
   then
     output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1`
     if [ $? -eq 0 ]
     then
       ocf_log info "SAP Instance $SID-$InstanceName stopped: $output"
       rc=$OCF_SUCCESS
     else
       ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output"
       rc=$OCF_ERR_GENERIC
     fi
   else
     ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output"
     rc=$OCF_ERR_GENERIC
   fi
 
   sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT"
 
   return $rc
 }
 
 
 #
 # sapinstance_monitor: Can the given SAP instance do anything useful?
 #
 sapinstance_monitor() {
   local MONLOG=$1
   local rc
 
   check_sapstartsrv
   rc=$?
 
   if [ $rc -eq $OCF_SUCCESS ]
   then
     local count=0
     local SERVNO
     local output
 
     output=`$SAPCONTROL -nr $InstanceNr -function GetProcessList -format script`
 
     # we have to parse the output, because the returncode doesn't tell anything about the instance status
     for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u`
     do
       local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3`
       local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3`
       local STATE=0
       local SEARCH
 
       case $COLOR in
         GREEN|YELLOW)       STATE=$OCF_SUCCESS;;
         *)                  STATE=$OCF_NOT_RUNNING;;
       esac 
 
       SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'`
       if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ]
       then
           if [ $STATE -eq $OCF_NOT_RUNNING ]
           then
             [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !"
             rc=$STATE
           fi
           count=1
       fi
     done
 
     if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ]
     then
       if ocf_is_probe
       then
         rc=$OCF_NOT_RUNNING
       else
         [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!"
         rc=$OCF_ERR_GENERIC
       fi
     fi
   fi
  
   return $rc
 }
 
 
 #
 # sapinstance_status: Lightweight check of SAP instance only with OS tools
 #
 sapinstance_status() {
   local pid
   local pids
 
   [ ! -f "/usr/sap/$SID/$InstanceName/work/kill.sap" ] && return $OCF_NOT_RUNNING
   pids=`grep '^kill -[0-9]' /usr/sap/$SID/$InstanceName/work/kill.sap | awk '{print $3}'`
   for pid in $pids
   do
     [ `pgrep -f -U $sidadm $InstanceName | grep -c $pid` -gt 0 ] && return $OCF_SUCCESS
   done
   return $OCF_NOT_RUNNING
 }
 
 
 #
 # sapinstance_validate: Check the symantic of the input parameters 
 #
 sapinstance_validate() {
   local rc=$OCF_SUCCESS
   if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ]
   then
     ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!"
     rc=$OCF_ERR_ARGS
   fi
 
   if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ]
   then
     ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!"
     rc=$OCF_ERR_ARGS
   fi
 
   if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ]
   then
     ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!"
     rc=$OCF_ERR_ARGS
   fi
 
   if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ]
   then
     ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!"
     rc=$OCF_ERR_ARGS
   fi
 
   return $rc
 }
 
 
 #
 # sapinstance_start_clone
 #
 sapinstance_start_clone() {
   sapinstance_init $OCF_RESKEY_ERS_InstanceName
   ${HA_SBIN_DIR}/crm_master -v 50 -l reboot
   sapinstance_start
   return $?
 }
 
 
 #
 # sapinstance_stop_clone
 #
 sapinstance_stop_clone() {
   sapinstance_init $OCF_RESKEY_ERS_InstanceName
   ${HA_SBIN_DIR}/crm_master -v 0 -l reboot
   sapinstance_stop
   return $?
 }
 
 
 #
 # sapinstance_monitor_clone
 #
 sapinstance_monitor_clone() {
   # first check with the status function (OS tools) if there could be something like a SAP instance running
   # as we do not know here, if we are in master or slave state we do not want to start our monitoring
   # agents (sapstartsrv) on the wrong host
   local rc
 
   sapinstance_init $OCF_RESKEY_InstanceName
   if sapinstance_status; then
     if sapinstance_monitor; then
       ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot
       return $OCF_RUNNING_MASTER
     fi
     # by nature of the SAP enqueue server we have to make sure
     # that we do a failover to the slave (enqueue replication server)
     # in case the enqueue process has failed. We signal this to the
     # cluster by setting our master preference to a lower value than the slave.
     ${HA_SBIN_DIR}/crm_master -v 10 -l reboot
     return $OCF_FAILED_MASTER
   fi
 
   sapinstance_init $OCF_RESKEY_ERS_InstanceName
   sapinstance_status && sapinstance_monitor
   rc=$?
   if [ $rc -eq $OCF_SUCCESS ]; then
     ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot
   fi
   return $rc
 }
 
 
 #
 # sapinstance_promote_clone: In a Master/Slave configuration get Master by starting the SCS instance and stopping the ERS instance
 #                            The order is important here to behave correct from the application levels view
 #
 sapinstance_promote_clone() {
   local rc
 
   sapinstance_init $OCF_RESKEY_InstanceName
   ocf_log info "Promoting $SID-$InstanceName to running Master."
   sapinstance_start
   rc=$?
 
   if [ $rc -eq $OCF_SUCCESS ]; then
     sapinstance_init $OCF_RESKEY_ERS_InstanceName
     sapinstance_stop
     rc=$?
   fi
 
   return $rc
 }
 
 
 #
 # sapinstance_demote_clone: In a Master/Slave configuration get Slave by stopping the SCS instance and starting the ERS instance
 #
 sapinstance_demote_clone() {
   local rc
 
   sapinstance_init $OCF_RESKEY_InstanceName
   ocf_log info "Demoting $SID-$InstanceName to a slave."
   sapinstance_stop
   rc=$?
 
   if [ $rc -eq $OCF_SUCCESS ]; then
     sapinstance_init $OCF_RESKEY_ERS_InstanceName
     sapinstance_start
     rc=$?
   fi
 
   return $rc
 }
 
 
 #
 # sapinstance_notify: Handle master scoring - to make sure a slave gets the next master
 #
 sapinstance_notify() {
   local n_type="$OCF_RESKEY_CRM_meta_notify_type"
   local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
 
   if [ "${n_type}_${n_op}" = "post_promote" ]; then
     # After promotion of one master in the cluster, we make sure that all clones reset their master
     # value back to 100. This is because a failed monitor on a master might have degree one clone
     # instance to score 10.
     ${HA_SBIN_DIR}/crm_master -v 100 -l reboot
   elif [ "${n_type}_${n_op}" = "pre_demote" ]; then
     # if we are a slave and a demote event is anounced, make sure we have the highes wish to became master
     # that is, when a slave resource was startet after the promote event of a already running master (e.g. node of slave was down)
     # We also have to make sure to overrule the globaly set resource_stickiness or any fail-count factors => INFINITY
     local n_uname="$OCF_RESKEY_CRM_meta_notify_demote_uname"
     if [ ${n_uname} != ${NODENAME} ]; then
       ${HA_SBIN_DIR}/crm_master -v INFINITY -l reboot
     fi
   fi
 }
 
 
 #
 #	'main' starts here...
 #
 
 ## GLOBALS
 SID=""
 sidadm=""
 InstanceName=""
 InstanceNr=""
 SAPVIRHOST=""
 DIR_EXECUTABLE=""
 SAPSTARTSRV=""
 SAPCONTROL=""
 DIR_PROFILE=""
 SAPSTARTPROFILE=""
 CLONE=0
 NODENAME=$(ocf_local_nodename)
 
 
 if
   ( [ $# -ne 1 ] )
 then
   sapinstance_usage
   exit $OCF_ERR_ARGS
 fi
 
 ACTION=$1
 if [ "$ACTION" = "status" ]; then
   ACTION=monitor
 fi
 
 # These operations don't require OCF instance parameters to be set
 case "$ACTION" in
   usage|methods)                sapinstance_$ACTION
                                 exit $OCF_SUCCESS;;
   meta-data)                    sapinstance_meta_data
                                 exit $OCF_SUCCESS;;
   notify)                       sapinstance_notify
                                 exit $OCF_SUCCESS;;
   *);;
 esac
 
 if ! ocf_is_root
 then
   ocf_log err "$0 must be run as root"
   exit $OCF_ERR_PERM
 fi
 
 # parameter check
 if  [ -z "$OCF_RESKEY_InstanceName" ]
 then
   ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!"
   exit $OCF_ERR_ARGS
 fi
 
 is_clone; CLONE=$?
 if [ ${CLONE} -eq 1 ]
 then
   CLACT=_clone
 else
   if [ "$ACTION" = "promote" -o "$ACTION" = "demote" ]
   then
     ocf_log err "$ACTION called in a non master/slave environment"
     exit $OCF_ERR_ARGS
   fi
   sapinstance_init $OCF_RESKEY_InstanceName
 fi
 
 # What kind of method was invoked?
 case "$ACTION" in
   start|stop|monitor|promote|demote)      sapinstance_$ACTION$CLACT
                                           exit $?;;
   validate-all)                           sapinstance_validate
                                           exit $?;;
   *)                                      sapinstance_methods
                                           exit $OCF_ERR_UNIMPLEMENTED;;
 esac
diff --git a/heartbeat/apache b/heartbeat/apache
index 09d5ded49..040da6d1a 100755
--- a/heartbeat/apache
+++ b/heartbeat/apache
@@ -1,656 +1,698 @@
 #!/bin/sh
 #
 #	High-Availability Apache/IBMhttp control script
 # 
 # apache (aka IBMhttpd)
 #
 # Description:	starts/stops apache web servers.
 #
 # Author:	Alan Robertson
 #		Sun Jiang Dong
 #
 # Support:	linux-ha@lists.linux-ha.org
 #
 # License:	GNU General Public License (GPL)
 #
 # Copyright:	(C) 2002-2005 International Business Machines
 #
 #
 # An example usage in /etc/ha.d/haresources: 
 #	 node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf
 #	 node1 10.0.0.170 IBMhttpd
 #
 # Our parsing of the Apache config files is very rudimentary.
 # It'll work with lots of different configurations - but not every
 # possible configuration.
 #
 # Patches are being accepted ;-)
 #
 # OCF parameters:
 #  OCF_RESKEY_configfile
 #  OCF_RESKEY_httpd
 #  OCF_RESKEY_port
 #  OCF_RESKEY_statusurl
 #  OCF_RESKEY_options
 #  OCF_RESKEY_testregex
 #  OCF_RESKEY_client
 #  OCF_RESKEY_testurl
 #  OCF_RESKEY_testregex10
 #  OCF_RESKEY_testconffile
 #  OCF_RESKEY_testname
 #  OCF_RESKEY_envfiles
 
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 . ${OCF_FUNCTIONS_DIR}/apache-conf.sh
 . ${OCF_FUNCTIONS_DIR}/http-mon.sh
 HA_VARRUNDIR=${HA_VARRUN}
 
 #######################################################################
 #
 #	Configuration options - usually you don't need to change these
 #
 #######################################################################
 #
 IBMHTTPD=/opt/IBMHTTPServer/bin/httpd
 HTTPDLIST="/sbin/httpd2 /usr/sbin/httpd2 /usr/sbin/apache2 /sbin/httpd /usr/sbin/httpd /usr/sbin/apache $IBMHTTPD"
 MPM=/usr/share/apache2/find_mpm
 if [ -x $MPM ]; then
 	HTTPDLIST="$HTTPDLIST `$MPM 2>/dev/null`"
 fi
 
 LOCALHOST="http://localhost"
 HTTPDOPTS="-DSTATUS"
 DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf
 DEFAULT_SUSECONFIG="/etc/apache2/httpd.conf"
 DEFAULT_RHELCONFIG="/etc/httpd/conf/httpd.conf"
+DEFAULT_DEBIANCONFIG="/etc/apache2/apache2.conf"
 #
 # You can also set
 #	HTTPD
 #	PORT
 #	STATUSURL
 #	CONFIGFILE
 # in this section if what we're doing doesn't work for you...
 #
 #	End of Configuration options
 #######################################################################
 
 CMD=`basename $0`
 
 #	The config-file-pathname is the pathname to the configuration
 #	file for this web server. Various appropriate defaults are
 #	assumed if no config file is specified. If this command is
 #	invoked as *IBM*, then the default config file name is
 #	$DEFAULT_IBMCONFIG, otherwise the default config file
 #	will be either $DEFAULT_RHELCONFIG or $DEFAULT_SUSECONFIG depending
 #	on which is detected.
 usage() {
 cat <<-END
 usage: $0 action
 
 action:
 	start	start the web server
 
 	stop	stop the web server
 
 	status	return the status of web server, run or down
 
 	monitor	return TRUE if the web server appears to be working.
 		For this to be supported you must configure mod_status
 		and give it a server-status URL. You have to have 
 		installed either curl or wget for this to work.
 
 	meta-data	show meta data message
 
 	validate-all	validate the instance parameters
 END
 }
 
 get_pid() {
 	if [ -f $PidFile ]; then
 		cat $PidFile
 	else
 		false
 	fi
 }
 #
 # return TRUE if a process with given PID is running
 #
 ProcessRunning() {
 	local pid=$1
 
 	# Use /proc if it looks like it's here...
 	if [ -d /proc -a -d /proc/1 ]; then
 		[ -d /proc/$pid ]
 	else
 		# This assumes we're running as root...
 		kill -s 0 "$pid" >/dev/null 2>&1
 	fi
 }
 silent_status() {
 	local pid
 
 	pid=`get_pid`
 	if [ -n "$pid" ]; then
 		ProcessRunning $pid
 	else
 		: No pid file
 		false
 	fi
 }
 
 # May be useful to add other distros in future
 validate_default_config() {
 	if [ -e /etc/SuSE-release ]; then
 		validate_default_suse_config
+	elif [ -e /etc/debian_version ]; then
+		validate_default_debian_config
 	else
 		return 0
 	fi
 }
 
 # When using the default /etc/apache2/httpd.conf on SUSE, the file
 # /etc/apache2/sysconfig.d/include.conf is required to be present,
 # but this is only generated if you run the apache init script
 # (with contents derived from /etc/sysconfig/apache2).  So, here,
 # if we're using the default system config file and it requires
 # that include, we run "/etc/init.d/apache2 configtest" to ensure
 # the relevant config is generated and valid.  We're also taking
 # this opportunity to enable mod_status if it's not present.
 validate_default_suse_config() {
 	if [ "$CONFIGFILE" = "$DEFAULT_SUSECONFIG" ] && \
 		grep -Eq '^Include[[:space:]]+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE"
 	then
 		[ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status
 		# init script style, for crusty old SUSE
 		if [ -e "/etc/init.d/apache2" ]; then
 			ocf_run -q /etc/init.d/apache2 configtest || return 1
 		# systemd style, for shiny new SUSE
 		elif [ -e "/usr/sbin/start_apache2" ]; then
 			ocf_run -q /usr/sbin/start_apache2 -t || return 1
 		fi
 	fi
 	return 0
 }
 
+# Debian's Default configuration uses a lock directory /var/lock/apache2
+# which is only generated using the lsb init script issues configtest. To
+# ensure these default directories are present it's useful to run a configtest
+# prior to the resource startup which will create the needed directories
+#
+# To support multiple apache instances the debian scripts and configs
+# obey apache2/envvars. (copy /etc/apache2 -> /etc/apache2-instance)
+# adjust (SUFFIX) envvars and set OCF_RESKEY_envfiles
+validate_default_debian_config() {
+	if find /etc/apache2* -name apache2.conf | grep -q "$CONFIGFILE"
+	then
+		export APACHE_CONFDIR=$(dirname $CONFIGFILE)
+		[ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status
+		ocf_run -q /usr/sbin/apache2ctl configtest || return 1
+	fi
+	return 0
+}
+
 apache_start() {
 	if
 		silent_status
 	then
 		ocf_log info "$CMD already running (pid `get_pid`)"
 		return $OCF_SUCCESS
 	fi
 
 	validate_default_config || return $OCF_ERR_CONFIGURED
-	# https://bugs.launchpad.net/ubuntu/+source/apache2/+bug/603211
-	[ -d /var/run/apache2 ] || mkdir /var/run/apache2
 
 	if [ -z $PIDFILE_DIRECTIVE ]; then
 		ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE
 	else
 		ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE -c "PidFile $PidFile"
 	fi
 
 	tries=0
 	while : # wait until the user set timeout
 	do
 		apache_monitor
 		ec=$?
 		if [ $ec -eq $OCF_NOT_RUNNING ]
 		then
 			tries=`expr $tries + 1`
 			ocf_log info "waiting for apache $CONFIGFILE to come up"
 			sleep 1
 		else
 			break
 		fi
 	done
 
 	if [ $ec -ne 0 ] && silent_status; then
 		apache_stop
 	fi
 	return $ec
 }
 
 signal_children()
 {
 	for sig in SIGTERM SIGHUP SIGKILL ; do
 		if pgrep -f $HTTPD.*$CONFIGFILE >/dev/null ; then
 			pkill -$sig -f $HTTPD.*$CONFIGFILE >/dev/null
 			ocf_log info "signal $sig sent to apache children"
 			sleep 1
 		else
 			break
 		fi
 	done
 }
 
 graceful_stop()
 {
 	local tries=10
 	local pid=$1
 
 	# Try graceful stop for half timeout period if timeout period is present
 	if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
 		tries=$((($OCF_RESKEY_CRM_meta_timeout/1000) / 2))
 	fi
 
 	ocf_log info "Attempting graceful stop of apache PID $pid"
 	kill -WINCH $pid >/dev/null
 	while
 		ProcessRunning $pid &&
 		[ $tries -gt 0 ]
 	do
 		sleep 1
 		tries=`expr $tries - 1`
 	done
 
 	if [ $tries -eq 0 ]; then
 		# graceful stop didn't work, process still up.
 		return 1
 	fi
 
 	return 0
 }
 
 kill_stop()
 {
 	local tries=0
 	local pid=$1
 
 	ocf_log info "Killing apache PID $pid"
 	while
 		ProcessRunning $pid &&
 		[ $tries -lt 10 ]
 	do
 		if [ $tries -ne 0 ]; then
 			# don't sleep on the first try
 			sleep 1
 		fi
 		kill $pid >/dev/null 
 		tries=`expr $tries + 1`
 	done
 }
 
 apache_stop() {
 	local ret=$OCF_SUCCESS
 	local pid
 
 	if ! silent_status; then
 		ocf_log info "$CMD is not running."
 		signal_children
 		return $ret
 	fi
 
 	pid=`get_pid`
 	graceful_stop $pid
 	if [ $? -ne 0 ]; then
 		kill_stop $pid
 
 		if ProcessRunning $pid; then
 			ocf_exit_reason "$CMD still running ($pid). Killing pid failed."
 			ret=$OCF_ERR_GENERIC
 		fi
 	fi
 
 	if [ $ret -eq 0 ]; then
 		ocf_log info "$CMD stopped."
 	fi
 
 	signal_children
 	return $ret
 }
 
 apache_monitor_10() {
-	if [ "$TESTCONFFILE" ]; then
+	if [ -f "$TESTCONFFILE" ] && [ -r "$TESTCONFFILE" ]; then
 		readtestconf < $TESTCONFFILE
 	else
 		test_url="$TESTURL"
 		test_regex="$TESTREGEX10"
 	fi
 
 	whattorun=`gethttpclient`
 	fixtesturl
 	is_testconf_sane ||
 		return $OCF_ERR_CONFIGURED
 
 	if $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null
 	then
 		return $OCF_SUCCESS
 	else
 		if ! ocf_is_probe; then
 			ocf_exit_reason "Failed to access httpd status page."
 		fi
 		return $OCF_ERR_GENERIC
 	fi
 }
 
 # If the user has not provided any basic monitoring 
 # information, allow the agent to verify the server is
 # healthy and capable of processing requests by requesting
 # the http header of website's index 
 attempt_index_monitor_request() {
 	local indexpage=""
 	
 	if [ -n "$OCF_RESKEY_testregex" ]; then
 		return 1;
 	fi
 	if [ -n "$OCF_RESKEY_testregex10" ]; then
 		return 1;
 	fi
 	if [ -n "$OCF_RESKEY_testurl" ]; then
 		return 1;
 	fi
 	if [ -n "$OCF_RESKEY_statusurl" ]; then
 		return 1;
 	fi
 	if [ -n "$OCF_RESKEY_testconffile" ]; then
 		return 1;
 	fi
 
 	indexpage=$(buildlocalurl)
 
 	request_url_header $indexpage
 	if [ $? -ne 0 ]; then
 		return $OCF_ERR_GENERIC
 	fi
 	ocf_log info "Successfully retrieved http header at $indexpage" 
 	return 0
 }
 
 apache_monitor_basic() {
 	if ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null
 	then
 		return $OCF_SUCCESS
 	fi
 
 	attempt_index_monitor_request
 	if [ $? -eq 0 ]; then
 		return $OCF_SUCCESS
 	fi
 
 	if ! ocf_is_probe; then
 		ocf_exit_reason "Failed to access httpd status page."
 	fi
 	return $OCF_ERR_GENERIC
 }
 apache_monitor() {
 	silent_status
 	if [ $? -ne 0 ]; then
 		ocf_log info "$CMD not running"
 		return $OCF_NOT_RUNNING
 	fi
 
 	ourhttpclient=`findhttpclient`  # we'll need one
 	if [ -z "$ourhttpclient" ]; then
 		ocf_exit_reason "could not find a http client; make sure that either wget or curl is available"
 		return $OCF_ERR_INSTALLED
 	fi
 
 	case `ocf_check_level 10` in
 		0) apache_monitor_basic;;
 		10) apache_monitor_10;;
 	esac
 }
 
 detect_default_config()
 {
 	if [ -f $DEFAULT_SUSECONFIG ]; then
 		echo $DEFAULT_SUSECONFIG
+	elif [ -f $DEFAULT_DEBIANCONFIG ]; then
+		echo $DEFAULT_DEBIANCONFIG
 	else
 		echo $DEFAULT_RHELCONFIG
 	fi
 }
 
 
 apache_meta_data(){
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="apache">
 <version>1.0</version>
 
 <longdesc lang="en">
 This is the resource agent for the Apache Web server.
 This resource agent operates both version 1.x and version 2.x Apache
 servers.
 
 The start operation ends with a loop in which monitor is
 repeatedly called to make sure that the server started and that
 it is operational. Hence, if the monitor operation does not
 succeed within the start operation timeout, the apache resource
 will end with an error status.
 
 The monitor operation by default loads the server status page
 which depends on the mod_status module and the corresponding
 configuration file (usually /etc/apache2/mod_status.conf).
 Make sure that the server status page works and that the access
 is allowed *only* from localhost (address 127.0.0.1).
 See the statusurl and testregex attributes for more details.
 
 See also http://httpd.apache.org/
 </longdesc>
 <shortdesc lang="en">Manages an Apache Web server instance</shortdesc>
 
 <parameters>
 <parameter name="configfile" required="0" unique="1">
 <longdesc lang="en">
 The full pathname of the Apache configuration file.
 This file is parsed to provide defaults for various other
 resource agent parameters.
 </longdesc>
 <shortdesc lang="en">configuration file path</shortdesc>
 <content type="string" default="$(detect_default_config)" />
 </parameter>
 
 <parameter name="httpd">
 <longdesc lang="en">
 The full pathname of the httpd binary (optional).
 </longdesc>
 <shortdesc lang="en">httpd binary path</shortdesc>
 <content type="string" default="/usr/sbin/httpd" />
 </parameter>
 
 <parameter name="port" >
 <longdesc lang="en">
 A port number that we can probe for status information
 using the statusurl.
 This will default to the port number found in the
 configuration file, or 80, if none can be found
 in the configuration file.
 
 </longdesc>
 <shortdesc lang="en">httpd port</shortdesc>
 <content type="integer" />
 </parameter>
 
 <parameter name="statusurl">
 <longdesc lang="en">
 The URL to monitor (the apache server status page by default).
 If left unspecified, it will be inferred from
 the apache configuration file.
 
 If you set this, make sure that it succeeds *only* from the
 localhost (127.0.0.1). Otherwise, it may happen that the cluster
 complains about the resource being active on multiple nodes.
 </longdesc>
 <shortdesc lang="en">url name</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="testregex">
 <longdesc lang="en">
 Regular expression to match in the output of statusurl.
 Case insensitive.
 </longdesc>
 <shortdesc lang="en">monitor regular expression</shortdesc>
 <content type="string" default="exists, but impossible to show in a human readable format (try grep testregex)"/>
 </parameter>
 
 <parameter name="client">
 <longdesc lang="en">
 Client to use to query to Apache. If not specified, the RA will
 try to find one on the system. Currently, wget and curl are
 supported. For example, you can set this parameter to "curl" if
 you prefer that to wget.
 </longdesc>
 <shortdesc lang="en">http client</shortdesc>
 <content type="string" default=""/>
 </parameter>
 
 <parameter name="testurl">
 <longdesc lang="en">
 URL to test. If it does not start with "http", then it's
 considered to be relative to the Listen address.
 </longdesc>
 <shortdesc lang="en">test url</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="testregex10">
 <longdesc lang="en">
 Regular expression to match in the output of testurl.
 Case insensitive.
 </longdesc>
 <shortdesc lang="en">extended monitor regular expression</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="testconffile">
 <longdesc lang="en">
 A file which contains test configuration. Could be useful if
 you have to check more than one web application or in case sensitive
 info should be passed as arguments (passwords). Furthermore,
 using a config file is the only way to specify certain
 parameters.
 
 Please see README.webapps for examples and file description.
 </longdesc>
 <shortdesc lang="en">test configuration file</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="testname">
 <longdesc lang="en">
 Name of the test within the test configuration file.
 </longdesc>
 <shortdesc lang="en">test name</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="options">
 <longdesc lang="en">
 Extra options to apply when starting apache. See man httpd(8).
 </longdesc>
 <shortdesc lang="en">command line options</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="envfiles">
 <longdesc lang="en">
 Files (one or more) which contain extra environment variables.
 If you want to prevent script from reading the default file, set
 this parameter to empty string.
 </longdesc>
 <shortdesc lang="en">environment settings files</shortdesc>
 <content type="string" default="/etc/apache2/envvars"/>
 </parameter>
 
 <parameter name="use_ipv6">
 <longdesc lang="en">
 We will try to detect if the URL (for monitor) is IPv6, but if
 that doesn't work set this to true to enforce IPv6.
 </longdesc>
 <shortdesc lang="en">use ipv6 with http clients</shortdesc>
 <content type="boolean" default="false"/>
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"   timeout="40s" />
 <action name="stop"    timeout="60s" />
 <action name="status"  timeout="30s" />
 <action name="monitor" depth="0"  timeout="20s" interval="10" />
 <action name="meta-data"  timeout="5" />
 <action name="validate-all"  timeout="5" />
 </actions>
 </resource-agent>
 END
 	return $OCF_SUCCESS
 }
 
 apache_validate_all() {
 	if [ -z "$HTTPD" ]; then
 		ocf_exit_reason "apache httpd program not found"
 		return $OCF_ERR_INSTALLED
 	fi
 	if [ ! -x "$HTTPD" ]; then
 		ocf_exit_reason "HTTPD $HTTPD not found or is not an executable!"
 		return $OCF_ERR_INSTALLED
 	fi
 	if [ ! -f $CONFIGFILE ]; then
 		ocf_exit_reason "Configuration file $CONFIGFILE not found!"
 		return $OCF_ERR_INSTALLED
 	fi
+
+	# validate testconffile/testurl before apache_monitor_10()
+	if [ -n "$TESTCONFFILE" ]; then
+		if [ ! -f "$TESTCONFFILE" ] || [ ! -r "$TESTCONFFILE" ]; then
+			ocf_exit_reason "Configuration file $TESTCONFFILE not found, or not readable."
+			return $OCF_ERR_INSTALLED
+		fi
+	else
+		if [ -n "$TESTURL" ]; then
+			# remove leading or trailing spaces/tabs
+			local temp=$(printf "$TESTURL" | sed -e 's/^[ \t]*//g' -e 's/[ \t]*$//g')
+
+			if [ -z "$temp" ]; then
+				ocf_exit_reason "testurl: \"$TESTURL\" seems to be an empty string?"
+				return $OCF_ERR_CONFIGURED
+			fi
+		fi
+
+		# FIXME: validate TESTREGEX10 will be needed if empty regex is not allow.
+	fi
+
 	ocf_mkstatedir root 755 `dirname $PidFile` || return $OCF_ERR_INSTALLED
 	return $OCF_SUCCESS
 }
 
 find_httpd_prog() {
 	case $0 in
 		*IBM*)
 			HTTPD=$IBMHTTPD
 			DefaultConfig=$DEFAULT_IBMCONFIG;;
 		*)	
 			HTTPD=
 			for h in $HTTPDLIST
 			do
 				if [ -f $h -a -x $h ]; then
 					HTTPD=$h
 					break
 				fi
 			done
 
 			# Let the user know that the $HTTPD used is not the one (s)he specified via $OCF_RESKEY_httpd
 			if [ "X$OCF_RESKEY_httpd" != X -a "X$HTTPD" != X ]; then
 				ocf_log info "Using $HTTPD as HTTPD"
 			fi
 			DefaultConfig=$(detect_default_config)
 			;;
 	esac
 }
 
 apache_getconfig() {
 	# these variables are global
 	HTTPD="$OCF_RESKEY_httpd"
 	PORT="$OCF_RESKEY_port"
 	STATUSURL="$OCF_RESKEY_statusurl"
 	CONFIGFILE="$OCF_RESKEY_configfile"
 	OPTIONS="$OCF_RESKEY_options"
 	CLIENT=${OCF_RESKEY_client}
 	TESTREGEX=${OCF_RESKEY_testregex:-'</ *html *>'}
 	TESTURL="$OCF_RESKEY_testurl"
 	TESTREGEX10=${OCF_RESKEY_testregex10}
 	TESTCONFFILE="$OCF_RESKEY_testconffile"
 	TESTNAME="$OCF_RESKEY_testname"
 	: ${OCF_RESKEY_envfiles="/etc/apache2/envvars"}
 	source_envfiles $OCF_RESKEY_envfiles
 
 	if [ "X$HTTPD" = X -o ! -f "$HTTPD" -o ! -x "$HTTPD" ]; then
 		find_httpd_prog
 	fi
 
 	CONFIGFILE=${CONFIGFILE:-$DefaultConfig}
 	if [ -n "$HTTPD" ]; then
 		httpd_basename=`basename $HTTPD`
 		case $httpd_basename in
 			*-*)	httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;;
 		esac
 	fi
 	GetParams $CONFIGFILE
 }
 
 OCF_REQUIRED_PARAMS=""
 OCF_REQUIRED_BINARIES=""
 ocf_rarun $*
diff --git a/heartbeat/awseip b/heartbeat/awseip
new file mode 100755
index 000000000..a1bee44f1
--- /dev/null
+++ b/heartbeat/awseip
@@ -0,0 +1,247 @@
+#!/bin/sh
+#
+#
+#    Manage Elastic IP with Pacemaker
+#
+#
+# Copyright 2016 guessi <guessi@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+#
+#  Prerequisites:
+#
+#  - preconfigured AWS CLI running environment (AccessKey, SecretAccessKey, etc.)
+#  - a reserved secondary private IP address for EC2 instances high availablity
+#  - IAM user role with the following permissions:
+#    * DescribeInstances
+#    * AssociateAddress
+#    * DisassociateAddress
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+#######################################################################
+
+#
+# Defaults
+#
+OCF_RESKEY_awscli_default="/usr/bin/awscli"
+OCF_RESKEY_api_delay_default="1"
+
+: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
+: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
+
+meta_data() {
+    cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="awseip" version="0.9">
+<version>1.0</version>
+
+<longdesc lang="en">
+description
+</longdesc>
+<shortdesc lang="en"></shortdesc>
+
+<parameters>
+
+<parameter name="awscli" unique="0">
+<longdesc lang="en">
+command line tools for aws services
+</longdesc>
+<shortdesc lang="en">aws cli tools</shortdesc>
+<content type="string" default="${OCF_RESKEY_awscli_default}" />
+</parameter>
+
+<parameter name="elastic_ip" unique="1" required="1">
+<longdesc lang="en">
+reserved elastic ip for ec2 instance
+</longdesc>
+<shortdesc lang="en">reserved elastic ip for ec2 instance</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="allocation_id" unique="1" required="0">
+<longdesc lang="en">
+reserved allocation id for ec2 instance
+</longdesc>
+<shortdesc lang="en">reserved allocation id for ec2 instance</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="private_ip_address" unique="1" required="0">
+<longdesc lang="en">
+predefined private ip address for ec2 instance
+</longdesc>
+<shortdesc lang="en">predefined private ip address for ec2 instance</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="api_delay" unique="0">
+<longdesc lang="en">
+a short delay between API calls, to avoid sending API too quick
+</longdesc>
+<shortdesc lang="en">a short delay between API calls</shortdesc>
+<content type="integer" default="${OCF_RESKEY_api_delay_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start"        timeout="10" />
+<action name="stop"         timeout="10" />
+<action name="monitor"      timeout="10" interval="20" depth="0" />
+<action name="reload"       timeout="10" />
+<action name="migrate_to"   timeout="10" />
+<action name="migrate_from" timeout="10" />
+<action name="meta-data"    timeout="5" />
+<action name="validate"     timeout="10" />
+<action name="validate-all" timeout="10" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+awseip_usage() {
+    cat <<END
+usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+awseip_start() {
+    awseip_monitor && return $OCF_SUCCESS
+
+    if [ -n "${ALLOCATION_ID}" ] && [ -n "${PRIVATE_IP_ADDRESS}" ]; then
+        $AWSCLI ec2 associate-address  \
+            --instance-id ${INSTANCE_ID} \
+            --network-interface-id ${NETWORK_ID} \
+            --allocation-id ${ALLOCATION_ID} \
+            --private-ip-address ${PRIVATE_IP_ADDRESS}
+        RET=$?
+    else
+        $AWSCLI ec2 associate-address  \
+            --instance-id ${INSTANCE_ID} \
+            --public-ip ${ELASTIC_IP}
+        RET=$?
+    fi
+
+    # delay to avoid sending request too fast
+    sleep ${OCF_RESKEY_api_delay}
+
+    if [ $RET -ne 0 ]; then
+        return $OCF_NOT_RUNNING
+    fi
+
+    ocf_log info "elastic_ip have been successfully bring up (${ELASTIC_IP})"
+    return $OCF_SUCCESS
+}
+
+awseip_stop() {
+    awseip_monitor || return $OCF_SUCCESS
+
+    $AWSCLI ec2 disassociate-address  \
+        --public-ip ${ELASTIC_IP}
+    RET=$?
+
+    # delay to avoid sending request too fast
+    sleep ${OCF_RESKEY_api_delay}
+
+    if [ $RET -ne 0 ]; then
+        return $OCF_NOT_RUNNING
+    fi
+
+    ocf_log info "elastic_ip have been successfully bring down (${ELASTIC_IP})"
+    return $OCF_SUCCESS
+}
+
+awseip_monitor() {
+    $AWSCLI ec2 describe-instances --instance-id "${INSTANCE_ID}" | grep -q "${ELASTIC_IP}"
+    RET=$?
+
+    if [ $RET -ne 0 ]; then
+        return $OCF_NOT_RUNNING
+    fi
+    return $OCF_SUCCESS
+}
+
+awseip_validate() {
+    check_binary ${AWSCLI}
+
+    if [ -z "${INSTANCE_ID}" ]; then
+        ocf_log info "instant_id could not been found, is this EC2 instance?"
+        return $OCF_GENERIC
+    fi
+
+    return $OCF_SUCCESS
+}
+
+: ${OCF_RESKEY_awscli="/usr/bin/aws"}
+AWSCLI="${OCF_RESKEY_awscli}"
+ELASTIC_IP="${OCF_RESKEY_elastic_ip}"
+ALLOCATION_ID="${OCF_RESKEY_allocation_id}"
+PRIVATE_IP_ADDRESS="${OCF_RESKEY_private_ip_address}"
+INSTANCE_ID="$(curl -s http://169.254.169.254/latest/meta-data/instance-id)"
+NETWORK_ID="$($AWSCLI ec2 describe-instances --instance-id ${INSTANCE_ID} | grep -m 1 'eni' | awk -F'"' '{print$4}')"
+
+case $__OCF_ACTION in
+    meta-data)
+        meta_data
+        exit $OCF_SUCCESS
+        ;;
+    start)
+        awseip_start
+        ;;
+    stop)
+        awseip_stop
+        ;;
+    monitor)
+        awseip_monitor
+        ;;
+    migrate_to)
+        ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
+        awseip_stop
+        ;;
+    migrate_from)
+        ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}."
+        awseip_start
+        ;;
+    reload)
+        ocf_log info "Reloading ${OCF_RESOURCE_INSTANCE} ..."
+        ;;
+    validate|validate-all)
+        awseip_validate
+        ;;
+    usage|help)
+        awseip_usage
+        exit $OCF_SUCCESS
+        ;;
+    *)
+        awseip_usage
+        exit $OCF_ERR_UNIMPLEMENTED
+        ;;
+esac
+
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
new file mode 100755
index 000000000..b848af367
--- /dev/null
+++ b/heartbeat/awsvip
@@ -0,0 +1,222 @@
+#!/bin/sh
+#
+#
+#    Manage Secondary Private IP with Pacemaker
+#
+#
+# Copyright 2016 guessi <guessi@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+#
+#  Prerequisites:
+#
+#  - preconfigured AWS CLI running environment (AccessKey, SecretAccessKey, etc.)
+#  - a reserved secondary private IP address for EC2 instances high availablity
+#  - IAM user role with the following permissions:
+#    * DescribeInstances
+#    * AssignPrivateIpAddresses
+#    * UnassignPrivateIpAddresses
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+#######################################################################
+
+#
+# Defaults
+#
+OCF_RESKEY_awscli_default="/usr/bin/awscli"
+OCF_RESKEY_api_delay_default="1"
+
+: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
+: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
+
+meta_data() {
+    cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="awsvip" version="0.9">
+<version>1.0</version>
+
+<longdesc lang="en">
+description
+</longdesc>
+<shortdesc lang="en"></shortdesc>
+
+<parameters>
+
+<parameter name="awscli" unique="0">
+<longdesc lang="en">
+command line tools for aws services
+</longdesc>
+<shortdesc lang="en">aws cli tools</shortdesc>
+<content type="string" default="${OCF_RESKEY_awscli_default}" />
+</parameter>
+
+<parameter name="secondary_private_ip" unique="1" required="1">
+<longdesc lang="en">
+reserved secondary private ip for ec2 instance
+</longdesc>
+<shortdesc lang="en">reserved secondary private ip for ec2 instance</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="api_delay" unique="0">
+<longdesc lang="en">
+a short delay between API calls, to avoid sending API too quick
+</longdesc>
+<shortdesc lang="en">a short delay between API calls</shortdesc>
+<content type="integer" default="${OCF_RESKEY_api_delay_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start"        timeout="10" />
+<action name="stop"         timeout="10" />
+<action name="monitor"      timeout="10" interval="20" depth="0" />
+<action name="reload"       timeout="10" />
+<action name="migrate_to"   timeout="10" />
+<action name="migrate_from" timeout="10" />
+<action name="meta-data"    timeout="5" />
+<action name="validate"     timeout="10" />
+<action name="validate-all" timeout="10" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+awsvip_usage() {
+    cat <<END
+usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+awsvip_start() {
+    awsvip_monitor && return $OCF_SUCCESS
+
+    $AWSCLI ec2 assign-private-ip-addresses \
+        --network-interface-id ${NETWORK_ID} \
+        --private-ip-addresses ${SECONDARY_PRIVATE_IP} \
+        --allow-reassignment
+    RET=$?
+
+    # delay to avoid sending request too fast
+    sleep ${OCF_RESKEY_api_delay}
+
+    if [ $RET -ne 0 ]; then
+        return $OCF_NOT_RUNNING
+    fi
+
+    ocf_log info "secondary_private_ip have been successfully bring up (${SECONDARY_PRIVATE_IP})"
+    return $OCF_SUCCESS
+}
+
+awsvip_stop() {
+    awsvip_monitor || return $OCF_SUCCESS
+
+    $AWSCLI ec2 unassign-private-ip-addresses \
+        --network-interface-id ${NETWORK_ID} \
+        --private-ip-addresses ${SECONDARY_PRIVATE_IP}
+    RET=$?
+
+    # delay to avoid sending request too fast
+    sleep ${OCF_RESKEY_api_delay}
+
+    if [ $RET -ne 0 ]; then
+        return $OCF_NOT_RUNNING
+    fi
+
+    ocf_log info "secondary_private_ip have been successfully bring down (${SECONDARY_PRIVATE_IP})"
+    return $OCF_SUCCESS
+}
+
+awsvip_monitor() {
+    $AWSCLI ec2 describe-instances --instance-id "${INSTANCE_ID}" | grep -q "${SECONDARY_PRIVATE_IP}"
+    RET=$?
+
+    if [ $RET -ne 0 ]; then
+        return $OCF_NOT_RUNNING
+    fi
+    return $OCF_SUCCESS
+}
+
+awsvip_validate() {
+    check_binary ${AWSCLI}
+
+    if [ -z "${INSTANCE_ID}" ]; then
+        ocf_log info "instant_id could not been found, is this EC2 instance?"
+        return $OCF_GENERIC
+    fi
+
+    return $OCF_SUCCESS
+}
+
+: ${OCF_RESKEY_awscli="/usr/bin/aws"}
+AWSCLI="${OCF_RESKEY_awscli}"
+SECONDARY_PRIVATE_IP="${OCF_RESKEY_secondary_private_ip}"
+INSTANCE_ID="$(curl -s http://169.254.169.254/latest/meta-data/instance-id)"
+NETWORK_ID="$($AWSCLI ec2 describe-instances --instance-id ${INSTANCE_ID} | grep -m 1 'eni' | awk -F'"' '{print$4}')"
+
+case $__OCF_ACTION in
+    meta-data)
+        meta_data
+        exit $OCF_SUCCESS
+        ;;
+    start)
+        awsvip_start
+        ;;
+    stop)
+        awsvip_stop
+        ;;
+    monitor)
+        awsvip_monitor
+        ;;
+    migrate_to)
+        ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
+	awsvip_stop
+        ;;
+    migrate_from)
+        ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}."
+        awsvip_start
+        ;;
+    reload)
+        ocf_log info "Reloading ${OCF_RESOURCE_INSTANCE} ..."
+        ;;
+    validate|validate-all)
+        awsvip_validate
+        ;;
+    usage|help)
+        awsvip_usage
+        exit $OCF_SUCCESS
+        ;;
+    *)
+        awsvip_usage
+        exit $OCF_ERR_UNIMPLEMENTED
+        ;;
+esac
+
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
diff --git a/heartbeat/docker b/heartbeat/docker
index 5af1782c8..d173e90aa 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -1,456 +1,465 @@
 #!/bin/sh
 #
 # The docker HA resource agent creates and launches a docker container
 # based off a supplied docker image. Containers managed by this agent
 # are both created and removed upon the agent's start and stop actions.
 #
 # Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
 #                    All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 #######################################################################
 
 meta_data()
 {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="docker" version="0.9">
 <version>1.0</version>
 
 <longdesc lang="en">
 The docker HA resource agent creates and launches a docker container
 based off a supplied docker image. Containers managed by this agent
 are both created and removed upon the agent's start and stop actions.
 </longdesc>
 <shortdesc lang="en">Docker container resource agent.</shortdesc>
 
 <parameters>
 <parameter name="image" required="1" unique="0">
 <longdesc lang="en">
 The docker image to base this container off of.
 </longdesc>
 <shortdesc lang="en">docker image</shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="name" required="0" unique="0">
 <longdesc lang="en">
 The name to give the created container. By default this will
 be that resource's instance name.
 </longdesc>
 <shortdesc lang="en">docker container name</shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="allow_pull" unique="0">
 <longdesc lang="en">
 Allow the image to be pulled from the configured docker registry when
 the image does not exist locally. NOTE, this can drastically increase
 the time required to start the container if the image repository is
 pulled over the network.
 </longdesc>
 <shortdesc lang="en">Allow pulling non-local images</shortdesc>
 <content type="boolean"/>
 </parameter>
 
 <parameter name="run_opts" required="0" unique="0">
 <longdesc lang="en">
 Add options to be appended to the 'docker run' command which is used
 when creating the container during the start action. This option allows
 users to do things such as setting a custom entry point and injecting
 environment variables into the newly created container. Note the '-d'
 option is supplied regardless of this value to force containers to run
 in the background.
 
 NOTE: Do not explicitly specify the --name argument in the run_opts. This
 agent will set --name using either the resource's instance or the name
 provided in the 'name' argument of this agent.
 
 </longdesc>
 <shortdesc lang="en">run options</shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="run_cmd" required="0" unique="0">
 <longdesc lang="en">
 Specifiy a command to launch within the container once
 it has initialized.
 </longdesc>
 <shortdesc lang="en">run command</shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="monitor_cmd" required="0" unique="0">
 <longdesc lang="en">
 Specifiy the full path of a command to launch within the container to check
 the health of the container. This command must return 0 to indicate that
 the container is healthy. A non-zero return code will indicate that the
 container has failed and should be recovered.
 
-The command is executed using nsenter. In the future 'docker exec' will
-be used once it is more widely supported.
+If 'docker exec' is supported, it is used to execute the command. If not,
+nsenter is used.
 </longdesc>
 <shortdesc lang="en">monitor command</shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="force_kill" required="0" unique="0">
 <longdesc lang="en">
 Kill a container immediately rather than waiting for it to gracefully
 shutdown
 </longdesc>
 <shortdesc lang="en">force kill</shortdesc>
 <content type="boolean"/>
 </parameter>
 
 <parameter name="reuse" required="0" unique="0">
 <longdesc lang="en">
 Allow the container to be reused after stopping the container. By default
 containers are removed after stop. With the reuse option containers
 will persist after the container stops.
 </longdesc>
 <shortdesc lang="en">reuse container</shortdesc>
 <content type="boolean"/>
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"        timeout="90" />
 <action name="stop"         timeout="90" />
 <action name="monitor"      timeout="30" interval="30" depth="0" />
 <action name="meta-data"    timeout="5" />
 <action name="validate-all"   timeout="30" />
 </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 REQUIRE_IMAGE_PULL=0
 
 docker_usage()
 {
 	cat <<END
 usage: $0 {start|stop|monitor|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 
 monitor_cmd_exec()
 {
 	local rc=$OCF_SUCCESS
 	local out
 
 	if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
 		return $rc
 	fi
 
-	out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
-	rc=$?
+	if docker exec --help >/dev/null 2>&1; then
+		out=$(docker exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
+		rc=$?
+	else
+		out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
+		rc=$?
+	fi
+
 	if [ $rc -ne 0 ]; then
 		ocf_log info "monitor cmd exit code = $rc"
 		ocf_log info "stdout/stderr: $out"
 
 		if [ $rc -eq 127 ]; then
 			ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."
 			# there is no recovering from this, exit immediately
 			exit $OCF_ERR_ARGS
 		fi
 		rc=$OCF_ERR_GENERIC
 	else
 		ocf_log info "monitor cmd passed: exit code = $rc"
 	fi
 
 	return $rc
 }
 
 container_exists()
 {
 	docker inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1
 }
 
 remove_container()
 {
 	if ocf_is_true "$OCF_RESKEY_reuse"; then
 		# never remove the container if we have reuse enabled.
 		return 0
 	fi
 
 	container_exists
 	if [ $? -ne 0 ]; then
 		# don't attempt to remove a container that doesn't exist
 		return 0
 	fi
 	ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
 	ocf_run docker rm $CONTAINER
 }
 
 docker_simple_status()
 {
 	local val
 
 	container_exists
 	if [ $? -ne 0 ]; then
 		return $OCF_NOT_RUNNING
 	fi
 
 	# retrieve the 'Running' attribute for the container
 	val=$(docker inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
 	if [ $? -ne 0 ]; then
 		#not running as a result of container not being found
 		return $OCF_NOT_RUNNING
 	fi
 
 	if ocf_is_true "$val"; then
 		# container exists and is running
 		return $OCF_SUCCESS
 	fi
 
 	return $OCF_NOT_RUNNING
 }
 
 docker_monitor()
 {
 	local rc=0
 
 	docker_simple_status
 	rc=$?
 
 	if [ $rc -ne 0 ]; then
 		return $rc
 	fi
 
 	monitor_cmd_exec
 }
 
 docker_start()
 {
 	local run_opts="-d --name=${CONTAINER}"
 	# check to see if the container has already started
 	docker_simple_status
 	if [ $? -eq $OCF_SUCCESS ]; then
 		return $OCF_SUCCESS
 	fi
 
 	if [ -n "$OCF_RESKEY_run_opts" ]; then
 		run_opts="$run_opts $OCF_RESKEY_run_opts"
 	fi
 
 	if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then
 		ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}"
 		docker pull "${OCF_RESKEY_image}"
 		if [ $? -ne 0 ]; then
 			ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}"
 			return $OCF_ERR_GENERIC
 		fi
 	fi
 
 	if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then
 		ocf_log info "starting existing container $CONTAINER."
 		ocf_run docker start $CONTAINER
 	else
 		# make sure any previous container matching our container name is cleaned up first.
 		# we already know at this point it wouldn't be running
 		remove_container
 		ocf_log info "running container $CONTAINER for the first time"
 		ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
 	fi
 
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "docker failed to launch container"
 		return $OCF_ERR_GENERIC
 	fi
 
 
 	# wait for monitor to pass before declaring that the container is started
 	while true; do
 		docker_simple_status
 		if [ $? -ne $OCF_SUCCESS ]; then
 			ocf_exit_reason "Newly created docker container exited after start"
 			return $OCF_ERR_GENERIC
 		fi
 
 		monitor_cmd_exec
 		if [ $? -eq $OCF_SUCCESS ]; then
 			ocf_log notice "Container $CONTAINER  started successfully"
 			return $OCF_SUCCESS
 		fi
 
 		ocf_exit_reason "waiting on monitor_cmd to pass after start"
 		sleep 1
 	done
 }
 
 docker_stop()
 {
 	local timeout=60
 	docker_simple_status
 	if [ $? -eq  $OCF_NOT_RUNNING ]; then
 		remove_container
 		return $OCF_SUCCESS
 	fi
 
 	if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
 		timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 ))
 		if [ $timeout -lt 10 ]; then
 			timeout=10
 		fi
 	fi
 
 	if ocf_is_true "$OCF_RESKEY_force_kill"; then
 		ocf_run docker kill $CONTAINER
 	else
 		ocf_log debug "waiting $timeout second[s] before killing container"
 		ocf_run docker stop -t=$timeout $CONTAINER
 	fi
 
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
 		return $OCF_ERR_GENERIC
 	fi
 
 	remove_container
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
 		return $OCF_ERR_GENERIC
 	fi
 
 	return $OCF_SUCCESS
 }
 
 image_exists()
 {
 	# assume that OCF_RESKEY_name have been validated
 	local IMAGE_NAME="$(echo ${OCF_RESKEY_image} | awk -F':' '{print $1}')"
 
 	# if no tag was specified, use default "latest"
 	local COLON_FOUND=0
 	local IMAGE_TAG="latest"
 
 	COLON_FOUND="$(echo "${OCF_RESKEY_image}" | grep -o ':' | grep -c .)"
 
 	if [ ${COLON_FOUND} -ne 0 ]; then
 		IMAGE_TAG="$(echo ${OCF_RESKEY_image} | awk -F':' '{print $NF}')"
 	fi
 
 	# IMAGE_NAME might be following formats:
 	# - image
 	# - repository/image
 	# - docker.io/image (some distro will display "docker.io/" as prefix)
 	docker images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/)?${IMAGE_NAME}:${IMAGE_TAG}\$"
 	if [ $? -eq 0 ]; then
 		# image found
 		return 0
 	fi
 
 	if ocf_is_true "$OCF_RESKEY_allow_pull"; then
 		REQUIRE_IMAGE_PULL=1
 		ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start"
 		return 0
 	fi
 	# image not found.
 	return 1
 }
 
 docker_validate()
 {
 	check_binary docker
 	if [ -z "$OCF_RESKEY_image" ]; then
 		ocf_exit_reason "'image' option is required"
 		exit $OCF_ERR_CONFIGURED
 	fi
 
 	if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
-		ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
-		check_binary nsenter
+		docker exec --help >/dev/null 2>&1
+		if [ ! $? ]; then
+			ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
+			check_binary nsenter
+		fi
 	fi
 
 	image_exists
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
 		exit $OCF_ERR_CONFIGURED
 	fi
 
 	return $OCF_SUCCESS
 }
 
 # TODO :
 # When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters.
 # When a user appoints reuse, the resource agent cannot connect plural clones with a container.
 
 if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
 	if [ -n "$OCF_RESKEY_name" ]; then
 		if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ]
 		then
 			ocf_exit_reason "Cannot make plural clones from the same name parameter."
 			exit $OCF_ERR_CONFIGURED
 		fi
 		if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ]
 		then
 			ocf_exit_reason "Cannot make plural master from the same name parameter."
 			exit $OCF_ERR_CONFIGURED
 		fi
 	fi
 	: ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`} 
 else 
 	: ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
 fi
 
 if [ -n "$OCF_RESKEY_container" ]; then
 	# we'll keep the container attribute around for a bit in order not to break
 	# any existing deployments. The 'name' attribute is prefered now though.
 	CONTAINER=$OCF_RESKEY_container
 	ocf_log warn "The 'container' attribute is depreciated"
 else
 	CONTAINER=$OCF_RESKEY_name
 fi
 
 case $__OCF_ACTION in
 meta-data) meta_data
 		exit $OCF_SUCCESS;;
 start)
 	docker_validate
 	docker_start;;
 stop)		docker_stop;;
 monitor)	docker_monitor;;
 validate-all)	docker_validate;;
 usage|help)	docker_usage
 		exit $OCF_SUCCESS
 		;;
 *)		docker_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 rc=$?
 ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
 exit $rc
 
diff --git a/heartbeat/exportfs b/heartbeat/exportfs
index 4b88fa1ed..c6ea920fd 100755
--- a/heartbeat/exportfs
+++ b/heartbeat/exportfs
@@ -1,434 +1,442 @@
 #!/bin/sh
 # exportfs
 #
 # Description: Manages nfs exported file system.
 #
 #   (c) 2010 Ben Timby, Florian Haas, Dejan Muhamedagic,
 #            and Linux-HA contributors
 #
 # License: GNU General Public License v2 (GPLv2) and later
 
 #######################################################################
 # Initialization:
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # Defaults
 OCF_RESKEY_unlock_on_stop_default=1
 OCF_RESKEY_wait_for_leasetime_on_stop_default=0
 OCF_RESKEY_rmtab_backup_default=".rmtab"
 
 : ${OCF_RESKEY_unlock_on_stop=${OCF_RESKEY_unlock_on_stop_default}}
 : ${OCF_RESKEY_wait_for_leasetime_on_stop=${OCF_RESKEY_wait_for_leasetime_on_stop_default}}
 : ${OCF_RESKEY_rmtab_backup=${OCF_RESKEY_rmtab_backup_default}}
 #######################################################################
 
 exportfs_meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="exportfs">
 <version>1.0</version>
 
 <longdesc lang="en">
 Exportfs uses the exportfs command to add/remove nfs exports.
 It does NOT manage the nfs server daemon.
 It depends on Linux specific NFS implementation details,
 so is considered not portable to other platforms yet.
 </longdesc>
 
 <shortdesc lang="en">
 Manages NFS exports
 </shortdesc>
 
 <parameters>
 
 <parameter name="clientspec" unique="0" required="1">
 <longdesc lang="en">
 The client specification allowing remote machines to mount the directory
 (or directories) over NFS.
 </longdesc>
 <shortdesc lang="en">
 Client ACL.
 </shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="options" unique="0" required="0">
 <longdesc lang="en">
 The options to pass to exportfs for the exported directory
 or directories.
 </longdesc>
 <shortdesc lang="en">
 Export options.
 </shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="directory" unique="0" required="1">
 <longdesc lang="en">
 The directory or directories to be exported using NFS. Multiple
 directories are separated by white space.
 </longdesc>
 <shortdesc lang="en">
 The directory or directories to export.
 </shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="fsid" unique="1" required="1">
 <longdesc lang="en">
 The fsid option to pass to exportfs. This can be a unique positive
 integer, a UUID, or the special string "root" which is functionally
 identical to numeric fsid of 0.
 If multiple directories are being exported, then they are
 assigned ids sequentially starting with this fsid (fsid, fsid+1,
 fsid+2, ...). Obviously, in that case the fsid must be an
 integer.
 0 (root) identifies the export as the root of an NFSv4
 pseudofilesystem -- avoid this setting unless you understand its
 special status.
 This value will override any fsid provided via the options parameter.
 </longdesc>
 <shortdesc lang="en">
 Unique fsid within cluster or starting fsid for multiple exports.
 </shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="unlock_on_stop">
 <longdesc lang="en">
 Relinquish NFS locks associated with this filesystem when the resource
 stops. Enabling this parameter is highly recommended unless the path exported
 by this ${__SCRIPT_NAME} resource is also exported by a different resource.
 
 Note: Unlocking is only possible on Linux systems where
 /proc/fs/nfsd/unlock_filesystem exists and is writable. If your system does
 not fulfill this requirement (on account of having an nonrecent kernel,
 for example), you may set this parameter to 0 to silence the associated
 warning.
 </longdesc>
 <shortdesc lang="en">
 Unlock filesystem on stop?
 </shortdesc>
 <content type="boolean" default="${OCF_RESKEY_unlock_on_stop_default}" />
 </parameter>
 
 <parameter name="wait_for_leasetime_on_stop">
 <longdesc lang="en">
 When stopping (unexporting), wait out the NFSv4 lease time.
 Only after all leases have expired does the NFS kernel server
 relinquish all server-side handles on the exported filesystem.
 If this ${__SCRIPT_NAME} resource manages an export that resides
 on a mount point designed to fail over along with the NFS export
 itself, then enabling this parameter will ensure such failover
 is working properly. Note that when this parameter is set, your
 stop timeout MUST accommodate for the wait period. This parameter
 is safe to disable if none of your NFS clients are using NFS
 version 4 or later.
 </longdesc>
 <shortdesc lang="en">
 Ride out the NFSv4 lease time on resource stop?
 </shortdesc>
 <content type="boolean" default="${OCF_RESKEY_wait_for_leasetime_on_stop_default}" />
 </parameter>
 
 <parameter name="rmtab_backup">
 <longdesc lang="en">
 Back up those entries from the NFS rmtab that apply to the exported
 directory, to the specified backup file. The filename is interpreted
 as relative to the exported directory. This backup is required if
 clients are connecting to the export via NFSv3 over TCP. Note that a
 configured monitor operation is required for this functionality.
 
 To disable rmtab backups, set this parameter to the special
 string "none".
 </longdesc>
 <shortdesc lang="en">
 Location of the rmtab backup, relative to directory.
 </shortdesc>
 <content type="string" default="${OCF_RESKEY_rmtab_backup_default}" />
 </parameter>
 </parameters>
 
 <actions>
 <action name="start"   timeout="40" />
 <action name="stop"    timeout="120" />
 <action name="monitor" depth="0"  timeout="20" interval="10" />
 <action name="meta-data"  timeout="5" />
 <action name="validate-all"  timeout="30" />
 </actions>
 </resource-agent>
 END
 
 return $OCF_SUCCESS
 }
 
 exportfs_methods() {
   cat <<-!
 	start
 	stop
 	status
 	monitor
 	validate-all
 	methods
 	meta-data
 	usage
 	!
 }
 
 reset_fsid() {
 	CURRENT_FSID=$OCF_RESKEY_fsid
 }
 bump_fsid() {
 	CURRENT_FSID=$((CURRENT_FSID+1))
 }
 get_fsid() {
 	echo $CURRENT_FSID
 }
 
 # run a function on all directories
 forall() {
 	local func=$1
 	shift 1
 	local fast_exit=""
 	local dir rc=0
 	if [ "$2" = fast_exit ]; then
 		fast_exit=1
 		shift 1
 	fi
 	reset_fsid
 	for dir in $OCF_RESKEY_directory; do
 		$func $dir "$@"
 		rc=$(($rc | $?))
 		[ $NUMDIRS -gt 1 ] && bump_fsid
 		[ "$fast_exit" ] && continue
 		[ $rc -ne 0 ] && return $rc
 	done
 	return $rc
 }
 
 backup_rmtab() {
 	local dir=$1
 	local rmtab_backup
 	rmtab_backup="$dir/${OCF_RESKEY_rmtab_backup}"
 	grep ":$dir:" /var/lib/nfs/rmtab > ${rmtab_backup}
 }
 
 restore_rmtab() {
 	local dir=$1
 	local rmtab_backup
 	rmtab_backup="$dir/${OCF_RESKEY_rmtab_backup}"
 	if [ -r ${rmtab_backup} ]; then
 		local tmpf=`mktemp`
 		sort -u ${rmtab_backup} /var/lib/nfs/rmtab > $tmpf &&
 		install -o root -m 644 $tmpf /var/lib/nfs/rmtab
 		rm -f $tmpf
 		ocf_log debug "Restored `wc -l ${rmtab_backup}` rmtab entries from ${rmtab_backup}."
 	else
 		ocf_log warn "rmtab backup ${rmtab_backup} not found or not readable."
 	fi
 }
 
 exportfs_usage() {
 	cat <<END
 		usage: $0 {start|stop|monitor|status|validate-all|meta-data}
 END
 }
 
 format_exports() {
 	# exportfs output wraps lines for long export directory names.
 	# We unwrap here with sed.
 	# We then do a literal match on the full line (grep -x -F)
 	exportfs |
 		sed -e '$! N; s/\n[[:space:]]\+/ /; t; s/[[:space:]]\+\([^[:space:]]\+\)\(\n\|$\)/ \1\2/g; P;D;'
 }
 is_exported() {
 	local dir=$1
 	local spec=$2
 	local rc
 	format_exports | grep -q -x -F "$dir $spec"
 	rc=$?
 	if [ $rc -ne 0 -a "$spec" = "*" ]; then
 		# on some platforms, exportfs may print
 		# "<world>" instead of "*"
 		format_exports | grep -q -x -F "$dir <world>"
 		rc=$?
 	fi
 	# log something only for monitors
 	if [ $rc -ne 0 -a "$__OCF_ACTION" = "monitor" ]; then
 		local sev="info"
 		ocf_is_probe || sev="err"
 		ocf_log $sev "$dir not exported to $spec (stopped)."
 	fi
 	return $rc
 }
 
 exportfs_monitor ()
 {
-	if forall is_exported "${OCF_RESKEY_clientspec}"; then
+	if ! ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" monitor; then
+		return $OCF_NOT_RUNNING
+	fi
+
+	if forall is_exported "$(echo "${OCF_RESKEY_clientspec}" | tr -d '[]')"; then
 		if [ ${OCF_RESKEY_rmtab_backup} != "none" ]; then
 			forall backup_rmtab
 		fi
 		return $OCF_SUCCESS
 	else
 		return $OCF_NOT_RUNNING
 	fi
 }
 
 export_one() {
 	local dir=$1
 	local opts sep
 	sep=""
 	if [ -n "$OCF_RESKEY_options" ]; then
 		opts="$OCF_RESKEY_options"
 		sep=","
 	fi
 	if echo "$opts" | grep fsid >/dev/null; then
 		#replace fsid in options list
 		opts=`echo "$opts" | sed "s/fsid=[0-9]\+/fsid=$(get_fsid)/g"`
 	else
 		#tack the fsid option onto our options list.
 		opts="${opts}${sep}fsid=$(get_fsid)"
 	fi
 	opts="-o $opts"
 
 	# if any of directories fails to export we can exit
 	# immediately
 	ocf_run exportfs -v $opts "${OCF_RESKEY_clientspec}:$dir"
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "exportfs failed - exportfs -v $opts ${OCF_RESKEY_clientspec}:$dir"
 		exit $OCF_ERR_GENERIC
 	fi
 
 	ocf_log info "directory $dir exported"
 	return $OCF_SUCCESS
 }
 exportfs_start ()
 {
 	if exportfs_monitor; then
 		ocf_log debug "already exported"
 		return $OCF_SUCCESS
 	fi
 	ocf_log info "Exporting file system(s) ..."
+
+	ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" start
 	forall export_one
 
 	# Restore the rmtab to ensure smooth NFS-over-TCP failover
 	if [ ${OCF_RESKEY_rmtab_backup} != "none" ]; then
 		forall restore_rmtab
 	fi
 }
 
 unlock_fs() {
 	local dir=$1
 	local unlockfile
 	unlockfile=/proc/fs/nfsd/unlock_filesystem
 	if [ -w ${unlockfile} ]; then
 		echo "$dir" > ${unlockfile}
 		ocf_log info "Unlocked NFS export $dir"
 	else
 		ocf_log warn "Unable to unlock NFS export $dir, ${unlockfile} not found or not writable"
 	fi
 }
 wait_for_leasetime() {
 	local leasetimefile
 	local sleeptime
 	leasetimefile=/proc/fs/nfsd/nfsv4leasetime
 	if [ -r ${leasetimefile} ]; then
 		sleeptime=$((`cat ${leasetimefile}`+2))
 		ocf_log info "Sleeping ${sleeptime} seconds to accommodate for NFSv4 lease expiry"
 		sleep ${sleeptime}s
 	else
 		ocf_log warn "Unable to read NFSv4 lease time from ${leasetimefile}, file not found or not readable"
 	fi
 }
 cleanup_export_cache() {
 	# see if the cache is blocking unexport
 	local contentfile=/proc/net/rpc/nfsd.export/content
 	local fsid_re
 	local i=1
 	fsid_re="fsid=(echo `forall get_fsid`|sed 's/ /|/g'),"
 	while :; do
 		grep -E -q "$fsid_re" $contentfile ||
 			break
 		ocf_log info "Cleanup export cache ... (try $i)"
 		ocf_run exportfs -f
 		sleep 0.5
 		i=$((i + 1))
 	done
 }
 unexport_one() {
 	local dir=$1
 	ocf_run exportfs -v -u ${OCF_RESKEY_clientspec}:$dir
 }
 exportfs_stop ()
 {
 	local rc
 
 	exportfs_monitor
 	if [ $? -eq $OCF_NOT_RUNNING ]; then
 		ocf_log debug "not exported"
 		return $OCF_SUCCESS
 	fi
 
 	ocf_log info "Un-exporting file system ..."
 
 	# Backup the rmtab to ensure smooth NFS-over-TCP failover
 	if [ ${OCF_RESKEY_rmtab_backup} != "none" ]; then
 		forall backup_rmtab
 	fi
 
 	forall unexport_one
 	rc=$?
 
 	if ocf_is_true ${OCF_RESKEY_unlock_on_stop}; then
 		forall unlock_fs
 	fi
 
 	if ocf_is_true ${OCF_RESKEY_wait_for_leasetime_on_stop}; then
 		wait_for_leasetime
 	fi
 
 	if [ $rc -eq 0 ]; then
 		cleanup_export_cache
+		ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" stop
+
 		ocf_log info "Un-exported file system(s)"
 		return $OCF_SUCCESS
 	else
 		ocf_exit_reason "Failed to un-export file system(s)"
 		return $OCF_ERR_GENERIC
 	fi
 }
 
 testdir() {
 	if [ ! -d $1 ]; then
 		ocf_is_probe ||
 			ocf_log err "$1 does not exist or is not a directory"
 		return 1
 	fi
 	return 0
 }
 exportfs_validate_all ()
 {
 	if [ $NUMDIRS -gt 1 ] &&
 			! ocf_is_decimal "$OCF_RESKEY_fsid"; then
 		ocf_exit_reason "use integer fsid when exporting multiple directories"
 		return $OCF_ERR_CONFIGURED
 	fi
 	if ! forall testdir; then
 		return $OCF_ERR_INSTALLED
 	fi
 }
 
 # If someone puts a trailing slash at the end of the export directory,
 # this agent is going to fail in some unexpected ways due to how
 # export strings are matched.  The simplest solution here is to strip off
 # a trailing '/' in the directory before processing anything.
 newdir=$(echo "$OCF_RESKEY_directory" | sed -n -e 's/^\(.*\)\/$/\1/p')
 if [ -n "$newdir" ]; then
 	OCF_RESKEY_directory=$newdir
 fi
 
 NUMDIRS=`echo "$OCF_RESKEY_directory" | wc -w`
 OCF_REQUIRED_PARAMS="directory fsid clientspec"
 OCF_REQUIRED_BINARIES="exportfs"
 ocf_rarun $*
diff --git a/heartbeat/galera b/heartbeat/galera
index 7be2b00b1..543200d59 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -1,800 +1,977 @@
 #!/bin/sh
 #
 # Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
 #                    All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 ##
 # README.
 # 
 # This agent only supports being configured as a multistate Master
 # resource.
 #
 # Slave vs Master role:
 #
 # During the 'Slave' role, galera instances are in read-only mode and
 # will not attempt to connect to the cluster. This role exists as
 # a means to determine which galera instance is the most up-to-date. The
 # most up-to-date node will be used to bootstrap a galera cluster that
 # has no current members.
 #
 # The galera instances will only begin to be promoted to the Master role
 # once all the nodes in the 'wsrep_cluster_address' connection address
 # have entered read-only mode. At that point the node containing the
 # database that is most current will be promoted to Master.
 #
 # Once the first Master instance bootstraps the galera cluster, the
 # other nodes will join the cluster and start synchronizing via SST.
 # They will stay in Slave role as long as the SST is running. Their
 # promotion to Master will happen once synchronization is finished.
 #
 # Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3
 #
 # pcs resource create db galera enable_creation=true \
 # wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master
 #
 # By setting the 'enable_creation' option, the database will be automatically 
 # generated at startup. The meta attribute 'master-max=3' means that all 3
 # nodes listed in the wsrep_cluster_address list will be allowed to connect
 # to the galera cluster and perform replication.
 #
 # NOTE: If you have more nodes in the pacemaker cluster then you wish
 # to have in the galera cluster, make sure to use location contraints to prevent
 # pacemaker from attempting to place a galera instance on a node that is
 # not in the 'wsrep_cluster_address" list. 
 #
 ##
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 . ${OCF_FUNCTIONS_DIR}/mysql-common.sh
 
 # It is common for some galera instances to store
 # check user that can be used to query status
 # in this file
 if [ -f "/etc/sysconfig/clustercheck" ]; then
     . /etc/sysconfig/clustercheck
+elif [ -f "/etc/default/clustercheck" ]; then
+    . /etc/default/clustercheck
 fi
 
 #######################################################################
 
 usage() {
   cat <<UEND
 usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote)
 
 $0 manages a galera Database as an HA resource.
 
 The 'start' operation starts the database.
 The 'stop' operation stops the database.
 The 'status' operation reports whether the database is running
 The 'monitor' operation reports whether the database seems to be working
 The 'promote' operation makes this mysql server run as master
 The 'demote' operation makes this mysql server run as slave
 The 'validate-all' operation reports whether the parameters are valid
 
 UEND
 }
 
 meta_data() {
    cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="galera">
 <version>1.0</version>
 
 <longdesc lang="en">
 Resource script for managing galara database.
 </longdesc>
 <shortdesc lang="en">Manages a galara instance</shortdesc>
 <parameters>
 
 <parameter name="binary" unique="0" required="0">
 <longdesc lang="en">
 Location of the MySQL server binary
 </longdesc>
 <shortdesc lang="en">MySQL server binary</shortdesc>
 <content type="string" default="${OCF_RESKEY_binary_default}" />
 </parameter>
 
 <parameter name="client_binary" unique="0" required="0">
 <longdesc lang="en">
 Location of the MySQL client binary
 </longdesc>
 <shortdesc lang="en">MySQL client binary</shortdesc>
 <content type="string" default="${OCF_RESKEY_client_binary_default}" />
 </parameter>
 
 <parameter name="config" unique="0" required="0">
 <longdesc lang="en">
 Configuration file
 </longdesc>
 <shortdesc lang="en">MySQL config</shortdesc>
 <content type="string" default="${OCF_RESKEY_config_default}" />
 </parameter>
 
 <parameter name="datadir" unique="0" required="0">
 <longdesc lang="en">
 Directory containing databases
 </longdesc>
 <shortdesc lang="en">MySQL datadir</shortdesc>
 <content type="string" default="${OCF_RESKEY_datadir_default}" />
 </parameter>
 
 <parameter name="user" unique="0" required="0">
 <longdesc lang="en">
 User running MySQL daemon
 </longdesc>
 <shortdesc lang="en">MySQL user</shortdesc>
 <content type="string" default="${OCF_RESKEY_user_default}" />
 </parameter>
 
 <parameter name="group" unique="0" required="0">
 <longdesc lang="en">
 Group running MySQL daemon (for logfile and directory permissions)
 </longdesc>
 <shortdesc lang="en">MySQL group</shortdesc>
 <content type="string" default="${OCF_RESKEY_group_default}"/>
 </parameter>
 
 <parameter name="log" unique="0" required="0">
 <longdesc lang="en">
 The logfile to be used for mysqld.
 </longdesc>
 <shortdesc lang="en">MySQL log file</shortdesc>
 <content type="string" default="${OCF_RESKEY_log_default}"/>
 </parameter>
 
 <parameter name="pid" unique="0" required="0">
 <longdesc lang="en">
 The pidfile to be used for mysqld.
 </longdesc>
 <shortdesc lang="en">MySQL pid file</shortdesc>
 <content type="string" default="${OCF_RESKEY_pid_default}"/>
 </parameter>
 
 <parameter name="socket" unique="0" required="0">
 <longdesc lang="en">
 The socket to be used for mysqld.
 </longdesc>
 <shortdesc lang="en">MySQL socket</shortdesc>
 <content type="string" default="${OCF_RESKEY_socket_default}"/>
 </parameter>
 
 <parameter name="enable_creation" unique="0" required="0">
 <longdesc lang="en">
 If the MySQL database does not exist, it will be created
 </longdesc>
 <shortdesc lang="en">Create the database if it does not exist</shortdesc>
 <content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
 </parameter>
 
 <parameter name="additional_parameters" unique="0" required="0">
 <longdesc lang="en">
 Additional parameters which are passed to the mysqld on startup.
 (e.g. --skip-external-locking or --skip-grant-tables)
 </longdesc>
 <shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
 <content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
 </parameter>
 
 
 <parameter name="wsrep_cluster_address" unique="0" required="1">
 <longdesc lang="en">
 The galera cluster address. This takes the form of:
 gcomm://node,node,node
 
 Only nodes present in this node list will be allowed to start a galera instance.
 It is expected that the galera node names listed in this address match valid
 pacemaker node names.
 </longdesc>
 <shortdesc lang="en">Galera cluster address</shortdesc>
 <content type="string" default=""/>
 </parameter>
 
 <parameter name="check_user" unique="0" required="0">
 <longdesc lang="en">
 Cluster check user.
 </longdesc>
 <shortdesc lang="en">MySQL test user</shortdesc>
 <content type="string" default="root" />
 </parameter>
 
 <parameter name="check_passwd" unique="0" required="0">
 <longdesc lang="en">
 Cluster check user password
 </longdesc>
 <shortdesc lang="en">check password</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start" timeout="120" />
 <action name="stop" timeout="120" />
 <action name="status" timeout="60" />
 <action name="monitor" depth="0" timeout="30" interval="20" />
 <action name="monitor" role="Master" depth="0" timeout="30" interval="10" />
 <action name="monitor" role="Slave" depth="0" timeout="30" interval="30" />
 <action name="promote" timeout="300" />
 <action name="demote" timeout="120" />
 <action name="validate-all" timeout="5" />
 <action name="meta-data" timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 get_option_variable()
 {
     local key=$1
 
     $MYSQL $MYSQL_OPTIONS_CHECK  -e "SHOW VARIABLES like '$key';" | tail -1
 }
 
 get_status_variable()
 {
     local key=$1
 
     $MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1
 }
 
 set_bootstrap_node()
 {
     local node=$1
 
     ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true"
 }
 
 clear_bootstrap_node()
 {
     ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D
 }
 
 is_bootstrap()
 {
     ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -Q 2>/dev/null
 
 }
 
+set_no_grastate()
+{
+    ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true"
+}
+
+clear_no_grastate()
+{
+    ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D
+}
+
+is_no_grastate()
+{
+    local node=$1
+    ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -Q 2>/dev/null
+}
+
 clear_last_commit()
 {
     ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D
 }
 
 set_last_commit()
 {
     ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1
 }
 
 get_last_commit()
 {
     local node=$1
 
     if [ -z "$node" ]; then
        ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null
     else 
        ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null
     fi
 }
 
 wait_for_sync()
 {
     local state=$(get_status_variable "wsrep_local_state")
 
     ocf_log info "Waiting for database to sync with the cluster. "
     while [ "$state" != "4" ]; do
         sleep 1
         state=$(get_status_variable "wsrep_local_state")
     done
     ocf_log info "Database synced."
 }
 
 set_sync_needed()
 {
     ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-sync-needed" -v "true"
 }
 
 clear_sync_needed()
 {
     ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-sync-needed" -D
 }
 
 check_sync_needed()
 {
     ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-sync-needed" -Q 2>/dev/null
 }
 
+
+# this function is called when attribute sync-needed is set in the CIB
 check_sync_status()
 {
-    local state=$(get_status_variable "wsrep_local_state")
-    local ready=$(get_status_variable "wsrep_ready")
+    # if the pidfile is created, mysqld is up and running
+    # an IST might still be in progress, check wsrep status
+    if [ -e $OCF_RESKEY_pid ]; then
+        local cluster_status=$(get_status_variable "wsrep_cluster_status")
+        local state=$(get_status_variable "wsrep_local_state")
+        local ready=$(get_status_variable "wsrep_ready")
+
+        if [ -z "$cluster_status" -o -z "$state" -o -z "$ready" ]; then
+            ocf_exit_reason "Unable to retrieve state transfer status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
+            return $OCF_ERR_GENERIC
+        fi
 
-    if [ -z "$state" -o -z "$ready" ]; then
-        ocf_exit_reason "Unable to retrieve state transfer status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
-        return $OCF_ERR_GENERIC
-    fi
+        if [ "$cluster_status" != "Primary" ]; then
+            ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
+            return $OCF_ERR_GENERIC
+        fi
 
-    if [ "$state" == "4" -a "$ready" == "ON" ]; then
-        ocf_log info "local node synced with the cluster"
-        # when sync is finished, we are ready to switch to Master
-        clear_sync_needed
-        set_master_score
-        return $OCF_SUCCESS
-    else
-        ocf_log info "local node syncing"
-        return $OCF_SUCCESS
+        if [ "$state" = "4" -a "$ready" = "ON" ]; then
+            ocf_log info "local node synced with the cluster"
+            # when sync is finished, we are ready to switch to Master
+            clear_sync_needed
+            set_master_score
+            return $OCF_SUCCESS
+        fi
     fi
+
+    # if we pass here, an IST or SST is still in progress
+    ocf_log info "local node syncing"
+    return $OCF_SUCCESS
 }
 
 is_primary()
 {
     cluster_status=$(get_status_variable "wsrep_cluster_status")
     if [ "$cluster_status" = "Primary" ]; then
         return 0
     fi
 
     if [ -z "$cluster_status" ]; then
         ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
     else
         ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}"
     fi
     return 1
 }
 
 is_readonly()
 {
     local res=$(get_option_variable "read_only")
 
     if ! ocf_is_true "$res"; then
         return 1
     fi
 
     cluster_status=$(get_status_variable "wsrep_cluster_status")
     if ! [ "$cluster_status" = "Disconnected" ]; then
         return 1
     fi
 
     return 0
 }
 
 master_exists()
 {
     if [ "$__OCF_ACTION" = "demote" ]; then
         # We don't want to detect master instances during demote.
         # 1. we could be detecting ourselves as being master, which is no longer the case.
         # 2. we could be detecting other master instances that are in the process of shutting down.
         # by not detecting other master instances in "demote" we are deferring this check
         # to the next recurring monitor operation which will be much more accurate
         return 1
     fi
     # determine if a master instance is already up and is healthy
     crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
     return $?
 }
 
 clear_master_score()
 {
     local node=$1
     if [ -z "$node" ]; then
         $CRM_MASTER -D
     else 
         $CRM_MASTER -D -N $node
     fi
 }
 
 set_master_score()
 {
     local node=$1
 
     if [ -z "$node" ]; then
         $CRM_MASTER -v 100
     else 
         $CRM_MASTER -N $node -v 100
     fi
 }
 
 greater_than_equal_long()
 {
     # there are values we need to compare in this script
     # that are too large for shell -gt to process
     echo | awk -v n1="$1" -v n2="$2"  '{if (n1>=n2) printf ("true"); else printf ("false");}' |  grep -q "true"
 }
 
 detect_first_master()
 {
     local best_commit=0
     local best_node="$NODENAME"
     local last_commit=0
     local missing_nodes=0
+    local nodes=""
+    local nodes_recovered=""
 
+    # avoid selecting a recovered node as bootstrap if possible
     for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
+        if is_no_grastate $node; then
+            nodes_recovered="$nodes_recovered $node"
+        else
+            nodes="$nodes $node"
+        fi
+    done
+
+    for node in $nodes_recovered $nodes; do
         last_commit=$(get_last_commit $node)
 
         if [ -z "$last_commit" ]; then
             ocf_log info "Waiting on node <${node}> to report database status before Master instances can start."
             missing_nodes=1
             continue
         fi
 
         # this means -1, or that no commit has occured yet.
         if [ "$last_commit" = "18446744073709551615" ]; then
             last_commit="0"
         fi
 
         greater_than_equal_long "$last_commit" "$best_commit"
         if [ $? -eq 0 ]; then
             best_node=$node
             best_commit=$last_commit
         fi
 
     done
 
     if [ $missing_nodes -eq 1 ]; then
         return
     fi
 
     ocf_log info "Promoting $best_node to be our bootstrap node"
     set_master_score $best_node
     set_bootstrap_node $best_node
 }
 
+detect_galera_pid()
+{
+    ps auxww | grep -v -e "${OCF_RESKEY_binary}" -e grep | grep -qe "--pid-file=$OCF_RESKEY_pid"
+}
+
+galera_status()
+{
+    local loglevel=$1
+    local rc
+    local running
+
+    if [ -e $OCF_RESKEY_pid ]; then
+        mysql_common_status $loglevel
+        rc=$?
+    else
+        # if pidfile is not created, the server may
+        # still be starting up, e.g. running SST
+        detect_galera_pid
+        running=$?
+        if [ $running -eq 0 ]; then
+            rc=$OCF_SUCCESS
+        else
+            ocf_log $loglevel "MySQL is not running"
+            rc=$OCF_NOT_RUNNING
+        fi
+    fi
+
+    return $rc
+}
+
+galera_start_nowait()
+{
+    local mysql_extra_params="$1"
+    local pid
+    local running
+
+    ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
+    --pid-file=$OCF_RESKEY_pid \
+    --socket=$OCF_RESKEY_socket \
+    --datadir=$OCF_RESKEY_datadir \
+    --log-error=$OCF_RESKEY_log \
+    --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
+    $mysql_extra_params >/dev/null 2>&1 &
+    pid=$!
+
+    # Spin waiting for the server to be spawned.
+    # Let the CRM/LRM time us out if required.
+    start_wait=1
+    while [ $start_wait = 1 ]; do
+        if ! ps $pid > /dev/null 2>&1; then
+            wait $pid
+            ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation"
+            return $OCF_ERR_GENERIC
+        fi
+        detect_galera_pid
+        running=$?
+        if [ $running -eq 0 ]; then
+            start_wait=0
+        else
+            ocf_log info "MySQL is not running"
+        fi
+        sleep 2
+    done
+
+    return $OCF_SUCCESS
+}
+
 galera_start_local_node()
 {
     local rc
     local extra_opts
     local bootstrap
 
     bootstrap=$(is_bootstrap)
     
     master_exists
     if [ $? -eq 0 ]; then
         # join without bootstrapping
         ocf_log info "Node <${NODENAME}> is joining the cluster"
         extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}"
     elif ocf_is_true $bootstrap; then
         ocf_log info "Node <${NODENAME}> is bootstrapping the cluster"
         extra_opts="--wsrep-cluster-address=gcomm://"
     else
         ocf_exit_reason "Failure, Attempted to join cluster of $OCF_RESOURCE_INSTANCE before master node has been detected."
         clear_last_commit
         return $OCF_ERR_GENERIC
     fi
 
     # clear last_commit before we start galera to make sure there
     # won't be discrepency between the cib and galera if this node
     # processes a few transactions and fails before we detect it
     clear_last_commit
 
     mysql_common_prepare_dirs
-    mysql_common_start "$extra_opts"
-    rc=$?
-    if [ $rc != $OCF_SUCCESS ]; then
-        return $rc
-    fi
 
-    mysql_common_status info
-    rc=$?
+    # At start time, if galera requires a SST rather than an IST, the
+    # mysql server's pidfile won't be available until SST finishes,
+    # which can be longer than the start timeout.  So we only check
+    # bootstrap node extensively. Joiner nodes are monitored in the
+    # "monitor" op
+    if ocf_is_true $bootstrap; then
+        # start server and wait until it's up and running
+        mysql_common_start "$extra_opts"
+        rc=$?
+        if [ $rc != $OCF_SUCCESS ]; then
+            return $rc
+        fi
 
-    if [ $rc != $OCF_SUCCESS ]; then
-        ocf_exit_reason "Failed initial monitor action"
-        return $rc
-    fi
+        mysql_common_status info
+        rc=$?
 
-    is_readonly
-    if [ $? -eq 0 ]; then
-        ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration."
-        return $OCF_ERR_GENERIC
-    fi
+        if [ $rc != $OCF_SUCCESS ]; then
+            ocf_exit_reason "Failed initial monitor action"
+            return $rc
+        fi
 
-    is_primary
-    if [ $? -ne 0 ]; then
-        ocf_exit_reason "Failure. Master instance started, but is not in Primary mode."
-        return $OCF_ERR_GENERIC
-    fi
+        is_readonly
+        if [ $? -eq 0 ]; then
+            ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration."
+            return $OCF_ERR_GENERIC
+        fi
+
+        is_primary
+        if [ $? -ne 0 ]; then
+            ocf_exit_reason "Failure. Master instance started, but is not in Primary mode."
+            return $OCF_ERR_GENERIC
+        fi
 
-    if ocf_is_true $bootstrap; then
         clear_bootstrap_node
+        # clear attribute no-grastate. if last shutdown was
+        # not clean, we cannot be extra-cautious by requesting a SST
+        # since this is the bootstrap node
+        clear_no_grastate
     else
+        # only start server, defer full checks to "monitor" op
+        galera_start_nowait "$extra_opts"
+        rc=$?
+        if [ $rc != $OCF_SUCCESS ]; then
+            return $rc
+        fi
+
         set_sync_needed
+        # attribute no-grastate will be cleared once the joiner
+        # has finished syncing and is promoted to Master
     fi
 
     ocf_log info "Galera started"
     return $OCF_SUCCESS
 }
 
+detect_last_commit()
+{
+    local last_commit
+    local recover_args="--defaults-file=$OCF_RESKEY_config \
+                        --pid-file=$OCF_RESKEY_pid \
+                        --socket=$OCF_RESKEY_socket \
+                        --datadir=$OCF_RESKEY_datadir \
+                        --user=$OCF_RESKEY_user"
+    local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p'
+
+    ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
+    last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
+    if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
+        local tmp=$(mktemp)
+        local tmperr=$(mktemp)
+
+        # if we pass here because grastate.dat doesn't exist,
+        # try not to bootstrap from this node if possible
+        if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then
+            set_no_grastate
+        fi
+
+        ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
+
+        ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr
+
+        last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
+        if [ -z "$last_commit" ]; then
+            # Galera uses InnoDB's 2pc transactions internally. If
+            # server was stopped in the middle of a replication, the
+            # recovery may find a "prepared" XA transaction in the
+            # redo log, and mysql won't recover automatically
+
+            cat $tmperr | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null
+            if [ $? -eq 0 ]; then
+                # we can only rollback the transaction, but that's OK
+                # since the DB will get resynchronized anyway
+                ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
+                ${OCF_RESKEY_binary} $recover_args --wsrep-recover \
+                                     --tc-heuristic-recover=rollback > $tmp 2>/dev/null
+
+                last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
+                if [ ! -z "$last_commit" ]; then
+                    ocf_log warn "State recovered. force SST at next restart for full resynchronization"
+                    rm -f ${OCF_RESKEY_datadir}/grastate.dat
+                    # try not to bootstrap from this node if possible
+                    set_no_grastate
+                fi
+            fi
+        fi
+        rm -f $tmp $tmperr
+    fi
+
+    if [ ! -z "$last_commit" ]; then
+        ocf_log info "Last commit version found:  $last_commit"
+        set_last_commit $last_commit
+        return $OCF_SUCCESS
+    else
+        ocf_exit_reason "Unable to detect last known write sequence number"
+        clear_last_commit
+        return $OCF_ERR_GENERIC
+    fi
+}
 
 galera_promote()
 {
     local rc
     local extra_opts
     local bootstrap
 
     master_exists
     if [ $? -ne 0 ]; then
         # promoting the first master will bootstrap the cluster
         if is_bootstrap; then
             galera_start_local_node
             rc=$?
             return $rc
         else
             ocf_exit_reason "Attempted to start the cluster without being a bootstrap node."
             return $OCF_ERR_GENERIC
         fi
     else
         # promoting other masters only performs sanity checks
         # as the joining nodes were started during the "monitor" op
         if ! check_sync_needed; then
+            # sync is done, clear info about last startup
+            clear_no_grastate
             return $OCF_SUCCESS
         else
             ocf_exit_reason "Attempted to promote local node while sync was still needed."
             return $OCF_ERR_GENERIC
         fi
     fi
 }
 
 galera_demote()
 {
     mysql_common_stop
     rc=$?
     if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
         ocf_exit_reason "Failed to stop Master galera instance during demotion to Master"
         return $rc
     fi
 
     # if this node was previously a bootstrap node, that is no longer the case.
     clear_bootstrap_node
     clear_last_commit
     clear_sync_needed
+    clear_no_grastate
 
-    # record last commit by "starting" galera. start is just detection of the last sequence number
-    galera_start
+    # record last commit for next promotion
+    detect_last_commit
+    rc=$?
+    return $rc
 }
 
 galera_start()
 {
-    local last_commit
+    local rc
 
     echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
     if [ $? -ne 0 ]; then
         ocf_exit_reason "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance"
         return $OCF_ERR_CONFIGURED
     fi
 
-    mysql_common_status info
+    galera_status info
     if [ $? -ne $OCF_NOT_RUNNING ]; then
         ocf_exit_reason "master galera instance started outside of the cluster's control"
         return $OCF_ERR_GENERIC
     fi
 
     mysql_common_prepare_dirs
 
-    ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
-    last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
-    if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
-        ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
-        local tmp=$(mktemp)
-        ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
-            --pid-file=$OCF_RESKEY_pid \
-            --socket=$OCF_RESKEY_socket \
-            --datadir=$OCF_RESKEY_datadir \
-            --user=$OCF_RESKEY_user \
-            --wsrep-recover > $tmp 2>&1
-
-        last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')"
-        rm -f $tmp
-
-        if [ "$last_commit" = "-1" ]; then
-            last_commit="0"
-        fi
-    fi
-
-    if [ -z "$last_commit" ]; then
-        ocf_exit_reason "Unable to detect last known write sequence number"
-        clear_last_commit
-        return $OCF_ERR_GENERIC
+    detect_last_commit
+    rc=$?
+    if [ $rc -ne $OCF_SUCCESS ]; then
+        return $rc
     fi
-    ocf_log info "Last commit version found:  $last_commit"
-
-    set_last_commit $last_commit
 
     master_exists
     if [ $? -eq 0 ]; then
         ocf_log info "Master instances are already up, local node will join in when started"
     else
         clear_master_score
         detect_first_master
     fi
 
     return $OCF_SUCCESS
 }
 
 galera_monitor()
 {
     local rc
     local status_loglevel="err"
 
     # Set loglevel to info during probe
     if ocf_is_probe; then
         status_loglevel="info"
     fi
 
-    mysql_common_status $status_loglevel
+    # Check whether mysql is running or about to start after sync
+    galera_status $status_loglevel
     rc=$?
 
     if [ $rc -eq $OCF_NOT_RUNNING ]; then
         last_commit=$(get_last_commit $NODENAME)
         if [ -n "$last_commit" ];then
             rc=$OCF_SUCCESS
 
             if ocf_is_probe; then
                 # prevent state change during probe
                 return $rc
             fi
 
             master_exists
             if [ $? -ne 0 ]; then
                 detect_first_master
             else
                 # a master instance exists and is healthy.
                 # start this node and mark it as "pending sync"
                 ocf_log info "cluster is running. start local node to join in"
                 galera_start_local_node
                 rc=$?
             fi
         fi
         return $rc
     elif [ $rc -ne $OCF_SUCCESS ]; then
         return $rc
     fi
 
-    # if we make it here, mysql is running. Check cluster status now.
+    # if we make it here, mysql is running or about to start after sync.
+    # Check cluster status now.
 
     echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
     if [ $? -ne 0 ]; then
         ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
         return $OCF_ERR_GENERIC
     fi
 
-    is_primary
+    check_sync_needed
     if [ $? -eq 0 ]; then
-        check_sync_needed
-        if [ $? -eq 0 ]; then
-            # galera running and sync is needed: slave state
-            if ocf_is_probe; then
-                # prevent state change during probe
-                rc=$OCF_SUCCESS
-            else
-                check_sync_status
-                rc=$?
-            fi
+        # galera running and sync is needed: slave state
+        if ocf_is_probe; then
+            # prevent state change during probe
+            rc=$OCF_SUCCESS
+        else
+            check_sync_status
+            rc=$?
+        fi
+    else
+        is_primary
+        if [ $? -ne 0 ]; then
+            ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
+            rc=$OCF_ERR_GENERIC
         else
             # galera running, no need to sync: master state and everything's clear
             rc=$OCF_RUNNING_MASTER
 
             if ocf_is_probe; then
                 # restore master score during probe
                 # if we detect this is a master instance
                 set_master_score
             fi
         fi
-    else
-        ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
-        rc=$OCF_ERR_GENERIC
     fi
 
     return $rc
 }
 
 galera_stop()
 {
     local rc
     # make sure the process is stopped
     mysql_common_stop
     rc=$?
 
     clear_last_commit
     clear_master_score
     clear_bootstrap_node
     clear_sync_needed
+    clear_no_grastate
     return $rc
 }
 
 galera_validate()
 {
     if ! ocf_is_ms; then
         ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
         return $OCF_ERR_CONFIGURED
     fi
 
     if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
         ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value."
         return $OCF_ERR_CONFIGURED
     fi
 
     mysql_common_validate
 }
 
 case "$1" in
   meta-data)    meta_data
         exit $OCF_SUCCESS;;
   usage|help)   usage
         exit $OCF_SUCCESS;;
 esac
 
 galera_validate
 rc=$?
 LSB_STATUS_STOPPED=3
 if [ $rc -ne 0 ]; then
     case "$1" in
         stop) exit $OCF_SUCCESS;;
         monitor) exit $OCF_NOT_RUNNING;;
         status) exit $LSB_STATUS_STOPPED;;
         *) exit $rc;;
     esac
 fi
 
 if [ -z "${OCF_RESKEY_check_passwd}" ]; then
     # This value is automatically sourced from /etc/sysconfig/checkcluster if available
     OCF_RESKEY_check_passwd=${MYSQL_PASSWORD}
 fi
 if [ -z "${OCF_RESKEY_check_user}" ]; then
     # This value is automatically sourced from /etc/sysconfig/checkcluster if available
     OCF_RESKEY_check_user=${MYSQL_USERNAME}
 fi
 : ${OCF_RESKEY_check_user="root"}
 
 MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
 if [ -n "${OCF_RESKEY_check_passwd}" ]; then
     MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
 fi
 
 # This value is automatically sourced from /etc/sysconfig/checkcluster if available
 if [ -n "${MYSQL_HOST}" ]; then
     MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}"
 fi
 
 # This value is automatically sourced from /etc/sysconfig/checkcluster if available
 if [ -n "${MYSQL_PORT}" ]; then
     MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}"
 fi
 
 
 
 # What kind of method was invoked?
 case "$1" in
   start)    galera_start;;
   stop)     galera_stop;;
-  status)   mysql_common_status err;;
+  status)   galera_status err;;
   monitor)  galera_monitor;;
   promote)  galera_promote;;
   demote)   galera_demote;;
   validate-all) exit $OCF_SUCCESS;;
 
  *)     usage
         exit $OCF_ERR_UNIMPLEMENTED;;
 esac
 
 # vi:sw=4:ts=4:et:
diff --git a/heartbeat/garbd b/heartbeat/garbd
new file mode 100755
index 000000000..950df76bb
--- /dev/null
+++ b/heartbeat/garbd
@@ -0,0 +1,417 @@
+#!/bin/sh
+#
+# Copyright (c) 2015 Damien Ciabrini <dciabrin@redhat.com>
+#                    All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+##
+# README.
+#
+# Resource agent for garbd, the Galera arbitrator
+#
+# You can use this agent if you run an even number of galera nodes,
+# and you want an additional node to avoid split-brain situations.
+#
+# garbd requires that a Galera cluster is running, so make sure to
+# add a proper ordering constraint to the cluster, e.g.:
+#
+#   pcs constraint order galera-master then garbd
+#
+# If you add garbd to the cluster while Galera is not running, you
+# might want to disable it before setting up ordering constraint, e.g.:
+#
+#   pcs resource create garbd garbd \
+#      wsrep_cluster_address=gcomm://node1:4567,node2:4567 \
+#      meta target-role=stopped
+#
+# Use location constraints to avoid running galera and garbd on
+# the same node, e.g.:
+#
+#   pcs constraint colocation add garbd with galera-master -INFINITY
+#   pcs constraint location garbd prefers node3=INFINITY
+#
+##
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+#######################################################################
+# Set default paramenter values
+
+OCF_RESKEY_binary_default="/usr/sbin/garbd"
+OCF_RESKEY_log_default="/var/log/garbd.log"
+OCF_RESKEY_pid_default="/var/run/garbd.pid"
+OCF_RESKEY_user_default="mysql"
+if [ "X${HOSTOS}" = "XOpenBSD" ];then
+    OCF_RESKEY_group_default="_mysql"
+else
+    OCF_RESKEY_group_default="mysql"
+fi
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}}
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
+: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
+: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
+
+usage() {
+  cat <<UEND
+usage: $0 (start|stop|validate-all|meta-data|status|monitor)
+
+$0 manages a Galera arbitrator.
+
+The 'start' operation starts the arbitrator.
+The 'stop' operation stops the arbitrator.
+The 'status' operation reports whether the arbitrator is running
+The 'monitor' operation reports whether the arbitrator seems to be working
+The 'validate-all' operation reports whether the parameters are valid
+
+UEND
+}
+
+meta_data() {
+   cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="garbd">
+<version>1.0</version>
+
+<longdesc lang="en">
+Resource script for managing Galera arbitrator.
+</longdesc>
+<shortdesc lang="en">Manages a galera arbitrator instance</shortdesc>
+<parameters>
+
+<parameter name="binary" unique="0" required="0">
+<longdesc lang="en">
+Location of the Galera arbitrator binary
+</longdesc>
+<shortdesc lang="en">garbd server binary</shortdesc>
+<content type="string" default="${OCF_RESKEY_binary_default}" />
+</parameter>
+
+<parameter name="user" unique="0" required="0">
+<longdesc lang="en">
+User running the garbd process
+</longdesc>
+<shortdesc lang="en">garbd user</shortdesc>
+<content type="string" default="${OCF_RESKEY_user_default}" />
+</parameter>
+
+<parameter name="group" unique="0" required="0">
+<longdesc lang="en">
+Group running garbd (for logfile permissions)
+</longdesc>
+<shortdesc lang="en">garbd group</shortdesc>
+<content type="string" default="${OCF_RESKEY_group_default}"/>
+</parameter>
+
+<parameter name="log" unique="0" required="0">
+<longdesc lang="en">
+The logfile to be used for garbd.
+</longdesc>
+<shortdesc lang="en">Galera arbitrator log file</shortdesc>
+<content type="string" default="${OCF_RESKEY_log_default}"/>
+</parameter>
+
+<parameter name="pid" unique="0" required="0">
+<longdesc lang="en">
+The pidfile to be used for garbd.
+</longdesc>
+<shortdesc lang="en">Galera arbitrator pidfile</shortdesc>
+<content type="string" default="${OCF_RESKEY_pid_default}"/>
+</parameter>
+
+<parameter name="options" unique="0" required="0">
+<longdesc lang="en">
+Additional parameters which are passed to garbd on startup.
+</longdesc>
+<shortdesc lang="en">Additional parameters to pass to garbd</shortdesc>
+<content type="string" default=""/>
+</parameter>
+
+<parameter name="wsrep_cluster_address" unique="0" required="1">
+<longdesc lang="en">
+The galera cluster address. This takes the form of:
+gcomm://node:port,node:port,node:port
+
+Unlike Galera servers, port is mandatory for garbd.
+</longdesc>
+<shortdesc lang="en">Galera cluster address</shortdesc>
+<content type="string" default=""/>
+</parameter>
+
+<parameter name="wsrep_cluster_name" unique="0" required="1">
+<longdesc lang="en">
+The group name of the Galera cluster to connect to.
+</longdesc>
+<shortdesc lang="en">Galera cluster name</shortdesc>
+<content type="string" default=""/>
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20" />
+<action name="stop" timeout="20" />
+<action name="monitor" depth="0" timeout="20" interval="20" />
+<action name="validate-all" timeout="5" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+END
+}
+
+
+garbd_start()
+{
+    local rc
+    local pid
+    local start_wait
+    local garbd_params
+
+    garbd_status info
+    rc=$?
+    if [ $rc -eq $OCF_SUCCESS ]; then
+        ocf_exit_reason "garbd started outside of the cluster's control"
+        return $OCF_ERR_GENERIC;
+    fi
+
+    touch $OCF_RESKEY_log
+    chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log
+    chmod 0640 $OCF_RESKEY_log
+    [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log
+
+    garbd_params="--address=${OCF_RESKEY_wsrep_cluster_address} \
+                  --group ${OCF_RESKEY_wsrep_cluster_name} \
+                  --log ${OCF_RESKEY_log}"
+
+    if [ ! -z "${OCF_RESKEY_options}" ]; then
+        garbd_params="${garbd_params} --options=${OCF_RESKEY_options}"
+    fi
+
+    # garbd has no parameter to run as a specific user,
+    # so we need to start it by our own means
+    pid=$(su - -s /bin/sh $OCF_RESKEY_user -c "${OCF_RESKEY_binary} ${garbd_params} >/dev/null 2>&1 & echo \$!")
+
+    # garbd doesn't create a pidfile either, so we create our own
+    echo $pid > $OCF_RESKEY_pid
+    if [ $? -ne 0 ]; then
+        ocf_exit_reason "Cannot create pidfile for garbd at $OCF_RESKEY_pid (rc=$?), please check your installation"
+        return $OCF_ERR_GENERIC
+    fi
+
+    # Spin waiting for garbd to connect to the cluster.
+    # Let the CRM/LRM time us out if required.
+    start_wait=1
+    while [ $start_wait -eq 1 ]; do
+        garbd_monitor info
+        rc=$?
+        if [ $rc -eq $OCF_NOT_RUNNING ]; then
+            ocf_exit_reason "garbd failed to start (pid=$pid), check logs in ${OCF_RESKEY_log}"
+            return $OCF_ERR_GENERIC
+        elif [ $rc -eq $OCF_SUCCESS ]; then
+            start_wait=0
+        fi
+        sleep 2
+    done
+
+    ocf_log info "garbd connected to cluster \"${OCF_RESKEY_wsrep_cluster_name}\""
+    return $OCF_SUCCESS
+}
+
+garbd_status()
+{
+    local loglevel=$1
+    local rc
+    ocf_pidfile_status $OCF_RESKEY_pid
+    rc=$?
+
+    if [ $rc -eq 0 ]; then
+        return $OCF_SUCCESS
+    elif [ $rc -eq 2 ]; then
+        return $OCF_NOT_RUNNING
+    else
+        # clean up if pidfile is stale
+        if [ $rc -eq 1 ]; then
+            ocf_log $loglevel "garbd not running: removing old PID file"
+            rm -f $OCF_RESKEY_pid
+        fi
+        return $OCF_ERR_GENERIC
+    fi
+}
+
+garbd_monitor()
+{
+    local rc
+    local pid
+    local loglevel=$1
+
+    # Set loglevel to info during probe
+    if ocf_is_probe; then
+        loglevel="info"
+    fi
+
+    garbd_status $loglevel
+    rc=$?
+
+    # probe just wants to know if garbd is running or not
+    if [ ocf_is_probe -a $rc -ne $OCF_SUCCESS ]; then
+        rc=$OCF_NOT_RUNNING
+    fi
+
+    # Consider garbd is working if it's connected to at least
+    # one node in the galera cluster.
+    # Note: a Galera node in Non-Primary state will be
+    # stopped by the galera RA. So we can assume that
+    # garbd will always be connected to the right partition
+    if [ $rc -eq $OCF_SUCCESS ]; then
+        pid=`cat $OCF_RESKEY_pid 2> /dev/null `
+        netstat -tnp 2>/dev/null | grep -s -q "ESTABLISHED.*${pid}/"
+        if [ $? -ne 0 ]; then
+            ocf_log $loglevel "garbd disconnected from cluster \"${OCF_RESKEY_wsrep_cluster_name}\""
+            rc=$OCF_ERR_GENERIC
+        fi
+    fi
+
+    return $rc
+}
+
+garbd_stop()
+{
+    local rc
+    local pid
+
+    if [ ! -f $OCF_RESKEY_pid ]; then
+        ocf_log info "garbd is not running"
+        return $OCF_SUCCESS
+    fi
+
+    pid=`cat $OCF_RESKEY_pid 2> /dev/null `
+
+    ocf_log info "stopping garbd"
+
+    # make sure the process is stopped
+    ocf_stop_processes TERM 10 $pid
+    rc=$?
+
+    if [ $rc -ne 0 ]; then
+        return $OCF_ERR_GENERIC
+    else
+        rm -f $OCF_RESKEY_pid
+        ocf_log info "garbd stopped"
+        return $OCF_SUCCESS
+    fi
+}
+
+garbd_validate()
+{
+    if ! have_binary "$OCF_RESKEY_binary"; then
+        ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_binary"
+        return $OCF_ERR_INSTALLED;
+    fi
+
+    if ! have_binary "netstat"; then
+        ocf_exit_reason "Setup problem: couldn't find command: netstat"
+        return $OCF_ERR_INSTALLED;
+    fi
+
+    if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
+        ocf_exit_reason "garbd must be configured with a wsrep_cluster_address value."
+        return $OCF_ERR_CONFIGURED
+    fi
+
+    # unlike galera RA, ports must be set in cluster address for garbd
+    # https://github.com/codership/galera/issues/98
+    for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
+        echo $node | grep -s -q ':[1-9][0-9]*$'
+        if [ $? -ne 0 ]; then
+            ocf_exit_reason "wsrep_cluster_address must specify ports (gcomm://node1:port,node2:port)."
+            return $OCF_ERR_CONFIGURED
+        fi
+    done
+
+    # Ensure that the encryption method is set if garbd is configured
+    # to use SSL.
+    echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_(key|cert)='
+    if [ $? -eq 0 ]; then
+        echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_cipher='
+        if [ $? -ne 0 ]; then
+            ocf_exit_reason "option socket.ssl_cipher must be set if SSL is enabled."
+            return $OCF_ERR_CONFIGURED
+        fi
+    fi
+
+    if [ -z "$OCF_RESKEY_wsrep_cluster_name" ]; then
+        ocf_exit_reason "garbd must be configured with a wsrep_cluster_name value."
+        return $OCF_ERR_CONFIGURED
+    fi
+
+    if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then
+        ocf_exit_reason "User $OCF_RESKEY_user doesn't exist"
+        return $OCF_ERR_INSTALLED
+    fi
+
+    if ! getent group $OCF_RESKEY_group >/dev/null 2>&1; then
+        ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist"
+        return $OCF_ERR_INSTALLED
+    fi
+
+    return $OCF_SUCCESS
+}
+
+case "$1" in
+  meta-data)    meta_data
+        exit $OCF_SUCCESS;;
+  usage|help)   usage
+        exit $OCF_SUCCESS;;
+esac
+
+garbd_validate
+rc=$?
+
+# trap configuration errors early, but don't block stop in such cases
+LSB_STATUS_STOPPED=3
+if [ $rc -ne 0 ]; then
+    case "$1" in
+        stop) exit $OCF_SUCCESS;;
+        status) exit $LSB_STATUS_STOPPED;;
+        *) exit $rc;;
+    esac
+fi
+
+# What kind of method was invoked?
+case "$1" in
+  start)    garbd_start;;
+  stop)     garbd_stop;;
+  status)   garbd_status err;;
+  monitor)  garbd_monitor err;;
+  promote)  garbd_promote;;
+  demote)   garbd_demote;;
+  validate-all) exit $OCF_SUCCESS;;
+
+ *)     usage
+        exit $OCF_ERR_UNIMPLEMENTED;;
+esac
diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit
index 0b1670f30..c1bf11dbb 100755
--- a/heartbeat/iSCSILogicalUnit
+++ b/heartbeat/iSCSILogicalUnit
@@ -1,677 +1,690 @@
 #!/bin/bash
 #
 #
 #   iSCSILogicalUnit OCF RA. Exports and manages iSCSI Logical Units.
 #
 #   (c) 2013 LINBIT, Lars Ellenberg
 #   (c) 2009-2010 Florian Haas, Dejan Muhamedagic,
 #       and Linux-HA contributors
 #
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # Defaults
 # Set a default implementation based on software installed
 if have_binary ietadm; then
 	OCF_RESKEY_implementation_default="iet"
 elif have_binary tgtadm; then
 	OCF_RESKEY_implementation_default="tgt"
 elif have_binary lio_node; then
 	OCF_RESKEY_implementation_default="lio"
 elif have_binary targetcli; then
 	OCF_RESKEY_implementation_default="lio-t"
 fi
 : ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}}
 
 # Use a default SCSI ID and SCSI SN that is unique across the cluster,
 # and persistent in the event of resource migration.
 # SCSI IDs are limited to 24 bytes, but only 16 bytes are known to be
 # supported by all iSCSI implementations this RA cares about. Thus,
 # for a default, use the first 16 characters of
 # $OCF_RESOURCE_INSTANCE.
 OCF_RESKEY_scsi_id_default="${OCF_RESOURCE_INSTANCE:0:16}"
 : ${OCF_RESKEY_scsi_id=${OCF_RESKEY_scsi_id_default}}
 # To have a reasonably unique default SCSI SN, use the first 8 bytes
 # of an MD5 hash of of $OCF_RESOURCE_INSTANCE
 sn=`echo -n "${OCF_RESOURCE_INSTANCE}" | openssl md5 | sed -e 's/(stdin)= //'`
 OCF_RESKEY_scsi_sn_default=${sn:0:8}
 : ${OCF_RESKEY_scsi_sn=${OCF_RESKEY_scsi_sn_default}}
 # set 0 as a default value for lio iblock device number
 OCF_RESKEY_lio_iblock_default=0
 OCF_RESKEY_lio_iblock=${OCF_RESKEY_lio_iblock:-$OCF_RESKEY_lio_iblock_default}
 
 ## tgt specifics
 # tgt has "backing store type" and "backing store open flags",
 # as well as device-type.
 #
 # suggestions how to make this generic accross all supported implementations?
 # how should they be named, how should they be mapped to implementation specifics?
 #
 # OCF_RESKEY_tgt_bstype
 # OCF_RESKEY_tgt_bsoflags
+# OCF_RESKEY_tgt_bsopts
 # OCF_RESKEY_tgt_device_type
 
 #######################################################################
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="iSCSILogicalUnit" version="0.9">
 <version>0.9</version>
 
 <longdesc lang="en">
 Manages iSCSI Logical Unit. An iSCSI Logical unit is a subdivision of 
 an SCSI Target, exported via a daemon that speaks the iSCSI protocol.
 </longdesc>
 <shortdesc lang="en">Manages iSCSI Logical Units (LUs)</shortdesc>
 
 <parameters>
 <parameter name="implementation" required="0" unique="0">
 <longdesc lang="en">
 The iSCSI target daemon implementation. Must be one of "iet", "tgt",
 "lio", or "lio-t".  If unspecified, an implementation is selected based on the
 availability of management utilities, with "iet" being tried first,
 then "tgt", then "lio", then "lio-t".
 </longdesc>
 <shortdesc lang="en">iSCSI target daemon implementation</shortdesc>
 <content type="string" default="${OCF_RESKEY_implementation_default}"/>
 </parameter>
 
 <parameter name="target_iqn" required="1" unique="0">
 <longdesc lang="en">
 The iSCSI Qualified Name (IQN) that this Logical Unit belongs to.
 </longdesc>
 <shortdesc lang="en">iSCSI target IQN</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="lun" required="1" unique="0">
 <longdesc lang="en">
 The Logical Unit number (LUN) exposed to initiators.
 </longdesc>
 <shortdesc lang="en">Logical Unit number (LUN)</shortdesc>
 <content type="integer" />
 </parameter>
 
 <parameter name="path" required="1" unique="0">
 <longdesc lang="en">
 The path to the block device exposed. Some implementations allow this
 to be a regular file, too.
 </longdesc>
 <shortdesc lang="en">Block device (or file) path</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="scsi_id" required="0" unique="1">
 <longdesc lang="en">
 The SCSI ID to be configured for this Logical Unit. The default
 is the resource name, truncated to 24 bytes.
 </longdesc>
 <shortdesc lang="en">SCSI ID</shortdesc>
 <content type="string" default="${OCF_RESKEY_scsi_id_default}"/>
 </parameter>
 
 <parameter name="scsi_sn" required="0" unique="1">
 <longdesc lang="en">
 The SCSI serial number to be configured for this Logical Unit.
 The default is a hash of the resource name, truncated to 8 bytes.
 </longdesc>
 <shortdesc lang="en">SCSI serial number</shortdesc>
 <content type="string" default="${OCF_RESKEY_scsi_sn_default}"/>
 </parameter>
 
 <parameter name="vendor_id" required="0" unique="0">
 <longdesc lang="en">
 The SCSI vendor ID to be configured for this Logical Unit.
 </longdesc>
 <shortdesc lang="en">SCSI vendor ID</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="product_id" required="0" unique="0">
 <longdesc lang="en">
 The SCSI product ID to be configured for this Logical Unit.
 </longdesc>
 <shortdesc lang="en">SCSI product ID</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="tgt_bstype" required="0" unique="0">
 <longdesc lang="en">
 TGT specific backing store type. If you want to use aio,
 make sure your tgtadm is built against libaio.
 See tgtadm(8).
 </longdesc>
 <shortdesc lang="en">TGT backing store type</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="tgt_bsoflags" required="0" unique="0">
 <longdesc lang="en">
 TGT specific backing store open flags (direct|sync).
 See tgtadm(8).
 </longdesc>
 <shortdesc lang="en">TGT backing store open flags</shortdesc>
 <content type="string" />
 </parameter>
 
+<parameter name="tgt_bsopts" required="0" unique="0">
+<longdesc lang="en">
+TGT specific backing store options.
+See tgtadm(8).
+</longdesc>
+<shortdesc lang="en">TGT backing store options</shortdesc>
+<content type="string" />
+</parameter>
+
 <parameter name="tgt_device_type" required="0" unique="0">
 <longdesc lang="en">
 TGT specific device type.
 See tgtadm(8).
 </longdesc>
 <shortdesc lang="en">TGT device type</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="additional_parameters" required="0" unique="0">
 <longdesc lang="en">
 Additional LU parameters. A space-separated list of "name=value" pairs
 which will be passed through to the iSCSI daemon's management
 interface. The supported parameters are implementation
 dependent. Neither the name nor the value may contain whitespace.
 </longdesc>
 <shortdesc lang="en">List of iSCSI LU parameters</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="allowed_initiators" required="0" unique="0">
 <longdesc lang="en">
 Allowed initiators. A space-separated list of initiators allowed to
 connect to this lun. Initiators may be listed in any syntax
 the target implementation allows. If this parameter is empty or
 not set, access to this lun will not be allowed from any initiator,
 if target is not in demo mode.
 
 This parameter is only necessary when using LIO.
 </longdesc>
 <shortdesc lang="en">List of iSCSI initiators allowed to connect
 to this lun.</shortdesc>
 <content type="string" default=""/>
 </parameter>
 
 <parameter name="lio_iblock" required="0" unique="0">
 <longdesc lang="en">
 LIO iblock device name, a number starting from 0.
 
 Using distinct values here avoids a warning in LIO "LEGACY: SHARED HBA";
 and it is necessary when using multiple LUNs started at the same time
 (eg. on node failover) to prevent a race condition in tcm_core on mkdir()
 in /sys/kernel/config/target/core/.
 </longdesc>
 <shortdesc lang="en">LIO iblock device number</shortdesc>
 <content type="integer" default="0"/>
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"         timeout="10" />
 <action name="stop"          timeout="10" />
 <action name="status"        timeout="10" interval="10" depth="0" />
 <action name="monitor"       timeout="10" interval="10" depth="0" />
 <action name="meta-data"     timeout="5" />
 <action name="validate-all"  timeout="10" />
 </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 
 iSCSILogicalUnit_usage() {
 	cat <<END
 usage: $0 {start|stop|status|monitor|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 iSCSILogicalUnit_start() {
 	iSCSILogicalUnit_monitor
 	if [ $? =  $OCF_SUCCESS ]; then
 		return $OCF_SUCCESS
 	fi
 
 	local params
 
 	case $OCF_RESKEY_implementation in
 	iet)
 		params="Path=${OCF_RESKEY_path}"
 		# use blockio if path points to a block device, fileio
 		# otherwise.
 		if [ -b "${OCF_RESKEY_path}" ]; then
 			params="${params} Type=blockio"
 		else
 			params="${params} Type=fileio"
 		fi
 		# in IET, we have to set LU parameters on creation
 		if [ -n "${OCF_RESKEY_scsi_id}" ]; then
 			params="${params} ScsiId=${OCF_RESKEY_scsi_id}"
 		fi
 		if [ -n "${OCF_RESKEY_scsi_sn}" ]; then
 			params="${params} ScsiSN=${OCF_RESKEY_scsi_sn}"
 		fi
 		params="${params} ${OCF_RESKEY_additional_parameters}"
 		ocf_run ietadm --op new \
 			--tid=${TID} \
 			--lun=${OCF_RESKEY_lun} \
 			--params ${params// /,} || exit $OCF_ERR_GENERIC
 		;;
 	tgt)
 		# tgt requires that we create the LU first, then set LU
 		# parameters
 		params=""
 		local var
 		local envar
 		for var in scsi_id scsi_sn vendor_id product_id; do
 			envar="OCF_RESKEY_${var}"
 			if [ -n "${!envar}" ]; then
 				params="${params} ${var}=${!envar}"
 			fi
 		done
 		params="${params} ${OCF_RESKEY_additional_parameters}"
 
 		# cleanup: tgt (as of tgtadm version 1.0.24) does not like an explicit "bsoflags=direct"
 		# when used with "bstype=aio" (which always uses O_DIRECT)
 		[[ $OCF_RESKEY_tgt_bstype/$OCF_RESKEY_tgt_bsoflags = "aio/direct" ]] && OCF_RESKEY_tgt_bsoflags=""
 
 		tgt_args=""
 		[[ $OCF_RESKEY_tgt_bstype ]]	&& tgt_args="$tgt_args --bstype=$OCF_RESKEY_tgt_bstype"
 		[[ $OCF_RESKEY_tgt_bsoflags ]]	&& tgt_args="$tgt_args --bsoflags=$OCF_RESKEY_tgt_bsoflags"
+		[[ $OCF_RESKEY_tgt_bsopts ]]	&& tgt_args="$tgt_args --bsopts=$OCF_RESKEY_tgt_bsopts"
 		[[ $OCF_RESKEY_tgt_device_type ]]	&& tgt_args="$tgt_args --device-type=$OCF_RESKEY_tgt_device_type"
 
 		ocf_run tgtadm --lld iscsi --op new --mode logicalunit \
 			--tid=${TID} \
 			--lun=${OCF_RESKEY_lun} \
 			$tgt_args \
 			--backing-store ${OCF_RESKEY_path} || exit $OCF_ERR_GENERIC
 		if [ -z "$params" ]; then
 			return $OCF_SUCCESS
 		else
 			ocf_run tgtadm --lld iscsi --op update --mode logicalunit \
 				--tid=${TID} \
 				--lun=${OCF_RESKEY_lun} \
 				--params ${params// /,} || exit $OCF_ERR_GENERIC
 		fi
 		;;
 	lio)
 		# For lio, we first have to create a target device, then
 		# add it to the Target Portal Group as an LU.
 
 		block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path"
 		if [ ! -e "${block_configfs_path}" ]; then
 			ocf_run tcm_node --createdev=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} \
 				${OCF_RESKEY_path} || exit $OCF_ERR_GENERIC
 		elif [ -e "$block_configfs_path" ] && [ $(cat "$block_configfs_path") != "${OCF_RESKEY_path}" ]; then
 			ocf_exit_reason "Existing iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} has incorrect path: $(cat "$block_configfs_path") != ${OCF_RESKEY_path}"
 			exit $OCF_ERR_GENERIC
 		else
 			ocf_log info "iscsi iblock already exists: ${block_configfs_path}"
 		fi
 
 		if [ -n "${OCF_RESKEY_scsi_sn}" ]; then
 			ocf_run tcm_node --setunitserial=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} \
 				${OCF_RESKEY_scsi_sn} || exit $OCF_ERR_GENERIC
 		fi
 
 		lun_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/${OCF_RESOURCE_INSTANCE}/udev_path"
 		if [ ! -e "${lun_configfs_path}" ]; then
 			ocf_run lio_node --addlun=${OCF_RESKEY_target_iqn} 1 ${OCF_RESKEY_lun} \
 				${OCF_RESOURCE_INSTANCE} iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC
 		else
 			ocf_log info "iscsi lun already exists: ${lun_configfs_path}"
 		fi
 
 		if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
 			for initiator in ${OCF_RESKEY_allowed_initiators}; do
 				acl_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/acls/${initiator}/lun_${OCF_RESKEY_lun}"
 				if [ ! -e "${acl_configfs_path}" ]; then
 					ocf_run lio_node --addlunacl=${OCF_RESKEY_target_iqn} 1 \
 					${initiator} ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
 				else
 					ocf_log info "iscsi acl already exists: ${acl_configfs_path}"
 				fi
 			done
 		fi
 		;;
 	lio-t)
 		# For lio, we first have to create a target device, then
 		# add it to the Target Portal Group as an LU.
 		ocf_run targetcli /backstores/block create name=${OCF_RESOURCE_INSTANCE} dev=${OCF_RESKEY_path} || exit $OCF_ERR_GENERIC
 		if [ -n "${OCF_RESKEY_scsi_sn}" ]; then
 			echo ${OCF_RESKEY_scsi_sn} > /sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/wwn/vpd_unit_serial
 		fi
 		ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns create /backstores/block/${OCF_RESOURCE_INSTANCE} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
 
 		if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
 			for initiator in ${OCF_RESKEY_allowed_initiators}; do
 				ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls create ${initiator} add_mapped_luns=False || exit $OCF_ERR_GENERIC
 				ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} create ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
 			done
 		fi
 		;;
 	esac
 
 	# Force the monitor operation to pass before start is considered a success.
 	iSCSILogicalUnit_monitor
 }
 
 iSCSILogicalUnit_stop() {
 	iSCSILogicalUnit_monitor
 	if [ $? -eq $OCF_NOT_RUNNING ]; then
 		return $OCF_SUCCESS
 	fi
 
 	case $OCF_RESKEY_implementation in
 
 	iet)
 	# IET allows us to remove LUs while they are in use
 	ocf_run ietadm --op delete \
 		--tid=${TID} \
 		--lun=${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
 		;;
 	tgt)
 		# tgt will fail to remove an LU while it is in use,
 		# but at the same time does not allow us to
 		# selectively shut down a connection that is using a
 		# specific LU. Thus, we need to loop here until tgtd
 		# decides that the LU is no longer in use, or we get
 		# timed out by the LRM.
 		while ! ocf_run -warn tgtadm --lld iscsi --op delete --mode logicalunit \
 			--tid ${TID} \
 			--lun=${OCF_RESKEY_lun}; do
 			sleep 1
 		done
 		;;
 	lio)
 
 		acls_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/acls"
 		for initiatorpath in ${acls_configfs_path}/*; do
 			initiator=$(basename "${initiatorpath}")
 			if [ -e "${initiatorpath}/lun_${OCF_RESKEY_lun}" ]; then
 				ocf_log info "deleting acl at ${initiatorpath}/lun_${OCF_RESKEY_lun}"
 				ocf_run lio_node --dellunacl=${OCF_RESKEY_target_iqn} 1 \
 					${initiator} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
 			fi
 		done
 		lun_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/"
 		if [ -e "${lun_configfs_path}" ]; then
 			ocf_run lio_node --dellun=${OCF_RESKEY_target_iqn} 1 ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
 		fi
 		block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path"
 		if [ -e "${block_configfs_path}" ]; then
 			ocf_run tcm_node --freedev=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC
 		fi
 		;;
 	lio-t)
 		# "targetcli delete" will fail if the LUN is already
 		# gone. Log a warning and still push ahead.
 		ocf_run -warn targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns delete ${OCF_RESKEY_lun}
 		if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
 			for initiator in ${OCF_RESKEY_allowed_initiators}; do
 				if targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} status | grep "Mapped LUNs: 0" >/dev/null ; then
 					ocf_run -warn targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/ delete ${initiator}
 				fi
 			done
 		fi
 
 		# If we've proceeded down to here and we're unable to
 		# delete the backstore, then something is seriously
 		# wrong and we need to fail the stop operation
 		# (potentially causing fencing)
 		ocf_run targetcli /backstores/block delete ${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC
 		;;
 	esac
 
 	return $OCF_SUCCESS
 }
 
 iSCSILogicalUnit_monitor() {
-	# If our backing device (or file) doesn't even exist, we're not running
-	[ -e ${OCF_RESKEY_path} ] || return $OCF_NOT_RUNNING
+	if [ x"${OCF_RESKEY_tgt_bstype}" != x"rbd" ]; then
+		# If our backing device (or file) doesn't even exist, we're not running
+		[ -e ${OCF_RESKEY_path} ] || return $OCF_NOT_RUNNING
+	fi
 
 	case $OCF_RESKEY_implementation in
 	iet)
 		# Figure out and set the target ID
 		TID=`sed -ne "s/tid:\([[:digit:]]\+\) name:${OCF_RESKEY_target_iqn}$/\1/p" < /proc/net/iet/volume`
 		if [ -z "${TID}" ]; then
 			# Our target is not configured, thus we're not
 			# running.
 			return $OCF_NOT_RUNNING
 		fi
 		# FIXME: this looks for a matching LUN and path, but does
 		# not actually test for the correct target ID.
 		grep -E -q "[[:space:]]+lun:${OCF_RESKEY_lun}.*path:${OCF_RESKEY_path}$" /proc/net/iet/volume && return $OCF_SUCCESS
 		;;
 	tgt)
 		# Figure out and set the target ID
 		TID=`tgtadm --lld iscsi --op show --mode target \
 			| sed -ne "s/^Target \([[:digit:]]\+\): ${OCF_RESKEY_target_iqn}$/\1/p"`
 		if [ -z "$TID" ]; then
 			# Our target is not configured, thus we're not
 			# running.
 			return $OCF_NOT_RUNNING
 		fi
 		# This only looks for the backing store, but does not test
 		# for the correct target ID and LUN.
 		tgtadm --lld iscsi --op show --mode target \
 			| grep -E -q "[[:space:]]+Backing store.*: ${OCF_RESKEY_path}$" && return $OCF_SUCCESS
 		;;
 	lio)
 		configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/${OCF_RESOURCE_INSTANCE}/udev_path"
 		[ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS
 
 		# if we aren't activated, is a block device still left over?
 		block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path"
 		[ -e ${block_configfs_path} ] && ocf_log warn "existing block without an active lun: ${block_configfs_path}"
 		[ -e ${block_configfs_path} ] && return $OCF_ERR_GENERIC
 
 		;;
 	lio-t)
 		configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/*/udev_path"
 		[ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS
 
 		# if we aren't activated, is a block device still left over?
 		block_configfs_path="/sys/kernel/config/target/core/iblock_*/${OCF_RESOURCE_INSTANCE}/udev_path"
 		[ -e ${block_configfs_path} ] && ocf_log warn "existing block without an active lun: ${block_configfs_path}"
 		[ -e ${block_configfs_path} ] && return $OCF_ERR_GENERIC
 		;;
 	esac
 
 	return $OCF_NOT_RUNNING
 }
 
 iSCSILogicalUnit_validate() {
 	# Do we have all required variables?
 	for var in target_iqn lun path; do
 	param="OCF_RESKEY_${var}"
 	if [ -z "${!param}" ]; then
 		ocf_exit_reason "Missing resource parameter \"$var\"!"
 		exit $OCF_ERR_CONFIGURED
 	fi
 	done
 
 	# Is the configured implementation supported?
 	case "$OCF_RESKEY_implementation" in
 	"iet"|"tgt"|"lio"|"lio-t")
 		;;
 	"")
 		# The user didn't specify an implementation, and we were
 		# unable to determine one from installed binaries (in
 		# other words: no binaries for any supported
 		# implementation could be found)
 		ocf_exit_reason "Undefined iSCSI target implementation"
 		exit $OCF_ERR_INSTALLED
 		;;
 	*)
 		ocf_exit_reason "Unsupported iSCSI target implementation \"$OCF_RESKEY_implementation\"!"
 		exit $OCF_ERR_CONFIGURED
 		;;
 	esac
 
 	# Do we have a valid LUN?
 	case $OCF_RESKEY_implementation in
 	iet)
 		# IET allows LUN 0 and up
 		[ $OCF_RESKEY_lun -ge 0 ]
 		case $? in
 		0)
 			# OK
 			;;
 		1)
 			ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be a non-negative integer)."
 			exit $OCF_ERR_CONFIGURED
 			;;
 		*)
 			ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be an integer)."
 			exit $OCF_ERR_CONFIGURED
 			;;
 		esac
 		;;
 	tgt)
 		# tgt reserves LUN 0 for its own purposes
 		[ $OCF_RESKEY_lun -ge 1 ]
 		case $? in
 		0)
 				# OK
 			;;
 		1)
 			ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be greater than 0)."
 			exit $OCF_ERR_CONFIGURED
 			;;
 		*)
 			ocf_log err "Invalid LUN $OCF_RESKEY_lun (must be an integer)."
 			exit $OCF_ERR_CONFIGURED
 			;;
 		esac
 		;;
 	esac
 
 	# Do we have any configuration parameters that the current
 	# implementation does not support?
 	local unsupported_params
 	local var
 	local envar
 	case $OCF_RESKEY_implementation in
 	iet)
 		# IET does not support setting the vendor and product ID
 		# (it always uses "IET" and "VIRTUAL-DISK")
-		unsupported_params="vendor_id product_id allowed_initiators lio_iblock tgt_bstype tgt_bsoflags tgt_device_type"
+		unsupported_params="vendor_id product_id allowed_initiators lio_iblock tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type"
 		;;
 	tgt)
 		unsupported_params="allowed_initiators lio_iblock"
 		;;
 	lio)
-		unsupported_params="scsi_id vendor_id product_id tgt_bstype tgt_bsoflags tgt_device_type"
+		unsupported_params="scsi_id vendor_id product_id tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type"
 		;;
 	lio-t)
-		unsupported_params="scsi_id vendor_id product_id tgt_bstype tgt_bsoflags tgt_device_type lio_iblock"
+		unsupported_params="scsi_id vendor_id product_id tgt_bstype tgt_bsoflags tgt_bsopts tgt_device_type lio_iblock"
 		;;
 	esac
 
 	for var in ${unsupported_params}; do
 		envar=OCF_RESKEY_${var}
 		defvar=OCF_RESKEY_${var}_default
 		if [ -n "${!envar}" ]; then
 			if  [[ "${!envar}" != "${!defvar}" ]];then 
 				case "$__OCF_ACTION" in
 				start|validate-all)
 					ocf_log warn "Configuration parameter \"${var}\"" \
 						"is not supported by the iSCSI implementation" \
 						"and will be ignored." ;;
 				esac
 			fi
 		fi
 	done
 
 	if ! ocf_is_probe; then
 	# Do we have all required binaries?
 	case $OCF_RESKEY_implementation in
 	iet)
 		check_binary ietadm
 		;;
 	tgt)
 		check_binary tgtadm
 		;;
 	lio)
 		check_binary tcm_node
 		check_binary lio_node
 		;;
 	lio-t)
 		check_binary targetcli
 		;;
 	esac
 
 	# Is the required kernel functionality available?
 	case $OCF_RESKEY_implementation in
 	iet)
 		[ -d /proc/net/iet ]
 		if [ $? -ne 0 ]; then
 			ocf_log err "/proc/net/iet does not exist or is not a directory -- check if required modules are loaded."
 			exit $OCF_ERR_INSTALLED
 		fi
 		;;
 	tgt)
 		# tgt is userland only
 		;;
 	esac
 	fi
 
 	return $OCF_SUCCESS
 }
 
 case $1 in
 meta-data)
 	meta_data
 	exit $OCF_SUCCESS
 	;;
 usage|help)
 	iSCSILogicalUnit_usage
 	exit $OCF_SUCCESS
 	;;
 esac
 
 # Everything except usage and meta-data must pass the validate test
 iSCSILogicalUnit_validate
 
 case $__OCF_ACTION in
 start)		iSCSILogicalUnit_start;;
 stop)		iSCSILogicalUnit_stop;;
 monitor|status)	iSCSILogicalUnit_monitor;;
 reload)		ocf_log err "Reloading..."
 			iSCSILogicalUnit_start
 		;;
 validate-all)	;;
 *)		iSCSILogicalUnit_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 
 rc=$?
 ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
 exit $rc
diff --git a/heartbeat/iSCSITarget b/heartbeat/iSCSITarget
index b71a21f29..08832cd64 100755
--- a/heartbeat/iSCSITarget
+++ b/heartbeat/iSCSITarget
@@ -1,663 +1,683 @@
 #!/bin/bash
 #
 #
 #     iSCSITarget OCF RA. Exports and manages iSCSI targets.
 #
 #   (c) 2009-2010 Florian Haas, Dejan Muhamedagic,
 #                 and Linux-HA contributors
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # Defaults
 # Set a default implementation based on software installed
 if have_binary ietadm; then
 	OCF_RESKEY_implementation_default="iet"
 elif have_binary tgtadm; then
 	OCF_RESKEY_implementation_default="tgt"
 elif have_binary lio_node; then
 	OCF_RESKEY_implementation_default="lio"
 elif have_binary targetcli; then
 	OCF_RESKEY_implementation_default="lio-t"
 fi
 : ${OCF_RESKEY_implementation=${OCF_RESKEY_implementation_default}}
 
 # Listen on 0.0.0.0:3260 by default
 OCF_RESKEY_portals_default="0.0.0.0:3260"
 : ${OCF_RESKEY_portals=${OCF_RESKEY_portals_default}}
 
 # Lockfile, used for selecting a target ID
 LOCKFILE=${HA_RSCTMP}/iSCSITarget-${OCF_RESKEY_implementation}.lock
 #######################################################################
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="iSCSITarget" version="0.9">
 <version>0.9</version>
 
 <longdesc lang="en">
 Manages iSCSI targets. An iSCSI target is a collection of SCSI Logical
 Units (LUs) exported via a daemon that speaks the iSCSI protocol.
 </longdesc>
 <shortdesc lang="en">iSCSI target export agent</shortdesc>
 
 <parameters>
 <parameter name="implementation" required="0" unique="0">
 <longdesc lang="en">
 The iSCSI target daemon implementation. Must be one of "iet", "tgt",
 "lio", or "lio-t".  If unspecified, an implementation is selected based on the
 availability of management utilities, with "iet" being tried first,
 then "tgt", then "lio", then "lio-t".
 </longdesc>
 <shortdesc lang="en">Specifies the iSCSI target implementation
 ("iet", "tgt", "lio", or "lio-t").</shortdesc>
 <content type="string" default="${OCF_RESKEY_implementation_default}"/>
 </parameter>
 
 <parameter name="iqn" required="1" unique="1">
 <longdesc lang="en">
 The target iSCSI Qualified Name (IQN). Should follow the conventional
 "iqn.yyyy-mm.&lt;reversed domain name&gt;[:identifier]" syntax.
 </longdesc>
 <shortdesc lang="en">iSCSI target IQN</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="tid" required="0" unique="1">
 <longdesc lang="en">
 The iSCSI target ID. Required for tgt.
 </longdesc>
 <shortdesc lang="en">iSCSI target ID</shortdesc>
 <content type="integer" />
 </parameter>
 
 <parameter name="portals" required="0" unique="0">
 <longdesc lang="en">
 iSCSI network portal addresses. Not supported by all
 implementations. If unset, the default is to create one portal that
 listens on ${OCF_RESKEY_portal_default}.
 </longdesc>
 <shortdesc lang="en">iSCSI portal addresses</shortdesc>
 <content type="string" default="${OCF_RESKEY_portals_default}"/>
 </parameter>
 
+<parameter name="iser_portals" required="0" unique="0">
+<longdesc lang="en">
+iSCSI iSER network portal addresses. Not supported by all
+implementations.
+</longdesc>
+<shortdesc lang="en">iSCSI iSER enabled portal addresses</shortdesc>
+<content type="string"/>
+</parameter>
+
 <parameter name="allowed_initiators" required="0" unique="0">
 <longdesc lang="en">
 Allowed initiators. A space-separated list of initiators allowed to
 connect to this target. Initiators may be listed in any syntax
 the target implementation allows. If this parameter is empty or
 not set, access to this target will be allowed from any initiator.
 </longdesc>
 <shortdesc lang="en">List of iSCSI initiators allowed to connect
 to this target</shortdesc>
 <content type="string" default=""/>
 </parameter>
 
 <parameter name="incoming_username" required="0" unique="1">
 <longdesc lang="en">
 A username used for incoming initiator authentication. If unspecified,
 allowed initiators will be able to log in without authentication.
 This is a unique parameter, as it not allowed to re-use a single
 username across multiple target instances.
 </longdesc>
 <shortdesc lang="en">Incoming account username</shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="incoming_password" required="0" unique="0">
 <longdesc lang="en">
 A password used for incoming initiator authentication.
 </longdesc>
 <shortdesc lang="en">Incoming account password</shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="additional_parameters" required="0" unique="0">
 <longdesc lang="en">
 Additional target parameters. A space-separated list of "name=value"
 pairs which will be passed through to the iSCSI daemon's management
 interface. The supported parameters are implementation
 dependent. Neither the name nor the value may contain whitespace.
 </longdesc>
 <shortdesc lang="en">List of iSCSI target parameters</shortdesc>
 <content type="string" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"		timeout="10" />
 <action name="stop"		 timeout="10" />
 <action name="status"	   timeout="10" interval="10" depth="0" />
 <action name="monitor"	  timeout="10" interval="10" depth="0" />
 <action name="meta-data"	timeout="5" />
 <action name="validate-all"   timeout="10" />
 </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 
 iSCSITarget_usage() {
 	cat <<END
 usage: $0 {start|stop|status|monitor|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 iSCSITarget_start() {
 	iSCSITarget_monitor
 	if [ $? =  $OCF_SUCCESS ]; then
 		return $OCF_SUCCESS
 	fi
 
 	local param
 	local name
 	local value
 	local initiator
 	local portal
 
 	case $OCF_RESKEY_implementation in
 	iet)
 		local lasttid
 		local tid
 		if [ "${OCF_RESKEY_tid}" ]; then
 			tid="${OCF_RESKEY_tid}"
 		else
 			# Figure out the last used target ID, add 1 to get the new
 			# target ID.
 			ocf_take_lock $LOCKFILE
 			ocf_release_lock_on_exit $LOCKFILE
 			lasttid=`sed -ne "s/tid:\([[:digit:]]\+\) name:.*/\1/p" < /proc/net/iet/volume | sort -n | tail -n1`
 			[ -z "${lasttid}" ] && lasttid=0
 			tid=$((++lasttid))
 		fi
 
 		# Create the target.
 		ocf_run ietadm --op new \
 			--tid=${tid} \
 			--params Name=${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
 
 		# Set additional parameters.
 		for param in ${OCF_RESKEY_additional_parameters}; do
 			name=${param%=*}
 			value=${param#*=}
 			ocf_run ietadm --op update \
 				--tid=${tid} \
 				--params ${name}=${value} || exit $OCF_ERR_GENERIC
 		done
 
 		# Legacy versions of IET allow targets by default, current
 		# versions deny. To be safe we manage both the .allow and
 		# .deny files.
 		if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
 			echo "${OCF_RESKEY_iqn} ALL" >> /etc/initiators.deny
 			echo "${OCF_RESKEY_iqn} ${OCF_RESKEY_allowed_initiators// /,}" >> /etc/initiators.allow
 		else
 			echo "${OCF_RESKEY_iqn} ALL" >> /etc/initiators.allow
 		fi
 		# In iet, adding a new user and assigning it to a target
 		# is one operation.
 		if [ -n "${OCF_RESKEY_incoming_username}" ]; then
 			ocf_run ietadm --op new --user \
 				--tid=${tid} \
 				--params=IncomingUser=${OCF_RESKEY_incoming_username},Password=${OCF_RESKEY_incoming_password} \
 				|| exit $OCF_ERR_GENERIC
 		fi
 		;;
 	tgt)
 		local tid
 		tid="${OCF_RESKEY_tid}"
 		# Create the target.
 		ocf_run tgtadm --lld iscsi --op new --mode target \
 			--tid=${tid} \
 			--targetname ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
 
 		# Set parameters.
 		for param in ${OCF_RESKEY_additional_parameters}; do
 			name=${param%=*}
 			value=${param#*=}
 			ocf_run tgtadm --lld iscsi --op update --mode target \
 				--tid=${tid} \
 				--name=${name} --value=${value} || exit $OCF_ERR_GENERIC
 		done
 
 		# For tgt, we always have to add access per initiator;
 		# access to targets is denied by default. If
 		# "allowed_initiators" is unset, we must use the special
 		# keyword ALL.
 		for initiator in ${OCF_RESKEY_allowed_initiators=ALL}; do
 			ocf_run tgtadm --lld iscsi --op bind --mode target \
 				--tid=${tid} \
 				--initiator-address=${initiator} || exit $OCF_ERR_GENERIC
 		done
 
 		# In tgt, we must first create a user account, then assign
 		# it to a target using the "bind" operation.
 		if [ -n "${OCF_RESKEY_incoming_username}" ]; then
 			ocf_run tgtadm --lld iscsi --mode account --op new \
 				--user=${OCF_RESKEY_incoming_username} \
 				--password=${OCF_RESKEY_incoming_password} || exit $OCF_ERR_GENERIC
 			ocf_run tgtadm --lld iscsi --mode account --op bind \
 				--tid=${tid} \
 				--user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC
 		fi
 		;;
 	lio)
 		# lio distinguishes between targets and target portal
 		# groups (TPGs). We will always create one TPG, with the
 		# number 1. In lio, creating a network portal
 		# automatically creates the corresponding target if it
 		# doesn't already exist.
 		for portal in ${OCF_RESKEY_portals}; do
 			ocf_run lio_node --addnp ${OCF_RESKEY_iqn} 1 \
 				${portal} || exit $OCF_ERR_GENERIC
 		done
 
 		# in lio, we can set target parameters by manipulating
 		# the appropriate configfs entries
 		for param in ${OCF_RESKEY_additional_parameters}; do
 			name=${param%=*}
 			value=${param#*=}
 			configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/param/${name}"
 			if [ -e ${configfs_path} ]; then
 				echo ${value} > ${configfs_path} || exit $OCF_ERR_GENERIC
 			else
 				ocf_log warn "Unsupported iSCSI target parameter ${name}: will be ignored."
 			fi
 		done
 
 		# lio does per-initiator filtering by default. To disable
 		# this, we need to switch the target to "permissive mode".
 		if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
 			for initiator in ${OCF_RESKEY_allowed_initiators}; do
 				ocf_run lio_node --addnodeacl ${OCF_RESKEY_iqn} 1 \
 				${initiator} || exit $OCF_ERR_GENERIC
 			done
 		else
 			ocf_run lio_node --permissive ${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC
 			# permissive mode enables read-only access by default,
 			# so we need to change that to RW to be in line with
 			# the other implementations.
 			echo 0 > "/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/attrib/demo_mode_write_protect"
 			if [ `cat /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/attrib/demo_mode_write_protect` -ne 0 ]; then
 				ocf_log err "Failed to disable write protection for target ${OCF_RESKEY_iqn}."
 				exit $OCF_ERR_GENERIC
 			fi
 		fi
 
 		# TODO: add CHAP authentication support when it gets added
 		# back into LIO
 		ocf_run lio_node --disableauth ${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC
 		# Finally, we need to enable the target to allow
 		# initiators to connect
 		ocf_run lio_node --enabletpg=${OCF_RESKEY_iqn} 1 || exit $OCF_ERR_GENERIC
 		;;
 	lio-t)
 		# lio distinguishes between targets and target portal
 		# groups (TPGs). We will always create one TPG, with the
 		# number 1. In lio, creating a network portal
 		# automatically creates the corresponding target if it
 		# doesn't already exist.
 		ocf_run targetcli /iscsi set global auto_add_default_portal=false || exit $OCF_ERR_GENERIC
 		ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
 		for portal in ${OCF_RESKEY_portals}; do
 			if [ $portal != ${OCF_RESKEY_portals_default} ] ; then
 				IFS=':' read -a sep_portal <<< "$portal"
 				ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/portals create "${sep_portal[0]}" "${sep_portal[1]}" || exit $OCF_ERR_GENERIC
 			else
 				ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
 			fi
 		done
 		# in lio, we can set target parameters by manipulating
 		# the appropriate configfs entries
 		for param in ${OCF_RESKEY_additional_parameters}; do
 			name=${param%=*}
 			value=${param#*=}
 			configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/param/${name}"
 			if [ -e ${configfs_path} ]; then
 				echo ${value} > ${configfs_path} || exit $OCF_ERR_GENERIC
 			else
 				ocf_log warn "Unsupported iSCSI target parameter ${name}: will be ignored."
 			fi
 		done
+		
+		# allow iSER enabled portal
+		for iser_portal in ${OCF_RESKEY_iser_portals}; do
+                       configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/np/${iser_portal}\:*/iser"
+                       if [ -f ${configfs_path} ]; then
+                                 echo "1" > ${configfs_path} || exit $OCF_ERR_GENERIC
+                       else
+                                 ocf_log warn "Unable to set iSER on: $iser_portal"
+                       fi
+                done
+		
 		# lio does per-initiator filtering by default. To disable
 		# this, we need to switch the target to "permissive mode".
 		if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
 			for initiator in ${OCF_RESKEY_allowed_initiators}; do
 				ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/acls create ${initiator} || exit $OCF_ERR_GENERIC
 			done
 		else
 			ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/ set attribute authentication=0 demo_mode_write_protect=0 generate_node_acls=1 cache_dynamic_acls=1 || exit $OCF_ERR_GENERIC
 		fi
 		# TODO: add CHAP authentication support when it gets added
 		# back into LIO
 		ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/ set attribute authentication=0 || exit $OCF_ERR_GENERIC
 #			   ocf_run targetcli /iscsi 
 		;;
 	esac
 
 	iSCSITarget_monitor
 }
 
 iSCSITarget_stop() {
 	iSCSITarget_monitor
 	if [ $? -eq $OCF_NOT_RUNNING ]; then
 		return $OCF_SUCCESS
 	fi
 
 	local tid
 	case $OCF_RESKEY_implementation in
 	iet)
 		# Figure out the target ID
 		tid=`sed -ne "s/tid:\([[:digit:]]\+\) name:${OCF_RESKEY_iqn}/\1/p" < /proc/net/iet/volume`
 		if [ -z "${tid}" ]; then
 			ocf_log err "Failed to retrieve target ID for IQN ${OCF_RESKEY_iqn}"
 			exit $OCF_ERR_GENERIC
 		fi
 		# Close existing connections. There is no other way to
 		# do this in IET than to parse the contents of
 		# /proc/net/iet/session.
 		set -- $(sed -ne '/^tid:'${tid}' /,/^tid/ {
 						  /^[[:space:]]*sid:\([0-9]\+\)/ {
 							 s/^[[:space:]]*sid:\([0-9]*\).*/--sid=\1/; h;
 						  };
 						  /^[[:space:]]*cid:\([0-9]\+\)/ { 
 							  s/^[[:space:]]*cid:\([0-9]*\).*/--cid=\1/; G; p; 
 						  }; 
 					  }' < /proc/net/iet/session)
 		while [[ -n $2 ]]; do
 			# $2 $1 looks like "--sid=X --cid=Y"
 			ocf_run ietadm --op delete \
 				 --tid=${tid} $2 $1
 			shift 2
 		done
 			# In iet, unassigning a user from a target and
 		# deleting the user account is one operation.
 		if [ -n "${OCF_RESKEY_incoming_username}" ]; then
 			ocf_run ietadm --op delete --user \
 			--tid=${tid} \
 			--params=IncomingUser=${OCF_RESKEY_incoming_username} \
 			|| exit $OCF_ERR_GENERIC
 		fi
 		# Loop on delete. Keep trying until we time out, if
 		# necessary.
 		while true; do
 			if ietadm --op delete --tid=${tid}; then
 				ocf_log debug "Removed target ${OCF_RESKEY_iqn}."
 				break
 			else
 				ocf_log warn "Failed to remove target ${OCF_RESKEY_iqn}, retrying."
 				sleep 1
 			fi
 		done
 		# Avoid stale /etc/initiators.{allow,deny} entries
 		# for this target
 		if [ -e /etc/initiators.deny ]; then
 			ocf_run sed -e "/^${OCF_RESKEY_iqn}[[:space:]]/d" \
 			-i /etc/initiators.deny
 		fi
 		if [ -e /etc/initiators.allow ]; then
 			ocf_run sed -e "/^${OCF_RESKEY_iqn}[[:space:]]/d" \
 			-i /etc/initiators.allow
 		fi
 		;;
 	tgt)
 		tid="${OCF_RESKEY_tid}"
 		# Close existing connections. There is no other way to
 		# do this in tgt than to parse the output of "tgtadm --op
 		# show".
 		set -- $(tgtadm --lld iscsi --op show --mode target \
 			| sed -ne '/^Target '${tid}':/,/^Target/ {
 						  /^[[:space:]]*I_T nexus: \([0-9]\+\)/ {
 							 s/^.*: \([0-9]*\).*/--sid=\1/; h;
 						  };
 						  /^[[:space:]]*Connection: \([0-9]\+\)/ { 
 							  s/^.*: \([0-9]*\).*/--cid=\1/; G; p; 
 						  }; 
 						  /^[[:space:]]*LUN information:/ q; 
 					  }')
 		while [[ -n $2 ]]; do
 			# $2 $1 looks like "--sid=X --cid=Y"
 			ocf_run tgtadm --lld iscsi --op delete --mode connection \
 			--tid=${tid} $2 $1
 			shift 2
 		done
 			# In tgt, we must first unbind the user account from
 		# the target, then remove the account itself.
 		if [ -n "${OCF_RESKEY_incoming_username}" ]; then
 			ocf_run tgtadm --lld iscsi --mode account --op unbind \
 				--tid=${tid} \
 				--user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC
 			ocf_run tgtadm --lld iscsi --mode account --op delete \
 				--user=${OCF_RESKEY_incoming_username} || exit $OCF_ERR_GENERIC
 		fi
 		# Loop on delete. Keep trying until we time out, if
 		# necessary.
 		while true; do
 			if tgtadm --lld iscsi --op delete --mode target --tid=${tid}; then
 				ocf_log debug "Removed target ${OCF_RESKEY_iqn}."
 				break
 			else
 				ocf_log warn "Failed to remove target ${OCF_RESKEY_iqn}, retrying."
 				sleep 1
 			fi
 		done
 		# In tgt, we don't have to worry about our ACL
 		# entries. They are automatically removed upon target
 		# deletion.
 		;;
 	lio)
 		# In lio, removing a target automatically removes all
 		# associated TPGs, network portals, and LUNs.
 		ocf_run lio_node --deliqn ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
 		;;
 	lio-t)
 		ocf_run targetcli /iscsi delete ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
 		;;
 	esac
 
 	return $OCF_SUCCESS
 }
 
 iSCSITarget_monitor() {
 	case $OCF_RESKEY_implementation in
 	iet)
 		grep -Eq "tid:[0-9]+ name:${OCF_RESKEY_iqn}" /proc/net/iet/volume && return $OCF_SUCCESS
 		;;
 	tgt)
 		tgtadm --lld iscsi --op show --mode target \
 		| grep -Eq "Target [0-9]+: ${OCF_RESKEY_iqn}" && return $OCF_SUCCESS
 		;;
 	lio | lio-t)
 		# if we have no configfs entry for the target, it's
 		# definitely stopped
 		[ -d /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn} ] || return $OCF_NOT_RUNNING
 		# if the target is there, but its TPG is not enabled, then
 		# we also consider it stopped
 		[ `cat /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn}/tpgt_1/enable` -eq 1 ] || return $OCF_NOT_RUNNING
 		return $OCF_SUCCESS
 		;;
 	esac
 	
 	return $OCF_NOT_RUNNING
 }
 
 iSCSITarget_validate() {
 	# Do we have all required variables?
 	local required_vars
 	case $OCF_RESKEY_implementation in
 	iet)
 		required_vars="iqn"
 		;;
 	tgt)
 		required_vars="iqn tid"
 		;;
 	esac
 
 	for var in ${required_vars}; do
 		param="OCF_RESKEY_${var}"
 		if [ -z "${!param}" ]; then
 			ocf_exit_reason "Missing resource parameter \"$var\"!"
 			exit $OCF_ERR_CONFIGURED
 		fi
 	done
 
 	# Is the configured implementation supported?
 	case "$OCF_RESKEY_implementation" in
 	"iet"|"tgt"|"lio"|"lio-t")
 		;;
 	"")
 		# The user didn't specify an implementation, and we were
 		# unable to determine one from installed binaries (in
 		# other words: no binaries for any supported
 		# implementation could be found)
 		ocf_exit_reason "Undefined iSCSI target implementation"
 		exit $OCF_ERR_INSTALLED
 		;;
 	*)
 		ocf_exit_reason "Unsupported iSCSI target implementation \"$OCF_RESKEY_implementation\"!"
 		exit $OCF_ERR_CONFIGURED
 		;;
 	esac
 
 	# Do we have any configuration parameters that the current
 	# implementation does not support?
 	local unsupported_params
 	local var
 	local envar
 	case $OCF_RESKEY_implementation in
 	iet|tgt)
 		# IET and tgt do not support binding a target portal to a
 		# specific IP address.
 		unsupported_params="portals"
 		;;
 	lio|lio-t)
 		# TODO: Remove incoming_username and incoming_password
 		# from this check when LIO 3.0 gets CHAP authentication
 		unsupported_params="tid incoming_username incoming_password"
 		;;
 	esac
 
 	for var in ${unsupported_params}; do
 		envar=OCF_RESKEY_${var}
 		defvar=OCF_RESKEY_${var}_default
 		if [ -n "${!envar}" ]; then
 			if  [[ "${!envar}" != "${!defvar}" ]];then
 				case "$__OCF_ACTION" in
 					start|validate-all)
 						ocf_log warn "Configuration parameter \"${var}\"" \
 							"is not supported by the iSCSI implementation" \
 							"and will be ignored." ;;
 				esac
 			fi
 		fi
 	done
 
 	if ! ocf_is_probe; then
 		# Do we have all required binaries?
 	case $OCF_RESKEY_implementation in
 	iet)
 		check_binary ietadm
 		;;
 	tgt)
 		check_binary tgtadm
 		;;
 	lio)
 		check_binary tcm_node
 		check_binary lio_node
 		;;
 	lio-t)
 		check_binary targetcli
 		;;
 	esac
 
 	# Is the required kernel functionality available?
 	case $OCF_RESKEY_implementation in
 	iet)
 		[ -d /proc/net/iet ]
 		if [ $? -ne 0 ]; then
 			ocf_log err "/proc/net/iet does not exist or is not a directory -- check if required modules are loaded."
 			exit $OCF_ERR_INSTALLED
 		fi
 		;;
 	tgt)
 			# tgt is userland only
 		;;
 	lio)
 			# lio needs configfs to be mounted
 		if ! grep -Eq "^.*/sys/kernel/config[[:space:]]+configfs" /proc/mounts; then
 			ocf_log err "configfs not mounted at /sys/kernel/config -- check if required modules are loaded."
 			exit $OCF_ERR_INSTALLED
 		fi
 			# check for configfs entries created by target_core_mod
 		if [ ! -d /sys/kernel/config/target ]; then
 			ocf_log err "/sys/kernel/config/target does not exist or is not a directory -- check if required modules are loaded."
 			exit $OCF_ERR_INSTALLED
 		fi
 		;;
 	lio-t)
 		#targetcli loads the needed kernel modules
 		;;
 	esac
 	fi
 
 	return $OCF_SUCCESS
 }
 
 
 case $1 in
   meta-data)
 	meta_data
 	exit $OCF_SUCCESS
 	;;
   usage|help)
 	iSCSITarget_usage
 	exit $OCF_SUCCESS
 	;;
 esac
 
 # Everything except usage and meta-data must pass the validate test
 iSCSITarget_validate
 
 case $__OCF_ACTION in
 start)		iSCSITarget_start;;
 stop)		iSCSITarget_stop;;
 monitor|status)	iSCSITarget_monitor;;
 reload)		ocf_log err "Reloading..."
 			iSCSITarget_start
 		;;
 validate-all)	;;
 *)		iSCSITarget_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 rc=$?
 ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
 exit $rc
diff --git a/heartbeat/iscsi b/heartbeat/iscsi
index ef0236e47..81cd78eba 100755
--- a/heartbeat/iscsi
+++ b/heartbeat/iscsi
@@ -1,514 +1,516 @@
 #!/bin/sh
 #
 # iSCSI OCF resource agent
 # Description: manage iSCSI disks (add/remove) using open-iscsi
 #
 # Copyright Dejan Muhamedagic <dejan@suse.de>
 # (C) 2007 Novell Inc. All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 # See usage() and meta_data() below for more details...
 #
 # OCF instance parameters:
 #	OCF_RESKEY_portal: the iSCSI portal address or host name (required)
 #	OCF_RESKEY_target: the iSCSI target (required)
 #	OCF_RESKEY_iscsiadm: iscsiadm program path (optional)
 #	OCF_RESKEY_discovery_type: discovery type (optional; default: sendtargets)
 #	OCF_RESKEY_try_recovery: wait for iSCSI recovery in monitor (optional; default: false)
 #
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # Defaults
 OCF_RESKEY_udev_default="yes"
 OCF_RESKEY_iscsiadm_default="iscsiadm"
 OCF_RESKEY_discovery_type_default="sendtargets"
 OCF_RESKEY_try_recovery_default="false"
 
 : ${OCF_RESKEY_udev=${OCF_RESKEY_udev_default}}
 : ${OCF_RESKEY_iscsiadm=${OCF_RESKEY_iscsiadm_default}}
 : ${OCF_RESKEY_discovery_type=${OCF_RESKEY_discovery_type_default}}
 
 usage() {
   methods=`iscsi_methods`
   methods=`echo $methods | tr ' ' '|'`
   cat <<EOF
 	usage: $0 {$methods}
 
 	$0 manages an iSCSI target
 
 	The 'start' operation starts (adds) the iSCSI target.
 	The 'stop' operation stops (removes) the iSCSI target.
 	The 'status' operation reports whether the iSCSI target is connected
 	The 'monitor' operation reports whether the iSCSI target is connected
 	The 'validate-all' operation reports whether the parameters are valid
 	The 'methods' operation reports on the methods $0 supports
 
 EOF
 }
 
 meta_data() {
 	cat <<EOF
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="iscsi">
 <version>1.0</version>
 
 <longdesc lang="en">
 OCF Resource Agent for iSCSI. Add (start) or remove (stop) iSCSI
 targets.
 </longdesc>
 <shortdesc lang="en">Manages a local iSCSI initiator and its connections to iSCSI targets</shortdesc>
 
 <parameters>
 
 <parameter name="portal" unique="0" required="1">
 <longdesc lang="en">
 The iSCSI portal address in the form: {ip_address|hostname}[":"port]
 </longdesc>
 <shortdesc lang="en">Portal address</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="target" unique="1" required="1">
 <longdesc lang="en">
 The iSCSI target IQN.
 </longdesc>
 <shortdesc lang="en">Target IQN</shortdesc>
 <content type="string" />
 </parameter>
 
 <parameter name="discovery_type" unique="0" required="0">
 <longdesc lang="en">
 Target discovery type. Check the open-iscsi documentation for
 supported discovery types.
 </longdesc>
 <shortdesc lang="en">Target discovery type</shortdesc>
 <content type="string" default="${OCF_RESKEY_discovery_type_default}" />
 </parameter>
 
 <parameter name="iscsiadm" unique="0" required="0">
 <longdesc lang="en">
 open-iscsi administration utility binary.
 </longdesc>
 <shortdesc lang="en">iscsiadm binary</shortdesc>
 <content type="string" default="${OCF_RESKEY_iscsiadm_default}" />
 </parameter>
 
 <parameter name="udev" unique="0" required="0">
 <longdesc lang="en">
 If the next resource depends on the udev creating a device then
 we wait until it is finished. On a normally loaded host this
 should be done quickly, but you may be unlucky. If you are not
 using udev set this to "no", otherwise we will spin in a loop
 until a timeout occurs.
 </longdesc>
 <shortdesc lang="en">udev</shortdesc>
 <content type="string" default="${OCF_RESKEY_udev_default}" />
 </parameter>
 
 <parameter name="try_recovery" unique="0" required="0">
 <longdesc lang="en">
 If the iSCSI session exists but is currently inactive/broken,
 which is most probably due to network problems, the iSCSI layer
 will try to recover. If this parameter is set to true, we'll wait
 for the recovery to succeed. In that case the monitor operation
 can only time out so you should set the monitor op timeout
 attribute appropriately.
 </longdesc>
 <shortdesc lang="en">On error wait for iSCSI recovery in monitor</shortdesc>
 <content type="boolean" default="${OCF_RESKEY_try_recovery_default}" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start" timeout="120" />
 <action name="stop" timeout="120" />
 <action name="status" timeout="30" />
 <action name="monitor" depth="0" timeout="30" interval="120" />
 <action name="validate-all" timeout="5" />
 <action name="methods" timeout="5" />
 <action name="meta-data" timeout="5" />
 </actions>
 </resource-agent>
 EOF
 }
 
 iscsi_methods() {
   cat <<EOF
 	start
 	stop
 	status
 	monitor
 	validate-all
 	methods
 	meta-data
 	usage
 EOF
 }
 
 #
 # open-iscsi interface
 #
 
 is_iscsid_running() {
 	ps -e -o cmd | grep -qs '[i]scsid'
 }
 open_iscsi_setup() {
 	discovery=open_iscsi_discovery
 	add_disk=open_iscsi_add
 	remove_disk=open_iscsi_remove
 	disk_status=open_iscsi_monitor
 	iscsiadm=${OCF_RESKEY_iscsiadm}
 
 	have_binary ${iscsiadm} ||
 		return 3
 	if is_iscsid_running; then
 		return 0
 	elif grep -qs '^iscsid.startup' /etc/iscsi/iscsid.conf; then
 		# apparently on RedHat (perhaps elsewhere?), there is a
 		# kind of iscsid autostart once root invokes some
 		# open_iscsi command; the iscsid.startup hook should take
 		# care of it; reported by m.richardson@ed.ac.uk (see also
 		# the discussion at the linux-ha-dev ML)
 		return 1
 	else
 		ocf_exit_reason "iscsid not running; please start open-iscsi utilities"
 		return 2
 	fi
 }
 
 #
 # discovery return codes:
 #   0: ok (variable portal set)
 #   1: target not found
 #   2: target found but can't connect it unambigously
 #   3: iscsiadm returned error
 #
 # open-iscsi >= "2.0-872" changed discovery semantics
 # see http://www.mail-archive.com/open-iscsi@googlegroups.com/msg04883.html
 # there's a new discoverydb command which should be used instead discovery
  
 open_iscsi_discovery() {
 	local output
 	local discovery_variant="discovery"
 	local options=""
 	local cmd
 	local version=`$iscsiadm --version | awk '{print $3}'`
 
 	ocf_version_cmp "$version" "2.0-871"
 	if [ $? -eq 2 ]; then # newer than 2.0-871?
 		discovery_variant="discoverydb"
 		[ "$discovery_type" = "sendtargets" ] &&
 			options="-D"
 	fi
 	cmd="$iscsiadm -m $discovery_variant -p $OCF_RESKEY_portal -t $discovery_type $options"
 	output=`$cmd`
 	if [ $? -ne 0 -o x = "x$output" ]; then
 		[ x != "x$output" ] && {
 			ocf_exit_reason "$cmd FAILED"
 			echo "$output"
 		}
 		return 3
 	fi
 	PORTAL=`echo "$output" |
 		awk -v target="$OCF_RESKEY_target" '
 		$NF==target{
 			if( NF==3 ) portal=$2; # sles compat mode
 			else portal=$1;
 			sub(",.*","",portal);
 			print portal;
 		}'`
 
 	case `echo "$PORTAL" | wc -w` in
 	0) #target not found
 		echo "$output"
 		ocf_exit_reason "target $OCF_RESKEY_target not found at portal $OCF_RESKEY_portal"
 		return 1
 	;;
 	1) #we're ok
 		return 0
 	;;
 	*) # handle multihome hosts reporting multiple portals
 		for p in $PORTAL; do
 			if [ "$OCF_RESKEY_portal" = "$p" ]; then
 				PORTAL="$OCF_RESKEY_portal"
 				return 0
 			fi
 		done
 		echo "$output"
 		ocf_exit_reason "sorry, can't handle multihomed hosts unless you specify the portal exactly"
 		return 2
 	;;
 	esac
 }
 open_iscsi_add() {
 	$iscsiadm -m node -p $1 -T $2 -l
 }
 open_iscsi_get_session_id() {
 	local target="$1"
+	local portal="$2"
 	$iscsiadm -m session 2>/dev/null |
 		grep -E "$target($|[[:space:]])" |
+		grep -E "] $portal" |
 		awk '{print $2}' | tr -d '[]'
 }
 open_iscsi_remove() {
 	local target="$1"
 	local session_id
-	session_id=`open_iscsi_get_session_id "$target"`
+	session_id=`open_iscsi_get_session_id "$target" "$OCF_RESKEY_portal"`
 	if [ "$session_id" ]; then
 		$iscsiadm -m session -r $session_id -u
 	else
 		ocf_exit_reason "cannot find session id for target $target"
 		return 1
 	fi
 }
 # open_iscsi_monitor return codes:
 #   0: target running (logged in)
 #   1: target not running and target record exists
 #   2: iscsiadm -m session error (unexpected)
 #   3: target record does not exist (discovery necessary)
 #
 open_iscsi_monitor() {
 	local target="$1"
 	local session_id conn_state outp
 	local prev_state
 	local recov
 
 	recov=${2:-$OCF_RESKEY_try_recovery}
-	session_id=`open_iscsi_get_session_id "$target"`
+	session_id=`open_iscsi_get_session_id "$target" "$OCF_RESKEY_portal"`
 	prev_state=""
 	if [ -z "$session_id" ]; then
 		if $iscsiadm -m node -p $OCF_RESKEY_portal -T $target >/dev/null 2>&1; then
 			return 1 # record found
 		else
 			return 3
 		fi
 	fi
 	while :; do
 		outp=`$iscsiadm -m session -r $session_id -P 1` ||
 			return 2
 		conn_state=`echo "$outp" | sed -n '/Connection State/s/.*: //p'`
 		# some drivers don't return connection state, in that case
 		# we'll assume that we're still connected
 		case "$conn_state" in
 			"LOGGED IN")
 				[ -n "$msg_logged" ] &&
 					ocf_log info "connection state $conn_state. Session restored."
 				return 0;;
 			"Unknown"|"") # this is also probably OK
 				[ -n "$msg_logged" ] &&
 					ocf_log info "connection state $conn_state. Session restored."
 				return 0;;
 			*) # failed
 				if [ "$__OCF_ACTION" != stop ] && ! ocf_is_probe && ocf_is_true $recov; then
 					if [ "$conn_state" != "$prev_state" ]; then
 						ocf_log warning "connection state $conn_state, waiting for recovery..."
 						prev_state="$conn_state"
 					fi
 					sleep 1
 				else
 					ocf_exit_reason "iscsiadm output: $outp"
 					return 2
 				fi
 			;;
 		esac
 	done
 }
 
 disk_discovery() {
 	discovery_type=${OCF_RESKEY_discovery_type}
 	$discovery  # discover and setup the real portal string (address)
 	case $? in
 	0) ;;
 	1|2) exit $OCF_ERR_GENERIC ;;
 	3) if ! is_iscsid_running; then
 			[ $setup_rc -eq 1 ] &&
 				ocf_log warning "iscsid.startup probably not correctly set in /etc/iscsi/iscsid.conf"
 			exit $OCF_ERR_INSTALLED
 	   fi
 	   exit $OCF_ERR_GENERIC
 	;;
 	esac
 }
 
 #
 # NB: this is udev specific!
 #
 wait_for_udev() {
 	dev=/dev/disk/by-path/ip-$PORTAL-iscsi-$OCF_RESKEY_target
 	while :; do
 		ls $dev* >/dev/null 2>&1 && break
 		ocf_log warning "waiting for udev to create $dev" 
 		sleep 1
 	done
 }
 iscsi_monitor() {
 	$disk_status $OCF_RESKEY_target $*
 	case $? in
 		0) return $OCF_SUCCESS;;
 		1|3) return $OCF_NOT_RUNNING;;
 		2) return $OCF_ERR_GENERIC;;
 	esac
 }
 iscsi_start() {
 	local rc
 	$disk_status $OCF_RESKEY_target
 	rc=$?
 	if [ $rc -eq 3 ]; then
 		disk_discovery
 		$disk_status $OCF_RESKEY_target
 		rc=$?
 	fi
 	case $rc in
 	0)
 		ocf_log info "iscsi $PORTAL $OCF_RESKEY_target already running"
 		return $OCF_SUCCESS
 	;;
 	1)
 		$add_disk $PORTAL $OCF_RESKEY_target ||
 			return $OCF_ERR_GENERIC
 		case "$OCF_RESKEY_udev" in
 		[Yy]es) wait_for_udev ||
 			return $OCF_ERR_GENERIC
 		;;
 		*) ;;
 		esac
 	;;
 	*) # the session exists, but it's broken
 		ocf_log warning "iscsi $PORTAL $OCF_RESKEY_target in failed state"
 	;;
 	esac
 	iscsi_monitor 1 # enforce wait
 	if [ $? -eq $OCF_SUCCESS ]; then
 		return $OCF_SUCCESS
 	else
 		return $OCF_ERR_GENERIC
 	fi
 }
 iscsi_stop() {
 	iscsi_monitor
 	if [ $? -ne $OCF_NOT_RUNNING ] ; then
 		$remove_disk $OCF_RESKEY_target ||
 			return $OCF_ERR_GENERIC
 		iscsi_monitor
 		if [ $? -ne $OCF_NOT_RUNNING ] ; then
 			return $OCF_ERR_GENERIC
 		else
 			return $OCF_SUCCESS
 		fi
 	else
 		ocf_log info "iscsi $OCF_RESKEY_target already stopped"
 		return $OCF_SUCCESS
 	fi
 }
 
 #
 #	'main' starts here...
 #
 
 if [ $# -ne 1 ]; then
 	usage
 	exit $OCF_ERR_ARGS
 fi
 
 # These operations don't require OCF instance parameters to be set
 case "$1" in
 	meta-data)	meta_data
 		exit $OCF_SUCCESS;;
 	usage) usage
 		exit $OCF_SUCCESS;;
 	methods) iscsi_methods
 		exit $OCF_SUCCESS;;
 esac
 
 if [ x = "x$OCF_RESKEY_target" ]; then
 	ocf_exit_reason "target parameter not set"
 	exit $OCF_ERR_CONFIGURED
 fi
 
 if [ x = "x$OCF_RESKEY_portal" ]; then
 	ocf_exit_reason "portal parameter not set"
 	exit $OCF_ERR_CONFIGURED
 fi
 
 case `uname` in
 Linux) setup=open_iscsi_setup
 ;;
 *) ocf_log info "platform `uname` may not be supported"
 	setup=open_iscsi_setup
 ;;
 esac
 
 PORTAL="$OCF_RESKEY_portal" # updated by discovery
 LSB_STATUS_STOPPED=3
 $setup
 setup_rc=$?
 if [ $setup_rc -gt 1 ]; then
 	ocf_exit_reason "iscsi initiator utilities not installed or not setup"
 	case "$1" in
 		stop) exit $OCF_SUCCESS;;
 		monitor) exit $OCF_NOT_RUNNING;;
 		status) exit $LSB_STATUS_STOPPED;;
 		*) exit $OCF_ERR_INSTALLED;;
 	esac
 fi
 
 if [ `id -u` != 0 ]; then
 	ocf_exit_reason "$0 must be run as root"
 	exit $OCF_ERR_PERM
 fi
 
 # which method was invoked?
 case "$1" in
 	start)
 		iscsi_start
 	;;
 	stop)	iscsi_stop
 	;;
 	status)	iscsi_monitor
 		rc=$?
 		case $rc in
 		$OCF_SUCCESS)
 		  echo iscsi target $OCF_RESKEY_target running
 		;;
 		$OCF_NOT_RUNNING)
 		  echo iscsi target $OCF_RESKEY_target stopped
 		;;
 		*)
 		  echo iscsi target $OCF_RESKEY_target failed
 		;;
 		esac
 		exit $rc
 		;;
 	monitor)	iscsi_monitor
 	;;
 	validate-all)	# everything already validated
 		# just exit successfully here.
 		exit $OCF_SUCCESS;;
 	*)		iscsi_methods
 		exit $OCF_ERR_UNIMPLEMENTED;;
 esac
 
 #
 # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0
diff --git a/heartbeat/mysql b/heartbeat/mysql
index e2d54dd17..be914d3b2 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -1,1044 +1,1045 @@
 #!/bin/sh
 #
 #
 # MySQL
 #
 # Description:  Manages a MySQL database as Linux-HA resource
 #
 # Authors:  Alan Robertson:               DB2 Script
 #           Jakub Janczak:                rewrite as MySQL
 #           Andrew Beekhof:               cleanup and import
 #           Sebastian Reitenbach:         add OpenBSD defaults, more cleanup
 #           Narayan Newton:               add Gentoo/Debian defaults
 #           Marian Marinov, Florian Haas: add replication capability
 #           Yves Trudeau, Baron Schwartz: add VIP support and improve replication
 #
 # Support:  linux-ha@lists.linux-ha.org
 # License:  GNU General Public License (GPL)
 #
 # (c) 2002-2005 International Business Machines, Inc.
 #     2005-2010 Linux-HA contributors
 #
 # An example usage in /etc/ha.d/haresources:
 #       node1  10.0.0.170 mysql
 #
 # See usage() function below for more details...
 #
 # OCF instance parameters:
 #   OCF_RESKEY_binary
 #   OCF_RESKEY_client_binary
 #   OCF_RESKEY_config
 #   OCF_RESKEY_datadir
 #   OCF_RESKEY_user
 #   OCF_RESKEY_group
 #   OCF_RESKEY_test_table
 #   OCF_RESKEY_test_user
 #   OCF_RESKEY_test_passwd
 #   OCF_RESKEY_enable_creation
 #   OCF_RESKEY_additional_parameters
 #   OCF_RESKEY_log
 #   OCF_RESKEY_pid
 #   OCF_RESKEY_socket
 #   OCF_RESKEY_replication_user
 #   OCF_RESKEY_replication_passwd
 #   OCF_RESKEY_replication_port
 #   OCF_RESKEY_max_slave_lag
 #   OCF_RESKEY_evict_outdated_slaves
 #   OCF_RESKEY_reader_attribute
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 . ${OCF_FUNCTIONS_DIR}/mysql-common.sh
 #######################################################################
 
 usage() {
   cat <<UEND
 usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote|notify)
 
 $0 manages a MySQL Database as an HA resource.
 
 The 'start' operation starts the database.
 The 'stop' operation stops the database.
 The 'status' operation reports whether the database is running
 The 'monitor' operation reports whether the database seems to be working
 The 'promote' operation makes this mysql server run as master
 The 'demote' operation makes this mysql server run as slave
 The 'validate-all' operation reports whether the parameters are valid
 
 UEND
 }
 
 meta_data() {
    cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="mysql">
 <version>1.0</version>
 
 <longdesc lang="en">
 Resource script for MySQL.
 May manage a standalone MySQL database, a clone set with externally
 managed replication, or a complete master/slave replication setup.
 Note, when master/slave replication is in use, the resource must
 be setup to use notifications. Set 'notify=true' in the metadata
 attributes when defining a MySQL master/slave instance.
 
 While managing replication, the default behavior is to use uname -n 
 values in the change master to command.  Other IPs can be specified 
 manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP
 giving the IP to use for replication.  For example, if the mysql primitive 
 you are using is p_mysql, the attribute to set will be 
 p_mysql_mysql_master_IP.
 </longdesc>
 <shortdesc lang="en">Manages a MySQL database instance</shortdesc>
 <parameters>
 
 <parameter name="binary" unique="0" required="0">
 <longdesc lang="en">
 Location of the MySQL server binary
 </longdesc>
 <shortdesc lang="en">MySQL server binary</shortdesc>
 <content type="string" default="${OCF_RESKEY_binary_default}" />
 </parameter>
 
 <parameter name="client_binary" unique="0" required="0">
 <longdesc lang="en">
 Location of the MySQL client binary
 </longdesc>
 <shortdesc lang="en">MySQL client binary</shortdesc>
 <content type="string" default="${OCF_RESKEY_client_binary_default}" />
 </parameter>
 
 <parameter name="config" unique="0" required="0">
 <longdesc lang="en">
 Configuration file
 </longdesc>
 <shortdesc lang="en">MySQL config</shortdesc>
 <content type="string" default="${OCF_RESKEY_config_default}" />
 </parameter>
 
 <parameter name="datadir" unique="0" required="0">
 <longdesc lang="en">
 Directory containing databases
 </longdesc>
 <shortdesc lang="en">MySQL datadir</shortdesc>
 <content type="string" default="${OCF_RESKEY_datadir_default}" />
 </parameter>
 
 <parameter name="user" unique="0" required="0">
 <longdesc lang="en">
 User running MySQL daemon
 </longdesc>
 <shortdesc lang="en">MySQL user</shortdesc>
 <content type="string" default="${OCF_RESKEY_user_default}" />
 </parameter>
 
 <parameter name="group" unique="0" required="0">
 <longdesc lang="en">
 Group running MySQL daemon (for logfile and directory permissions)
 </longdesc>
 <shortdesc lang="en">MySQL group</shortdesc>
 <content type="string" default="${OCF_RESKEY_group_default}"/>
 </parameter>
 
 <parameter name="log" unique="0" required="0">
 <longdesc lang="en">
 The logfile to be used for mysqld.
 </longdesc>
 <shortdesc lang="en">MySQL log file</shortdesc>
 <content type="string" default="${OCF_RESKEY_log_default}"/>
 </parameter>
 
 <parameter name="pid" unique="0" required="0">
 <longdesc lang="en">
 The pidfile to be used for mysqld.
 </longdesc>
 <shortdesc lang="en">MySQL pid file</shortdesc>
 <content type="string" default="${OCF_RESKEY_pid_default}"/>
 </parameter>
 
 <parameter name="socket" unique="0" required="0">
 <longdesc lang="en">
 The socket to be used for mysqld.
 </longdesc>
 <shortdesc lang="en">MySQL socket</shortdesc>
 <content type="string" default="${OCF_RESKEY_socket_default}"/>
 </parameter>
 
 <parameter name="test_table" unique="0" required="0">
 <longdesc lang="en">
 Table to be tested in monitor statement (in database.table notation)
 </longdesc>
 <shortdesc lang="en">MySQL test table</shortdesc>
 <content type="string" default="${OCF_RESKEY_test_table_default}" />
 </parameter>
 
 <parameter name="test_user" unique="0" required="0">
 <longdesc lang="en">
 MySQL test user, must have select privilege on test_table
 </longdesc>
 <shortdesc lang="en">MySQL test user</shortdesc>
 <content type="string" default="${OCF_RESKEY_test_user_default}" />
 </parameter>
 
 <parameter name="test_passwd" unique="0" required="0">
 <longdesc lang="en">
 MySQL test user password
 </longdesc>
 <shortdesc lang="en">MySQL test user password</shortdesc>
 <content type="string" default="${OCF_RESKEY_test_passwd_default}" />
 </parameter>
 
 <parameter name="enable_creation" unique="0" required="0">
 <longdesc lang="en">
 If the MySQL database does not exist, it will be created
 </longdesc>
 <shortdesc lang="en">Create the database if it does not exist</shortdesc>
 <content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
 </parameter>
 
 <parameter name="additional_parameters" unique="0" required="0">
 <longdesc lang="en">
 Additional parameters which are passed to the mysqld on startup.
 (e.g. --skip-external-locking or --skip-grant-tables)
 </longdesc>
 <shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
 <content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
 </parameter>
 
 <parameter name="replication_user" unique="0" required="0">
 <longdesc lang="en">
 MySQL replication user. This user is used for starting and stopping
 MySQL replication, for setting and resetting the master host, and for
 setting and unsetting read-only mode. Because of that, this user must
 have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, PROCESS and RELOAD
 privileges on all nodes within the cluster. Mandatory if you define a
 master-slave resource.
 </longdesc>
 <shortdesc lang="en">MySQL replication user</shortdesc>
 <content type="string" default="${OCF_RESKEY_replication_user_default}" />
 </parameter>
 
 <parameter name="replication_passwd" unique="0" required="0">
 <longdesc lang="en">
 MySQL replication password. Used for replication client and slave.
 Mandatory if you define a master-slave resource.
 </longdesc>
 <shortdesc lang="en">MySQL replication user password</shortdesc>
 <content type="string" default="${OCF_RESKEY_replication_passwd_default}" />
 </parameter>
 
 <parameter name="replication_port" unique="0" required="0">
 <longdesc lang="en">
 The port on which the Master MySQL instance is listening.
 </longdesc>
 <shortdesc lang="en">MySQL replication port</shortdesc>
 <content type="string" default="${OCF_RESKEY_replication_port_default}" />
 </parameter>
 
 <parameter name="max_slave_lag" unique="0" required="0">
 <longdesc lang="en">
 The maximum number of seconds a replication slave is allowed to lag
 behind its master. Do not set this to zero. What the cluster manager
 does in case a slave exceeds this maximum lag is determined by the
 evict_outdated_slaves parameter.
 </longdesc>
 <shortdesc lang="en">Maximum time (seconds) a MySQL slave is allowed
 to lag behind a master</shortdesc>
 <content type="integer" default="${OCF_RESKEY_max_slave_lag_default}"/>
 </parameter>
 
 <parameter name="evict_outdated_slaves" unique="0" required="0">
 <longdesc lang="en">
 If set to true, any slave which is more than max_slave_lag seconds
 behind the master has its MySQL instance shut down. If this parameter
 is set to false in a primitive or clone resource, it is simply
 ignored. If set to false in a master/slave resource, then exceeding
 the maximum slave lag will merely push down the master preference so
 the lagging slave is never promoted to the new master.
 </longdesc>
 <shortdesc lang="en">Determines whether to shut down badly lagging
 slaves</shortdesc>
 <content type="boolean" default="${OCF_RESKEY_evict_outdated_slaves_default}" />
 </parameter>
 
 <parameter name="reader_attribute" unique="1" required="0">
 <longdesc lang="en">
 An attribute that the RA can manage to specify whether a node
 can be read from. This node attribute will be 1 if it's fine to
 read from the node, and 0 otherwise (for example, when a slave
 has lagged too far behind the master).
 
 A typical example for the use of this attribute would be to tie
 a set of IP addresses to MySQL slaves that can be read from.
 
 This parameter is only meaningful in master/slave set configurations.
 </longdesc>
 <shortdesc lang="en">Sets the node attribute that determines
 whether a node is usable for clients to read from.</shortdesc>
 <content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
 </parameter>
 </parameters>
 
 <actions>
 <action name="start" timeout="120" />
 <action name="stop" timeout="120" />
 <action name="status" timeout="60" />
 <action name="monitor" depth="0" timeout="30" interval="20" />
 <action name="monitor" role="Master" depth="0" timeout="30" interval="10" />
 <action name="monitor" role="Slave" depth="0" timeout="30" interval="30" />
 <action name="promote" timeout="120" />
 <action name="demote" timeout="120" />
 <action name="notify" timeout="90" />
 <action name="validate-all" timeout="5" />
 <action name="meta-data" timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 # Convenience functions
 
 set_read_only() {
     # Sets or unsets read-only mode. Accepts one boolean as its
     # optional argument. If invoked without any arguments, defaults to
     # enabling read only mode. Should only be set in master/slave
     # setups.
     # Returns $OCF_SUCCESS if the operation succeeds, or
     # $OCF_ERR_GENERIC if it fails.
     local ro_val
     if ocf_is_true $1; then
         ro_val="on"
     else
         ro_val="off"
     fi
     ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
         -e "SET GLOBAL read_only=${ro_val}"
 }
 
 get_read_only() {
     # Check if read-only is set
     local read_only_state
 
     read_only_state=`$MYSQL $MYSQL_OPTIONS_REPL \
         -e "SHOW VARIABLES" | grep -w read_only | awk '{print $2}'`
 
     if [ "$read_only_state" = "ON" ]; then
         return 0
     else
         return 1
     fi
 }
 
 is_slave() {
     # Determine whether the machine is currently running as a MySQL
     # slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW
     # SLAVE STATUS creates an empty result set, 0 otherwise.
     local rc
     local tmpfile
 
     # Check whether this machine should be slave
     if ! ocf_is_ms || ! get_read_only; then
         return 1
     fi
    
     get_slave_info
     rc=$?
     rm -f $tmpfile
 
     if [ $rc -eq 0 ]; then
        # show slave status is not empty
        # Is there a master_log_file defined?  (master_log_file is deleted 
        # by reset slave
        if [ "$master_log_file" ]; then
           return 0
        else
           return 1
        fi
     else
        # "SHOW SLAVE STATUS" returns an empty set if instance is not a
        # replication slave
        return 1
     fi
     
 }
 
 parse_slave_info() {
     # Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2
     sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2
 }
 
 get_slave_info() {
     # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
     local mysql_options
             
     if [ "$master_log_file" -a "$master_host" ]; then
         # variables are already defined, get_slave_info has been run before
         return $OCF_SUCCESS
     else
         tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
 
         $MYSQL $MYSQL_OPTIONS_REPL \
         -e 'SHOW SLAVE STATUS\G' > $tmpfile
 
         if [ -s $tmpfile ]; then
             master_host=`parse_slave_info Master_Host $tmpfile`
             master_user=`parse_slave_info Master_User $tmpfile`
             master_port=`parse_slave_info Master_Port $tmpfile`
             master_log_file=`parse_slave_info Master_Log_File $tmpfile`
             master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
             slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
             slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
             last_errno=`parse_slave_info Last_Errno $tmpfile`
             secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
             ocf_log debug "MySQL instance running as a replication slave"
         else
             # Instance produced an empty "SHOW SLAVE STATUS" output --
             # instance is not a slave
             ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
             return $OCF_ERR_GENERIC
         fi
 
         return $OCF_SUCCESS
     fi
 }
 
 check_slave() {
     # Checks slave status
     local rc new_master
 
     get_slave_info
     rc=$?
 
     if [ $rc -eq 0 ]; then
         # Did we receive an error other than max_connections?
         if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
             # Whoa. Replication ran into an error. This slave has
             # diverged from its master. Make sure this resource
             # doesn't restart in place.
             ocf_exit_reason "MySQL instance configured for replication, but replication has failed."
             ocf_log err "See $tmpfile for details"
 
             # Just pull the reader VIP away, killing MySQL here would be pretty evil
             # on a loaded server
 
             set_reader_attr 0
             exit $OCF_SUCCESS
 
         fi
 
         # If we got max_connections, let's remove the vip
         if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
             set_reader_attr 0
             exit $OCF_SUCCESS
         fi
 
         if [ "$slave_io" != 'Yes' ]; then
             # Not necessarily a bad thing. The master may have
             # temporarily shut down, and the slave may just be
             # reconnecting. A warning can't hurt, though.
             ocf_log warn "MySQL Slave IO threads currently not running."
 
             # Sanity check, are we at least on the right master
             new_master=`$CRM_ATTR_REPL_INFO --query  -q | cut -d'|' -f1`
 
             if [ "$master_host" != "$new_master" ]; then
                # Not pointing to the right master, not good, removing the VIPs
                set_reader_attr 0
 
                exit $OCF_SUCCESS
             fi
 
         fi
 
         if [ "$slave_sql" != 'Yes' ]; then
             # We don't have a replication SQL thread running. Not a
             # good thing. Try to recoved by restarting the SQL thread
             # and remove reader vip.  Prevent MySQL restart.
             ocf_exit_reason "MySQL Slave SQL threads currently not running."
             ocf_log err "See $tmpfile for details"
 
             # Remove reader vip
             set_reader_attr 0
 
             # try to restart slave
             ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
                 -e "START SLAVE"
 
             # Return success to prevent a restart
             exit $OCF_SUCCESS
         fi
 
         if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then
             # We're supposed to bail out if we lag too far
             # behind. Let's check our lag.
             if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
                 ocf_exit_reason "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)."
                 ocf_log err "See $tmpfile for details"
 
                 # Remove reader vip
                 set_reader_attr 0
 
                 exit $OCF_ERR_INSTALLED
             fi
         elif ocf_is_ms; then
             # Even if we're not set to evict lagging slaves, we can
             # still use the seconds behind master value to set our
             # master preference.
             local master_pref
             master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
             if [ $master_pref -lt 0 ]; then
                 # Sanitize a below-zero preference to just zero
                 master_pref=0
             fi
             $CRM_MASTER -v $master_pref
         fi
 
         # is the slave ok to have a VIP on it
         if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
             set_reader_attr 0
         else
             set_reader_attr 1
         fi
 
         ocf_log debug "MySQL instance running as a replication slave"
         rm -f $tmpfile
     else
         # Instance produced an empty "SHOW SLAVE STATUS" output --
         # instance is not a slave
         # TODO: Needs to handle when get_slave_info will return too many connections error
         rm -f $tmpfile
         ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
         exit $OCF_ERR_GENERIC
     fi
 }
 
 set_master() {
     local new_master master_log_file master_log_pos
     local master_params
 
     new_master=`$CRM_ATTR_REPL_INFO --query  -q | cut -d'|' -f1`
 
     # Keep replication position
     get_slave_info
 
     if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
         #	master_params=", MASTER_LOG_FILE='$master_log_file', \
         #	    MASTER_LOG_POS=$master_log_pos"
         ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
         rm -f $tmpfile
         return
     else
         master_log_file=`$CRM_ATTR_REPL_INFO --query  -q | cut -d'|' -f2`
         master_log_pos=`$CRM_ATTR_REPL_INFO --query  -q | cut -d'|' -f3`
         if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
             master_params=", MASTER_LOG_FILE='$master_log_file', \
             MASTER_LOG_POS=$master_log_pos"
             ocf_log info "Restored master pos for $new_master : $master_log_file:$master_log_pos"
         fi
     fi
 
     # Informs the MySQL server of the master to replicate
     # from. Accepts one mandatory argument which must contain the host
     # name of the new master host. The master must either be unchanged
     # from the laste master the slave replicated from, or freshly
     # reset with RESET MASTER.
 
     ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
         -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
+        MASTER_PORT=$OCF_RESKEY_replication_port, \
         MASTER_USER='$OCF_RESKEY_replication_user', \
         MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
     rm -f $tmpfile
 }
 
 unset_master(){
     # Instructs the MySQL server to stop replicating from a master
     # host.
 
     # If we're currently not configured to be replicating from any
     # host, then there's nothing to do. But we do log a warning as
     # no-one but the CRM should be touching the MySQL master/slave
     # configuration.
     if ! is_slave; then
         ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave"
         return $OCF_SUCCESS
     fi
 
     local tmpfile
     tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX`
 
     # At this point, the master is read only so there should not be much binlogs to transfer
     # Let's wait for the last bits
     while true; do
         $MYSQL $MYSQL_OPTIONS_REPL \
         -e 'SHOW PROCESSLIST\G' > $tmpfile
         if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then
             ocf_log info "MySQL slave has finished reading master binary log"
             break
         fi
         if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
             ocf_log info "Master is down, no more binary logs to come"
             break
         fi
         if grep -i 'Connecting to master' $tmpfile >/dev/null; then
             ocf_log info "Master is down, no more binary logs to come"
             break
         fi
         if ! grep 'system user' $tmpfile >/dev/null; then
             ocf_log info "Slave is not running - not waiting to finish"
             break
         fi
 
         sleep 1
     done
 
     # Now, stop the slave I/O thread and wait for relay log
     # processing to complete
     ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
         -e "STOP SLAVE IO_THREAD"
     if [ $? -gt 0 ]; then
         ocf_exit_reason "Error stopping slave IO thread"
         exit $OCF_ERR_GENERIC
     fi
 
     while true; do
         $MYSQL $MYSQL_OPTIONS_REPL \
             -e 'SHOW PROCESSLIST\G' > $tmpfile
         if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
             ocf_log info "MySQL slave has finished processing relay log"
             break
         fi
         if ! grep -q 'system user' $tmpfile; then
             ocf_log info "Slave not runnig - not waiting to finish"
             break
         fi
         ocf_log info "Waiting for MySQL slave to finish processing relay log"
         sleep 1
     done
     rm -f $tmpfile
 
     # Now, stop all slave activity and unset the master host
     ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
         -e "STOP SLAVE"
     if [ $? -gt 0 ]; then
         ocf_exit_reason "Error stopping rest slave threads"
         exit $OCF_ERR_GENERIC
     fi
 
     ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
         -e "RESET SLAVE /*!50516 ALL */;"
     if [ $? -gt 0 ]; then
         ocf_exit_reason "Failed to reset slave"
         exit $OCF_ERR_GENERIC
     fi
 }
 
 # Start replication as slave
 start_slave() {
 
     ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
         -e "START SLAVE"
 }
 
 # Set the attribute controlling the readers VIP
 set_reader_attr() {
     local curr_attr_value
 
     curr_attr_value=$(get_reader_attr)
 
     if [ "$curr_attr_value" -ne "$1" ]; then
         $CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1
     fi
 
 }
 
 # get the attribute controlling the readers VIP
 get_reader_attr() {
     local attr_value
     local rc
 
     attr_value=`$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q`
     rc=$?
     if [ "$rc" -eq "0" ]; then
         echo $attr_value
     else
         echo -1
     fi
 
 }
 
 # Stores data for MASTER STATUS from MySQL
 update_data_master_status() {
 
    master_status_file="${HA_RSCTMP}/master_status.${OCF_RESOURCE_INSTANCE}"
 
    $MYSQL $MYSQL_OPTIONS_REPL -e "SHOW MASTER STATUS\G" > $master_status_file
 }
 
 
 # Returns the specified value from the stored copy of SHOW MASTER STATUS.
 # should be call after update_data_master_status for tmpfile
 # Arguments:
 #  $1 The value to get.
 get_master_status() {
    awk -v var="$1" '$1 == var ":" {print substr($0, index($0, ":") + 2)}' "$master_status_file"
 }
 
 
 # Determines what IP address is attached to the current host.  The output of the
 # crm_attribute command looks like this:
 # scope=nodes  name=IP value=10.2.2.161
 # If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
 # The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the 
 # change master to command.
 get_local_ip() {
    local IP
    IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G`
    if [ ! $? -eq 0 ]; then
       uname -n
    else
       echo $IP
    fi
 }
 
 #######################################################################
 
 # Functions invoked by resource manager actions
 
 mysql_monitor() {
     local rc
     local status_loglevel="err"
 
     # Set loglevel to info during probe
     if ocf_is_probe; then
         status_loglevel="info"
     fi
  
     mysql_common_status $status_loglevel
 
     rc=$?
 
     # TODO: check max connections error
 
     # If status returned an error, return that immediately
     if [ $rc -ne $OCF_SUCCESS ]; then
         return $rc
     fi
 
     if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then
         # Check if this instance is configured as a slave, and if so
         # check slave status
         if is_slave; then
             check_slave
         fi
 
         # Check for test table
         ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
             -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table"
         rc=$?
 
         if [ $rc -ne 0 ]; then
             ocf_exit_reason "Failed to select from $test_table";
             return $OCF_ERR_GENERIC;
         fi
     fi
 
     if ocf_is_ms && ! get_read_only; then
         ocf_log debug "MySQL monitor succeeded (master)";
         return $OCF_RUNNING_MASTER
     else
         ocf_log debug "MySQL monitor succeeded";
         return $OCF_SUCCESS
     fi
 }
 
 mysql_start() {
     local rc
 
     if ocf_is_ms; then
         # Initialize the ReaderVIP attribute, monitor will enable it
         set_reader_attr 0
     fi
 
     mysql_common_status info
     if [ $? = $OCF_SUCCESS ]; then
         ocf_log info "MySQL already running"
         return $OCF_SUCCESS
     fi
 
     mysql_common_prepare_dirs
 
     # Uncomment to perform permission clensing
     # - not convinced this should be enabled by default
     #
     #chmod 0755 $OCF_RESKEY_datadir
     #chown -R $OCF_RESKEY_user $OCF_RESKEY_datadir
     #chgrp -R $OCF_RESKEY_group $OCF_RESKEY_datadir
     mysql_extra_params=
     if ocf_is_ms; then
         mysql_extra_params="--skip-slave-start"
     fi
 
     mysql_common_start $mysql_extra_params
     rc=$?
     if [ $rc != $OCF_SUCCESS ]; then
         return $rc
     fi
 
     if ocf_is_ms; then
         # We're configured as a stateful resource. We must start as
         # slave by default. At this point we don't know if the CRM has
         # already promoted a master. So, we simply start in read only
         # mode.
         set_read_only on
 
         # Now, let's see whether there is a master. We might be a new
         # node that is just joining the cluster, and the CRM may have
         # promoted a master before.
         master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "`
         if [ "$master_host" -a "$master_host" != ${NODENAME} ]; then
             ocf_log info "Changing MySQL configuration to replicate from $master_host."
             set_master
             start_slave
             if [ $? -ne 0 ]; then
                 ocf_exit_reason "Failed to start slave"
                 return $OCF_ERR_GENERIC
             fi
         else
             ocf_log info "No MySQL master present - clearing replication state"
             unset_master
         fi
 
         # We also need to set a master preference, otherwise Pacemaker
         # won't ever promote us in the absence of any explicit
         # preference set by the administrator. We choose a low
         # greater-than-zero preference.
         $CRM_MASTER -v 1
 
     fi
 
     # Initial monitor action
     if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then
         OCF_CHECK_LEVEL=10
     fi
     mysql_monitor
     rc=$?
     if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
         ocf_exit_reason "Failed initial monitor action"
         return $rc
     fi
 
     ocf_log info "MySQL started"
     return $OCF_SUCCESS
 }
 
 mysql_stop() {
     if ocf_is_ms; then
         # clear preference for becoming master
         $CRM_MASTER -D
 
         # Remove VIP capability
         set_reader_attr 0
     fi
 
     mysql_common_stop
 }
 
 mysql_promote() {
     local master_info
 
     if ( ! mysql_common_status err ); then
         return $OCF_NOT_RUNNING
     fi
     ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
         -e "STOP SLAVE"
 
     # Set Master Info in CIB, cluster level attribute
     update_data_master_status
     master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
     ${CRM_ATTR_REPL_INFO} -v "$master_info"
     rm -f $tmpfile
 
     set_read_only off || return $OCF_ERR_GENERIC
 
     # Existing master gets a higher-than-default master preference, so
     # the cluster manager does not shuffle the master role around
     # unnecessarily
     $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
 
     # A master can accept reads
     set_reader_attr 1
 
     return $OCF_SUCCESS
 }
 
 mysql_demote() {
     if ! mysql_common_status err; then
         return $OCF_NOT_RUNNING
     fi
 
     # Return master preference to default, so the cluster manager gets
     # a chance to select a new master
     $CRM_MASTER -v 1
 }
 
 mysql_notify() {
     # If not configured as a Stateful resource, we make no sense of
     # notifications.
     if ! ocf_is_ms; then
         ocf_log info "This agent makes no use of notifications unless running in master/slave mode."
         return $OCF_SUCCESS
     fi
 
     local type_op
     type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
 
     ocf_log debug "Received $type_op notification."
 
     case "$type_op" in
         'pre-promote')
             # Nothing to do now here, new replication info not yet published
 
         ;;
         'post-promote')
             # The master has completed its promotion. Now is a good
             # time to check whether our replication slave is working
             # correctly.
             master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
             if [ "$master_host" = ${NODENAME} ]; then
                 ocf_log info "This will be the new master, ignoring post-promote notification."
             else
                 ocf_log info "Resetting replication"
                 unset_master
                 if [ $? -ne 0 ]; then
                     return $OCF_ERR_GENERIC
                 fi
 
                 ocf_log info "Changing MySQL configuration to replicate from $master_host"
                 set_master
                 if [ $? -ne 0 ]; then
                     return $OCF_ERR_GENERIC
                 fi
 
                 start_slave
                 if [ $? -ne 0 ]; then
                     ocf_exit_reason "Failed to start slave"
                     return $OCF_ERR_GENERIC
                 fi
             fi
             return $OCF_SUCCESS
         ;;
         'pre-demote')
             demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
             if [ $demote_host = ${NODENAME} ]; then
                 ocf_log info "post-demote notification for $demote_host"
                 set_read_only on
                 if [ $? -ne 0 ]; then
                     ocf_exit_reason "Failed to set read-only";
                     return $OCF_ERR_GENERIC;
                 fi
 
                 # Must kill all existing user threads because they are still Read/write
                 # in order for the slaves to complete the read of binlogs
                 local tmpfile
                 tmpfile=`mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX`
                 $MYSQL $MYSQL_OPTIONS_REPL \
                 -e "SHOW PROCESSLIST" > $tmpfile
 
                 for thread in `awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile`
                 do
                     ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
                         -e "KILL ${thread}"
                 done
             else
                ocf_log info "Ignoring post-demote notification execpt for my own demotion."
             fi
             return $OCF_SUCCESS
         ;;
         'post-demote')
             demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
             if [ $demote_host = ${NODENAME} ]; then
                 ocf_log info "Ignoring post-demote notification for my own demotion."
                 return $OCF_SUCCESS
             fi
             ocf_log info "post-demote notification for $demote_host."
             # The former master has just been gracefully demoted.
             unset_master
         ;;
         *)
             return $OCF_SUCCESS
         ;;
     esac
 }
 
 #######################################################################
 
 
 ##########################################################################
 # If DEBUG_LOG is set, make this resource agent easy to debug: set up the
 # debug log and direct all output to it.  Otherwise, redirect to /dev/null.
 # The log directory must be a directory owned by root, with permissions 0700,
 # and the log must be writable and not a symlink.
 ##########################################################################
 DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
 if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
     DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
     if [ -d "${DEBUG_LOG_DIR}" ]; then
         exec 9>>"$DEBUG_LOG"
         exec 2>&9
         date >&9
         echo "$*" >&9
         env | grep OCF_ | sort >&9
         set -x
     else
         exec 9>/dev/null
     fi
 fi
 
 case "$1" in
   meta-data)    meta_data
         exit $OCF_SUCCESS;;
   usage|help)   usage
         exit $OCF_SUCCESS;;
 esac
 
 mysql_common_validate
 rc=$?
 LSB_STATUS_STOPPED=3
 if [ $rc -ne 0 ]; then
     case "$1" in
         stop) ;;
         monitor)
             mysql_common_status "info"
             if [ $? -eq $OCF_SUCCESS ]; then
                 # if validatation fails and pid is active, always treat this as an error
                 ocf_exit_reason "environment validation failed, active pid is in unknown state."
                 exit $OCF_ERR_GENERIC
             fi
             # validation failed and pid is not active, it's safe to say this instance is inactive.
             exit $OCF_NOT_RUNNING;;
 
         status) exit $LSB_STATUS_STOPPED;;
         *) exit $rc;;
     esac
 fi
 
 # What kind of method was invoked?
 case "$1" in
   start)    mysql_start;;
   stop)     mysql_stop;;
   status)   mysql_common_status err;;
   monitor)  mysql_monitor;;
   promote)  mysql_promote;;
   demote)   mysql_demote;;
   notify)   mysql_notify;;
   validate-all) exit $OCF_SUCCESS;;
 
  *)     usage
         exit $OCF_ERR_UNIMPLEMENTED;;
 esac
 
 # vi:sw=4:ts=4:et:
diff --git a/heartbeat/nagios b/heartbeat/nagios
index e61306cf4..d2067bc38 100755
--- a/heartbeat/nagios
+++ b/heartbeat/nagios
@@ -1,246 +1,246 @@
 #!/bin/sh
 #
 #  License:      GNU General Public License (GPL)
 #  (c) 2015 T.J. Yang, O. Albrigtsen
 #           and Linux-HA contributors
 #
 # -----------------------------------------------------------------------------
 #      O C F    R E S O U R C E    S C R I P T   S P E C I F I C A T I O N
 # -----------------------------------------------------------------------------
 #
 # NAME
 #       nagios : OCF resource agent script for Nagios Server
 #
 
 # Initialization:
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # Defaults
 OCF_RESKEY_user_default="nagios"
 OCF_RESKEY_group_default="nagios"
 OCF_RESKEY_binary_default="/usr/sbin/nagios"
 OCF_RESKEY_config_default="/etc/nagios/nagios.cfg"
 OCF_RESKEY_log_default="/var/log/nagios/nagios.log"
 OCF_RESKEY_retention_default="/var/log/nagios/retention.dat"
 OCF_RESKEY_command_default="/var/log/nagios/rw/nagios.cmd"
 OCF_RESKEY_pid_default="/var/run/nagios.pid"
 
 : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
 : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
 : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
 : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
 : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}}
 : ${OCF_RESKEY_retention=${OCF_RESKEY_retention_default}}
 : ${OCF_RESKEY_command=${OCF_RESKEY_command_default}}
 : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
 
 
 nagios_usage() {
   cat <<END
     usage: $0 (start|stop|validate-all|meta-data|help|usage|monitor)
     $0 manages a Nagios instance as an OCF HA resource.
     The 'start' operation starts the instance.
     The 'stop' operation stops the instance.
     The 'status' operation reports whether the instance is running
     The 'monitor' operation reports whether the instance seems to be working
     The 'validate-all' operation reports whether the parameters are valid
 END
 }
 
 nagios_meta_data() {
         cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="nagios">
 <version>0.75</version>
 
 <longdesc lang="en">OCF Resource script for Nagios 3.x or 4.x. It manages a Nagios instance as a HA resource.</longdesc>
 <shortdesc lang="en">Nagios resource agent</shortdesc>
 
 <parameters>
 
 <parameter name="user">
     <longdesc lang="en">User running Nagios daemon (for file permissions)</longdesc>
     <shortdesc lang="en">Nagios user</shortdesc>
     <content type="string" default="${OCF_RESKEY_user_default}" />
 </parameter>
 
 <parameter name="group">
     <longdesc lang="en">Group running Nagios daemon (for file permissions)</longdesc>
     <shortdesc lang="en">Nagios group</shortdesc>
     <content type="string" default="${OCF_RESKEY_group_default}" />
 </parameter>
 
 <parameter name="binary">
     <longdesc lang="en">Location of the Nagios binary</longdesc>
     <shortdesc lang="en">Nagios binary</shortdesc>
     <content type="string" default="${OCF_RESKEY_binary_default}" />
 </parameter>
 
 <parameter name="config">
     <longdesc lang="en">Configuration file</longdesc>
     <shortdesc lang="en">Nagios config</shortdesc>
     <content type="string" default="${OCF_RESKEY_config_default}" />
 </parameter>
 
 <parameter name="log">
     <longdesc lang="en">Location of the Nagios log</longdesc>
     <shortdesc lang="en">Nagios log</shortdesc>
     <content type="string" default="${OCF_RESKEY_log_default}" />
 </parameter>
 
 <parameter name="retention">
     <longdesc lang="en">Location of the Nagios retention file</longdesc>
     <shortdesc lang="en">Nagios retention file</shortdesc>
     <content type="string" default="${OCF_RESKEY_retention_default}" />
 </parameter>
 
 <parameter name="command">
     <longdesc lang="en">Location of the Nagios external command file</longdesc>
     <shortdesc lang="en">Nagios command file</shortdesc>
     <content type="string" default="${OCF_RESKEY_command_default}" />
 </parameter>
 
 <parameter name="pid">
     <longdesc lang="en">Location of the Nagios pid/lock</longdesc>
     <shortdesc lang="en">Nagios pid file</shortdesc>
     <content type="string" default="${OCF_RESKEY_pid_default}" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start" timeout="20" />
 <action name="stop" timeout="20" />
 <action name="status" timeout="20" />
 <action name="monitor" depth="0" timeout="20" interval="10" start-delay="10" />
 <action name="validate-all" timeout="20" />
 <action name="meta-data" timeout="20" />
 </actions>
 </resource-agent>
 END
 }
 
 
 nagios_start() {
     nagios_validate_all
     rc=$?
     if [ $rc -ne 0 ]; then
         return $rc
     fi
 
-
     # if resource is already running,no need to continue code after this.
     if nagios_monitor; then
         ocf_log info "Nagios is already running"
         return $OCF_SUCCESS
     fi
 
     # Remove ${OCF_RESKEY_pid} if it exists
-    rm -f ${OCF_RESKEY_pid}
+    rm -f "${OCF_RESKEY_pid}"
 
     ocf_run -q touch ${OCF_RESKEY_log} ${OCF_RESKEY_retention} ${OCF_RESKEY_pid}
     chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} ${OCF_RESKEY_log} ${OCF_RESKEY_retention} ${OCF_RESKEY_pid}
-    rm -f ${OCF_RESKEY_command}
+    rm -f "${OCF_RESKEY_command}"
+
     [ -x /sbin/restorecon ] && /sbin/restorecon ${OCF_RESKEY_pid}
     ocf_run -q ${OCF_RESKEY_binary} -d ${OCF_RESKEY_config}
 
     while ! nagios_monitor; do
         sleep 1
     done
 
-    if [ $? -eq "0" ]; then
+    if [ $? -eq 0 ]; then
         ocf_log info "Nagios started"
         return ${OCF_SUCCESS}
     fi
 
     return $OCF_SUCCESS
 }
 
 nagios_stop() {
     nagios_monitor
-    if [ "$?" -ne "$OCF_SUCCESS" ]; then
+    if [ $? -ne $OCF_SUCCESS ]; then
         # Currently not running. Nothing to do.
         ocf_log info "Resource is already stopped"
         rm -f ${OCF_RESKEY_pid}
 
         return $OCF_SUCCESS
     fi
 
     kill `cat ${OCF_RESKEY_pid}`
 
     # Wait for process to stop
     while nagios_monitor; do
         sleep 1
     done
-    
+
     return $OCF_SUCCESS
 }
 
 nagios_monitor(){
     ocf_pidfile_status ${OCF_RESKEY_pid} > /dev/null 2>&1
     case "$?" in
         0)
             rc=$OCF_SUCCESS
             ;;
         1|2)
             rc=$OCF_NOT_RUNNING
             ;;
         *)
             rc=$OCF_ERR_GENERIC
             ;;
     esac
     return $rc
 }
 
 nagios_validate_all(){
-    check_binary ${OCF_RESKEY_binary}
-    
-    if [ ! -f ${OCF_RESKEY_config} ]; then
+    check_binary "${OCF_RESKEY_binary}"
+
+    if [ ! -f "${OCF_RESKEY_config}" ]; then
         ocf_exit_reason "Configuration file ${OCF_RESKEY_config} not found"
         return ${OCF_ERR_INSTALLED}
     fi
-    
-    ${OCF_RESKEY_binary} -v ${OCF_RESKEY_config} > /dev/null 2>&1;
-    if [ $? -ne "0" ]; then
+
+    ${OCF_RESKEY_binary} -v ${OCF_RESKEY_config} >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
         ocf_exit_reason "Configuration check failed"
         return ${OCF_ERR_INSTALLED}
     fi
 }
 
 
 # **************************** MAIN SCRIPT ************************************
 
 # Make sure meta-data and usage always succeed
 case $__OCF_ACTION in
 meta-data)      nagios_meta_data
                 exit $OCF_SUCCESS
                 ;;
 usage|help)     nagios_usage
                 exit $OCF_SUCCESS
                 ;;
 esac
 
 # This OCF agent script need to be run as root user.
 if ! ocf_is_root; then
         echo  "$0 agent script need to be run as root user."
         ocf_log debug "$0 agent script need to be run as root user."
         exit $OCF_ERR_GENERIC
 fi
 
 # Translate each action into the appropriate function call
 case $__OCF_ACTION in
 start)          nagios_start;;
 stop)           nagios_stop;;
 status|monitor) nagios_monitor;;
 validate-all)   nagios_validate_all;;
 *)              nagios_usage
                 exit $OCF_ERR_UNIMPLEMENTED
                 ;;
 esac
 rc=$?
 
 exit $rc
-  
+
 # End of this script
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index 3cec5c8e7..d1e6259e3 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -1,660 +1,866 @@
 #!/bin/sh
 # nfsserver
 #
 # Description: Manages nfs server as OCF resource
 # by hxinwei@gmail.com
 # License: GNU General Public License v2 (GPLv2) and later
 
 if [ -n "$OCF_DEBUG_LIBRARY" ]; then
     . $OCF_DEBUG_LIBRARY
 else
     : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 fi
 
 if is_redhat_based; then
 	. ${OCF_FUNCTIONS_DIR}/nfsserver-redhat.sh
 fi
 
+DEFAULT_INIT_SCRIPT_LIST="/etc/init.d/nfsserver /etc/init.d/nfs /etc/init.d/nfs-kernel-server"
 DEFAULT_INIT_SCRIPT="/etc/init.d/nfsserver"
-if ! [ -f $DEFAULT_INIT_SCRIPT ]; then
-	# On some systems, the script is just called nfs
-	DEFAULT_INIT_SCRIPT="/etc/init.d/nfs"
-fi
+for script in $DEFAULT_INIT_SCRIPT_LIST
+do
+	if [ -f $script -a -x $script ]; then
+		DEFAULT_INIT_SCRIPT=$script
+		break
+	fi
+done
 
 DEFAULT_NOTIFY_CMD=`which sm-notify`
 DEFAULT_NOTIFY_CMD=${DEFAULT_NOTIFY_CMD:-"/sbin/sm-notify"}
 DEFAULT_NOTIFY_FOREGROUND="false"
 DEFAULT_RPCPIPEFS_DIR="/var/lib/nfs/rpc_pipefs"
 EXEC_MODE=0
 SELINUX_ENABLED=-1
 STATD_PATH="/var/lib/nfs"
 STATD_DIR=""
 
 nfsserver_meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="nfsserver">
 <version>1.0</version>
 
 <longdesc lang="en">
 Nfsserver helps to manage the Linux nfs server as a failover-able resource in Linux-HA.
 It depends on Linux specific NFS implementation details, so is considered not portable to other platforms yet.
 </longdesc>
 
 <shortdesc lang="en">Manages an NFS server</shortdesc>
 
 <parameters>
 
 <parameter name="nfs_init_script" unique="0" required="0">
 <longdesc lang="en">
 The default init script shipped with the Linux distro.
 The nfsserver resource agent offloads the start/stop/monitor 
 work to the init script because the procedure to start/stop/monitor 
 nfsserver varies on different Linux distro. In the event that this
 option is not set, this agent will attempt to use an init script at 
 this location, ${DEFAULT_INIT_SCRIPT}, or detect a systemd unit-file 
 to use in the event that no init script is detected.
 </longdesc>
 <shortdesc lang="en">
 Init script for nfsserver
 </shortdesc>
 <content type="string" default="auto detected" />
 </parameter>
 
 <parameter name="nfs_no_notify" unique="0" required="0">
 <longdesc lang="en">
 Do not send reboot notifications to NFSv3 clients during server startup.
 </longdesc>
 <shortdesc lang="en">
 Disable NFSv3 server reboot notifications
 </shortdesc>
 <content type="boolean" default="false" />
 </parameter>
 
 <parameter name="nfs_notify_foreground" unique="0" required="0">
 <longdesc lang="en">
 Keeps the sm-notify attached to its controlling terminal and running in the foreground.
 </longdesc>
 <shortdesc lang="en">
 Keeps the notify tool running in the foreground.
 </shortdesc>
 <content type="boolean" default="$DEFAULT_NOTIFY_FOREGROUND" />
 </parameter>
 
 <parameter name="nfs_smnotify_retry_time" unique="0" required="0">
 <longdesc lang="en">
 Specifies the length of sm-notify retry time, in minutes, to continue retrying notifications to unresponsive hosts.  
 If this option is not specified, sm-notify attempts to send notifications for 15 minutes. Specifying a value of 0 
 causes sm-notify to continue sending notifications to unresponsive peers until it is manually killed.
 </longdesc>
 <shortdesc lang="en">
 Specifies the length of sm-notify retry time (minutes).
 </shortdesc>
 <content type="integer" default="" />
 </parameter>
 
 <parameter name="nfs_ip" unique="0" required="0">
 <longdesc lang="en">
 Comma separated list of floating IP addresses used to access the nfs service
 </longdesc>
 <shortdesc lang="en">
 IP addresses.
 </shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="nfs_shared_infodir" unique="0" required="0">
 <longdesc lang="en">
 The nfsserver resource agent will save nfs related information in this specific directory.
 And this directory must be able to fail-over before nfsserver itself.
 </longdesc>
 <shortdesc lang="en">
 Directory to store nfs server related information.
 </shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="rpcpipefs_dir" unique="0" required="0">
 <longdesc lang="en">
 The mount point for the sunrpc file system. Default is $DEFAULT_RPCPIPEFS_DIR. 
 This script will mount (bind) nfs_shared_infodir on /var/lib/nfs/ (cannot be changed),
 and this script will mount the sunrpc file system on $DEFAULT_RPCPIPEFS_DIR (default, can be changed by this parameter).
 If you want to move only rpc_pipefs/ (e.g. to keep rpc_pipefs/ local) from default, please set this value.
 </longdesc>
 <shortdesc lang="en">
 The mount point for the sunrpc file system.
 </shortdesc>
 <content type="string" default="$DEFAULT_RPCPIPEFS_DIR" />
 </parameter>
 
 $(
 is_redhat_based && nfsserver_redhat_meta_data
 )
 
 </parameters>
 
 <actions>
 <action name="start"   timeout="40" />
 <action name="stop"    timeout="20s" />
 <action name="monitor" depth="0"  timeout="20s" interval="10" />
 <action name="meta-data"  timeout="5" />
 <action name="validate-all"  timeout="30" />
 </actions>
 </resource-agent>
 END
 
 return $OCF_SUCCESS
 }
 
 nfsserver_usage() {
 	cat <<END
 		usage: $0 {start|stop|monitor|status|validate-all|meta-data}
 END
 }
 
 if [ $# -ne 1 ]; then
 	nfsserver_usage
 	exit $OCF_ERR_ARGS
 fi
 
 case $__OCF_ACTION in
 	meta-data)  nfsserver_meta_data
 		exit $OCF_SUCCESS
 		;;
 	usage|help) nfsserver_usage
 		exit $OCF_SUCCESS
 		;;
 	*)
 		;;	
 esac
 
 fp="$OCF_RESKEY_nfs_shared_infodir"
 : ${OCF_RESKEY_nfs_notify_cmd="$DEFAULT_NOTIFY_CMD"}
 : ${OCF_RESKEY_nfs_notify_foreground="$DEFAULT_NOTIFY_FOREGROUND"}
 
 if [ -z ${OCF_RESKEY_rpcpipefs_dir} ]; then
 	rpcpipefs_make_dir=$fp/rpc_pipefs
 	rpcpipefs_umount_dir=${DEFAULT_RPCPIPEFS_DIR}
 else
 	rpcpipefs_make_dir=${OCF_RESKEY_rpcpipefs_dir}
 	rpcpipefs_umount_dir=${OCF_RESKEY_rpcpipefs_dir}
 fi
 
 # Use statd folder if it exists
 if [ -d "/var/lib/nfs/statd" ]; then
 	STATD_DIR="statd"
 	STATD_PATH="/var/lib/nfs/statd"
 fi
 
 # SELinux information. We are taking the permissions from
 # the current statd dir and applying it to the HA one that is
 # being mounted in its place.
 which restorecon > /dev/null 2>&1 && selinuxenabled
 SELINUX_ENABLED=$?
 if [ $SELINUX_ENABLED -eq 0 ]; then
 	export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')"
 fi
 
 ##
 # EXEC_MODE values
 # 1  user init script or default init script
 # 2  systemd (with nfs-lock.service)
 # 3  systemd (with rpc-statd.service)
 #
 # On error, this function will terminate the process
 # with error code $OCF_ERR_INSTALLED
 ##
 set_exec_mode()
 {
 
 	##
 	# If EXEC_MODE is already set, we don't need to run this function again.
 	## 
 	if [ $EXEC_MODE -ne 0 ]; then
 		return 0;
 	fi
 
 	##
 	# If the user defined an init script, It must exist for us to continue
 	##
 	if [ -n "$OCF_RESKEY_nfs_init_script" ]; then
 		# check_binary will exit the process if init script does not exist
 		check_binary ${OCF_RESKEY_nfs_init_script}
 		EXEC_MODE=1
 		return 0
 	fi
 
 	##
 	# Check to see if the default init script exists, if so we'll use that.
 	##
 	if which $DEFAULT_INIT_SCRIPT > /dev/null 2>&1; then
 		OCF_RESKEY_nfs_init_script=$DEFAULT_INIT_SCRIPT
 		EXEC_MODE=1
 		return 0
 	fi
 
 	##
 	# Attempt systemd (with nfs-lock.service).
 	##
 	if which systemctl > /dev/null 2>&1; then
 		if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep nfs-lock > /dev/null; then
 			EXEC_MODE=2
 			# when using systemd, the nfs-lock service file handles nfsv3 locking daemons for us.
 			return 0
 		fi
 	fi
 
 	##
 	# Attempt systemd (with rpc-statd.service).
 	##
 	if which systemctl > /dev/null 2>&1; then
 		if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep rpc-statd > /dev/null; then
 			EXEC_MODE=3
 			return 0
 		fi
 	fi
 
 	ocf_exit_reason "No init script or systemd unit file detected for nfs server"
 	exit $OCF_ERR_INSTALLED
 }
 
 ##
 # wrapper for init script and systemd calls.
 ##
 nfs_exec()
 {
 	local cmd=$1
+	local svc=$2
 	set_exec_mode
 
 	case $EXEC_MODE in 
 		1) ${OCF_RESKEY_nfs_init_script} $cmd;;
-		2) systemctl $cmd nfs-server.service ;;
-		3) systemctl $cmd nfs-server.service ;;
+		2) if ! echo $svc | grep -q "\."; then
+			svc="${svc}.service"
+		   fi
+		   systemctl $cmd $svc
+		   ;;
+		3) if ! echo $svc | grep -q "\."; then
+			svc="${svc}.service"
+		   fi
+		   systemctl $cmd $svc
+		   ;;
 	esac
 }
 
 v3locking_exec()
 {
 	local cmd=$1
 	set_exec_mode
 
 	if [ $EXEC_MODE -eq 2 ]; then
-		systemctl $cmd nfs-lock.service
+		nfs_exec $cmd nfs-lock.service
 	elif [ $EXEC_MODE -eq 3 ]; then
-		systemctl $cmd rpc-statd.service
+		nfs_exec $cmd rpc-statd.service
 	else 
 		case $cmd in
 			start) locking_start;;
 			stop) locking_stop;;
 			status) locking_status;;
 		esac
 	fi
 }
 
+nfsserver_systemd_monitor()
+{
+	local threads_num
+	local rc
+	local fn
+
+	ocf_log debug "Status: rpcbind"
+	rpcinfo > /dev/null 2>&1
+	rc=$?
+	if [ "$rc" -ne "0" ]; then
+		ocf_exit_reason "rpcbind is not running"
+		return $OCF_NOT_RUNNING
+	fi
+
+	ocf_log debug "Status: nfs-mountd"
+	rpcinfo -t localhost 100005 > /dev/null 2>&1
+	rc=$?
+	if [ "$rc" -ne "0" ]; then
+		ocf_exit_reason "nfs-mountd is not running"
+		return $OCF_NOT_RUNNING
+	fi
+
+	ocf_log debug "Status: nfs-idmapd"
+	fn=`mktemp`
+	nfs_exec status nfs-idmapd > $fn 2>&1
+	rc=$?
+	ocf_log debug "$(cat $fn)"
+	rm -f $fn
+	if [ "$rc" -ne "0" ]; then
+		ocf_exit_reason "nfs-idmapd is not running"
+		return $OCF_NOT_RUNNING
+	fi
+
+	ocf_log debug "Status: rpc-statd"
+	rpcinfo -t localhost 100024 > /dev/null 2>&1
+	rc=$?
+	if [ "$rc" -ne "0" ]; then
+		ocf_exit_reason "rpc-statd is not running"
+		return $OCF_NOT_RUNNING
+	fi
+
+	nfs_exec is-active nfs-server
+	rc=$?
+
+	# Now systemctl is-active can't detect the failure of kernel process like nfsd.
+	# So, if the return value of systemctl is-active is 0, check the threads number
+	# to make sure the process is running really.
+	# /proc/fs/nfsd/threads has the numbers of the nfsd threads.
+	if [ $rc -eq 0 ]; then
+		threads_num=`cat /proc/fs/nfsd/threads 2>/dev/null`
+		if [ $? -eq 0 ]; then
+			if [ $threads_num -gt 0 ]; then
+				return $OCF_SUCCESS
+			else
+				return 3
+			fi
+		else
+			return $OCF_ERR_GENERIC
+		fi
+	fi
+
+	return $rc
+}
+
 nfsserver_monitor ()
 {
+	local fn
+
+	set_exec_mode
 	fn=`mktemp`
-	nfs_exec status > $fn 2>&1 
+	case $EXEC_MODE in
+               1) nfs_exec status nfs-server > $fn 2>&1;;
+            [23]) nfsserver_systemd_monitor > $fn 2>&1;;
+	esac
 	rc=$?
 	ocf_log debug "$(cat $fn)"
 	rm -f $fn
 
 	#Adapte LSB status code to OCF return code
 	if [ $rc -eq 0 ]; then
 		# don't report success if nfs servers are up
 		# without locking daemons.
 		v3locking_exec "status"
 		rc=$?
 		if [ $rc -ne 0 ]; then
 			ocf_exit_reason "NFS server is up, but the locking daemons are down"
 			rc=$OCF_ERR_GENERIC
 		fi
 		return $rc
-	elif [ $rc -eq 3 ]; then
+	elif [ $rc -eq 3 ] || [ $rc -eq $OCF_NOT_RUNNING ]; then
 		return $OCF_NOT_RUNNING
 	else
 		return $OCF_ERR_GENERIC
 	fi
 }
 
 prepare_directory ()
 {
 	if [ -z "$fp" ]; then
 		return
 	fi
 
 	[ -d "$fp" ] || mkdir -p $fp
 	[ -d "$rpcpipefs_make_dir" ] || mkdir -p $rpcpipefs_make_dir
 	[ -d "$fp/v4recovery" ] || mkdir -p $fp/v4recovery
 
 	[ -d "$fp/$STATD_DIR" ] || mkdir -p "$fp/$STATD_DIR"
 	[ -d "$fp/$STATD_DIR/sm" ] || mkdir -p "$fp/$STATD_DIR/sm"
 	[ -d "$fp/$STATD_DIR/sm.ha" ] || mkdir -p "$fp/$STATD_DIR/sm.ha"
 	[ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak"
 	[ -n "`id -u rpcuser 2>/dev/null`" -a "`id -g rpcuser 2>/dev/null`" ] &&
 		chown -R rpcuser.rpcuser "$fp/$STATD_DIR"
 
 	[ -f "$fp/etab" ] || touch "$fp/etab"
 	[ -f "$fp/xtab" ] || touch "$fp/xtab"
 	[ -f "$fp/rmtab" ] || touch "$fp/rmtab"
 
 	dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 >/dev/null 2>&1
 	[ -n "`id -u rpcuser 2>/dev/null`" -a "`id -g rpcuser 2>/dev/null`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state"
 	[ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp"
 }
 
 is_bound ()
 {
 	if mount | grep -q "on $1 type"; then
 		return 0
 	fi
 
 	return 1
 }
 
 bind_tree ()
 {
 	if [ -z "$fp" ]; then
 		return
 	fi
 
 	if is_bound /var/lib/nfs; then
 		ocf_log debug "$fp is already bound to /var/lib/nfs"
 		return 0
 	fi
 	mount --bind $fp /var/lib/nfs
 	[ $SELINUX_ENABLED -eq 0 ] && restorecon /var/lib/nfs
 }
 
 unbind_tree ()
 {
 	if `mount | grep -q " on $rpcpipefs_umount_dir"`; then
 		umount -t rpc_pipefs $rpcpipefs_umount_dir
 	fi
 	if is_bound /var/lib/nfs; then
 		umount /var/lib/nfs
 	fi
 }
 
 binary_status()
 {
 	local binary=$1
 	local pid
 
 	pid=$(pgrep ${binary})
 	case $? in
 		0)
 			echo "$pid"
 			return $OCF_SUCCESS;;
 		1)
 			return $OCF_NOT_RUNNING;;
 		*)
 			return $OCF_ERR_GENERIC;;
 	esac
 }
 
 locking_status()
 {
 	binary_status "rpc.statd" > /dev/null 2>&1
 }
 
 locking_start()
 {
 	local ret=$OCF_SUCCESS
 
 	ocf_log info "Starting rpc.statd."
 
 	rpc.statd $STATDARG
 
 	ret=$?
 	if [ $ret -ne 0 ]; then
 		ocf_log err "Failed to start rpc.statd"
 		return $ret
 	fi
 	[ -d /var/lock/subsys ] && touch /var/lock/subsys/nfslock
 
 	return $ret
 }
 
 terminate()
 {
 	local pids
 	local i=0
 
 	while : ; do
 		pids=$(binary_status $1)
 		[ -z "$pids" ] && return 0
 	 	kill $pids
 		sleep 1
 		i=$((i + 1))
 		[ $i -gt 3 ] && return 1
 	done
 }
 
 
 killkill()
 {
 	local pids
 	local i=0
 
 	while : ; do
 		pids=$(binary_status $1)
 		[ -z "$pids" ] && return 0
 	 	kill -9 $pids
 		sleep 1
 		i=$((i + 1))
 		[ $i -gt 3 ] && return 1
 	done
 }
 
 stop_process()
 {
 	local process=$1
 
 	ocf_log info "Stopping $process"
 	if terminate $process; then
 		ocf_log debug "$process is stopped"
 	else
 		if killkill $process; then
 			ocf_log debug "$process is stopped"
 		else
 			ocf_log debug "Failed to stop $process"
 			return 1
 		fi
 	fi
 	return 0
 }
 
 locking_stop()
 {
 	ret=0
 
 	# sm-notify can prevent umount of /var/lib/nfs/statd if
 	# it is still trying to notify unresponsive clients.
 	stop_process sm-notify
 	if [ $? -ne 0 ]; then
 		ret=$OCF_ERR_GENERIC
 	fi
 
 	stop_process rpc.statd
 	if [ $? -ne 0 ]; then
 		ret=$OCF_ERR_GENERIC
 	fi
 
 	return $ret
 }
 
 notify_locks()
 {
 	if ocf_is_true "$OCF_RESKEY_nfs_no_notify"; then
 		# we've been asked not to notify clients
 		return;
 	fi
 
 	# run in foreground, if requested
 	if ocf_is_true "$OCF_RESKEY_nfs_notify_foreground"; then
 		opts="-d"
 	fi
 
 	if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then
 		opts="$opts -m $OCF_RESKEY_nfs_smnotify_retry_time"
 	fi
 
 	if [ -n "$OCF_RESKEY_statd_outgoing_port" ]; then
 		opts="$opts -p $OCF_RESKEY_statd_outgoing_port"
 	fi
 
 	# forces re-notificaiton regardless if notifies have already gone out
 	opts="$opts -f"
 
 	ocf_log info "executing sm-notify"
 	if [ -n "$OCF_RESKEY_nfs_ip" ]; then
 		for ip in `echo ${OCF_RESKEY_nfs_ip} | sed 's/,/ /g'`; do
 			cp -rpfn $STATD_PATH/sm.ha/* $STATD_PATH/  > /dev/null 2>&1
 			sm-notify $opts -v $ip
 		done
 	else
 		sm-notify $opts
 	fi
 }
 
 nfsserver_start ()
 {
 	local rc;
+	local fn
 
 	if nfsserver_monitor; then
 		ocf_log debug "NFS server is already started"
 		return $OCF_SUCCESS
 	fi
 
 	is_redhat_based && set_env_args
 	prepare_directory
 	bind_tree
 
 	# remove the sm-notify pid so sm-notify will be allowed to run again without requiring a reboot.
 	rm -f /var/run/sm-notify.pid
 	#
 	# Synchronize these before starting statd
 	#
 	cp -rpfn $STATD_PATH/sm.ha/* $STATD_PATH/ > /dev/null 2>&1
 	rm -rf $STATD_PATH/sm.ha/* > /dev/null 2>&1
 	cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1
 
 	ocf_log info "Starting NFS server ..."
 
 	# mounts /proc/fs/nfsd for us
 	lsmod | grep -q nfsd
 	if [ $? -ne 0 ]; then
 		modprobe nfsd
 	fi
 
+	# systemd
+	case $EXEC_MODE in
+            [23]) nfs_exec start rpcbind
+		  local i=1
+		  while : ; do
+			ocf_log info "Start: rpcbind i: $i"
+			rpcinfo > /dev/null 2>&1
+			rc=$?
+			if [ "$rc" -eq "0" ]; then
+				break;
+			fi
+			sleep 1
+			i=$((i + 1))
+		  done
+		  ;;
+	esac
+
 	# check to see if we need to start rpc.statd
 	v3locking_exec "status"
 	if [ $? -ne $OCF_SUCCESS ]; then
 		v3locking_exec "start"
 		rc=$?
 		if [ $rc -ne 0 ]; then
 			ocf_exit_reason "Failed to start NFS server locking daemons"
 			return $rc
 		fi
 	else
 		ocf_log info "rpc.statd already up"
 	fi
 
+	# systemd
+	case $EXEC_MODE in
+            [23]) nfs_exec start nfs-mountd
+		  local i=1
+		  while : ; do
+			ocf_log info "Start: nfs-mountd i: $i"
+			rpcinfo -t localhost 100005 > /dev/null 2>&1
+			rc=$?
+			if [ "$rc" -eq "0" ]; then
+				break;
+			fi
+			sleep 1
+			i=$((i + 1))
+		  done
+
+		  nfs_exec start nfs-idmapd
+		  local i=1
+		  while : ; do
+			ocf_log info "Start: nfs-idmapd i: $i"
+			fn=`mktemp`
+			nfs_exec status nfs-idmapd > $fn 2>&1
+			rc=$?
+			ocf_log debug "$(cat $fn)"
+			rm -f $fn
+			if [ "$rc" -eq "0" ]; then
+				break;
+			fi
+			sleep 1
+			i=$((i + 1))
+		  done
+
+		  nfs_exec start rpc-statd
+		  local i=1
+		  while : ; do
+			ocf_log info "Start: rpc-statd i: $i"
+			rpcinfo -t localhost 100024 > /dev/null 2>&1
+			rc=$?
+			if [ "$rc" -eq "0" ]; then
+				break;
+			fi
+			sleep 1
+			i=$((i + 1))
+		  done
+	esac
+
+
 	fn=`mktemp`
-	nfs_exec start > $fn 2>&1
+	nfs_exec start nfs-server > $fn 2>&1
 	rc=$?
 	ocf_log debug "$(cat $fn)"
 	rm -f $fn
 
 	if [ $rc -ne 0 ]; then
 		ocf_exit_reason "Failed to start NFS server"
 		return $rc
-	fi	
+	fi
+
+	tfn="/proc/fs/nfsd/threads"
+	if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then
+		ocf_exit_reason "Failed to start NFS server: /proc/fs/nfsd/threads"
+		return $OCF_ERR_GENERIC
+	fi
 
 	notify_locks
 
 	ocf_log info "NFS server started"
 	return $OCF_SUCCESS
 }
 
 nfsserver_stop ()
 {
+	local fn
+
 	ocf_log info "Stopping NFS server ..."
 
 	# backup the current sm state information to the ha folder before stopping.
 	# the ha folder will be synced after startup, restoring the statd client state
 	rm -rf $STATD_PATH/sm.ha/* > /dev/null 2>&1
 	cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1
 
 	fn=`mktemp`
-	nfs_exec stop > $fn 2>&1
+	nfs_exec stop nfs-server > $fn 2>&1
 	rc=$?
 	ocf_log debug "$(cat $fn)"
 	rm -f $fn
 
+	if [ $rc -ne 0 ]; then
+		ocf_exit_reason "Failed to stop NFS server"
+		return $rc
+	fi
+
+	# systemd
+	case $EXEC_MODE in
+            [23]) ocf_log info "Stop: threads"
+		  tfn="/proc/fs/nfsd/threads"
+		  if [ -f "$tfn" ] && [ "$(cat $tfn)" -gt "0" ]; then
+			ocf_exit_reason "NFS server failed to stop: /proc/fs/nfsd/threads"
+			return $OCF_ERR_GENERIC
+		  fi
+
+		  nfs_exec stop rpc-statd > /dev/null 2>&1
+		  ocf_log info "Stop: rpc-statd"
+		  rpcinfo -t localhost 100024 > /dev/null 2>&1
+		  rc=$?
+		  if [ "$rc" -eq "0" ]; then
+			ocf_exit_reason "Failed to stop rpc-statd"
+			return $OCF_ERR_GENERIC
+		  fi
+
+		  nfs_exec stop nfs-idmapd > /dev/null 2>&1
+		  ocf_log info "Stop: nfs-idmapd"
+		  fn=`mktemp`
+		  nfs_exec status nfs-idmapd > $fn 2>&1
+		  rc=$?
+		  ocf_log debug "$(cat $fn)"
+		  rm -f $fn
+		  if [ "$rc" -eq "0" ]; then
+			ocf_exit_reason "Failed to stop nfs-idmapd"
+			return $OCF_ERR_GENERIC
+		  fi
+
+		  nfs_exec stop nfs-mountd > /dev/null 2>&1
+		  ocf_log info "Stop: nfs-mountd"
+		  rpcinfo -t localhost 100005 > /dev/null 2>&1
+		  rc=$?
+		  if [ "$rc" -eq "0" ]; then
+			ocf_exit_reason "Failed to stop nfs-mountd"
+			return $OCF_ERR_GENERIC
+		  fi
+	esac
+
+
 	v3locking_exec "stop"
 	if [ $? -ne 0 ]; then
 		ocf_exit_reason "Failed to stop NFS locking daemons"
 		rc=$OCF_ERR_GENERIC
 	fi
 
-	if [ $rc -eq 0 ]; then
-		unbind_tree 
-		ocf_log info "NFS server stopped"
-	else 
-		ocf_exit_reason "Failed to stop NFS server"
-	fi
-	return $rc
+	# systemd
+	case $EXEC_MODE in
+            [23]) nfs_exec stop rpcbind > /dev/null 2>&1
+		  ocf_log info "Stop: rpcbind"
+
+		  nfs_exec stop rpc-gssd > /dev/null 2>&1
+		  ocf_log info "Stop: rpc-gssd"
+	esac
+
+	unbind_tree
+	ocf_log info "NFS server stopped"
+	return 0
 }
 
 nfsserver_validate ()
 {
 	##
 	# set_exec_mode will exit if nfs server is not installed
 	##
 	set_exec_mode
 	check_binary ${OCF_RESKEY_nfs_notify_cmd}
 
 
 	if [ -n "$OCF_RESKEY_CRM_meta_clone" ] && [ -n "$OCF_RESKEY_nfs_shared_infodir" ]; then
 		ocf_exit_reason "This RA does not support clone mode when a shared info directory is in use."
 		exit $OCF_ERR_CONFIGURED
 	fi
 
 	if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then
 		if ! ocf_is_decimal "$OCF_RESKEY_nfs_smnotify_retry_time"; then
 			ocf_exit_reason "Invalid nfs_smnotify_retry_time [$OCF_RESKEY_nfs_smnotify_retry_time]"
 			exit $OCF_ERR_CONFIGURED
 		fi
 	fi
 
 	case ${OCF_RESKEY_nfs_notify_cmd##*/} in
 	sm-notify|rpc.statd) ;;
 	*)
 		ocf_exit_reason "Invalid nfs_notify_cmd [$OCF_RESKEY_nfs_notify_cmd]"
 		exit $OCF_ERR_CONFIGURED
 		;;
 	esac
 
 	return $OCF_SUCCESS
 }
 
 nfsserver_validate
 
 case $__OCF_ACTION in
 	start)      nfsserver_start
 		;;
 	stop)       nfsserver_stop
 		;;
 	monitor)    nfsserver_monitor
 		;;
 	validate-all)   exit $OCF_SUCCESS
 		;;
 	*)      nfsserver_usage
 	exit $OCF_ERR_UNIMPLEMENTED
 	;;
 esac
 
diff --git a/heartbeat/ocf-directories.in b/heartbeat/ocf-directories.in
index 6e0a9d542..8d7077627 100644
--- a/heartbeat/ocf-directories.in
+++ b/heartbeat/ocf-directories.in
@@ -1,22 +1,22 @@
 # Binaries and binary options for use in Resource Agents
 
 prefix=@prefix@
 exec_prefix=@exec_prefix@
 : ${INITDIR:=@INITDIR@}
 : ${HA_DIR:=@sysconfdir@/ha.d}
 : ${HA_RCDIR:=$HA_DIR/rc.d}
 : ${HA_CONFDIR=$HA_DIR/conf}
 : ${HA_CF:=$HA_DIR/ha.cf}
 : ${HA_VARLIB:=@localstatedir@/lib/heartbeat}
 : ${HA_RSCTMP:=@HA_RSCTMPDIR@}
 : ${HA_RSCTMP_OLD:=@HA_VARRUNDIR@/heartbeat/rsctmp}
 : ${HA_FIFO:=@localstatedir@/lib/heartbeat/fifo}
 : ${HA_BIN:=@libexecdir@/heartbeat}
 : ${HA_SBIN_DIR:=@sbindir@}
 : ${HA_DATEFMT:="%Y/%m/%d_%T "}
 : ${HA_DEBUGLOG:=/dev/null}
 : ${HA_RESOURCEDIR:=$HA_DIR/resource.d}
 : ${HA_DOCDIR:=@datadir@/doc/heartbeat}
 : ${__SCRIPT_NAME:=`basename $0`}
-: ${HA_VARRUN:=@localstatedir@/run/}
-: ${HA_VARLOCK:=@localstatedir@/lock/subsys/}
+: ${HA_VARRUN:=@localstatedir@/run}
+: ${HA_VARLOCK:=@localstatedir@/lock/subsys}
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index e00df4487..6d9669d17 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -1,923 +1,922 @@
 #
 #
 # 	Common helper functions for the OCF Resource Agents supplied by
 # 	heartbeat.
 #
 # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée
 #                    All Rights Reserved.
 #
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
 #
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 # 
 
 # Build version: $Format:%H$
 
 # TODO: Some of this should probably split out into a generic OCF
 # library for shell scripts, but for the time being, we'll just use it
 # ourselves...
 #
 
 # TODO wish-list:
 # - Generic function for evaluating version numbers
 # - Generic function(s) to extract stuff from our own meta-data
 # - Logging function which automatically adds resource identifier etc
 #   prefixes
 # TODO: Move more common functionality for OCF RAs here.
 #
 
 # This was common throughout all legacy Heartbeat agents
 unset LC_ALL; export LC_ALL
 unset LANGUAGE; export LANGUAGE
 
 __SCRIPT_NAME=`basename $0`
 
 if [ -z "$OCF_ROOT" ]; then
     : ${OCF_ROOT=@OCF_ROOT_DIR@}
 fi
 
 if [ "$OCF_FUNCTIONS_DIR" = ${OCF_ROOT}/resource.d/heartbeat ]; then  # old
 	unset OCF_FUNCTIONS_DIR
 fi
 
 : ${OCF_FUNCTIONS_DIR:=${OCF_ROOT}/lib/heartbeat}
 
 . ${OCF_FUNCTIONS_DIR}/ocf-binaries
 . ${OCF_FUNCTIONS_DIR}/ocf-returncodes
 . ${OCF_FUNCTIONS_DIR}/ocf-directories
 . ${OCF_FUNCTIONS_DIR}/ocf-rarun
 . ${OCF_FUNCTIONS_DIR}/ocf-distro
 
 # Define OCF_RESKEY_CRM_meta_interval in case it isn't already set,
 # to make sure that ocf_is_probe() always works
 : ${OCF_RESKEY_CRM_meta_interval=0}
 
 ocf_is_root() {
 	if [ X`id -u` = X0 ]; then
 		true
 	else
 		false
 	fi
 }
 
 ocf_maybe_random() {
 	local rnd="$RANDOM"
 	# Something sane-ish in case a shell doesn't support $RANDOM
 	[ -n "$rnd" ] || rnd=$$
 	echo $rnd
 }
 
 # Portability comments:
 # o The following rely on Bourne "sh" pattern-matching, which is usually
 #   that for filename generation (note: not regexp).
 # o The "*) true ;;" clause is probably unnecessary, but is included
 #   here for completeness.
 # o The negation in the pattern uses "!".  This seems to be common
 #   across many OSes (whereas the alternative "^" fails on some).
 # o If an OS is encountered where this negation fails, then a possible
 #   alternative would be to replace the function contents by (e.g.):
 #	[ -z "`echo $1 | tr -d '[0-9]'`" ]
 #
 ocf_is_decimal() {
 	case "$1" in
 	""|*[!0-9]*)	# empty, or at least one non-decimal
 		false ;;
 	*)
 		true ;;
 	esac
 }
 
 ocf_is_true() {
 	case "$1" in
 	yes|true|1|YES|TRUE|ja|on|ON) true ;;
 	*)	false ;;
 	esac
 }
 
 ocf_is_hex() {
 	case "$1" in
         ""|*[!0-9a-fA-F]*)	# empty, or at least one non-hex
 		false ;;
 	*)
 		true ;;
 	esac
 }
 
 ocf_is_octal() {
 	case "$1" in
         ""|*[!0-7]*)	# empty, or at least one non-octal
 		false ;;
 	*)
 		true ;;
 	esac
 }
 
 __ocf_set_defaults() {
 	__OCF_ACTION="$1"
 
 	# Return to sanity for the agents...
 	unset LANG
 	LC_ALL=C
 	export LC_ALL
 
 	# TODO: Review whether we really should source this. Or rewrite
 	# to match some emerging helper function syntax...? This imports
 	# things which no OCF RA should be using...
 
 	# Strip the OCF_RESKEY_ prefix from this particular parameter
 	if [ -z "$OCF_RESKEY_OCF_CHECK_LEVEL" ]; then
 		: ${OCF_CHECK_LEVEL:=0}
 	else
 		: ${OCF_CHECK_LEVEL:=$OCF_RESKEY_OCF_CHECK_LEVEL}
 	fi
 
 	if [ ! -d "$OCF_ROOT" ]; then
 		ha_log "ERROR: OCF_ROOT points to non-directory $OCF_ROOT."
 		exit $OCF_ERR_GENERIC
 	fi
 
 	if [ -z "$OCF_RESOURCE_TYPE" ]; then
 		: ${OCF_RESOURCE_TYPE:=$__SCRIPT_NAME}
 	fi
 
+	if [ "x$__OCF_ACTION" = "xmeta-data" ]; then
+		: ${OCF_RESOURCE_INSTANCE:="RESOURCE_ID"}
+	fi
+
 	if [ -z "$OCF_RA_VERSION_MAJOR" ]; then
 		: We are being invoked as an init script.
 		: Fill in some things with reasonable values.
 		: ${OCF_RESOURCE_INSTANCE:="default"}
 		return 0
         fi
 
-	if [ "x$__OCF_ACTION" = "xmeta-data" ]; then
-		OCF_RESOURCE_INSTANCE="undef"
-	fi	
-
 	if [ -z "$OCF_RESOURCE_INSTANCE" ]; then
 		ha_log "ERROR: Need to tell us our resource instance name."
 		exit $OCF_ERR_ARGS
 	fi
 }
 
 hadate() {
   date "+${HA_DATEFMT}"
 }
 
 set_logtag() {
 	if [ -z "$HA_LOGTAG" ]; then
 		if [ -n "$OCF_RESOURCE_INSTANCE" ]; then
 			HA_LOGTAG="$__SCRIPT_NAME($OCF_RESOURCE_INSTANCE)[$$]"
 		else
 			HA_LOGTAG="$__SCRIPT_NAME[$$]"
 		fi
 	fi
 }
 
 __ha_log() {
 	local ignore_stderr=false
 	local loglevel
 
 	[ "x$1" = "x--ignore-stderr" ] && ignore_stderr=true && shift
 
 	[ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=""
 	# if we're connected to a tty, then output to stderr
 	if tty >/dev/null; then
 		if [ "x$HA_debug" = "x0" -a "x$loglevel" = xdebug ] ; then
 			return 0
 		elif [ "$ignore_stderr" = "true" ]; then
 			# something already printed this error to stderr, so ignore
 			return 0
 		fi
 		if [ "$HA_LOGTAG" ]; then
 			echo "$HA_LOGTAG: $*"
 		else
 			echo "$*"
 		fi >&2
 		return 0
 	fi
 
 	set_logtag
 
 	if [ "x${HA_LOGD}" = "xyes" ] ; then 
 		ha_logger -t "${HA_LOGTAG}" "$@"
 		if [ "$?" -eq "0" ] ; then
 			return 0
 		fi
 	fi
 
 	if
 	  [ -n "$HA_LOGFACILITY" ]
         then
 	  : logging through syslog
 	  # loglevel is unknown, use 'notice' for now
           loglevel=notice
           case "${*}" in
             *ERROR*)		loglevel=err;;
             *WARN*)		loglevel=warning;;
             *INFO*|info)	loglevel=info;;
 	  esac
 	  logger -t "$HA_LOGTAG" -p ${HA_LOGFACILITY}.${loglevel} "${*}"
         fi	
         if
 	  [ -n "$HA_LOGFILE" ]
 	then
 	  : appending to $HA_LOGFILE
 	  echo "$HA_LOGTAG:	"`hadate`"${*}" >> $HA_LOGFILE
 	fi
 	if
 	  [ -z "$HA_LOGFACILITY" -a -z "$HA_LOGFILE" ] && ! [ "$ignore_stderr" = "true" ]
 	then
 	  : appending to stderr
 	  echo `hadate`"${*}" >&2
 	fi
         if
           [ -n "$HA_DEBUGLOG" ]
         then
           : appending to $HA_DEBUGLOG
 		  if [ "$HA_LOGFILE"x != "$HA_DEBUGLOG"x ]; then
             echo "$HA_LOGTAG:	"`hadate`"${*}" >> $HA_DEBUGLOG
           fi
         fi
 }
 
 ha_log()
 {
 	__ha_log "$@"
 }
 
 ha_debug() {
 
         if [ "x${HA_debug}" = "x0" ] ; then
                 return 0
         fi
 	if tty >/dev/null; then
 		if [ "$HA_LOGTAG" ]; then
 			echo "$HA_LOGTAG: $*"
 		else
 			echo "$*"
 		fi >&2
 		return 0
 	fi
 
 	set_logtag
 
         if [ "x${HA_LOGD}" = "xyes" ] ; then  
 		ha_logger -t "${HA_LOGTAG}" -D "ha-debug" "$@"
                 if [ "$?" -eq "0" ] ; then
                         return 0
                 fi
         fi
 
 	[ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=""
 
 	if
 	  [ -n "$HA_LOGFACILITY" ]
 	then
 	  : logging through syslog
 	  logger -t "$HA_LOGTAG" -p "${HA_LOGFACILITY}.debug" "${*}"
 	fi
         if
 	  [ -n "$HA_DEBUGLOG" ]
 	then
 	  : appending to $HA_DEBUGLOG
 	  echo "$HA_LOGTAG:	"`hadate`"${*}" >> $HA_DEBUGLOG
 	fi
 	if
 	  [ -z "$HA_LOGFACILITY" -a -z "$HA_DEBUGLOG" ]
 	then
 	  : appending to stderr
 	  echo "$HA_LOGTAG:	`hadate`${*}:	${HA_LOGFACILITY}" >&2
 	fi
 }
 
 ha_parameter() {
 	local VALUE
     VALUE=`sed -e 's%[	][	]*% %' -e 's%^ %%' -e 's%#.*%%'   $HA_CF | grep -i "^$1 " | sed 's%[^ ]* %%'`
     if
 	[ "X$VALUE" = X ]
     then
 	
 	case $1 in
 	    keepalive)	VALUE=2;;
 	    deadtime)
 		ka=`ha_parameter keepalive`
 		VALUE=`expr $ka '*' 2 '+' 1`;;
 	esac
     fi
     echo $VALUE
 }
 
 ocf_log() {
 	# TODO: Revisit and implement internally.
 	if
           [ $# -lt 2 ]
         then
           ocf_log err "Not enough arguments [$#] to ocf_log."
         fi
         __OCF_PRIO="$1"
         shift
         __OCF_MSG="$*"
 
         case "${__OCF_PRIO}" in
           crit)	__OCF_PRIO="CRIT";;
           err)	__OCF_PRIO="ERROR";;
           warn)	__OCF_PRIO="WARNING";;
           info)	__OCF_PRIO="INFO";;
           debug)__OCF_PRIO="DEBUG";;
           *)	__OCF_PRIO=`echo ${__OCF_PRIO}| tr '[a-z]' '[A-Z]'`;;
 	esac
 
 	if [ "${__OCF_PRIO}" = "DEBUG" ]; then
 		ha_debug "${__OCF_PRIO}: $__OCF_MSG"
 	else
 		ha_log "${__OCF_PRIO}: $__OCF_MSG"
 	fi
 }
 
 #
 # ocf_exit_reason: print exit error string to stderr
 # Usage:           Allows the OCF script to provide a string
 #                  describing why the exit code was returned.
 # Arguments:   reason - required, The string that represents why the error
 #                       occured.
 #
 ocf_exit_reason()
 {
 	local cookie="$OCF_EXIT_REASON_PREFIX"
 	local fmt
 	local msg
 
 	# No argument is likely not intentional.
 	# Just one argument implies a printf format string of just "%s".
 	# "Least surprise" in case some interpolated string from variable
 	# expansion or other contains a percent sign.
 	# More than one argument: first argument is going to be the format string.
 	case $# in
 	0)	ocf_log err "Not enough arguments to ocf_log_exit_msg." ;;
 	1)	fmt="%s" ;;
 
 	*)	fmt=$1
 		shift
 		case $fmt in
 		*%*) : ;; # ok, does look like a format string
 		*) ocf_log warn "Does not look like format string: [$fmt]" ;;
 		esac ;;
 	esac
 
 	if [ -z "$cookie" ]; then
 		# use a default prefix
 		cookie="ocf-exit-reason:"
 	fi
 
 	msg=$(printf "${fmt}" "$@")
 	printf >&2 "%s%s\n" "$cookie" "$msg"
 	__ha_log --ignore-stderr "ERROR: $msg"
 }
 
 #
 # ocf_deprecated: Log a deprecation warning
 # Usage:          ocf_deprecated [param-name]
 # Arguments:      param-name optional, name of a boolean resource
 #                            parameter that can be used to suppress
 #                            the warning (default
 #                            "ignore_deprecation")
 ocf_deprecated() {
     local param
     param=${1:-ignore_deprecation}
     # don't use ${!param} here, it's a bashism
     if ! ocf_is_true $(eval echo \$OCF_RESKEY_$param); then
 	ocf_log warn "This resource agent is deprecated" \
 	    "and may be removed in a future release." \
 	    "See the man page for details." \
 	    "To suppress this warning, set the \"${param}\"" \
 	    "resource parameter to true."
     fi
 }
 
 #
 # Ocf_run: Run a script, and log its output.
 # Usage:   ocf_run [-q] [-info|-warn|-err] <command>
 #	-q: don't log the output of the command if it succeeds
 #	-info|-warn|-err: log the output of the command at given
 #		severity if it fails (defaults to err)
 #
 ocf_run() {
 	local rc
 	local output
 	local verbose=1
 	local loglevel=err
 	local var
 
 	for var in 1 2
 	do
 	    case "$1" in
 		"-q")
 		    verbose=""
 		    shift 1;;
 		"-info"|"-warn"|"-err")
 		    loglevel=`echo $1 | sed -e s/-//g`
 		    shift 1;;
 		*)
 		    ;;		
 	    esac
 	done
 
 	output=`"$@" 2>&1`
 	rc=$?
 	output=`echo $output`
 	if [ $rc -eq 0 ]; then 
 	    if [ "$verbose" -a ! -z "$output" ]; then
 		ocf_log info "$output"
 	    fi
 	    return $OCF_SUCCESS
 	else
 	    if [ ! -z "$output" ]; then
 		ocf_log $loglevel "$output"
 	    else
 		ocf_log $loglevel "command failed: $*"
 	    fi
 	    return $rc
 	fi
 }
 
 ocf_pidfile_status() {
     local pid pidfile=$1
     if [ ! -e $pidfile ]; then
 	# Not exists
 	return 2
     fi
     pid=`cat $pidfile`
     kill -0 $pid 2>&1 > /dev/null
     if [ $? = 0 ]; then
 	return 0
     fi
 
     # Stale
     return 1
 }
 
 ocf_take_lock() {
     local lockfile=$1
     local rnd=$(ocf_maybe_random)
 
     sleep 0.$rnd
     while 
 	ocf_pidfile_status $lockfile
     do
 	ocf_log info "Sleeping until $lockfile is released..."
 	sleep 0.$rnd
     done
     echo $$ > $lockfile
 }
 
 
 ocf_release_lock_on_exit() {
     local lockfile=$1
     trap "rm -f $lockfile" EXIT
 }
 
 # returns true if the CRM is currently running a probe. A probe is
 # defined as a monitor operation with a monitoring interval of zero.
 ocf_is_probe() {
     [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" = 0 ]
 }
 
 # returns true if the resource is configured as a clone. This is
 # defined as a resource where the clone-max meta attribute is present,
 # and set to greater than zero.
 ocf_is_clone() {
     [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] && [ "${OCF_RESKEY_CRM_meta_clone_max}" -gt 0 ]
 }
 
 # returns true if the resource is configured as a multistate
 # (master/slave) resource. This is defined as a resource where the
 # master-max meta attribute is present, and set to greater than zero.
 ocf_is_ms() {
     [ ! -z "${OCF_RESKEY_CRM_meta_master_max}" ] && [ "${OCF_RESKEY_CRM_meta_master_max}" -gt 0 ]
 }
 
 # version check functions
 # allow . and - to delimit version numbers
 # max version number is 999
 # letters and such are effectively ignored
 #
 ocf_is_ver() {
 	echo $1 | grep '^[0-9][0-9.-]*[0-9]$' >/dev/null 2>&1
 }
 ocf_ver2num() {
 	echo $1 | awk -F'[.-]' '
 	{for(i=1; i<=NF; i++) s=s*1000+$i; print s}
 	'
 }
 ocf_ver_level(){
 	echo $1 | awk -F'[.-]' '{print NF}'
 }
 ocf_ver_complete_level(){
 	local ver="$1"
 	local level="$2"
 	local i=0
 	while [ $i -lt $level ]; do
 		ver=${ver}.0
 		i=`expr $i + 1`
 	done
 	echo $ver
 }
 
 # usage: ocf_version_cmp VER1 VER2
 #     version strings can contain digits, dots, and dashes
 #     must start and end with a digit
 # returns:
 #     0: VER1 smaller (older) than VER2
 #     1: versions equal
 #     2: VER1 greater (newer) than VER2
 #     3: bad format
 ocf_version_cmp() {
 	ocf_is_ver "$1" || return 3
 	ocf_is_ver "$2" || return 3
 	local v1=$1
 	local v2=$2
 	local v1_level=`ocf_ver_level $v1`
 	local v2_level=`ocf_ver_level $v2`
 	local level_diff
 	if [ $v1_level -lt $v2_level ]; then
 		level_diff=`expr $v2_level - $v1_level`
 		v1=`ocf_ver_complete_level $v1 $level_diff`
 	elif [ $v1_level -gt $v2_level ]; then
 		level_diff=`expr $v1_level - $v2_level`
 		v2=`ocf_ver_complete_level $v2 $level_diff`
 	fi
 	v1=`ocf_ver2num $v1`
 	v2=`ocf_ver2num $v2`
 	if [ $v1 -eq $v2 ]; then
 		return 1
 	elif [ $v1 -lt $v2 ]; then
 		return 0
 	else
 		return 2 # -1 would look funny in shell ;-)
 	fi
 }
 
 ocf_local_nodename() {
 	# use crm_node -n for pacemaker > 1.1.8
 	which pacemakerd > /dev/null 2>&1
 	if [ $? -eq 0 ]; then
 		local version=$(pacemakerd -$ | grep "Pacemaker .*" | awk '{ print $2 }')
 		version=$(echo $version | awk -F- '{ print $1 }')
 		ocf_version_cmp "$version" "1.1.8"
 		if [ $? -eq 2 ]; then
 			which crm_node > /dev/null 2>&1
 			if [ $? -eq 0 ]; then
 				crm_node -n
 				return
 			fi
 		fi
 	fi
 
 	# otherwise use uname -n
 	uname -n
 }
 
 # usage: dirname DIR
 dirname()
 {
 	local a
 	local b
 
 	[ $# = 1 ] || return 1
 	a="$1"
 	while [ 1 ]; do
 		b="${a%/}"
 		[ "$a" = "$b" ] && break
 		a="$b"
 	done
 	b=${a%/*}
 	[ -z "$b" -o "$a" = "$b"  ] && b="."
 
 	echo "$b"
 	return 0
 }
 
 #
 # pseudo_resource status tracking function...
 #
 # This allows pseudo resources to give correct status information.  As we add
 # resource monitoring, and better resource tracking in general, this will
 # become essential.
 #
-# These scripts work because ${HA_RSCTMP} is cleaned out every time
-# heartbeat is started.
+# These scripts work because ${HA_RSCTMP} is cleaned on node reboot.
 #
 # We create "resource-string" tracking files under ${HA_RSCTMP} in a
 # very simple way:
 #
 #	Existence of "${HA_RSCTMP}/resource-string" means that we consider
 #	the resource named by "resource-string" to be running.
 #
 # Note that "resource-string" needs to be unique.  Using the resource type
 # plus the resource instance arguments to make up the resource string
 # is probably sufficient...
 #
 # usage: ha_pseudo_resource resource-string op [tracking_file]
 # 	where op is {start|stop|monitor|status|restart|reload|print}
 #	print is a special op which just prints the tracking file location
 #	user can override our choice of the tracking file location by
 #		specifying it as the third arg
 #	Note that all operations are silent...
 #
 ha_pseudo_resource()
 {
   local ha_resource_tracking_file="${3:-${HA_RSCTMP}/$1}"
   case $2 in
     start|restart|reload)  touch "$ha_resource_tracking_file";;
     stop) rm -f "$ha_resource_tracking_file";;
     status|monitor)
            if
              [ -f "$ha_resource_tracking_file" ]
            then
              return 0
            else
              case $2 in
                status)	return 3;;
                *)	return 7;;
              esac
            fi;;
     print)  echo "$ha_resource_tracking_file";;
     *)	return 3;;
   esac
 }
 
 # usage: rmtempdir TMPDIR
 rmtempdir()
 {
 	[ $# = 1 ] || return 1
 	if [ -e "$1" ]; then
 		rmdir "$1" || return 1
 	fi
 	return 0
 }
 
 # usage: maketempfile [-d]
 maketempfile()
 {
 	if [ $# = 1 -a "$1" = "-d" ]; then
 		mktemp -d
 		return -0
 	elif [ $# != 0 ]; then
 		return 1
 	fi
 
 	mktemp
 	return 0
 }
 
 # usage: rmtempfile TMPFILE
 rmtempfile ()
 {
 	[ $# = 1 ] || return 1
 	if [ -e "$1" ]; then
 		rm "$1" || return 1
 	fi
 	return 0
 }
 
 # echo the first lower supported check level
 # pass set of levels supported by the agent
 # (in increasing order, 0 is optional)
 ocf_check_level()
 {
 	local lvl prev
 	lvl=0
 	prev=0
 	if ocf_is_decimal "$OCF_CHECK_LEVEL"; then
 		# the level list should be very short
 		for lvl; do
 			if [ "$lvl" -eq "$OCF_CHECK_LEVEL" ]; then
 				break
 			elif [ "$lvl" -gt "$OCF_CHECK_LEVEL" ]; then
 				lvl=$prev # the previous one
 				break
 			fi
 			prev=$lvl
 		done
 	fi
 	echo $lvl
 }
 
 # usage: ocf_stop_processes SIGNALS WAIT_TIME PIDS
 #
 # we send signals (use quotes for more than one!) in the order
 # given; if one or more processes are still running we try KILL;
 # the wait_time is the _total_ time we'll spend in this function
 # this time may be slightly exceeded if the processes won't leave
 # 
 # returns:
 #     0: all processes left
 #     1: some processes still running
 #
 # example:
 #
 # ocf_stop_processes TERM 5 $pids
 # 
 ocf_stop_processes() {
 	local signals="$1"
 	local wait_time="$(($2/`echo $signals|wc -w`))"
 	shift 2
 	local pids="$*"
 	local sig i
 	test -z "$pids" &&
 		return 0
 	for sig in $signals KILL; do
 		kill -s $sig $pids 2>/dev/null
 		# try to leave early, and yet leave processes time to exit
 		sleep 0.2
 		for i in `seq $wait_time`; do
 			kill -s 0 $pids 2>/dev/null ||
 				return 0
 			sleep 1
 		done
 	done
 	return 1
 }
 
 #
 # create a given status directory
 # if the directory path doesn't start with $HA_VARRUN, then
 # we return with error (most of the calls would be with the user
 # supplied configuration, hence we need to do necessary
 # protection)
 # used mostly for PID files
 #
 # usage: ocf_mkstatedir owner permissions path
 #
 # owner: user.group
 # permissions: permissions
 # path: directory path
 #
 # example:
 #	ocf_mkstatedir named 755 `dirname $pidfile`
 #
 ocf_mkstatedir()
 {
 	local owner
 	local perms
 	local path
 
 	owner=$1
 	perms=$2
 	path=$3
 
 	test -d $path && return 0
 	[ $(id -u) = 0 ] || return 1
 
 	case $path in
 	$HA_VARRUN/*) : this path is ok ;;
 	*) ocf_log err "cannot create $path (does not start with $HA_VARRUN)"
 		return 1
 	;;
 	esac
 
 	mkdir -p $path &&
 	chown $owner $path &&
 	chmod $perms $path
 }
 
 #
 # create a unique status directory in $HA_VARRUN
 # used mostly for PID files
 # the directory is by default set to
 #   $HA_VARRUN/$OCF_RESOURCE_INSTANCE
 # the directory name is printed to stdout
 #
 # usage: ocf_unique_rundir owner permissions name
 #
 # owner: user.group (default: "root")
 # permissions: permissions (default: "755")
 # name: some unique string (default: "$OCF_RESOURCE_INSTANCE")
 #
 # to use the default either don't set the parameter or set it to
 # empty string ("")
 # example:
 #
 #	STATEDIR=`ocf_unique_rundir named "" myownstatedir`
 #
 ocf_unique_rundir()
 {
 	local path
 	local owner
 	local perms
 	local name
 
 	owner=${1:-"root"}
 	perms=${2:-"755"}
 	name=${3:-"$OCF_RESOURCE_INSTANCE"}
 	path=$HA_VARRUN/$name
 	if [ ! -d $path ]; then
 		[ $(id -u) = 0 ] || return 1
 		mkdir -p $path &&
 		chown $owner $path &&
 		chmod $perms $path || return 1
 	fi
 	echo $path
 }
 
 #
 # RA tracing may be turned on by setting OCF_TRACE_RA
 # the trace output will be saved to OCF_TRACE_FILE, if set, or
 # by default to
 #   $HA_VARLIB/trace_ra/<type>/<id>.<action>.<timestamp>
 #   e.g. $HA_VARLIB/trace_ra/oracle/db.start.2012-11-27.08:37:08
 #
 # OCF_TRACE_FILE:
 # - FD (small integer [3-9]) in that case it is up to the callers
 #   to capture output; the FD _must_ be open for writing
 # - absolute path
 #
 # NB: FD 9 may be used for tracing with bash >= v4 in case
 # OCF_TRACE_FILE is set to a path.
 #
 ocf_is_bash4() {
 	echo "$SHELL" | grep bash > /dev/null &&
 			[ ${BASH_VERSINFO[0]} = "4" ]
 }
 ocf_trace_redirect_to_file() {
 	local dest=$1
 	if ocf_is_bash4; then
 		exec 9>$dest
 		BASH_XTRACEFD=9
 	else
 		exec 2>$dest
 	fi
 }
 ocf_trace_redirect_to_fd() {
 	local fd=$1
 	if ocf_is_bash4; then
 		BASH_XTRACEFD=$fd
 	else
 		exec 2>&$fd
 	fi
 }
 __ocf_test_trc_dest() {
 	local dest=$1
 	if ! touch $dest; then
 		ocf_log warn "$dest not writable, trace not going to happen"
 		__OCF_TRC_DEST=""
 		__OCF_TRC_MANAGE=""
 		return 1
 	fi
 	return 0
 }
 ocf_default_trace_dest() {
 	tty >/dev/null && return
 	if [ -n "$OCF_RESOURCE_TYPE" -a \
 			-n "$OCF_RESOURCE_INSTANCE" -a -n "$__OCF_ACTION" ]; then
 		local ts=`date +%F.%T`
 		__OCF_TRC_DEST=$HA_VARLIB/trace_ra/${OCF_RESOURCE_TYPE}/${OCF_RESOURCE_INSTANCE}.${__OCF_ACTION}.$ts
 		__OCF_TRC_MANAGE="1"
 	fi
 }
 
 ocf_start_trace() {
 	export __OCF_TRC_DEST="" __OCF_TRC_MANAGE=""
 	case "$OCF_TRACE_FILE" in
 	[3-9]) ocf_trace_redirect_to_fd "$OCF_TRACE_FILE" ;;
 	/*/*) __OCF_TRC_DEST=$OCF_TRACE_FILE ;;
 	"") ocf_default_trace_dest ;;
 	*)
 		ocf_log warn "OCF_TRACE_FILE must be set to either FD (open for writing) or absolute file path"
 		ocf_default_trace_dest
 		;;
 	esac
 	if [ "$__OCF_TRC_DEST" ]; then
 		mkdir -p `dirname $__OCF_TRC_DEST`
 		__ocf_test_trc_dest $__OCF_TRC_DEST ||
 			return
 		ocf_trace_redirect_to_file "$__OCF_TRC_DEST"
 	fi
 	if [ -n "$BASH_VERSION" ]; then
 		PS4='+ `date +"%T"`: ${FUNCNAME[0]:+${FUNCNAME[0]}:}${LINENO}: '
 	fi
 	set -x
 	env=$( echo; printenv | sort )
 }
 ocf_stop_trace() {
 	set +x
 }
 
 __ocf_set_defaults "$@"
 
 : ${OCF_TRACE_RA:=$OCF_RESKEY_trace_ra}
 ocf_is_true "$OCF_TRACE_RA" && ocf_start_trace
 
 # pacemaker sets HA_use_logd, some others use HA_LOGD :/
 if ocf_is_true "$HA_use_logd"; then
 	: ${HA_LOGD:=yes}
 fi
diff --git a/heartbeat/oracle b/heartbeat/oracle
index 951221c5e..6fad5bc6f 100755
--- a/heartbeat/oracle
+++ b/heartbeat/oracle
@@ -1,763 +1,770 @@
 #!/bin/sh
 #
 # 
 # oracle
 #
 # Description:	Manages an Oracle Database as a High-Availability
 #		resource
 #
 #
 # Author:	Dejan Muhamedagic
 # Support:	linux-ha@lists.linux-ha.org
 # License:	GNU General Public License (GPL)
 # Copyright:	(C) 2006 International Business Machines, Inc.
 #
 #		This code inspired by the DB2 resource script
 #		written by Alan Robertson
 #
 # An example usage in /etc/ha.d/haresources: 
 #       node1  10.0.0.170 oracle::RK1::/oracle/10.2::orark1
 #
 # See oracle_usage() function below for more details...
 #
 # OCF instance parameters:
 #	OCF_RESKEY_sid
 #	OCF_RESKEY_home (optional; else read it from /etc/oratab)
 #	OCF_RESKEY_user (optional; figure it out by checking file ownership)
 #	OCF_RESKEY_ipcrm (optional; defaults to "instance")
 #	OCF_RESKEY_clear_backupmode (optional; default to "false")
 #	OCF_RESKEY_shutdown_method (optional; default to "checkpoint/abort")
 #	OCF_RESKEY_monuser (optional; defaults to "OCFMON")
 #	OCF_RESKEY_monpassword (optional; defaults to "OCFMON")
 #	OCF_RESKEY_monprofile (optional; defaults to "OCFMONPROFILE")
 #
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 . ${OCF_FUNCTIONS_DIR}/ora-common.sh
 
 #######################################################################
 
 oracle_usage() {
   methods=`oracle_methods`
   methods=`echo $methods | tr ' ' '|'`
   cat <<-!
 	usage: $0 {$methods}
 
 	$0 manages an Oracle Database instance as an HA resource.
 
 	The 'start' operation starts the database.
 	The 'stop' operation stops the database.
 	The 'status' operation reports whether the database is running
 	The 'monitor' operation reports whether the database seems to be working
 	The 'dumpinstipc' operation prints IPC resources used by the instance
 	The 'cleanup' operation tries to clean up after Oracle was brutally stopped
 	The 'validate-all' operation reports whether the parameters are valid
 	The 'methods' operation reports on the methods $0 supports
 
 	!
 }
 
 # Defaults
 OCF_RESKEY_monuser_default="OCFMON"
 OCF_RESKEY_monpassword_default="OCFMON"
 OCF_RESKEY_monprofile_default="OCFMONPROFILE"
 
 oracle_meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="oracle">
 <version>1.0</version>
 
 <longdesc lang="en">
 Resource script for oracle. Manages an Oracle Database instance
 as an HA resource.
 </longdesc>
 <shortdesc lang="en">Manages an Oracle Database instance</shortdesc>
 
 <parameters>
 
 <parameter name="sid" unique="1" required="1">
 <longdesc lang="en">
 The Oracle SID (aka ORACLE_SID).
 </longdesc>
 <shortdesc lang="en">sid</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="home" unique="0">
 <longdesc lang="en">
 The Oracle home directory (aka ORACLE_HOME).
 If not specified, then the SID along with its home should be listed in
 /etc/oratab.
 </longdesc>
 <shortdesc lang="en">home</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="user" unique="0">
 <longdesc lang="en">
 The Oracle owner (aka ORACLE_OWNER).
 If not specified, then it is set to the owner of
 file \$ORACLE_HOME/dbs/*\${ORACLE_SID}.ora.
 If this does not work for you, just set it explicitely.
 </longdesc>
 <shortdesc lang="en">user</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="monuser" unique="0">
 <longdesc lang="en">
 Monitoring user name. Every connection as
 sysdba is logged in an audit log. This can
 result in a large number of new files created.
 A new user is created (if it doesn't exist) in
 the start action and subsequently used in monitor.
 It should have very limited rights. Make sure
 that the password for this user does not expire.
 </longdesc>
 <shortdesc lang="en">monuser</shortdesc>
 <content type="string" default="$OCF_RESKEY_monuser_default" />
 </parameter>
 
 <parameter name="monpassword" unique="0">
 <longdesc lang="en">
 Password for the monitoring user. Make sure
 that the password for this user does not expire.
 </longdesc>
 <shortdesc lang="en">monpassword</shortdesc>
 <content type="string" default="$OCF_RESKEY_monpassword_default" />
 </parameter>
 
 <parameter name="monprofile" unique="0">
 <longdesc lang="en">
 Profile used by the monitoring user. If the
 profile does not exist, it will be created
 with a non-expiring password.
 </longdesc>
 <shortdesc lang="en">monprofile</shortdesc>
 <content type="string" default="$OCF_RESKEY_monprofile_default" />
 </parameter>
 
 <parameter name="ipcrm" unique="0">
 <longdesc lang="en">
 Sometimes IPC objects (shared memory segments and semaphores)
 belonging to an Oracle instance might be left behind which
 prevents the instance from starting. It is not easy to figure out
 which shared segments belong to which instance, in particular when
 more instances are running as same user.
 
 What we use here is the "oradebug" feature and its "ipc" trace
 utility. It is not optimal to parse the debugging information, but
 I am not aware of any other way to find out about the IPC
 information. In case the format or wording of the trace report
 changes, parsing might fail. There are some precautions, however,
 to prevent stepping on other peoples toes. There is also a
 dumpinstipc option which will make us print the IPC objects which
 belong to the instance. Use it to see if we parse the trace file
 correctly.
 
 Three settings are possible:
 
 - none: don't mess with IPC and hope for the best (beware: you'll
   probably be out of luck, sooner or later)
 - instance: try to figure out the IPC stuff which belongs to the
   instance and remove only those (default; should be safe)
 - orauser: remove all IPC belonging to the user which runs the
   instance (don't use this if you run more than one instance as same
   user or if other apps running as this user use IPC)
 
 The default setting "instance" should be safe to use, but in that
 case we cannot guarantee that the instance will start. In case IPC
 objects were already left around, because, for instance, someone
 mercilessly killing Oracle processes, there is no way any more to
 find out which IPC objects should be removed. In that case, human
 intervention is necessary, and probably _all_ instances running as
 same user will have to be stopped. The third setting, "orauser",
 guarantees IPC objects removal, but it does that based only on IPC
 objects ownership, so you should use that only if every instance
 runs as separate user.
 
 Please report any problems. Suggestions/fixes welcome.
 </longdesc>
 <shortdesc lang="en">ipcrm</shortdesc>
 <content type="string" default="instance" />
 </parameter>
 
 <parameter name="clear_backupmode" unique="0" required="0">
 <longdesc lang="en">
 The clear of the backup mode of ORACLE.
 </longdesc>
 <shortdesc lang="en">clear_backupmode</shortdesc>
 <content type="boolean" default="false" />
 </parameter>
 
 <parameter name="shutdown_method" unique="0" required="0">
 <longdesc lang="en">
 How to stop Oracle is a matter of taste it seems. The default
 method ("checkpoint/abort") is:
 
 	alter system checkpoint;
 	shutdown abort;
 
 This should be the fastest safe way bring the instance down. If
 you find "shutdown abort" distasteful, set this attribute to
 "immediate" in which case we will
 
 	shutdown immediate;
 
 If you still think that there's even better way to shutdown an
 Oracle instance we are willing to listen.
 </longdesc>
 <shortdesc lang="en">shutdown_method</shortdesc>
 <content type="string" default="checkpoint/abort" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start" timeout="120" />
 <action name="stop" timeout="120" />
 <action name="status" timeout="5" />
 <action name="monitor" depth="0" timeout="30" interval="120" />
 <action name="validate-all" timeout="5" />
 <action name="methods" timeout="5" />
 <action name="meta-data" timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 
 #
 # methods: What methods/operations do we support?
 #
 oracle_methods() {
   cat <<-!
 	start
 	stop
 	status
 	monitor
 	dumpinstipc
 	showdbstat
 	cleanup
 	validate-all
 	methods
 	meta-data
 	usage
 	!
 }
 
 #
 #	Run commands as the Oracle owner...
 #
 execsql() {
 	if [ "$US" = "$ORACLE_OWNER" ]; then
 		sqlplus -S /nolog
 	else
 		su - $ORACLE_OWNER -s /bin/sh -c ". $ORA_ENVF; sqlplus -S /nolog"
 	fi
 }
 
 #
 #	Run commands in the oracle admin sqlplus...
 #
 common_sql_opts() {
 	cat<<EOF
 set feedback off
 set heading off
 set pagesize 0
 EOF
 }
 common_sql_filter() {
 	grep -v '^Connected' |
 		grep -v '^ENV MSG:' |
 		grep -v 'Your password will expire in'
 }
 runsql() {
 	local conn_s="$1"
 	shift 1
 	local func
 	(
 	echo "$conn_s"
 	common_sql_opts
 	for func; do $func; done
 	) |
 	execsql | common_sql_filter
 }
 dbasql() {
 	runsql "connect / as sysdba" $*
 }
 monsql() {
-	runsql "connect $MONUSR/\"$MONPWD\"" $*
+	runsql "connect \"$MONUSR\"/\"$MONPWD\"" $*
 }
 # use dbasql_one if the query should result in a single line output
 # at times people stuff commands in oracle .profile
 # which may produce extra output
 dbasql_one() {
 	dbasql $* | tail -1
 }
 monsql_one() {
 	monsql $* | tail -1
 }
 
 #
 # various interesting sql
 #
 dbstat() {
 	echo 'select status from v$instance;'
 }
 dbmount() {
 	echo 'alter database mount;'
 }
 dbopen() {
 	echo 'alter database open;'
 }
 dbstop_immediate() {
 	echo 'shutdown immediate'
 }
 dbstop_checkpoint_abort() {
 	echo 'alter system checkpoint;'
 	echo 'shutdown abort'
 }
 dbstop() {
 	case "${shutdown_method}" in
 	"immediate")
 		dbstop_immediate
 	;;
 	"checkpoint/abort")
 		dbstop_checkpoint_abort
 	;;
 	esac
 }
 dbstart() {
 	echo 'startup'
 }
 dbstart_mount() {
 	echo 'startup mount'
 }
 dbendbackup() {
 	echo 'alter database end backup;'
 }
 db_backup_mode() {
 	echo "select 'COUNT'||count(*) from v\$backup where status='ACTIVE';"
 }
 is_clear_backupmode_set(){
 	[ x"${clear_backupmode}" = x"true" ]
 }
 is_instance_in_backup_mode() {
 	local count
 	count="`dbasql_one db_backup_mode | sed 's/COUNT//'`"
 	[ x"$count" != x"0" ]
 }
 clear_backup_mode() {
 	local output
 	output="`dbasql dbendbackup`"
 	ocf_log info "Oracle instance $ORACLE_SID alter database end backup: $output"
 }
 getdumpdest() {
 	#echo 'select value from v$parameter where name = \'user_dump_dest\';'
 	echo "select value from v\$parameter where name = 'user_dump_dest';"
 }
 getipc() {
 	echo "oradebug setmypid"
 	echo "oradebug tracefile_name"
 	echo "oradebug ipc"
 }
 show_mon_profile() {
 	echo "select PROFILE from dba_profiles where PROFILE='$MONPROFILE';"
 }
 mk_mon_profile() {
 	cat<<EOF
-create profile $MONPROFILE limit FAILED_LOGIN_ATTEMPTS UNLIMITED PASSWORD_LIFE_TIME UNLIMITED;
+create profile "$MONPROFILE" limit FAILED_LOGIN_ATTEMPTS UNLIMITED PASSWORD_LIFE_TIME UNLIMITED;
 EOF
 }
 show_mon_user() {
 	echo "select USERNAME, ACCOUNT_STATUS from dba_users where USERNAME='$MONUSR';"
 }
 mk_mon_user() {
 	cat<<EOF
-create user $MONUSR identified by "$MONPWD" profile $MONPROFILE;
-grant create session to $MONUSR;
-grant select on v_\$instance to $MONUSR;
+create user "$MONUSR" identified by "$MONPWD" profile "$MONPROFILE";
+grant create session to "$MONUSR";
+grant select on v_\$instance to "$MONUSR";
 EOF
 }
 show_mon_user_profile() {
 	echo "select PROFILE from dba_users where USERNAME='$MONUSR';"
 }
 set_mon_user_profile() {
-	echo "alter user $MONUSR profile $MONPROFILE;"
+	echo "alter user "$MONUSR" profile '$MONPROFILE';"
 }
 reset_mon_user_password() {
-	echo "alter user $MONUSR identified by $MONPWD;"
+	echo "alter user "$MONUSR" identified by "$MONPWD";"
 }
 check_mon_profile() {
 	local output
 	output=`dbasql show_mon_profile`
 	if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
 		return 0
 	fi
 	output=`dbasql mk_mon_profile show_mon_profile`
 	if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
 		return 0
+	elif echo "$output" | grep ORA-65140 >/dev/null 2>&1; then
+		ocf_exit_reason "monprofile must start with C## for container databases"
+		return $OCF_ERR_CONFIGURED
 	else
 		ocf_exit_reason "could not create $MONPROFILE oracle profile"
 		ocf_log err "sqlplus output: $output"
 		return 1
 	fi
 }
 check_mon_user() {
 	local output
 	local output2
 
 	output=`dbasql show_mon_user`
 	if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
 		if echo "$output" | grep -w "EXPIRED" >/dev/null; then
 			dbasql reset_mon_user_password
 		fi
 		output=`dbasql show_mon_user_profile`
 		if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
 			return 0
 		else
 			output=`dbasql set_mon_user_profile`
 			output2=`dbasql show_mon_user_profile`
 			if echo "$output2" | grep -iw "^$MONPROFILE" >/dev/null; then
 				return 0
 			fi
 			ocf_exit_reason "could not set profile for $MONUSR oracle user"
 			ocf_log err "sqlplus output: $output( $output2 )"
 			return 1
 		fi
 	fi
 	output=`dbasql mk_mon_user show_mon_user`
 	if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
 		return 0
+	elif echo "$output" | grep ORA-65096 >/dev/null 2>&1; then
+		ocf_exit_reason "monuser must start with C## for container databases"
+		return $OCF_ERR_CONFIGURED
 	else
 		ocf_exit_reason "could not create $MONUSR oracle user"
 		ocf_log err "sqlplus output: $output"
 		return 1
 	fi
 }
 #
 # print the output of dbstat (for debugging)
 #
 showdbstat() {
 	echo "Full output:"
 	dbstat | execsql
 	echo "Stripped output:"
 	echo "<`dbasql dbstat`>"
 }
 
 #
 # IPC stuff: not overly complex, but quite involved :-/
 #
 
 # Part 1: Oracle
 other_trace_junk() {
 	echo $1 | sed 's/trc$/trm/'
 }
 dumpinstipc() {
 	local output tracef
 	output=`dbasql getipc` # filename in the 2nd line
 	tracef=`echo "$output" | awk 'NR==2' | grep '^/.*trc$'`
 	if [ "$tracef" ]; then
 		echo $tracef
 	else
 		ocf_log warn "'dbasql getipc' failed: $output"
 		return 1
 	fi
 }
 parseipc() {
 	local inf=$1
 	if [ ! -f "$1" ]; then
 		ocf_log warn "$1: no such ipc trace file"
 		return 1
 	fi
 	awk '
 		$3 == "Shmid" {n=1;next}
 		n {
 			if( $3~/^[0-9]+$/ ) print $3;
 			n=0
 		}
 	' $inf |
 	sort -u | sed 's/^/m:/'
 	awk '
 		/Semaphore List/ {insems=1;next}
 		insems {
 			for( i=1; i<=NF; i++ )
 				if( $i~/^[0-9]+$/ ) print $i;
 		}
 		/system semaphore information/ {exit}
 	' $inf |
 	sort -u | sed 's/^/s:/'
 	TMPFILES="$TMPFILES $inf `other_trace_junk $inf`"
 }
 
 # Part 2: OS (ipcs,ipcrm)
 filteroraipc() {  # this portable?
 	grep -w $ORACLE_OWNER | awk '{print $2}'
 }
 ipcdesc() {
 	local what=$1
 	case $what in
 	m) echo "shared memory segment";;
 	s) echo "semaphore";;
 	q) echo "message queue";;
 	esac
 }
 rmipc() {
 	local what=$1 id=$2
 	ipcs -$what | filteroraipc | grep -iw $id >/dev/null 2>&1 ||
 		return
 	ocf_log info "Removing `ipcdesc $what` $id."
 	ipcrm -$what $id
 }
 ipcrm_orauser() {
 	local what id
 	for what in m s q; do
 		for id in `ipcs -$what | filteroraipc`; do
 			rmipc $what $id
 		done
 	done
 }
 ipcrm_instance() {
 	local ipcobj
 	for ipcobj; do
 		rmipc `echo $ipcobj | sed 's/:/ /'`
 	done
 }
 
 #
 # oracle_status: is the Oracle instance running?
 #
 # quick check to see if the instance is up
 is_proc_running() {
 	ps -ef | grep -wiqs "[^ ]*[_]pmon_${ORACLE_SID}"
 }
 # instance in OPEN state?
 instance_live() {
 	local status=`monsql_one dbstat`
 	[ "$status" = OPEN ] && return 0
 	status=`dbasql_one dbstat`
 	if [ "$status" = OPEN ]; then
 		return 0
 	else
 		ocf_log info "$ORACLE_SID instance state is not OPEN (dbstat output: $status)"
 		return 1
 	fi
 }
 
 ora_cleanup() {
 	#rm -fr /tmp/.oracle #???
 	rm -f `ls $ORACLE_HOME/dbs/lk* | grep -i "$ORACLE_SID\$"`
 	#return
 
 	case $IPCRM in
 	none)
 		;;
 	instance)
 		ipcrm_instance $*
 		;;
 	orauser)
 		ipcrm_orauser $*
 		;;
 	esac
 }
 
 oracle_getconfig() {
 	ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user"
 
 	clear_backupmode=${OCF_RESKEY_clear_backupmode:-"false"}
 	shutdown_method=${OCF_RESKEY_shutdown_method:-"checkpoint/abort"}
 	IPCRM=${OCF_RESKEY_ipcrm:-"instance"}
 }
 
 #
 # oracle_start: Start the Oracle instance
 #
 # NOTE: We handle instance in the MOUNTED and STARTED states
 # efficiently
 # We *do not* handle instance in the restricted or read-only
 # mode, i.e. it appears as running, but its availability is
 # "not for general use"
 #
 
 oracle_start() {
 	local status output
 	if is_proc_running; then
 		status="`dbasql_one dbstat`"
 		case "$status" in
 		"OPEN")
 			: nothing to be done, we can leave right now
 			ocf_log info "Oracle instance $ORACLE_SID already running"
 			return $OCF_SUCCESS
 		;;
 		"STARTED")
 			output=`dbasql dbmount`
 		;;
 		"MOUNTED")
 			: we proceed if mounted
 		;;
 		*) # status unknown
 			output=`dbasql dbstop dbstart_mount`
 		;;
 		esac
 	else
 		output="`dbasql dbstart_mount`"
 		# try to cleanup in case of
 		# ORA-01081: cannot start already-running ORACLE - shut it down first
 		if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
 			ocf_log info "ORA-01081 error found, trying to cleanup oracle (dbstart_mount output: $output)"
 			ora_cleanup
+			output=`dbasql dbstop_immediate`
 			output=`dbasql dbstart_mount`
 		fi
 	fi
 
 	# oracle instance should be mounted.
 	status="`dbasql_one dbstat`"
 	case "$status" in
 	"MOUNTED")
 		;;
 	*)
 		: error!!
 		ocf_exit_reason "oracle $ORACLE_SID can not be mounted (status: $status)"
 		return $OCF_ERR_GENERIC
 		;;
 	esac
 
 	# It is examined whether mode is "online backup mode",
 	# and if it is true, makes clear the mode.
 	# Afterwards, DB is opened.
 	if is_clear_backupmode_set && is_instance_in_backup_mode; then
 		clear_backup_mode
 	fi
 	output=`dbasql dbopen`
 
 	# check/create the monitor profile
 	if ! check_mon_profile; then
 		return $OCF_ERR_GENERIC
 	fi
 
 	# check/create the monitor user
 	if ! check_mon_user; then
 		return $OCF_ERR_GENERIC
 	fi
 
 	if ! is_proc_running; then
 		ocf_exit_reason "oracle process not running: $output"
 		return $OCF_ERR_GENERIC
 	elif ! instance_live; then
 		ocf_exit_reason "oracle instance $ORACLE_SID not started: $output"
 		return $OCF_ERR_GENERIC
 	else
 		: cool, we are up and running
 		ocf_log info "Oracle instance $ORACLE_SID started: $output"
 		return $OCF_SUCCESS
 	fi
 }
 
 #
 # oracle_stop: Stop the Oracle instance
 #
 oracle_stop() {
 	local status output ipc=""
 	if is_proc_running; then
 		[ "$IPCRM" = "instance" ] && ipc=$(parseipc `dumpinstipc`)
 		output=`dbasql dbstop`
 	else
 		ocf_log info "Oracle instance $ORACLE_SID already stopped"
 		return $OCF_SUCCESS
 	fi
 	ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids`  # kill the procs if they hanged
 	if is_proc_running; then
 		ocf_exit_reason "Oracle instance $ORACLE_SID not stopped: $output"
 		return $OCF_ERR_GENERIC
 	else
 		ocf_log info "Oracle instance $ORACLE_SID stopped: $output"
 		sleep 1  # give em a chance to cleanup
 		ocf_log info "Cleaning up for $ORACLE_SID"
 		ora_cleanup "$ipc"
 		return $OCF_SUCCESS
 	fi
 }
 
 #
 # oracle_monitor: Can the Oracle instance do anything useful?
 #
 oracle_monitor() {
 	if ! is_proc_running; then
 		ocf_log info "oracle process not running"
 		return $OCF_NOT_RUNNING
 	fi
 	if ! instance_live; then
 		ocf_exit_reason "oracle instance $ORACLE_SID is down"
 		return $OCF_ERR_GENERIC
 	fi
 	#ocf_log info "Oracle instance $ORACLE_SID is alive"
 	return $OCF_SUCCESS
 }
 
 # other supported actions
 oracle_status() {
 	if is_proc_running
 	then
 	  echo Oracle instance $ORACLE_SID is running
 	  exit $OCF_SUCCESS
 	else
 	  echo Oracle instance $ORACLE_SID is stopped
 	  exit $OCF_NOT_RUNNING
 	fi
 }
 oracle_dumpinstipc() {
 	is_proc_running && parseipc `dumpinstipc`
 }
 oracle_showdbstat() {
 	showdbstat
 }
 oracle_cleanup() {
 	if [ "$IPCRM" = "instance" ]; then
 		ora_cleanup $(parseipc `dumpinstipc`)
 	else
 		ora_cleanup
 	fi
 }
 oracle_validate_all() {
 	case "${shutdown_method}" in
 	"immediate") ;;
 	"checkpoint/abort") ;;
 	*) ocf_exit_reason "unsupported shutdown_method, please read meta-data"
 		return $OCF_ERR_CONFIGURED
 		;;
 	esac
 
 	case "${IPCRM}" in
 	"none"|"instance"|"orauser") ;;
 	*) ocf_exit_reason "unsupported ipcrm setting, please read meta-data"
 		return $OCF_ERR_CONFIGURED
 		;;
 	esac
 
 	ora_common_validate_all
 }
 
 # used in ora-common.sh
 show_procs() {
 	ps -e -o pid,args | grep -i "[o]ra[a-zA-Z0-9_]*$ORACLE_SID$"
 }
 proc_pids() { show_procs | awk '{print $1}'; }
 PROCS_CLEANUP_TIME="30"
 
 MONUSR=${OCF_RESKEY_monuser:-$OCF_RESKEY_monuser_default}
 MONPWD=${OCF_RESKEY_monpassword:-$OCF_RESKEY_monpassword_default}
-MONPROFILE=${OCF_RESKEY_monprofile_default:-$OCF_RESKEY_monprofile_default}
+MONPROFILE=${OCF_RESKEY_monprofile:-$OCF_RESKEY_monprofile_default}
 
-MONUSR=$(echo $MONUSR | awk '{print toupper($0)}')
-MONPROFILE=$(echo $MONPROFILE | awk '{print toupper($0)}')
+MONUSR=$(echo "$MONUSR" | awk '{print toupper($0)}')
+MONPROFILE=$(echo "$MONPROFILE" | awk '{print toupper($0)}')
 OCF_REQUIRED_PARAMS="sid"
 OCF_REQUIRED_BINARIES="sqlplus"
 ocf_rarun $*
 
 #
 # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0
diff --git a/heartbeat/oralsnr b/heartbeat/oralsnr
index c47f12117..622138c6f 100755
--- a/heartbeat/oralsnr
+++ b/heartbeat/oralsnr
@@ -1,281 +1,281 @@
 #!/bin/sh
 #
 # 
 # oralsnr
 #
 # Description:	Manages an Oracle Listener as a High-Availability
 #		resource
 #
 #
 # Author:	Dejan Muhamedagic
 # Support:	linux-ha@lists.linux-ha.org
 # License:	GNU General Public License (GPL)
 # Copyright:	(C) 2006 International Business Machines, Inc.
 #
 #		This code inspired by the DB2 resource script
 #		written by Alan Robertson
 #
 # An example usage in /etc/ha.d/haresources: 
 #       node1  10.0.0.170 oralsnr::sid::home::user::listener
 #
 # See oralsnr_usage() function below for more details...
 #
 # OCF instance parameters:
 #	OCF_RESKEY_sid (mandatory; for the monitor op)
 #	OCF_RESKEY_home (optional; else read it from /etc/oratab)
 #	OCF_RESKEY_user (optional; user to run the listener)
 #	OCF_RESKEY_listener (optional; defaults to LISTENER)
 #
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 . ${OCF_FUNCTIONS_DIR}/ora-common.sh
 
 #######################################################################
 
 SH=/bin/sh
 
 oralsnr_usage() {
   methods=`oralsnr_methods`
   methods=`echo $methods | tr ' ' '|'`
   cat <<-!
 	usage: $0 ($methods)
 
 	$0 manages an Oracle Database instance as an HA resource.
 
 	The 'start' operation starts the database.
 	The 'stop' operation stops the database.
 	The 'status' operation reports whether the database is running
 	The 'monitor' operation reports whether the database seems to be working
 	The 'validate-all' operation reports whether the parameters are valid
 	The 'methods' operation reports on the methods $0 supports
 
 	!
 }
 
 oralsnr_meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="oralsnr">
 <version>1.0</version>
 
 <longdesc lang="en">
 Resource script for Oracle Listener. It manages an
 Oracle Listener instance as an HA resource.
 </longdesc>
 <shortdesc lang="en">Manages an Oracle TNS listener</shortdesc>
 
 <parameters>
 
 <parameter name="sid" unique="1" required="1">
 <longdesc lang="en">
 The Oracle SID (aka ORACLE_SID). Necessary for the monitor op,
 i.e. to do tnsping SID.
 </longdesc>
 <shortdesc lang="en">sid</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="home" unique="0">
 <longdesc lang="en">
 The Oracle home directory (aka ORACLE_HOME).
 If not specified, then the SID should be listed in /etc/oratab.
 </longdesc>
 <shortdesc lang="en">home</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="user" unique="0">
 <longdesc lang="en">
 Run the listener as this user.
 </longdesc>
 <shortdesc lang="en">user</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="listener" unique="1">
 <longdesc lang="en">
 Listener instance to be started (as defined in listener.ora).
 Defaults to LISTENER.
 </longdesc>
 <shortdesc lang="en">listener</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="tns_admin" required="0" unique="1">
 <longdesc lang="en">
 	Full path to the directory that contains the Oracle
 	listener tnsnames.ora configuration file.  The shell
 	variable TNS_ADMIN is set to the value provided.
 </longdesc>
 <shortdesc lang="en">
 	Full path to the directory containing tnsnames.ora
 </shortdesc>
 <content type="string"/>
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start" timeout="120" />
 <action name="stop" timeout="120" />
 <action name="status" timeout="60" />
 <action name="monitor" depth="0" timeout="30" interval="10" />
 <action name="validate-all" timeout="5" />
 <action name="meta-data" timeout="5" />
 <action name="methods" timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 
 #
 # methods: What methods/operations do we support?
 #
 oralsnr_methods() {
   cat <<-!
 	start
 	stop
 	status
 	monitor
 	validate-all
 	methods
 	meta-data
 	usage
 	!
 }
 
 #
 #	Run commands as the Oracle owner...
 #
 runasdba() {
 	if [ "$US" = "$ORACLE_OWNER" ]; then
 		$SH
 	else
 		(
 		echo ". $ORA_ENVF"
 		cat
 		) | su -s $SH - $ORACLE_OWNER
 	fi
 }
 
 #
 # oralsnr_start: Start the Oracle listener instance
 #
 
 oralsnr_start() {
 	if is_proc_running && test_tnsping; then
 		: nothing to be done, we can leave right now
 		ocf_log info "Listener $listener already running"
 		return $OCF_SUCCESS
 	fi
 	output=`echo lsnrctl start $listener | runasdba`
 	if test_tnsping; then
 		: cool, we are up and running
 		ocf_log info "Listener $listener running: $output"
 		return $OCF_SUCCESS
 	else
 		ocf_exit_reason "Listener $listener appears to have started, but is not running properly: $output"
 		ocf_log err "Probable Oracle configuration error"
 		return $OCF_ERR_GENERIC
 	fi
 }
 
 #
 # oralsnr_stop: Stop the Oracle instance
 #
 oralsnr_stop() {
 	if is_proc_running; then
 		output=`echo lsnrctl stop $listener | runasdba`
 	else
 		ocf_log info "Listener $listener already stopped"
 		return $OCF_SUCCESS
 	fi
 	ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids`  # kill the procs if they hanged
 	if is_proc_running; then
 		ocf_exit_reason "Listener $listener not stopped: $output"
 		return $OCF_ERR_GENERIC
 	else
 		ocf_log info "Listener $listener stopped: $output"
 		return $OCF_SUCCESS
 	fi
 }
 
 #
 # is_proc_running: is the listener running?
 #
 is_proc_running() {
 	show_procs | grep "." > /dev/null
 }
 # the following two should be run only if the process is running
 test_listener() {
 	local output
 	output=`lsnrctl status $listener`
 	if echo "$output" | tail -1 | grep -qs 'completed successfully'
 	then
 		return $OCF_SUCCESS
 	else
 		ocf_exit_reason "$listener status failed: $output"
 		return $OCF_ERR_GENERIC
 	fi
 }
 # and does it work?
 test_tnsping() {
 	local output
 	output=`tnsping $ORACLE_SID`
 	if echo "$output" | tail -1 | grep -qs '^OK'; then
 		return $OCF_SUCCESS
 	else
 		ocf_exit_reason "tnsping $ORACLE_SID failed: $output"
 		return $OCF_ERR_GENERIC
 	fi
 }
 
 #
 # oralsnr_monitor: Can we connect to the listener?
 #
 oralsnr_monitor() {
 	if is_proc_running; then
 		test_listener && test_tnsping
 	else
 		return $OCF_NOT_RUNNING
 	fi
 }
 
 oralsnr_status() {
 	if is_proc_running
 	then
 	  echo Listener $listener is running
 	  exit $OCF_SUCCESS
 	else
 	  echo Listener $listener is stopped
 	  exit $OCF_NOT_RUNNING
 	fi
 }
 
 oralsnr_getconfig() {
 	ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" "$OCF_RESKEY_tns_admin"
 	listener=${OCF_RESKEY_listener:-"LISTENER"}
 }
 
 oralsnr_validate_all() {
 	ora_common_validate_all
 }
 
 # used in ora-common.sh
 show_procs() {
-	ps -e -o pid,user,args |
-		grep '[t]nslsnr' | grep -i -w "$listener" | grep -w "$ORACLE_OWNER"
+	ps -U "$ORACLE_OWNER" -o pid,user,args |
+		grep '[t]nslsnr' | grep -i -w "$listener"
 }
 proc_pids() { show_procs | awk '{print $1}'; }
 PROCS_CLEANUP_TIME="10"
 
 OCF_REQUIRED_PARAMS="sid"
 OCF_REQUIRED_BINARIES="lsnrctl tnsping"
 ocf_rarun $*
 
 #
 # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0
diff --git a/heartbeat/pgagent b/heartbeat/pgagent
new file mode 100644
index 000000000..58054a7c3
--- /dev/null
+++ b/heartbeat/pgagent
@@ -0,0 +1,139 @@
+#!/bin/sh
+#
+# High-Availability pgagent OCF resource agent
+#
+# Description:  Starts/stops pgagent
+# Author:       Oleg Selin
+# License:      GNU General Public License (GPL)
+#
+# OCF parameters:
+#  OCF_RESKEY_connection_string
+#  OCF_RESKEY_user
+#  OCF_RESKEY_options
+#
+#######################################################################
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+OCF_RESKEY_executable_default="`which pgagent`"
+OCF_RESKEY_connection_string_default="user=postgres host=/var/run/postgresql"
+OCF_RESKEY_user_default="postgres"
+OCF_RESKEY_options_default="-r 1 -t 1"
+
+: ${OCF_RESKEY_executable="${OCF_RESKEY_executable_default}"}
+: ${OCF_RESKEY_connection_string="${OCF_RESKEY_connection_string_default}"}
+: ${OCF_RESKEY_user="${OCF_RESKEY_user_default}"}
+: ${OCF_RESKEY_options="${OCF_RESKEY_options_default}"}
+
+pgagent_validate_all() {
+        check_binary pgagent
+        ocf_log debug "executable: '$OCF_RESKEY_executable'"
+        ocf_log debug "connection string: '$OCF_RESKEY_connection_string'"
+        ocf_log debug "user: '$OCF_RESKEY_user'"
+        ocf_log debug "options: '$OCF_RESKEY_options'"
+        if [ -z "$OCF_RESKEY_connection_string" ]; then
+                ocf_log err "Connection string is not configured!"
+                exit $OCF_ERR_CONFIGURED
+        fi
+        if [ -z "$OCF_RESKEY_user" ]; then
+                ocf_log err "User is not configured!"
+                exit $OCF_ERR_CONFIGURED
+        fi
+        getent passwd $OCF_RESKEY_user >/dev/null 2>&1
+        if [ ! $? -eq 0 ]; then
+                ocf_log err "User $OCF_RESKEY_user doesn't exist";
+                return $OCF_ERR_CONFIGURED;
+        fi
+        return $OCF_SUCCESS
+}
+
+pgagent_start() {
+        pgagent_validate_all
+        nohup su - $OCF_RESKEY_user -c "'$OCF_RESKEY_executable' $OCF_RESKEY_options '$OCF_RESKEY_connection_string'" > /dev/null 2>&1 &
+        sleep 1
+        if [ -n pgagent_monitor ]; then
+                return $OCF_SUCCESS
+        fi
+        return $OCF_ERR_GENERIC
+}
+
+pgagent_stop() {
+        pgagent_validate_all
+        pid=`pgrep -f -x -U $OCF_RESKEY_user "$OCF_RESKEY_executable $OCF_RESKEY_options $OCF_RESKEY_connection_string"`
+        if [ -n "$pid" ]; then
+                ocf_run kill $pid || return $OCF_ERR_GENERIC
+        fi
+        return $OCF_SUCCESS
+}
+
+pgagent_monitor() {
+        if [ -z "$OCF_RESKEY_executable" ]; then
+                return $OCF_ERR_INSTALLED
+        fi
+        ocf_run pgrep -f -x -U "$OCF_RESKEY_user" "$OCF_RESKEY_executable $OCF_RESKEY_options $OCF_RESKEY_connection_string" || return $OCF_NOT_RUNNING
+        return $OCF_SUCCESS
+}
+
+meta_data() {
+        cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="pgagent" version="1.0">
+<version>1.0</version>
+<longdesc lang="en">This is a pgagent Resource Agent.</longdesc>
+<shortdesc lang="en">Controls pgagent</shortdesc>
+<parameters>
+<parameter name="connection_string">
+<longdesc lang="en">Connection string for pgagent.</longdesc>
+<shortdesc lang="en">pgagent connection string</shortdesc>
+<content type="string" default="$OCF_RESKEY_connection_string_default" />
+</parameter>
+<parameter name="user">
+<longdesc lang="en">User to run pgagent as.</longdesc>
+<shortdesc lang="en">User to run pgagent</shortdesc>
+<content type="string" default="$OCF_RESKEY_user_default" />
+</parameter>
+<parameter name="options">
+<longdesc lang="en">Options for pgagent.</longdesc>
+<shortdesc lang="en">pgagent run options, see pgagent --help for details</shortdesc>
+<content type="string" default="$OCF_RESKEY_options_default" />
+</parameter>
+</parameters>
+<actions>
+<action name="start" timeout="5" />
+<action name="stop" timeout="5" />
+<action name="monitor" timeout="20" interval="10" depth="0" />
+<action name="meta-data" timeout="5" />
+<action name="validate-all" timeout="5" />
+</actions>
+</resource-agent>
+END
+}
+
+pgagent_usage() {
+        cat <<END
+usage: $0 {start|stop|monitor|meta-data|validate-all}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+case $__OCF_ACTION in
+start)          pgagent_start;;
+stop)           pgagent_stop;;
+monitor)        pgagent_monitor;;
+validate-all)   pgagent_validate_all;;
+usage|help)     pgagent_usage
+                exit $OCF_SUCCESS
+                ;;
+meta-data)      meta_data
+                exit $OCF_SUCCESS
+                ;;
+*)              pgagent_usage
+                exit $OCF_ERR_UNIMPLEMENTED
+                ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index 320db692a..3768a0325 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -1,2087 +1,2093 @@
 #!/bin/sh
 #
 # Description:  Manages a PostgreSQL Server as an OCF High-Availability
 #               resource
 #
 # Authors:      Serge Dubrouski (sergeyfd@gmail.com) -- original RA
 #               Florian Haas (florian@linbit.com) -- makeover
 #               Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication
 #               David Corlette (dcorlette@netiq.com) -- add support for non-standard library locations and non-standard port
 #
 # Copyright:    2006-2012 Serge Dubrouski <sergeyfd@gmail.com>
 #                         and other Linux-HA contributors
 # License:      GNU General Public License (GPL)
 #
 ###############################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # Use runuser if available for SELinux.
 if [ -x /sbin/runuser ]; then
     SU=runuser
 else
     SU=su
 fi
 
 #
 # Get PostgreSQL Configuration parameter
 #
 get_pgsql_param() {
     local param_name
 
     param_name=$1
     perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) {
        \$dir=\$1;
        \$dir =~ s/\s*\#.*//;
        \$dir =~ s/^'(\S*)'/\$1/;
        print \$dir;}"
 
     perl -ne "$perl_code" < $OCF_RESKEY_config
 }
 
 # Defaults
 OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl
 OCF_RESKEY_psql_default=/usr/bin/psql
 OCF_RESKEY_pgdata_default=/var/lib/pgsql/data
 OCF_RESKEY_pgdba_default=postgres
 OCF_RESKEY_pghost_default=""
 OCF_RESKEY_pgport_default=5432
 OCF_RESKEY_pglibs_default=/usr/lib
 OCF_RESKEY_start_opt_default=""
 OCF_RESKEY_ctl_opt_default=""
 OCF_RESKEY_pgdb_default=template1
 OCF_RESKEY_logfile_default=/dev/null
-OCF_RESKEY_stop_escalate_default=30
+OCF_RESKEY_stop_escalate_default=90
 OCF_RESKEY_monitor_user_default=""
 OCF_RESKEY_monitor_password_default=""
 OCF_RESKEY_monitor_sql_default="select now();"
 OCF_RESKEY_check_wal_receiver_default="false"
 # Defaults for replication
 OCF_RESKEY_rep_mode_default=none
 OCF_RESKEY_node_list_default=""
 OCF_RESKEY_restore_command_default=""
 OCF_RESKEY_archive_cleanup_command_default=""
 OCF_RESKEY_recovery_end_command_default=""
 OCF_RESKEY_master_ip_default=""
 OCF_RESKEY_repuser_default="postgres"
 OCF_RESKEY_primary_conninfo_opt_default=""
 OCF_RESKEY_restart_on_promote_default="false"
 OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp"
 OCF_RESKEY_xlog_check_count_default="3"
 OCF_RESKEY_crm_attr_timeout_default="5"
-OCF_RESKEY_stop_escalate_in_slave_default=30
+OCF_RESKEY_stop_escalate_in_slave_default=90
 OCF_RESKEY_replication_slot_name_default=""
 
 : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}}
 : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}}
 : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}}
 : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}}
 : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}}
 : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}}
 : ${OCF_RESKEY_pglibs=${OCF_RESKEY_pglibs_default}}
 : ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf}
 : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}}
 : ${OCF_RESKEY_ctl_opt=${OCF_RESKEY_ctl_opt_default}}
 : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}}
 : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}}
 : ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}}
 : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}}
 : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}}
 : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}}
 : ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}}
 
 # for replication
 : ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}}
 : ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}}
 : ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}}
 : ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}}
 : ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}}
 : ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}}
 : ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}}
 : ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}}
 : ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}}
 : ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}}
 : ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}}
 : ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}}
 : ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}}
 : ${OCF_RESKEY_replication_slot_name=${OCF_RESKEY_replication_slot_name_default}}
 
 usage() {
     cat <<EOF
         usage: $0 start|stop|status|monitor|promote|demote|notify|meta-data|validate-all|methods
 
         $0 manages a PostgreSQL Server as an HA resource.
 
         The 'start' operation starts the PostgreSQL server.
         The 'stop' operation stops the PostgreSQL server.
         The 'status' operation reports whether the PostgreSQL is up.
         The 'monitor' operation reports whether the PostgreSQL is running.
         The 'promote' operation promotes the PostgreSQL server.
         The 'demote' operation demotes the PostgreSQL server.
         The 'validate-all' operation reports whether the parameters are valid.
         The 'methods' operation reports on the methods $0 supports.
 EOF
   return $OCF_ERR_ARGS
 }
 
 meta_data() {
     cat <<EOF
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="pgsql">
 <version>1.0</version>
 
 <longdesc lang="en">
 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource.
 </longdesc>
 <shortdesc lang="en">Manages a PostgreSQL database instance</shortdesc>
 
 <parameters>
 <parameter name="pgctl" unique="0" required="0">
 <longdesc lang="en">
 Path to pg_ctl command.
 </longdesc>
 <shortdesc lang="en">pgctl</shortdesc>
 <content type="string" default="${OCF_RESKEY_pgctl_default}" />
 </parameter>
 
 <parameter name="start_opt" unique="0" required="0">
 <longdesc lang="en">
 Start options (-o start_opt in pg_ctl). "-i -p 5432" for example.
 </longdesc>
 <shortdesc lang="en">start_opt</shortdesc>
 <content type="string" default="${OCF_RESKEY_start_opt_default}" />
 
 </parameter>
 <parameter name="ctl_opt" unique="0" required="0">
 <longdesc lang="en">
 Additional pg_ctl options (-w, -W etc..).
 </longdesc>
 <shortdesc lang="en">ctl_opt</shortdesc>
 <content type="string" default="${OCF_RESKEY_ctl_opt_default}" />
 </parameter>
 
 <parameter name="psql" unique="0" required="0">
 <longdesc lang="en">
 Path to psql command.
 </longdesc>
 <shortdesc lang="en">psql</shortdesc>
 <content type="string" default="${OCF_RESKEY_psql_default}" />
 </parameter>
 
 <parameter name="pgdata" unique="0" required="0">
 <longdesc lang="en">
 Path to PostgreSQL data directory.
 </longdesc>
 <shortdesc lang="en">pgdata</shortdesc>
 <content type="string" default="${OCF_RESKEY_pgdata_default}" />
 </parameter>
 
 <parameter name="pgdba" unique="0" required="0">
 <longdesc lang="en">
 User that owns PostgreSQL.
 </longdesc>
 <shortdesc lang="en">pgdba</shortdesc>
 <content type="string" default="${OCF_RESKEY_pgdba_default}" />
 </parameter>
 
 <parameter name="pghost" unique="0" required="0">
 <longdesc lang="en">
 Hostname/IP address where PostgreSQL is listening
 </longdesc>
 <shortdesc lang="en">pghost</shortdesc>
 <content type="string" default="${OCF_RESKEY_pghost_default}" />
 </parameter>
 
 <parameter name="pgport" unique="0" required="0">
 <longdesc lang="en">
 Port where PostgreSQL is listening
 </longdesc>
 <shortdesc lang="en">pgport</shortdesc>
 <content type="integer" default="${OCF_RESKEY_pgport_default}" />
 </parameter>
 
 <parameter name="pglibs" unique="0" required="0">
 <longdesc lang="en">
 Custom location of the Postgres libraries. If not set, the standard location
 will be used.
 </longdesc>
 <shortdesc lang="en">pglibs</shortdesc>
 <content type="string" default="${OCF_RESKEY_pglibs_default}" />
 </parameter>
 
 <parameter name="monitor_user" unique="0" required="0">
 <longdesc lang="en">
 PostgreSQL user that pgsql RA will user for monitor operations. If it's not set
 pgdba user will be used.
 </longdesc>
 <shortdesc lang="en">monitor_user</shortdesc>
 <content type="string" default="${OCF_RESKEY_monitor_user_default}" />
 </parameter>
 
 <parameter name="monitor_password" unique="0" required="0">
 <longdesc lang="en">
 Password for monitor user.
 </longdesc>
 <shortdesc lang="en">monitor_password</shortdesc>
 <content type="string" default="${OCF_RESKEY_monitor_password_default}" />
 </parameter>
 
 <parameter name="monitor_sql" unique="0" required="0">
 <longdesc lang="en">
 SQL script that will be used for monitor operations.
 </longdesc>
 <shortdesc lang="en">monitor_sql</shortdesc>
 <content type="string" default="${OCF_RESKEY_monitor_sql_default}" />
 </parameter>
 
 <parameter name="config" unique="0" required="0">
 <longdesc lang="en">
 Path to the PostgreSQL configuration file for the instance.
 </longdesc>
 <shortdesc lang="en">Configuration file</shortdesc>
 <content type="string" default="${OCF_RESKEY_pgdata}/postgresql.conf" />
 </parameter>
 
 <parameter name="pgdb" unique="0" required="0">
 <longdesc lang="en">
 Database that will be used for monitoring.
 </longdesc>
 <shortdesc lang="en">pgdb</shortdesc>
 <content type="string" default="${OCF_RESKEY_pgdb_default}" />
 </parameter>
 
 <parameter name="logfile" unique="0" required="0">
 <longdesc lang="en">
 Path to PostgreSQL server log output file.
 </longdesc>
 <shortdesc lang="en">logfile</shortdesc>
 <content type="string" default="${OCF_RESKEY_logfile_default}" />
 </parameter>
 
 <parameter name="socketdir" unique="0" required="0">
 <longdesc lang="en">
 Unix socket directory for PostgreSQL.
 
 If you use PostgreSQL 9.3 or higher and define unix_socket_directories in the postgresql.conf, then you must set socketdir to determine which directory is used for psql command.
 </longdesc>
 <shortdesc lang="en">socketdir</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="stop_escalate" unique="0" required="0">
 <longdesc lang="en">
-Number of shutdown retries (using -m fast) before resorting to -m immediate
+Number of seconds to wait for stop (using -m fast) before resorting to -m immediate
 </longdesc>
 <shortdesc lang="en">stop escalation</shortdesc>
 <content type="integer" default="${OCF_RESKEY_stop_escalate_default}" />
 </parameter>
 
 <parameter name="rep_mode" unique="0" required="0">
 <longdesc lang="en">
 Replication mode may be set to "async" or "sync" or "slave".
 They require PostgreSQL 9.1 or later.
 Once set, "async" and "sync" require node_list, master_ip, and
 restore_command parameters,as well as configuring PostgreSQL
 for replication (in postgresql.conf and pg_hba.conf).
 
 "slave" means that RA only makes recovery.conf before starting
 to connect to primary which is running somewhere.
 It dosen't need master/slave setting.
 It requires master_ip restore_command parameters.
 </longdesc>
 <shortdesc lang="en">rep_mode</shortdesc>
 <content type="string" default="${OCF_RESKEY_rep_mode_default}" />
 </parameter>
 
 <parameter name="node_list" unique="0" required="0">
 <longdesc lang="en">
 All node names. Please separate each node name with a space.
 This is required for replication.
 </longdesc>
 <shortdesc lang="en">node list</shortdesc>
 <content type="string" default="${OCF_RESKEY_node_list_default}" />
 </parameter>
 
 <parameter name="restore_command" unique="0" required="0">
 <longdesc lang="en">
 restore_command for recovery.conf.
 This is required for replication.
 </longdesc>
 <shortdesc lang="en">restore_command</shortdesc>
 <content type="string" default="${OCF_RESKEY_restore_command_default}" />
 </parameter>
 
 <parameter name="archive_cleanup_command" unique="0" required="0">
 <longdesc lang="en">
 archive_cleanup_command for recovery.conf.
 This is used for replication and is optional.
 </longdesc>
 <shortdesc lang="en">archive_cleanup_command</shortdesc>
 <content type="string" default="${OCF_RESKEY_archive_cleanup_command_default}" />
 </parameter>
 
 <parameter name="recovery_end_command" unique="0" required="0">
 <longdesc lang="en">
 recovery_end_command for recovery.conf.
 This is used for replication and is optional.
 </longdesc>
 <shortdesc lang="en">recovery_end_command</shortdesc>
 <content type="string" default="${OCF_RESKEY_recovery_end_command_default}" />
 </parameter>
 
 <parameter name="master_ip" unique="0" required="0">
 <longdesc lang="en">
 Master's floating IP address to be connected from hot standby.
 This parameter is used for "primary_conninfo" in recovery.conf.
 This is required for replication.
 </longdesc>
 <shortdesc lang="en">master ip</shortdesc>
 <content type="string" default="${OCF_RESKEY_master_ip_default}" />
 </parameter>
 
 <parameter name="repuser" unique="0" required="0">
 <longdesc lang="en">
 User used to connect to the master server.
 This parameter is used for "primary_conninfo" in recovery.conf.
 This is required for replication.
 </longdesc>
 <shortdesc lang="en">repuser</shortdesc>
 <content type="string" default="${OCF_RESKEY_repuser_default}" />
 </parameter>
 
 <parameter name="primary_conninfo_opt" unique="0" required="0">
 <longdesc lang="en">
 primary_conninfo options of recovery.conf except host, port, user and application_name.
 This is optional for replication.
 </longdesc>
 <shortdesc lang="en">primary_conninfo_opt</shortdesc>
 <content type="string" default="${OCF_RESKEY_primary_conninfo_opt_default}" />
 </parameter>
 
 <parameter name="restart_on_promote" unique="0" required="0">
 <longdesc lang="en">
 If this is true, RA deletes recovery.conf and restarts PostgreSQL
 on promote to keep Timeline ID. It probably makes fail-over slower.
 It's recommended to set on-fail of promote up as fence.
 This is optional for replication.
 </longdesc>
 <shortdesc lang="en">restart_on_promote</shortdesc>
 <content type="boolean" default="${OCF_RESKEY_restart_on_promote_default}" />
 </parameter>
 
 <parameter name="replication_slot_name" unique="0" required="0">
 <longdesc lang="en">
 Set this option when using replication slots.
 Can only use lower case letters, numbers and underscore for replication_slot_name.
 
 When the master node has 1 slave node,one replication slot would be created with the name "replication_slot_name".
 When the master node has 2 or more slave nodes,the replication slots would be created for each node, with the name adding the node name as postfix.
 For example, replication_slot_name is "sample" and 2 slaves which are "node1" and "node2" connect to
 their slots, the slots names are "sample_node1" and "sample_node2".
 If the node name contains a upper case letter, hyphen and dot, those characters will be converted to a lower case letter or an underscore.
 For example, Node-1.example.com to node_1_example_com.
 
 pgsql RA doesn't monitor and delete the repliation slot.
 When the slave node has been disconnected in failure or the like, execute one of the following manually.
 Otherwise it may eventually cause a disk full because the master node will continue to accumulate the unsent WAL.
 1. recover and reconnect the slave node to the master node as soon as possible.
 2. delete the slot on the master node by following psql command.
 $ select pg_drop_replication_slot('replication_slot_name');
 </longdesc>
 <shortdesc lang="en">replication_slot_name</shortdesc>
 <content type="string" default="${OCF_RESKEY_replication_slot_name_default}" />
 </parameter>
 
 <parameter name="tmpdir" unique="0" required="0">
 <longdesc lang="en">
 Path to temporary directory.
 This is optional for replication.
 </longdesc>
 <shortdesc lang="en">tmpdir</shortdesc>
 <content type="string" default="${OCF_RESKEY_tmpdir_default}" />
 </parameter>
 
 <parameter name="xlog_check_count" unique="0" required="0">
 <longdesc lang="en">
 Number of checks of xlog on monitor before promote.
 This is optional for replication.
 </longdesc>
 <shortdesc lang="en">xlog check count</shortdesc>
 <content type="integer" default="${OCF_RESKEY_xlog_check_count_default}" />
 </parameter>
 
 <parameter name="crm_attr_timeout" unique="0" required="0">
 <longdesc lang="en">
 The timeout of crm_attribute forever update command.
 Default value is 5 seconds.
 This is optional for replication.
 </longdesc>
 <shortdesc lang="en">The timeout of crm_attribute forever update command.</shortdesc>
 <content type="integer" default="${OCF_RESKEY_crm_attr_timeout_default}" />
 </parameter>
 
 <parameter name="stop_escalate_in_slave" unique="0" required="0">
 <longdesc lang="en">
-Number of shutdown retries (using -m fast) before resorting to -m immediate
+Number of seconds to wait for stop (using -m fast) before resorting to -m immediate
 in slave state.
 This is optional for replication.
 </longdesc>
 <shortdesc lang="en">stop escalation_in_slave</shortdesc>
 <content type="integer" default="${OCF_RESKEY_stop_escalate_in_slave_default}" />
 </parameter>
 
 <parameter name="check_wal_receiver" unique="0" required="0">
 <longdesc lang="en">
 If this is true, RA checks wal_receiver process on monitor
 and notifies its status using "(resource name)-receiver-status" attribute.
 It's useful for checking whether PostgreSQL (hot standby) connects to primary.
 The attribute shows status as "normal" or "normal (master)" or "ERROR".
 Note that if you configure PostgreSQL as master/slave resource, then
 wal receiver is not running in the master and the attribute shows status as
 "normal (master)" consistently because it is normal status.
 </longdesc>
 <shortdesc lang="en">check_wal_receiver</shortdesc>
 <content type="boolean" default="${OCF_RESKEY_check_wal_receiver_default}" />
 </parameter>
 </parameters>
 
 <actions>
 <action name="start" timeout="120" />
 <action name="stop" timeout="120" />
 <action name="status" timeout="60" />
 <action name="monitor" depth="0" timeout="30" interval="30"/>
 <action name="monitor" depth="0" timeout="30" interval="29" role="Master" />
 <action name="promote" timeout="120" />
 <action name="demote" timeout="120" />
 <action name="notify"   timeout="90" />
 <action name="meta-data" timeout="5" />
 <action name="validate-all" timeout="5" />
 <action name="methods" timeout="5" />
 </actions>
 </resource-agent>
 EOF
 }
 
 
 #
 #   Run the given command in the Resource owner environment...
 #
 runasowner() {
     local quietrun=""
     local loglevel="-err"
     local var
 
     for var in 1 2
     do
         case "$1" in
             "-q")
                 quietrun="-q"
                 shift 1;;
             "warn"|"err")
                 loglevel="-$1"
                 shift 1;;
             *)
                 ;;
         esac
     done
 
     ocf_run $quietrun $loglevel $SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*"
 }
 
 #
 #       Shell escape
 #
 escape_string() {
     echo "$*" | sed -e "s/'/'\\\\''/g"
 }
 
 
 #
 # methods: What methods/operations do we support?
 #
 
 pgsql_methods() {
     cat <<EOF
     start
     stop
     status
     monitor
     promote
     demote
     notify
     methods
     meta-data
     validate-all
 EOF
 }
 
 
 # Execulte SQL and return the result.
 exec_sql() {
     local sql="$1"
     local output
     local rc
 
     output=`$SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
                 $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
                 -Atc \"$sql\""`
     rc=$?
 
     echo $output
     return $rc
 }
 
 
 #pgsql_real_start: Starts PostgreSQL
 pgsql_real_start() {
     local pgctl_options
     local postgres_options
     local rc
 
     if pgsql_status; then
         ocf_log info "PostgreSQL is already running. PID=`cat $PIDFILE`"
         if is_replication; then
             return $OCF_ERR_GENERIC
         else
             return $OCF_SUCCESS
         fi
     fi
 
     # Remove postmaster.pid if it exists
     rm -f $PIDFILE
 
     # Remove backup_label if it exists
     if [ -f $BACKUPLABEL ] && ! is_replication; then
         ocf_log info "Removing $BACKUPLABEL. The previous backup might have failed."
         rm -f $BACKUPLABEL
     fi
 
     # Check if we need to create a log file
     if ! check_log_file $OCF_RESKEY_logfile
     then
         ocf_exit_reason "PostgreSQL can't write to the log file: $OCF_RESKEY_logfile"
         return $OCF_ERR_PERM
     fi
 
     # Check socket directory
     if [ -n "$OCF_RESKEY_socketdir" ]
     then
         check_socket_dir
     fi
 
     if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
         rm -f $RECOVERY_CONF
         make_recovery_conf || return $OCF_ERR_GENERIC
     fi
 
     # Set options passed to pg_ctl
     pgctl_options="$OCF_RESKEY_ctl_opt -D $OCF_RESKEY_pgdata -l $OCF_RESKEY_logfile"
 
     # Set options passed to the PostgreSQL server process
     postgres_options="-c config_file=${OCF_RESKEY_config}"
 
     if [ -n "$OCF_RESKEY_pghost" ]; then
         postgres_options="$postgres_options -h $OCF_RESKEY_pghost"
     fi
     if [ -n "$OCF_RESKEY_start_opt" ]; then
         postgres_options="$postgres_options $OCF_RESKEY_start_opt"
     fi
 
     # Tack pass-through options onto pg_ctl options
     pgctl_options="$pgctl_options -o '$postgres_options'"
 
     # Invoke pg_ctl
     runasowner "unset PGUSER; unset PGPASSWORD; $OCF_RESKEY_pgctl $pgctl_options start"
 
     if [ $? -eq 0 ]; then
         # Probably started.....
         ocf_log info "PostgreSQL start command sent."
     else
         ocf_exit_reason "Can't start PostgreSQL."
         return $OCF_ERR_GENERIC
     fi
 
     while :
     do
         pgsql_real_monitor warn
         rc=$?
         if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then
             break;
         fi
         sleep 1
         ocf_log debug "PostgreSQL still hasn't started yet. Waiting..."
     done
 
     # create replication slot on the master and slave nodes.
     # creating slot on the slave node is in preparation for failover.
     if use_replication_slot; then
         create_replication_slot
         if [ $? -eq $OCF_ERR_GENERIC ]; then
             ocf_exit_reason "PostgreSQL can't create replication_slot."
             return $OCF_ERR_GENERIC
         fi
     fi
 
     ocf_log info "PostgreSQL is started."
     return $rc
 }
 
 pgsql_replication_start() {
     local rc
 
     # initializing for replication
     change_pgsql_status "$NODENAME" "STOP"
     delete_master_baseline
     exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE
     rm -f ${XLOG_NOTE_FILE}.* $REP_MODE_CONF $RECOVERY_CONF
     if ! make_recovery_conf || ! delete_xlog_location || ! set_async_mode_all; then
         return $OCF_ERR_GENERIC
     fi
 
     if [ -f $PGSQL_LOCK ]; then
         ocf_exit_reason "My data may be inconsistent. You have to remove $PGSQL_LOCK file to force start."
         return $OCF_ERR_GENERIC
     fi
 
     # start
     pgsql_real_start
     if [ $? -ne $OCF_SUCCESS ]; then
         return $OCF_ERR_GENERIC
     fi
     change_pgsql_status "$NODENAME" "HS:alone"
     return $OCF_SUCCESS
 }
 
 #pgsql_start: pgsql_real_start() wrapper for replication
 pgsql_start() {
     if ! is_replication; then
         pgsql_real_start
         return $?
     else
         pgsql_replication_start
         return $?
     fi
 }
 
 #pgsql_promote: Promote PostgreSQL
 pgsql_promote() {
     local target
     local rc
 
     if ! is_replication; then
         ocf_exit_reason "Not in a replication mode."
         return $OCF_ERR_CONFIGURED
     fi
     rm -f ${XLOG_NOTE_FILE}.*
 
     for target in $NODE_LIST; do
         [ "$target" = "$NODENAME" ] && continue
         change_data_status "$target" "DISCONNECT"
         change_master_score "$target" "$CAN_NOT_PROMOTE"
     done
 
     ocf_log info "Creating $PGSQL_LOCK."
     touch $PGSQL_LOCK
     show_master_baseline
 
     if ocf_is_true ${OCF_RESKEY_restart_on_promote}; then
         ocf_log info "Restarting PostgreSQL instead of promote."
         #stop : this function returns $OCF_SUCCESS only.
         pgsql_real_stop slave
         rm -f $RECOVERY_CONF
         pgsql_real_start
         rc=$?
         if [ $rc -ne $OCF_RUNNING_MASTER ]; then
             ocf_exit_reason "Can't start PostgreSQL as primary on promote."
             if [ $rc -ne $OCF_SUCCESS ]; then
                 change_pgsql_status "$NODENAME" "STOP"
             fi
             return $OCF_ERR_GENERIC
         fi
     else
         runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata promote"
         if [ $? -eq 0 ]; then
             ocf_log info "PostgreSQL promote command sent."
         else
             ocf_exit_reason "Can't promote PostgreSQL."
             return $OCF_ERR_GENERIC
         fi
 
         while :
         do
             pgsql_real_monitor warn
             rc=$?
             if [ $rc -eq $OCF_RUNNING_MASTER ]; then
                 break;
             elif [ $rc -eq $OCF_ERR_GENERIC ]; then
                 ocf_exit_reason "Can't promote PostgreSQL."
                 return $rc
             fi
             sleep 1
             ocf_log debug "PostgreSQL still hasn't promoted yet. Waiting..."
         done
         ocf_log info "PostgreSQL is promoted."
     fi
 
     change_data_status "$NODENAME" "LATEST"
     exec_with_retry 0 $CRM_MASTER -v $PROMOTE_ME
     change_pgsql_status "$NODENAME" "PRI"
     return $OCF_SUCCESS
 }
 
 #pgsql_demote: Demote PostgreSQL
 pgsql_demote() {
     local rc
 
     if ! is_replication; then
         ocf_exit_reason "Not in a replication mode."
         return $OCF_ERR_CONFIGURED
     fi
 
     exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE
     delete_master_baseline
 
     if ! pgsql_status; then
         ocf_log info "PostgreSQL is already stopped on demote."
     else
         ocf_log info "Stopping PostgreSQL on demote."
         pgsql_real_stop master
         rc=$?
         if [ "$rc" -ne "$OCF_SUCCESS" ]; then
             change_pgsql_status "$NODENAME" "UNKNOWN"
             return $rc
         fi
     fi
     change_pgsql_status "$NODENAME" "STOP"
     return $OCF_SUCCESS
 }
 
 #pgsql_real_stop: Stop PostgreSQL
 pgsql_real_stop() {
     local rc
     local count
     local stop_escalate
 
     if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
         attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -D -q
     fi
 
     if ! pgsql_status
     then
         #Already stopped
         return $OCF_SUCCESS
     fi
 
     stop_escalate=$OCF_RESKEY_stop_escalate
     if [ "$1" = "slave" ]; then
         stop_escalate="$OCF_RESKEY_stop_escalate_in_slave"
     fi
+    # adjust stop_escalate time when it is longer than the timeout
+    if [ -n "$OCF_RESKEY_CRM_meta_timeout" ] && \
+        [ "$stop_escalate" -ge $((OCF_RESKEY_CRM_meta_timeout/1000)) ]; then
+        stop_escalate=$(((OCF_RESKEY_CRM_meta_timeout/1000) - 10))
+        ocf_log info "stop_escalate(or stop_escalate_in_slave) time is adjusted to ${stop_escalate} based on the configured timeout."
+    fi
 
     # Stop PostgreSQL, do not wait for clients to disconnect
     if [ $stop_escalate -gt 0 ]; then
-            runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m fast"
+            runasowner "$OCF_RESKEY_pgctl -W -D $OCF_RESKEY_pgdata stop -m fast"
     fi
 
     # stop waiting
     count=0
     while [ $count -lt $stop_escalate ]
     do
         if ! pgsql_status
         then
             #PostgreSQL stopped
             break;
         fi
         count=`expr $count + 1`
         sleep 1
     done
 
     if pgsql_status
     then
         #PostgreSQL is still up. Use another shutdown mode.
         ocf_log info "PostgreSQL failed to stop after ${stop_escalate}s using -m fast. Trying -m immediate..."
-        runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m immediate"
+        runasowner "$OCF_RESKEY_pgctl -W -D $OCF_RESKEY_pgdata stop -m immediate"
     fi
 
     while :
     do
         pgsql_real_monitor
         rc=$?
         if [ $rc -eq $OCF_NOT_RUNNING ]; then
             # An unnecessary debug log is prevented.
             break;
         fi
         sleep 1
         ocf_log debug "PostgreSQL still hasn't stopped yet. Waiting..."
     done
 
     # Remove postmaster.pid if it exists
     rm -f $PIDFILE
 
     if  [ "$1" = "master" -a "$OCF_RESKEY_CRM_meta_notify_slave_uname" = " " ]; then
         ocf_log info "Removing $PGSQL_LOCK."
         rm -f $PGSQL_LOCK
     fi
     return $OCF_SUCCESS
 }
 
 pgsql_replication_stop() {
     local rc
 
     exec_with_retry 5 $CRM_MASTER -v $CAN_NOT_PROMOTE
     delete_xlog_location
 
     if ! pgsql_status
     then
         ocf_log info "PostgreSQL is already stopped."
         change_pgsql_status "$NODENAME" "STOP"
         return $OCF_SUCCESS
     fi
 
     pgsql_real_stop slave
     rc=$?
     if [ $rc -ne $OCF_SUCCESS ]; then
         change_pgsql_status "$NODENAME" "UNKNOWN"
         return $rc
     fi
 
     change_pgsql_status "$NODENAME" "STOP"
     set_async_mode_all
     delete_master_baseline
     return $OCF_SUCCESS
 }
 
 #pgsql_stop: pgsql_real_stop() wrapper for replication
 pgsql_stop() {
     if ! is_replication; then
         pgsql_real_stop
         return $?
     else
         pgsql_replication_stop
         return $?
     fi
 }
 
 #
 # pgsql_status: is PostgreSQL up?
 #
 
 pgsql_status() {
      if [ -f $PIDFILE ]
      then
          PID=`head -n 1 $PIDFILE`
          runasowner "kill -s 0 $PID >/dev/null 2>&1"
          return $?
      fi
 
      # No PID file
      false
 }
 
 pgsql_wal_receiver_status() {
     local PID
     local receiver_parent_pids
     local pgsql_real_monitor_status=$1
 
     PID=`head -n 1 $PIDFILE`
     receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al receiver process" | cut -d " " -f 3`
 
     if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then
         attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" -q
         return 0
     fi
 
     if [ $pgsql_real_monitor_status -eq "$OCF_RUNNING_MASTER" ]; then
         attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal (master)" -q
         return 0
     fi
 
     attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" -q
     ocf_log warn "wal receiver process is not running"
     return 1
 }
 
 #
 # pgsql_real_monitor
 #
 
 pgsql_real_monitor() {
     local loglevel
     local rc
     local output
 
     # Set the log level of the error message
     loglevel=${1:-err}
 
     if ! pgsql_status
     then
         ocf_log info "PostgreSQL is down"
         return $OCF_NOT_RUNNING
     fi
 
     if is_replication; then
         #Check replication state
         output=`exec_sql "${CHECK_MS_SQL}"`
         rc=$?
 
         if [ $rc -ne  0 ]; then
             report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status."
             return $OCF_ERR_GENERIC
         fi
 
         case "$output" in
             f)  ocf_log debug "PostgreSQL is running as a primary."
                 if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then
                     return $OCF_RUNNING_MASTER
                 fi
                 ;;
 
             t)  ocf_log debug "PostgreSQL is running as a hot standby."
                 return $OCF_SUCCESS;;
 
             *)  ocf_exit_reason "$CHECK_MS_SQL output is $output"
                 return $OCF_ERR_GENERIC;;
         esac
     fi
 
     OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"`
     runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \
                   -c '$OCF_RESKEY_monitor_sql'"
     rc=$?
     if [ $rc -ne  0 ]; then
         report_psql_error $rc $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running."
         return $OCF_ERR_GENERIC
     fi
 
     if is_replication; then
         return $OCF_RUNNING_MASTER
     fi
     return $OCF_SUCCESS
 }
 
 pgsql_replication_monitor() {
     local rc
 
     rc=$1
     if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then
         return $rc
     fi
     # If I am Master
     if [ $rc -eq $OCF_RUNNING_MASTER ]; then
         change_data_status "$NODENAME" "LATEST"
         change_pgsql_status "$NODENAME" "PRI"
         control_slave_status || return $OCF_ERR_GENERIC
         if [ "$RE_CONTROL_SLAVE" = "true" ]; then
             sleep 2
             ocf_log info "re-controlling slave status."
             RE_CONTROL_SLAVE="none"
             control_slave_status || return $OCF_ERR_GENERIC
         fi
         return $rc
     fi
 
     # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor,
     # so I will get master node name using crm_mon -n
     print_crm_mon | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):].*Master"
     if [ $? -ne 0 ] ; then
         # If I am Slave and Master is not exist
         ocf_log info "Master does not exist."
         change_pgsql_status "$NODENAME" "HS:alone"
         have_master_right
         if [ $? -eq 0 ]; then
             rm -f ${XLOG_NOTE_FILE}.*
         fi
     else
         output=`exec_with_retry 0 $CRM_ATTR_FOREVER -N "$NODENAME" \
                 -n "$PGSQL_DATA_STATUS_ATTR" -G -q`
         if [ "$output" = "DISCONNECT" ]; then
             change_pgsql_status "$NODENAME" "HS:alone"
         fi
     fi
     return $rc
 }
 
 #pgsql_monitor: pgsql_real_monitor() wrapper for replication
 pgsql_monitor() {
     local rc
 
     pgsql_real_monitor
     rc=$?
 
     if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
         pgsql_wal_receiver_status $rc
     fi
 
     if ! is_replication; then
         return $rc
     else
         pgsql_replication_monitor $rc
         return $?
     fi
 }
 
 # pgsql_post_demote
 pgsql_post_demote() {
     DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
     ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE"
     if [ "$DEMOTE_NODE" != "$NODENAME" ]; then
         if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | tr '[A-Z]' '[a-z]' | grep $NODENAME; then
             show_master_baseline
             change_pgsql_status "$NODENAME" "HS:alone"
         fi
     fi
     return $OCF_SUCCESS
 }
 
 pgsql_pre_promote() {
     local master_baseline
     local my_master_baseline
     local cmp_location
     local number_of_nodes
 
     # If my data is newer than new master's one, I fail my resource.
     PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
                   sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
     number_of_nodes=`echo $NODE_LIST | wc -w`
     if [ $number_of_nodes -ge 3 -a \
          "$OCF_RESKEY_rep_mode" = "sync" -a \
          "$PROMOTE_NODE" != "$NODENAME" ]; then
         master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \
                          "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
         if [ $? -eq 0 ]; then
             my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \
                                 "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null`
             # get older location
             cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\
                           sort | head -1`
             if [ "$cmp_location" != "$my_master_baseline" ]; then
                 ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline"
                 exec_with_retry 0 $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY
                 return $OCF_ERR_GENERIC
             fi
         fi
     fi
     return $OCF_SUCCESS
 }
 
 pgsql_notify() {
     local type="${OCF_RESKEY_CRM_meta_notify_type}"
     local op="${OCF_RESKEY_CRM_meta_notify_operation}"
     local rc
 
     if ! is_replication; then
         return $OCF_SUCCESS
     fi
 
     ocf_log debug "notify: ${type} for ${op}"
     case $type in
         pre)
             case $op in
                 promote)
                     pgsql_pre_promote
                     return $?
                     ;;
             esac
             ;;
         post)
             case $op in
                 promote)
                     delete_xlog_location
                     PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \
                                   sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
                     if [ "$PROMOTE_NODE" != "$NODENAME" ]; then
                         delete_master_baseline
                     fi
                     return $OCF_SUCCESS
                     ;;
                 demote)
                     pgsql_post_demote
                     return $?
                     ;;
                 start|stop)
                     MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \
                                   sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'`
                     if [ "$NODENAME" = "$MASTER_NODE" ]; then
                         control_slave_status
                     fi
                     return $OCF_SUCCESS
                     ;;
             esac
             ;;
     esac
     return $OCF_SUCCESS
 }
 
 control_slave_status() {
     local rc
     local data_status
     local target
     local all_data_status
     local tmp_data_status
     local number_of_nodes
 
     all_data_status=`exec_sql "${CHECK_REPLICATION_STATE_SQL}"`
     rc=$?
 
     if [ $rc -eq 0 ]; then
         if [ -n "$all_data_status" ]; then
             all_data_status=`echo $all_data_status | sed "s/\n/ /g"`
         fi
     else
         report_psql_error $rc err "Can't get PostgreSQL replication status."
         return 1
     fi
 
     number_of_nodes=`echo $NODE_LIST | wc -w`
     for target in $NODE_LIST; do
         if [ "$target" = "$NODENAME" ]; then
             continue
         fi
 
         data_status="DISCONNECT"
         if [ -n "$all_data_status" ]; then
             for tmp_data_status in $all_data_status; do
                 if ! echo $tmp_data_status | grep -q "^${target}|"; then
                     continue
                 fi
                 data_status=`echo $tmp_data_status | cut -d "|" -f 2,3`
                 ocf_log debug "node_name and data_status is $tmp_data_status"
                 break
             done
         fi
 
         case "$data_status" in
             "STREAMING|SYNC")
                 change_data_status "$target" "$data_status"
                 change_master_score "$target" "$CAN_PROMOTE"
                 change_pgsql_status "$target" "HS:sync"
                 ;;
             "STREAMING|ASYNC")
                 change_data_status "$target" "$data_status"
                 if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
                     change_master_score "$target" "$CAN_NOT_PROMOTE"
                     set_sync_mode "$target"
                 else
                     if [ $number_of_nodes -le 2 ]; then
                         change_master_score "$target" "$CAN_PROMOTE"
                     else
                         # I can't determine which slave's data is newest in async mode.
                         change_master_score "$target" "$CAN_NOT_PROMOTE"
                     fi
                 fi
                 change_pgsql_status "$target" "HS:async"
                 ;;
             "STREAMING|POTENTIAL")
                 change_data_status "$target" "$data_status"
                 change_master_score "$target" "$CAN_NOT_PROMOTE"
                 change_pgsql_status "$target" "HS:potential"
                 ;;
             "DISCONNECT")
                 change_data_status "$target" "$data_status"
                 change_master_score "$target" "$CAN_NOT_PROMOTE"
                 if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
                     set_async_mode "$target"
                 fi
                 ;;
             *)
                 change_data_status "$target" "$data_status"
                 change_master_score "$target" "$CAN_NOT_PROMOTE"
                 if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
                     set_async_mode "$target"
                 fi
                 change_pgsql_status "$target" "HS:connected"
                 ;;
         esac
     done
     return 0
 }
 
 have_master_right() {
     local old
     local new
     local output
     local data_status
     local node
     local mylocation
     local count
     local newestXlog
     local oldfile
     local newfile
 
     ocf_log debug "Checking if I have a master right."
 
     data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \
                  "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
     if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
         if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
              "$data_status" != "LATEST" ]; then
             ocf_log warn "My data is out-of-date. status=$data_status"
             return 1
         fi
     else
         if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \
              "$data_status" != "STREAMING|ASYNC" -a \
              "$data_status" != "LATEST" ]; then
             ocf_log warn "My data is out-of-date. status=$data_status"
             return 1
         fi
     fi
     ocf_log info "My data status=$data_status."
 
     show_xlog_location
     if [ $? -ne 0 ]; then
         ocf_exit_reason "Failed to show my xlog location."
         exit $OCF_ERR_GENERIC
     fi
 
     old=0
     for count in `seq $OCF_RESKEY_xlog_check_count`; do
        if [ -f ${XLOG_NOTE_FILE}.$count ]; then
            old=$count
            continue
        fi
        break
     done
     new=`expr $old + 1`
 
     # get xlog locations of all nodes
     for node in ${NODE_LIST}; do
         output=`$CRM_ATTR_REBOOT -N "$node" -n \
                 "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null`
         if [ $? -ne 0 ]; then
             ocf_log warn "Can't get $node xlog location."
             continue
         else
             ocf_log info "$node xlog location : $output"
             echo "$node $output" >> ${XLOG_NOTE_FILE}.${new}
             if [ "$node" = "$NODENAME" ]; then
                 mylocation=$output
             fi
         fi
     done
 
     oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null`
     newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null`
     if [ "$oldfile" != "$newfile" ]; then
         # reset counter
         rm -f ${XLOG_NOTE_FILE}.*
         printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
         return 1
     fi
 
     if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then
         newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \
                     head -1 | cut -d " " -f 2`
         if [ "$newestXlog" = "$mylocation" ]; then
             ocf_log info "I have a master right."
             exec_with_retry 5 $CRM_MASTER -v $PROMOTE_ME
             return 0
         fi
         change_data_status "$NODENAME" "DISCONNECT"
         ocf_log info "I don't have correct master data."
         # reset counter
         rm -f ${XLOG_NOTE_FILE}.*
         printf "$newfile\n" > ${XLOG_NOTE_FILE}.0
     fi
 
     return 1
 }
 
 is_replication() {
     if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then
         return 0
     fi
     return 1
 }
 
 use_replication_slot() {
     if [ -n "$OCF_RESKEY_replication_slot_name" ]; then
         return 0
     fi
 
     return 1
 }
 
 create_replication_slot_name() {
     local number_of_nodes=0
     local target
     local replication_slot_name
     local replication_slot_name_list_tmp
     local replication_slot_name_list
 
     if [ -n "$NODE_LIST" ]; then
         number_of_nodes=`echo $NODE_LIST | wc -w`
     fi
 
     # If the number of nodes 2 or less, Master node has 1 or less Slave node.
     # The Master node should have 1 slot for the Slave, which is named "$OCF_RES_KEY_replication_slot_name".
     if [ $number_of_nodes -le 2 ]; then
         replication_slot_name_list="$OCF_RESKEY_replication_slot_name"
 
     # If the number of nodes 3 or more, the Master has some Slave nodes.
     # The Master node should have some slots equal to the number of Slaves, and
     # the Slave nodes connect to their dedicated slot on the Master.
     # To ensuring that the slots name are each unique, add postfix to $OCF_RESKEY_replication_slot.
     # The postfix is "_$target".
     else
         for target in $NODE_LIST
         do
             if [ "$target" != "$NODENAME" ]; then
                 # The Uppercase, "-" and "." don't allow to use in slot_name.
                 # If the NODENAME contains them, convert upper case to lower case and "_" and "." to "_".
                 target=`echo "$target" | tr '[A-Z.-]' '[a-z__]'`
                 replication_slot_name="$OCF_RESKEY_replication_slot_name"_"$target"
                 replication_slot_name_list_tmp="$replication_slot_name_list"
                 replication_slot_name_list="$replication_slot_name_list_tmp $replication_slot_name"
             fi
         done
     fi
 
     echo $replication_slot_name_list
 }
 
 create_replication_slot() {
     local replication_slot_name
     local replication_slot_name_list
     local output
     local rc
     local CREATE_REPLICATION_SLOT_sql
     local DELETE_REPLICATION_SLOT_sql
 
     replication_slot_name_list=`create_replication_slot_name`
     ocf_log debug "replication slot names are $replication_slot_name_list."
 
     for replication_slot_name in $replication_slot_name_list
     do
         # If the same name slot is already exists, initialize(delete and create) the slot.
         if [ `check_replication_slot $replication_slot_name` = "1" ]; then
             DELETE_REPLICATION_SLOT_sql="SELECT pg_drop_replication_slot('$replication_slot_name');"
             output=`exec_sql "$DELETE_REPLICATION_SLOT_sql"`
             rc=$?
 
             if [ $rc -eq 0 ]; then
                 ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)."
             else
                 ocf_exit_reason "$output"
                 return $OCF_ERR_GENERIC
             fi
         fi
 
         CREATE_REPLICATION_SLOT_sql="SELECT pg_create_physical_replication_slot('$replication_slot_name');"
         output=`exec_sql "$CREATE_REPLICATION_SLOT_sql"`
         rc=$?
 
         if [ $rc -eq 0 ]; then
             ocf_log info "PostgreSQL creates the replication slot($replication_slot_name)."
         else
             ocf_exit_reason "$output"
             return $OCF_ERR_GENERIC
         fi
     done
 
     return 0
 }
 
 # This function check the replication slot does exists.
 check_replication_slot(){
     local replication_slot_name=$1
     local output
     local CHECK_REPLICATION_SLOT_sql="SELECT count(*) FROM pg_replication_slots WHERE slot_name = '$replication_slot_name'"
 
     output=`exec_sql "$CHECK_REPLICATION_SLOT_sql"`
     echo "$output"
 }
 
 get_my_location() {
     local rc
     local output
     local replay_loc
     local receive_loc
     local output1
     local output2
     local log1
     local log2
     local newer_location
 
     output=`exec_sql "$CHECK_XLOG_LOC_SQL"`
     rc=$?
 
     if [ $rc -ne 0 ]; then
         report_psql_error $rc err "Can't get my xlog location."
         return 1
     fi
     replay_loc=`echo $output | cut -d "|" -f 1`
     receive_loc=`echo $output | cut -d "|" -f 2`
 
     output1=`echo "$replay_loc" | cut -d "/" -f 1`
     output2=`echo "$replay_loc" | cut -d "/" -f 2`
     log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
     log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
     replay_loc="${log1}${log2}"
 
     output1=`echo "$receive_loc" | cut -d "/" -f 1`
     output2=`echo "$receive_loc" | cut -d "/" -f 2`
     log1=`printf "%08s\n" $output1 | sed "s/ /0/g"`
     log2=`printf "%08s\n" $output2 | sed "s/ /0/g"`
     receive_loc="${log1}${log2}"
 
     newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1`
     echo "$newer_location"
     return 0
 }
 
 show_xlog_location() {
     local location
 
     location=`get_my_location` || return 1
     exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location"
 }
 
 delete_xlog_location() {
     exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D
 }
 
 show_master_baseline() {
     local rc
     local location
 
     location=`get_my_location`
     ocf_log info "My master baseline : $location."
     exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location"
 }
 
 delete_master_baseline() {
     exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D
 }
 
 set_async_mode_all() {
     [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0
     ocf_log info "Set all nodes into async mode."
     runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
     if [ $? -ne 0 ]; then
         ocf_exit_reason "Can't set all nodes into async mode."
         return 1
     fi
     return 0
 }
 
 set_async_mode() {
     cat $REP_MODE_CONF |  grep -q -e "[,' ]$1[,' ]"
     if [ $? -eq 0 ]; then
         ocf_log info "Setup $1 into async mode."
         runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\""
     else
         ocf_log debug "$1 is already in async mode."
         return 0
     fi
     exec_with_retry 0 reload_conf
 }
 
 set_sync_mode() {
     local sync_node_in_conf
 
     sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2`
     if [ -n "$sync_node_in_conf" ]; then
         ocf_log debug "$sync_node_in_conf is already sync mode."
     else
         ocf_log info "Setup $1 into sync mode."
         runasowner -q err "echo \"synchronous_standby_names = '$1'\" > \"$REP_MODE_CONF\""
         [ "$RE_CONTROL_SLAVE" = "false" ] && RE_CONTROL_SLAVE="true"
         exec_with_retry 0 reload_conf
     fi
 }
 
 reload_conf() {
     # Invoke pg_ctl
     runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload"
     if [ $? -eq 0 ]; then
         ocf_log info "Reload configuration file."
     else
         ocf_exit_reason "Can't reload configuration file."
         return 1
     fi
 
     return 0
 }
 
 user_recovery_conf() {
     local number_of_nodes
     local nodename_tmp
 
     # put archive_cleanup_command and recovery_end_command only when defined by user
     if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then
         echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'"
     fi
     if [ -n "$OCF_RESKEY_recovery_end_command" ]; then
         echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'"
     fi
 
     if use_replication_slot; then
         number_of_nodes=`echo $NODE_LIST | wc -w`
         if [ $number_of_nodes -le 2 ]; then
             echo "primary_slot_name = '${OCF_RESKEY_replication_slot_name}'"
         else
             nodename_tmp=`echo "$NODENAME" | tr '[A-Z.-]' '[a-z__]'`
             echo "primary_slot_name = '${OCF_RESKEY_replication_slot_name}_$nodename_tmp'"
         fi
     fi
 }
 
 make_recovery_conf() {
     runasowner "touch $RECOVERY_CONF"
     if [ $? -ne 0 ]; then
         ocf_exit_reason "Can't create recovery.conf."
         return 1
     fi
 
 cat > $RECOVERY_CONF <<END
 standby_mode = 'on'
 primary_conninfo = 'host=${OCF_RESKEY_master_ip} port=${OCF_RESKEY_pgport} user=${OCF_RESKEY_repuser} application_name=${NODENAME} ${OCF_RESKEY_primary_conninfo_opt}'
 restore_command = '${OCF_RESKEY_restore_command}'
 recovery_target_timeline = 'latest'
 END
 
     user_recovery_conf >> $RECOVERY_CONF
     ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}"
     return 0
 }
 
 # change pgsql-status.
 # arg1:node, arg2: value
 change_pgsql_status() {
     local output
 
     if ! is_node_online $1; then
         return 0
     fi
 
     output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null`
     if [ "$output" != "$2" ]; then
         # If slave's disk is broken, RA cannot read PID file
         # and misjudges the PostgreSQL as down while it is running.
         # It causes overwriting of pgsql-status by Master because replication is still connected.
         if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then
             if [ "$1" != "$NODENAME" ]; then
                 ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited."
                 return 0
             fi
         fi
         ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2."
         exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2"
     fi
     return 0
 }
 
 # change pgsql-data-status.
 # arg1:node, arg2: value
 change_data_status() {
     local output
 
     if ! node_exist $1; then
         return 0
     fi
 
     while :
     do
         output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null`
         if [ "$output" != "$2" ]; then
             ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2."
             exec_with_retry 0 exec_with_timeout 0 "$CRM_ATTR_FOREVER" -N $1 -n $PGSQL_DATA_STATUS_ATTR -v "$2"
         else
             break
         fi
     done
     return 0
 }
 
 # set master-score
 # arg1:node, arg2: score, arg3: resoure
 set_master_score() {
     local current_score
 
     current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null`
     if [ -n "$current_score" -a "$current_score" != "$2" ]; then
         ocf_log info "Changing $3 master score on $1 : $current_score->$2."
         exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "master-$3" -v "$2"
     fi
     return 0
 }
 
 # change master-score
 # arg1:node, arg2: score
 change_master_score() {
     local instance
 
     if ! is_node_online $1; then
         return 0
     fi
 
     if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then
         # If Pacemaker version is 1.0.x
         instance=0
         while :
         do
             if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then
                 break
             fi
             if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then
                 instance=`expr $instance + 1`
                 continue
             fi
             set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1
             instance=`expr $instance + 1`
         done
     else
         # If globally-unique=false and Pacemaker version is 1.1.8 or higher 
         # Master/Slave resource has no instance number
         set_master_score $1 $2 ${RESOURCE_NAME} || return 1
     fi
     return 0
 }
 
 report_psql_error()
 {
     local rc
     local loglevel
     local message
 
     rc=$1
     loglevel=${2:-err}
     message="$3"
 
     ocf_log $loglevel "$message rc=$rc"
     if [ $rc -eq 1 ]; then
         ocf_exit_reason "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command."
     elif [ $rc -eq 2 ]; then
         ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command."
     elif [ $rc -eq 3 ]; then
         ocf_exit_reason "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command."
     fi
 }
 
 #
 # timeout management function
 # arg1   timeout >= 0 (if arg1 is 0, OCF_RESKEY_crm_attr_timeout is used.)
 # arg2 : command
 # arg3 : command's args
 exec_with_timeout() {
     local func_pid
     local count=$OCF_RESKEY_crm_attr_timeout
     local rc
 
     if [ "$1" -ne 0 ]; then
         count=$1
     fi
     shift
 
     $* &
     func_pid=$!
     sleep .1
 
     while kill -s 0 $func_pid >/dev/null 2>&1; do
         sleep 1
         count=`expr $count - 1`
         if [ $count -le 0 ]; then
             ocf_exit_reason "\"$*\" (pid=$func_pid) timed out."
             kill -s 9 $func_pid >/dev/null 2>&1
             return 1
         fi
         ocf_log info "Waiting($count). \"$*\" (pid=$func_pid)."
     done
     wait $func_pid
 }
 
 # retry command when command doesn't return 0
 # arg1       : count >= 0 (if arg1 is 0, it retries command in infinitum(1day))
 # arg2..argN : command and args
 exec_with_retry() {
     local count="86400"
     local output
     local rc
 
     if [ "$1" -ne 0 ]; then
         count=$1
     fi
     shift
 
     while [ $count -gt 0 ]; do
         output=`$*`
         rc=$?
         if [ $rc -ne 0 ]; then
             ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"."
             count=`expr $count - 1`
             sleep 1
         else
             printf "${output}"
             return 0
         fi
     done
 
     ocf_exit_reason "giving up executing \"$*\""
     return $rc
 }
 
 is_node_online() {
     print_crm_mon | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline"
 }
 
 node_exist() {
     print_crm_mon | tr '[A-Z]' '[a-z]' | grep -q "^node $1"
 }
 
 check_binary2() {
     if ! have_binary "$1"; then
         ocf_exit_reason "Setup problem: couldn't find command: $1"
         return 1
     fi
     return 0
 }
 
 check_config() {
     local rc=0
 
     if [ ! -f "$1" ]; then
         if ocf_is_probe; then
            ocf_log info "Configuration file is $1 not readable during probe."
            rc=1
         else
            ocf_exit_reason "Configuration file $1 doesn't exist"
            rc=2
         fi
     fi
 
     return $rc
 }
 
 # Validate most critical parameters
 pgsql_validate_all() {
     local version
     local check_config_rc
     local rep_mode_string
     local socket_directories
 
     version=`cat $OCF_RESKEY_pgdata/PG_VERSION`
 
     if ! check_binary2 "$OCF_RESKEY_pgctl" || 
        ! check_binary2 "$OCF_RESKEY_psql"; then
         return $OCF_ERR_INSTALLED
     fi
 
     check_config "$OCF_RESKEY_config"
     check_config_rc=$?
     [ $check_config_rc -eq 2 ] && return $OCF_ERR_INSTALLED
     if [ $check_config_rc -eq 0 ]; then
         ocf_version_cmp "$version" "9.3"
         if [ $? -eq 0 ]; then
             : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`}
         else
             # unix_socket_directories is used by PostgreSQL 9.3 or higher.
             socket_directories=`get_pgsql_param unix_socket_directories`
             if [ -n "$socket_directories" ]; then
                 # unix_socket_directories may have multiple socket directories and the pgsql RA can not know which directory is used for psql command.
                 # Therefore, the user must set OCF_RESKEY_socketdir explicitly.
                 if [ -z "$OCF_RESKEY_socketdir" ]; then
                     ocf_exit_reason "In PostgreSQL 9.3 or higher, socketdir can't be empty if you define unix_socket_directories in the postgresql.conf."
                     return $OCF_ERR_CONFIGURED
                 fi
             fi
         fi
     fi
 
     getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
     if [ ! $? -eq 0 ]; then
         ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist";
         return $OCF_ERR_INSTALLED;
     fi
 
     if ocf_is_probe; then
         ocf_log info "Don't check $OCF_RESKEY_pgdata during probe"
     else
         if ! runasowner "test -w $OCF_RESKEY_pgdata"; then
             ocf_exit_reason "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba"
             return $OCF_ERR_PERM;
         fi
     fi
 
     if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ]
     then
         ocf_exit_reason "monitor password can't be empty"
         return $OCF_ERR_CONFIGURED
     fi
 
     if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ]
     then
         ocf_exit_reason "monitor_user has to be set if monitor_password is set"
         return $OCF_ERR_CONFIGURED
     fi
 
     if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
         if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then
             ocf_exit_reason "Replication mode needs PostgreSQL 9.1 or higher."
             return $OCF_ERR_INSTALLED
         fi
         if [ ! -n "$OCF_RESKEY_master_ip" ]; then
             ocf_exit_reason "master_ip can't be empty."
             return $OCF_ERR_CONFIGURED
         fi
     fi
 
     if is_replication; then
         if ! ocf_is_ms; then
             ocf_exit_reason "Replication(rep_mode=async or sync) requires Master/Slave configuration."
             return $OCF_ERR_CONFIGURED
         fi
         if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then
             ocf_exit_reason "Invalid rep_mode : $OCF_RESKEY_rep_mode"
             return $OCF_ERR_CONFIGURED
         fi
         if [ ! -n "$NODE_LIST" ]; then
             ocf_exit_reason "node_list can't be empty."
             return $OCF_ERR_CONFIGURED
         fi
         if [ $check_config_rc -eq 0 ]; then
             rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA"
             if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then
                 if ! grep -q "$rep_mode_string" $OCF_RESKEY_config; then
                     ocf_log info "adding include directive into $OCF_RESKEY_config"
                     echo "$rep_mode_string" >> $OCF_RESKEY_config
                 fi
             else
                 if grep -q "$rep_mode_string" $OCF_RESKEY_config; then
                     ocf_log info "deleting include directive from $OCF_RESKEY_config"
                     rep_mode_string=`echo $rep_mode_string | sed -e 's|/|\\\\/|g'`
                     sed -i "/$rep_mode_string/d" $OCF_RESKEY_config
                 fi
             fi
         fi
         if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then
             ocf_exit_reason "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba"
             return $OCF_ERR_PERM
         fi
     fi
 
     if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
         if ocf_is_ms; then
             ocf_exit_reason "Replication(rep_mode=slave) does not support Master/Slave configuration."
             return $OCF_ERR_CONFIGURED
         fi
     fi
 
     if use_replication_slot; then
         ocf_version_cmp "$version" "9.4"
         if [ $? -eq 0 -o $? -eq 3 ]; then
             ocf_exit_reason "Replication slot needs PostgreSQL 9.4 or higher."
             return $OCF_ERR_CONFIGURED
         fi
 
         echo "$OCF_RESKEY_replication_slot_name" | grep -q -e [^a-z0-9_]
         if [ $? -eq 0 ]; then
             ocf_exit_reason "Invalid replication_slot_name($OCF_RESKEY_replication_slot_name). only use lower case letters, numbers, and the underscore character."
             return $OCF_ERR_CONFIGURED
         fi
     fi
 
     return $OCF_SUCCESS
 }
 
 
 #
 # Check if we need to create a log file
 #
 
 check_log_file() {
     if [ ! -f "$1" ]
     then
         touch $1 > /dev/null 2>&1
         chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1
     fi
 
     #Check if $OCF_RESKEY_pgdba can write to the log file
     if ! runasowner "test -w $1"
     then
         return 1
     fi
 
     return 0
 }
 
 #
 # Check socket directory
 #
 check_socket_dir() {
     if [ ! -d "$OCF_RESKEY_socketdir" ]; then
         if ! mkdir "$OCF_RESKEY_socketdir"; then
             ocf_exit_reason "Can't create directory $OCF_RESKEY_socketdir"
             exit $OCF_ERR_PERM
         fi
 
         if ! chown $OCF_RESKEY_pgdba:`getent passwd \
              $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" 
         then
             ocf_exit_reason "Can't change ownership for $OCF_RESKEY_socketdir"
             exit $OCF_ERR_PERM
         fi
 
         if ! chmod 2775 "$OCF_RESKEY_socketdir"; then
             ocf_exit_reason "Can't change permissions for $OCF_RESKEY_socketdir"
             exit $OCF_ERR_PERM
         fi
     else
         if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then
             ocf_exit_reason "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir"
             exit $OCF_ERR_PERM
         fi
         rm $OCF_RESKEY_socketdir/test.$$
     fi
 }
 
 print_crm_mon() {
     if [ -z "$CRM_MON_OUTPUT" ]; then
         CRM_MON_OUTPUT=`exec_with_retry 0 crm_mon -n1`
     fi
     printf "${CRM_MON_OUTPUT}\n"
 }
 
 #
 #   'main' starts here...
 #
 
 
 if [ $# -ne 1 ]
 then
     usage
     exit $OCF_ERR_GENERIC
 fi
 
 
 PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid
 BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label
 RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1`
 PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status"
 RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf
 NODENAME=$(ocf_local_nodename | tr '[A-Z]' '[a-z]')
 
 if is_replication; then
     REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf
     PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock
     XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note
 
     CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
     CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot"
     CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever"
     CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount"
 
     CAN_NOT_PROMOTE="-INFINITY"
     CAN_PROMOTE="100"
     PROMOTE_ME="1000"
 
     CHECK_MS_SQL="select pg_is_in_recovery()"
     CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()"
     CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication"
 
     PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status"
     PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status"
     PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc"
     PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline"
 
     NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'`
     RE_CONTROL_SLAVE="false"
 fi
 
 case "$1" in
     methods)    pgsql_methods
                 exit $?;;
 
     meta-data)  meta_data
                 exit $OCF_SUCCESS;;
 esac
 
 pgsql_validate_all
 rc=$?
 
 [ "$1" = "validate-all" ] && exit $rc
 
 if [ $rc -ne 0 ]
 then
     case "$1" in
         stop)    if is_replication; then
                     change_pgsql_status "$NODENAME" "UNKNOWN"
                  fi
                  exit $OCF_SUCCESS;;
         monitor) exit $OCF_NOT_RUNNING;;
         status)  exit $OCF_NOT_RUNNING;;
         *)       exit $rc;;
     esac
 fi
 
 US=`id -u -n`
 
 if [ $US != root -a $US != $OCF_RESKEY_pgdba ]
 then
     ocf_exit_reason "$0 must be run as root or $OCF_RESKEY_pgdba"
     exit $OCF_ERR_GENERIC
 fi
 
 # make psql command options
 if [ -n "$OCF_RESKEY_monitor_user" ]; then
     PGUSER=$OCF_RESKEY_monitor_user; export PGUSER
     PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD
     psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb"
 else
     psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb"
 fi
 
 if [ -n "$OCF_RESKEY_pghost" ]; then
     psql_options="$psql_options -h $OCF_RESKEY_pghost"
 else
     if [ -n "$OCF_RESKEY_socketdir" ]; then
         psql_options="$psql_options -h $OCF_RESKEY_socketdir"
     fi
 fi
 
 if [ -n "$OCF_RESKEY_pgport" ]; then
     export PGPORT=$OCF_RESKEY_pgport
 fi
 
 if [ -n "$OCF_RESKEY_pglibs" ]; then
     if [ -n "$LD_LIBRARY_PATH" ]; then
         export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$OCF_RESKEY_pglibs
     else
         export LD_LIBRARY_PATH=$OCF_RESKEY_pglibs
     fi
 fi
    
 
 # What kind of method was invoked?
 case "$1" in
     status)     if pgsql_status
                 then
                     ocf_log info "PostgreSQL is up"
                     exit $OCF_SUCCESS
                 else
                     ocf_log info "PostgreSQL is down"
                     exit $OCF_NOT_RUNNING
                 fi;;
 
     monitor)    pgsql_monitor
                 exit $?;;
 
     start)      pgsql_start
                 exit $?;;
 
     promote)    pgsql_promote
                 exit $?;;
 
     demote)     pgsql_demote
                 exit $?;;
 
     notify)     pgsql_notify
                 exit $?;;
 
     stop)       pgsql_stop
                 exit $?;;
     *)
                 exit $OCF_ERR_UNIMPLEMENTED;;
 esac
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index cc45f09ad..966dd64d1 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -1,370 +1,465 @@
 #!/bin/sh
 #
 # Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
 #                    All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 #######################################################################
 
 RMQ_SERVER=/usr/sbin/rabbitmq-server
 RMQ_CTL=/usr/sbin/rabbitmqctl
 RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
 RMQ_PID_DIR="/var/run/rabbitmq"
 RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid"
 RMQ_LOG_DIR="/var/log/rabbitmq"
 NODENAME=$(ocf_local_nodename)
 
+# this attr represents the current active local rmq node name.
+# when rmq stops or the node is fenced, this attr disappears
 RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}"
+# this attr represents the last known active local rmq node name
+# when rmp stops or the node is fenced, the attr stays forever so
+# we can continue to map an offline pcmk node to it's rmq node name
+# equivalent. 
+RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}"
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="rabbitmq-cluster" version="0.9">
 <version>1.0</version>
 
 <longdesc lang="en">
 Starts cloned rabbitmq cluster instance
 </longdesc>
 <shortdesc lang="en">rabbitmq clustered</shortdesc>
 
 <parameters>
 <parameter name="set_policy" unique="1">
 <longdesc lang="en">
 Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance.
 </longdesc>
 <shortdesc lang="en">rabbitmqctl set_policy args</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"        timeout="100" />
 <action name="stop"         timeout="90" />
 <action name="monitor"      timeout="40" interval="10" depth="0" />
 <action name="meta-data"    timeout="10" />
 <action name="validate-all"   timeout="20" />
 </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 
 rmq_usage() {
 	cat <<END
-usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
+usage: $0 {start|stop|monitor|notify|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 rmq_wipe_data()
 {
 	rm -rf $RMQ_DATA_DIR > /dev/null 2>&1 
 }
 
 rmq_local_node()
 {
 
 	local node_name=$(rabbitmqctl status 2>&1 | sed -n -e "s/^.*[S|s]tatus of node \(.*\)\s.*$/\1/p" | tr -d "'")
 
 	if [ -z "$node_name" ]; then
 		node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null | grep "\s*RABBITMQ_NODENAME=" | awk -F= '{print $2}')
 	fi
 
 	echo "$node_name"
 }
 
 rmq_join_list()
 {
     cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p"
 }
 
 rmq_write_nodename()
 {
 	local node_name=$(rmq_local_node)
 
 	if [ -z "$node_name" ]; then
 		ocf_log err "Failed to determine rabbitmq node name, exiting"
 		exit $OCF_ERR_GENERIC
 	fi
 
-	# store the pcmknode to rmq node mapping as an attribute
+	# store the pcmknode to rmq node mapping as a transient attribute. This allows
+	# us to retrieve the join list with a simple xpath.
 	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name"
+
+	# the pcmknode to rmq node mapping as a permanent attribute as well. this lets
+	# us continue to map offline nodes to their equivalent rmq node name
+	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --name "$RMQ_CRM_ATTR_COOKIE_LAST_KNOWN" -v "$node_name"
 }
 
 rmq_delete_nodename()
 {
 	# remove node-name
 	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D
 }
 
 prepare_dir () {
 	if [ ! -d ${1} ] ; then
 		mkdir -p ${1}
 		chown -R rabbitmq:rabbitmq ${1}
 		chmod 755 ${1}
 	fi
 }
 
 remove_pid () {
 	rm -f ${RMQ_PID_FILE} > /dev/null 2>&1
 }
 
 rmq_monitor() {
 	local rc
 
 	$RMQ_CTL cluster_status > /dev/null 2>&1
 	rc=$?
 	case "$rc" in
 	0)
 		ocf_log debug "RabbitMQ server is running normally"
 		rmq_write_nodename
 		
 		return $OCF_SUCCESS
 	;;
-	2)
+	2|68|69|70|75|78)
 		ocf_log info "RabbitMQ server is not running"
 		rmq_delete_nodename
 		return $OCF_NOT_RUNNING
 	;;
 	*)
-		ocf_log err "Unexpected return code from '$RMQ_CTL cluster status' exit code: $rc"
+		ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc"
 		rmq_delete_nodename
 		return $OCF_ERR_GENERIC
 	;;
 	esac
 }
 
 rmq_init_and_wait()
 {
 	local rc
 
 	prepare_dir $RMQ_PID_DIR
 	prepare_dir $RMQ_LOG_DIR
 	remove_pid
 
 	# the server startup script uses this environment variable
 	export RABBITMQ_PID_FILE="$RMQ_PID_FILE"
 
 	setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" &
 
 	ocf_log info "Waiting for server to start"
 	$RMQ_CTL wait $RMQ_PID_FILE
 	rc=$?
 	if [ $rc -ne $OCF_SUCCESS ]; then
 		remove_pid
 		ocf_log info "rabbitmq-server start failed: $rc"
 		return $OCF_ERR_GENERIC
 	fi
 
 	rmq_monitor
 	return $?
 }
 
 rmq_set_policy()
 {
 	$RMQ_CTL set_policy $@ > /dev/null 2>&1
 }
 
 rmq_start_first()
 {
 	local rc
 
 	ocf_log info "Bootstrapping rabbitmq cluster"
 	rmq_wipe_data
 	rmq_init_and_wait
 	rc=$?
 
 	if [ $rc -eq 0 ]; then
 		rc=$OCF_SUCCESS
 		ocf_log info "cluster bootstrapped"
 
 		if [ -n "$OCF_RESKEY_set_policy" ]; then
 			# do not quote set_policy, we are passing in arguments
 			rmq_set_policy $OCF_RESKEY_set_policy > /dev/null 2>&1
 			if [ $? -ne 0 ]; then
 				ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy"
 				rc=$OCF_ERR_GENERIC
 			else 
 				ocf_log info "Policy set: $OCF_RESKEY_set_policy"
 			fi
 		fi
 
 	else
 		ocf_log info "failed to bootstrap cluster. Check SELINUX policy"
 		rc=$OCF_ERR_GENERIC
 	fi
 
 	return $rc
 }
 
+rmq_is_clustered()
+{
+    $RMQ_CTL eval 'rabbit_mnesia:is_clustered().' | grep -q true
+}
+
 rmq_join_existing()
 {
 	local join_list="$1"
 	local rc=$OCF_ERR_GENERIC
 
 	ocf_log info "Joining existing cluster with [ $(echo $join_list | tr '\n' ' ') ] nodes."
 	rmq_init_and_wait
 	if [ $? -ne 0 ]; then
 		return $OCF_ERR_GENERIC
 	fi
 
+        if rmq_is_clustered; then
+            ocf_log info "Successfully re-joined existing rabbitmq cluster automatically"
+            return $OCF_SUCCESS
+        fi
+
 	# unconditionally join the cluster
 	$RMQ_CTL stop_app > /dev/null 2>&1
 	for node in $(echo "$join_list"); do
 		ocf_log info "Attempting to join cluster with target node $node"
 		$RMQ_CTL join_cluster $node
 		if [ $? -eq 0 ]; then
 			ocf_log info "Joined cluster by connecting to node $node, starting app"
 			$RMQ_CTL start_app
 			rc=$?
 			if [ $rc -ne 0 ]; then
 				ocf_log err "'$RMQ_CTL start_app' failed"
 			fi
 			break;
 		fi
 	done
 
 	if [ "$rc" -ne 0 ]; then
 		ocf_log info "Join process incomplete, shutting down."
 		return $OCF_ERR_GENERIC
 	fi
 
 	ocf_log info "Successfully joined existing rabbitmq cluster"
 	return $OCF_SUCCESS
 }
 
+rmq_forget_cluster_node_remotely() {
+	local running_cluster_nodes="$1"
+	local node_to_forget="$2"
+
+	ocf_log info "Forgetting $node_to_forget via nodes [ $(echo $running_cluster_nodes | tr '\n' ' ') ]."
+	for running_cluster_node in $running_cluster_nodes; do
+		rabbitmqctl -n $running_cluster_node forget_cluster_node $node_to_forget
+		if [ $? = 0 ]; then
+			ocf_log info "Succeeded forgetting $node_to_forget via $running_cluster_node."
+			return
+		else
+			ocf_log err "Failed to forget node $node_to_forget via $running_cluster_node."
+		fi
+	done
+}
+
+rmq_notify() {
+	node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}"
+	mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+
+
+	# When notifications are on, this agent is going to "forget" nodes once they
+	# leave the cluster. This is thought to resolve some issues where rabbitmq
+	# blocks trying to sync with an offline node after a fencing action occurs.
+	if ! [ "${mode}" = "post-stop" ]; then
+		return $OCF_SUCCESS
+	fi
+
+	rmq_monitor
+	if [ $? -ne $OCF_SUCCESS ]; then
+		# only run forget when we are for sure active 
+		return $OCF_SUCCESS
+	fi
+
+	# forget each stopped rmq instance in the provided pcmk node in the list.
+	for node in $(echo "$node_list"); do
+		local rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $node -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)"
+		if [ -z "$rmq_node" ]; then
+			ocf_log warn "Unable to map pcmk node $node to a known rmq node."
+			continue	
+		fi
+		ocf_log notice "Forgetting stopped node $rmq_node"
+		$RMQ_CTL forget_cluster_node $rmq_node
+		if [ $? -ne 0 ]; then
+			ocf_log warn "Unable to forget offline node $rmq_node."
+		fi
+	done
+	return $OCF_SUCCESS
+}
+
 rmq_start() {
 	local join_list=""
 	local rc
 
 	rmq_monitor
 	if [ $? -eq $OCF_SUCCESS ]; then
 		return $OCF_SUCCESS
 	fi
 
 	join_list=$(rmq_join_list)
 
 	# No join list means no active instances are up. This instance
 	# is the first, so it needs to bootstrap the rest
 	if [ -z "$join_list" ]; then
 		rmq_start_first
 		rc=$?
 		return $rc
 	fi
 
 	# first try to join without wiping mnesia data
 	rmq_join_existing "$join_list"
 	if [ $? -ne 0 ]; then
 		ocf_log info "node failed to join, wiping data directory and trying again"
+		local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)"
+
 		# if the graceful join fails, use the hammer and reset all the data.
 		rmq_stop 
 		rmq_wipe_data
+		rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node"
 		rmq_join_existing "$join_list"
-		if [ $? -ne 0 ]; then
+		rc=$?
+
+                # Restore users (if any)
+                BaseDataDir=`dirname $RMQ_DATA_DIR`
+                if [ -f $BaseDataDir/users.erl ] ; then
+                        rabbitmqctl eval "
+                                {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"),
+                                lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, X) end, Users).
+                        "
+                        rm -f $BaseDataDir/users.erl
+                fi
+
+		if [ $rc -ne 0 ]; then
 			ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
 			return $OCF_ERR_GENERIC
 		fi
 	fi
 
 	return $OCF_SUCCESS
 }
 
 rmq_stop() {
+        # Backup users
+        BaseDataDir=`dirname $RMQ_DATA_DIR`
+        rabbitmqctl eval "
+                Users = mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]),
+                file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])).
+        "
+
 	rmq_monitor
 	if [ $? -eq $OCF_NOT_RUNNING ]; then
 		return $OCF_SUCCESS
 	fi
 
 	$RMQ_CTL stop
 	rc=$?
 
 	if [ $rc -ne 0 ]; then
 		ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc"
 		return $rc
 	fi
 
 	#TODO add kill logic
 	stop_wait=1
 	while [ $stop_wait = 1 ]; do
 		rmq_monitor
 		rc=$?
 		if [ "$rc" -eq $OCF_NOT_RUNNING ]; then
 			stop_wait=0
 			break
 		elif [ "$rc" -ne $OCF_SUCCESS ]; then
 			ocf_log info "rabbitmq-server stop failed: $rc"
 			exit $OCF_ERR_GENERIC
 		fi
 		sleep 1
 	done
 
 	remove_pid
 	return $OCF_SUCCESS
 }
 
 rmq_validate() {
 	check_binary $RMQ_SERVER
 	check_binary $RMQ_CTL
 
 	# This resource only makes sense as a clone right now. at some point
 	# we may want to verify the following.
 	#TODO verify cloned
 	#TODO verify ordered=true
 
 	# Given that this resource does the cluster join explicitly,
 	# having a cluster_nodes list in the static config file will
 	# likely conflict with this agent. 
 	#TODO verify no cluster list in rabbitmq conf
 	#cat /etc/rabbitmq/rabbitmq.config | grep "cluster_nodes"
 
 	return $OCF_SUCCESS
 }
 
 case $__OCF_ACTION in
 meta-data)	meta_data
 		exit $OCF_SUCCESS
 		;;
 start)		rmq_start;;
 stop)		rmq_stop;;
 monitor)	rmq_monitor;;
 validate-all)	rmq_validate;;
+notify)		rmq_notify;;
 usage|help)	rmq_usage
 		exit $OCF_SUCCESS
 		;;
 *)		rmq_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 rc=$?
 ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
 exit $rc
 
diff --git a/heartbeat/sapdb.sh b/heartbeat/sapdb.sh
index 7edb4b88d..03474fc05 100755
--- a/heartbeat/sapdb.sh
+++ b/heartbeat/sapdb.sh
@@ -1,340 +1,367 @@
 #
 # sapdb.sh - for systems having SAPHostAgent installed
 # (sourced by SAPDatabase)
 #
 # Description:	This code is separated from the SAPDatabase agent to
 #               introduce new functions for systems which having
 #               SAPHostAgent installed.
 #               Someday it might be merged back into SAPDatabase agein.
 #
 # Author:       Alexander Krauth, September 2010
 # Support:      linux@sap.com
 # License:      GNU General Public License (GPL)
 # Copyright:    (c) 2010, 2012 Alexander Krauth
 #
 
 
 #
 # background_check_saphostexec : Run a request to saphostexec in a separat task, to be able to react on a hanging process
 #
 background_check_saphostexec() {
   timeout=600
   count=0
 
   $SAPHOSTCTRL -function ListDatabases >/dev/null 2>&1 &
   pid=$!
 
   while kill -0 $pid > /dev/null 2>&1
   do
     sleep 0.1
     count=$(( $count + 1 ))
     if [ $count -ge $timeout ]; then
       kill -9 $pid >/dev/null 2>&1
       ocf_log warn "saphostexec did not respond to the method 'ListDatabases' within 60 seconds"
       return $OCF_ERR_GENERIC                # Timeout
     fi
   done
 
   # child already has finished, now evaluate it's returncode 
   wait $pid
 }
 
 #
 # cleanup_saphostexec : make sure to cleanup the SAPHostAgent in case of any
 #                       misbehavior
 #
 cleanup_saphostexec() {
   pkill -9 -f "$SAPHOSTEXEC"
   pkill -9 -f "$SAPHOSTSRV"
   oscolpid=`pgrep -f "$SAPHOSTOSCOL"`       # we check saposcol pid, because it
                                             # might not run under control of
 					    # saphostexec
 
   # cleanup saposcol shared memory, otherwise it will not start again
   if [ -n "$oscolpid" ];then
     kill -9 $oscolpid
     oscolipc=`ipcs -m | grep "4dbe " | awk '{print $2}'`
     if [ -n "$oscolipc" ]; then
       ipcrm -m $oscolipc
     fi
   fi
 
   # removing the unix domain socket file as it might have wrong permissions or 
   # ownership - it will be recreated by saphostexec during next start
   [ -r /tmp/.sapstream1128 ] && rm -f /tmp/.sapstream1128
 }
 
 #
 # check_saphostexec : Before using saphostctrl we make sure that the
 #                     saphostexec is running on the current node.
 #
 check_saphostexec() {
   chkrc=$OCF_SUCCESS
   running=`pgrep -f "$SAPHOSTEXEC" | wc -l`
 
   if [ $running -gt 0 ]; then
     if background_check_saphostexec; then
       return $OCF_SUCCESS
     else
       ocf_log warn "saphostexec did not respond to the method 'ListDatabases' correctly (rc=$?), it will be killed now"
       running=0
     fi
   fi
 
   if [ $running -eq 0 ]; then
     ocf_log warn "saphostexec is not running on node `hostname`, it will be started now"
     cleanup_saphostexec
     output=`$SAPHOSTEXEC -restart 2>&1`
     
     # now make sure the daemon has been started and is able to respond
     srvrc=1
     while [ $srvrc -ne 0 -a `pgrep -f "$SAPHOSTEXEC" | wc -l` -gt 0 ]
     do
       sleep 1
       background_check_saphostexec
       srvrc=$?
     done
 
     if [ $srvrc -eq 0 ]
     then
       ocf_log info "saphostexec on node `hostname` was restarted !"
       chkrc=$OCF_SUCCESS
     else
       ocf_log error "saphostexec on node `hostname` could not be started! - $output"
       chkrc=$OCF_ERR_GENERIC
     fi
   fi
   
   return $chkrc
 }
 
 
 #
 # sapdatabase_start : Start the SAP database
 #
 sapdatabase_start() {
 
   check_saphostexec
   rc=$?
   
   if [ $rc -eq $OCF_SUCCESS ]
   then
     sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT"
 
     DBINST=""
     if [ -n "$OCF_RESKEY_DBINSTANCE" ]
     then
       DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE "
     fi
     FORCE=""
     if ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER
     then
       FORCE="-force"
     fi
-    output=`$SAPHOSTCTRL -function StartDatabase -dbname $SID -dbtype $DBTYPE $DBINST $FORCE -service`
+    DBOSUSER=""
+    if [ -n "$OCF_RESKEY_DBOSUSER" ]
+    then
+      DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER "
+    fi
+    output=`$SAPHOSTCTRL -function StartDatabase -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER $FORCE -service`
 
     sapdatabase_monitor 1
     rc=$?
 
     if [ $rc -eq 0 ]
     then
       ocf_log info "SAP database $SID started: $output"
       rc=$OCF_SUCCESS
     
       sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT"
     else
       ocf_log err "SAP database $SID start failed: $output"
       rc=$OCF_ERR_GENERIC
     fi
   fi
   
   return $rc
 }
 
 #
 # sapdatabase_stop: Stop the SAP database
 #
 sapdatabase_stop() {
 
   check_saphostexec
   rc=$?
   
   if [ $rc -eq $OCF_SUCCESS ]
   then
     sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT"
 
     DBINST=""
     if [ -n "$OCF_RESKEY_DBINSTANCE" ]
     then
       DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE "
     fi
-    output=`$SAPHOSTCTRL -function StopDatabase -dbname $SID -dbtype $DBTYPE $DBINST -force -service`
+    DBOSUSER=""
+    if [ -n "$OCF_RESKEY_DBOSUSER" ]
+    then
+      DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER "
+    fi
+    output=`$SAPHOSTCTRL -function StopDatabase -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER -force -service`
 
     if [ $? -eq 0 ]
     then
       ocf_log info "SAP database $SID stopped: $output"
       rc=$OCF_SUCCESS
     else
       ocf_log err "SAP database $SID stop failed: $output"
       rc=$OCF_ERR_GENERIC
     fi
   fi
 
   sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT"
   
   return $rc
 }
 
 
 #
 # sapdatabase_monitor: Can the given database instance do anything useful?
 #
 sapdatabase_monitor() {
   strict=$1
   rc=$OCF_SUCCESS
 
   if ! ocf_is_true $strict
   then
     sapdatabase_status
     rc=$?
   else
     check_saphostexec
     rc=$?
   
     if [ $rc -eq $OCF_SUCCESS ]
     then
       count=0
       
       DBINST=""
       if [ -n "$OCF_RESKEY_DBINSTANCE" ]
       then
         DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE "
       fi
-      output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST`
+      if [ -n "$OCF_RESKEY_DBOSUSER" ]
+      then
+        DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER "
+      fi
+      output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER`
 
       # we have to parse the output, because the returncode doesn't tell anything about the instance status
       for SERVICE in `echo "$output" | grep -i 'Component[ ]*Name *[:=] [A-Za-z][A-Za-z0-9_]* (' | sed 's/^.*Component[ ]*Name *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'`
       do 
-        COLOR=`echo "$output" | grep -i "Component[ ]*Name *[:=] *$SERVICE (" | sed 's/^.*Status *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'`
+        COLOR=`echo "$output" | grep -i "Component[ ]*Name *[:=] *$SERVICE (" | sed 's/^.*Status *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i' | uniq`
         STATE=0
 
         case $COLOR in
           Running)       STATE=$OCF_SUCCESS;;
           *)             STATE=$OCF_NOT_RUNNING;;
         esac 
 
         SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'`
         if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ]
         then
             if [ $STATE -eq $OCF_NOT_RUNNING ]
             then
               ocf_log err "SAP database service $SERVICE is not running with status $COLOR !"
               rc=$STATE
             fi
             count=1
         fi
       done
 
       if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ]
       then
         ocf_log err "The resource does not run any services which this RA could monitor!"
         rc=$OCF_ERR_ARGS
       fi
       
       if [ $rc -ne $OCF_SUCCESS ]
       then
         ocf_log err "The SAP database $SID is not running: $output"
       fi
     fi
   fi
   return $rc
 }
 
 
 #
 # sapdatabase_status: Are there any database processes on this host ?
 #
 sapdatabase_status() {
+  sid=`echo $SID | tr '[:upper:]' '[:lower:]'`
+
+  SUSER=${OCF_RESKEY_DBOSUSER:-""}
+
   case $DBTYPE in
     ADA) SEARCH="$SID/db/pgm/kernel"
-         SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'`
+         [ -z "$SUSER" ] && SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'`
          SNUM=2
          ;;
-    ORA) SEARCH="ora_[a-z][a-z][a-z][a-z]_"
-         SUSER="ora`echo $SID | tr '[:upper:]' '[:lower:]'`"
-         SNUM=4
+    ORA) DBINST=${OCF_RESKEY_DBINSTANCE}
+          DBINST=${OCF_RESKEY_DBINSTANCE:-${SID}}
+          SEARCH="ora_[a-z][a-z][a-z][a-z]_$DBINST"
+
+          if [ -z "$SUSER" ]; then
+            id "oracle" > /dev/null 2> /dev/null && SUSER="oracle"
+            id "ora${sid}" > /dev/null 2> /dev/null && SUSER="${SUSER:+${SUSER},}ora${sid}"
+          fi
+
+          SNUM=4
          ;;
     DB6) SEARCH="db2[a-z][a-z][a-z]"
-         SUSER="db2`echo $SID | tr '[:upper:]' '[:lower:]'`"
+         [ -z "$SUSER" ] && SUSER="db2${sid}"
          SNUM=2
          ;;
     SYB) SEARCH="dataserver"
-         SUSER="syb`echo $SID | tr '[:upper:]' '[:lower:]'`"
+         [ -z "$SUSER" ] && SUSER="syb${sid}"
          SNUM=1
 		 ;;
     HDB) SEARCH="hdb[a-z]*server"
-         SUSER="`echo $SID | tr '[:upper:]' '[:lower:]'`adm"
+         [ -z "$SUSER" ] && SUSER="${sid}adm"
          SNUM=1
 		 ;;
   esac
 
-  cnt=`ps -u $SUSER -o args 2> /dev/null | grep -c $SEARCH`
+  [ -z "$SUSER" ] && return $OCF_ERR_INSTALLED
+
+  cnt=`ps -u $SUSER -o args 2> /dev/null | grep -v grep | grep -c $SEARCH`
   [ $cnt -ge $SNUM ] && return $OCF_SUCCESS
   return $OCF_NOT_RUNNING
 }
 
 
 #
 # sapdatabase_recover:
 #
 sapdatabase_recover() {
   OCF_RESKEY_AUTOMATIC_RECOVER=1
   sapdatabase_stop
   sapdatabase_start
 }
 
 
 #
 # sapdatabase_validate: Check the symantic of the input parameters 
 #
 sapdatabase_validate() {
   rc=$OCF_SUCCESS
   if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ]
   then
     ocf_log err "Parsing parameter SID: '$SID' is not a valid system ID!"
     rc=$OCF_ERR_ARGS
   fi
 
   case "$DBTYPE" in
    ORA|ADA|DB6|SYB|HDB) ;;
    *) ocf_log err "Parsing parameter DBTYPE: '$DBTYPE' is not a supported database type!"
       rc=$OCF_ERR_ARGS ;;
   esac
 
   return $rc
 }
 
 #
 # sapdatabase_init: initialize global variables at the beginning
 #
 sapdatabase_init() {
 OCF_RESKEY_AUTOMATIC_RECOVER_default=0
 : ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}}
 
 if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ]
 then
   case $DBTYPE in
     ORA) export OCF_RESKEY_MONITOR_SERVICES="Instance|Database|Listener"
          ;;
     ADA) export OCF_RESKEY_MONITOR_SERVICES="Database"
          ;;
     DB6) db2sid="db2`echo $SID | tr '[:upper:]' '[:lower:]'`"
          export OCF_RESKEY_MONITOR_SERVICES="${SID}|${db2sid}"
          ;;
     SYB) export OCF_RESKEY_MONITOR_SERVICES="Server"
          ;;
     HDB) export OCF_RESKEY_MONITOR_SERVICES="hdbindexserver"
          ;;
   esac
 fi
 }
diff --git a/heartbeat/sg_persist b/heartbeat/sg_persist
index 1ce0a64c0..4d518ef0e 100755
--- a/heartbeat/sg_persist
+++ b/heartbeat/sg_persist
@@ -1,673 +1,674 @@
 #!/bin/bash
 #
 #
 #   OCF Resource Agent compliant PERSISTENT SCSI RESERVATION resource script.
 #
 #
 # Copyright (c) 2011 Evgeny Nifontov and lwang@suse.com All Rights Reserved.
 #
 # "Heartbeat drbd OCF Resource Agent: 2007, Lars Marowsky-Bree" was used 
 # as example of multistate OCF Resource Agent.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 #
 # OCF instance parameters
 #    OCF_RESKEY_binary
 #    OCF_RESKEY_devs
 #    OCF_RESKEY_required_devs_nof
 #    OCF_RESKEY_reservation_type
 #    OCF_RESKEY_master_score_base
 #    OCF_RESKEY_master_score_dev_factor
 #    OCF_RESKEY_master_score_delay
 #
 # TODO
 # 
 # 1) PROBLEM: devices which were not accessible during 'start' action, will be never registered/reserved 
 #    TODO:    'Master' and 'Salve' registers new devs in 'monitor' action
 #    TODO:    'Master' reserves new devs in 'monitor' action
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # set default values
     : ${sg_persist_binary="sg_persist"}     # binary name for the resource
     : ${devs=""}                            # device list
     : ${required_devs_nof=1}                # number of required devices
     : ${reservation_type=1}                 # reservation type
     : ${master_score_base=0}                # master score base 
     : ${master_score_dev_factor=100}        # device factor for master score
     : ${master_score_delay=30}              # delay for master score
 
 #######################################################################
 
 
 meta_data() {
     cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="sg_persist">
 <version>1.1</version>
 
 <longdesc lang="en">
 This resource agent manages SCSI PERSISTENT RESERVATIONS.
 "sg_persist" from sg3_utils is used, please see its documentation.
 Should be used as multistate (Master/Slave) resource
 Slave registers its node id ("crm_node -i") as reservation key ( --param-rk ) on each device in the "devs" list.
 Master reservs all devices from "devs" list with reservation "--prout-type" value from "reservation_type" parameter.
 </longdesc>
 <shortdesc lang="en">Manages SCSI PERSISTENT RESERVATIONS</shortdesc>
 
 <parameters>
 <parameter name="binary" unique="0">
 <longdesc lang="en">
 The name of the binary that manages the resource.
 </longdesc>
 <shortdesc>the binary name of the resource</shortdesc>
 <content type="string" default="$sg_persist_binary"/>
 </parameter>
 
 <parameter name="devs" unique="0" required="1">
 <longdesc lang="en">
 Device list. Multiple devices can be listed with blank space as separator. 
 Shell wildcars are allowed.
 </longdesc>
 <shortdesc lang="en">device list</shortdesc>
 <content type="string"/>
 </parameter>
 
 <parameter name="required_devs_nof" unique="0" required="0">
 <longdesc lang="en">
 Minimum number of "working" devices from device list
       1) existing 
       2) "sg_persist --read-keys \$device" works (Return code 0)
 resource actions "start","monitor","promote" and "validate-all" return "\$OCF_ERR_INSTALLED"
 if the actual number of "working" devices is less then "required_devs_nof".
 resource actions "stop" and "demote" tries to remove reservations and registration keys from 
 all working devices, but always return "\$OCF_SUCCESS"
 </longdesc>
 <shortdesc lang="en">minimum number of working devices</shortdesc>
 <content type="string" default="1"/>
 </parameter>
 
 <parameter name="reservation_type" unique="0" required="0">
 <longdesc lang="en">
 reservation type 
 </longdesc>
 <shortdesc lang="en">reservation type</shortdesc>
 <content type="string" default="1" />
 </parameter>
 
 <parameter name="master_score_base" unique="0" required="0">
 <longdesc lang="en">
 master_score_base value
 "master_score_base" value is used in "master_score" calculation:
 master_score = \$master_score_base + \$master_score_dev_factor * \$working_devs  
 if set to bigger value in sg_persist resource configuration on some node, this node will be "preferred" for master role. 
 </longdesc>
 <shortdesc lang="en">base master_score value</shortdesc>
 <content type="string" default="0" />
 </parameter>
 
 <parameter name="master_score_dev_factor" unique="0" required="0">
 <longdesc lang="en">
 Working device factor in master_score calculation
 each "working" device provides additional value to "master_score", 
 so the node that sees more devices will be preferred for the "Master"-role
 Setting it to 0 will disable this behavior. 
 </longdesc>
 <shortdesc lang="en">working device factor in master_score calculation</shortdesc>
 <content type="string" default="100" />
 </parameter>
 
 <parameter name="master_score_delay" unique="0" required="0">
 <longdesc lang="en">
 master/slave decreases/increases its master_score after delay of \$master_score_delay seconds
 so if some device gets inaccessible, the slave decreases its master_score first and the resource will no be watched
 and after this device reappears again the master increases its master_score first
 this can work only if the master_score_delay is bigger then monitor interval on both master and slave
 Setting it to 0 will disable this behavior.
 </longdesc>
 <shortdesc lang="en">master_score decrease/increase delay time</shortdesc>
 <content type="string" default="30" />
 </parameter>
 </parameters>
 
 <actions>
 <action name="start"   timeout="30" />
 <action name="promote"   timeout="30" />
 <action name="demote"   timeout="30" />
 <action name="notify"   timeout="30" />
 <action name="stop"    timeout="30" />
 <action name="monitor" depth="0"  timeout="20" interval="29" role="Slave" />
 <action name="monitor" depth="0"  timeout="20" interval="60" role="Master" />
 <action name="meta-data"  timeout="5" />
 <action name="validate-all"  timeout="30" />
 </actions>
 </resource-agent>
 END
 
     exit $OCF_SUCCESS
 }
 
 sg_persist_init() {
     
     if ! ocf_is_root ; then
         ocf_log err "You must be root to perform this operation."
         exit $OCF_ERR_PERM
     fi 
 
     SG_PERSIST=${OCF_RESKEY_binary:-"$sg_persist_binary"}
     check_binary $SG_PERSIST
     
     ROLE=$OCF_RESKEY_CRM_meta_role
     NOW=$(date +%s)
 
     RESOURCE="${OCF_RESOURCE_INSTANCE}"
-    MASTER_SCORE_VAR_NAME="master-${OCF_RESOURCE_INSTANCE}"
+    MASTER_SCORE_VAR_NAME="master-${OCF_RESOURCE_INSTANCE//:/-}"
     PENDING_VAR_NAME="pending-$MASTER_SCORE_VAR_NAME"
     
     #only works with corocync 
     CRM_NODE="${HA_SBIN_DIR}/crm_node"
     NODE_ID_DEC=$($CRM_NODE -i)
 
-    NODE=$($CRM_NODE -l | $GREP $NODE_ID_DEC)
+    NODE=$($CRM_NODE -l | $GREP -w ^$NODE_ID_DEC)
     NODE=${NODE#$NODE_ID_DEC }
     NODE=${NODE% *}
     
     MASTER_SCORE_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$MASTER_SCORE_VAR_NAME --node=$NODE"
     CRM_MASTER="${HA_SBIN_DIR}/crm_master --lifetime=reboot"
     PENDING_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$PENDING_VAR_NAME --node=$NODE"
 
     NODE_ID_HEX=$(printf '0x%x' $NODE_ID_DEC)
 
     if [ -z "$NODE_ID_HEX" ]; then
         ocf_log err "Couldn't get node id with \"$CRM_NODE\""
         exit $OCF_ERR_INSTALLED
     fi
 
     ocf_log debug "$RESOURCE: NODE:$NODE, ROLE:$ROLE, NODE_ID DEC:$NODE_ID_DEC HEX:$NODE_ID_HEX"
 
     DEVS=${OCF_RESKEY_devs:=$devs}
     REQUIRED_DEVS_NOF=${OCF_RESKEY_required_devs_nof:=$required_devs_nof}
     RESERVATION_TYPE=${OCF_RESKEY_reservation_type:=$reservation_type}
     MASTER_SCORE_BASE=${OCF_RESKEY_master_score_base:=$master_score_base}
     MASTER_SCORE_DEV_FACTOR=${OCF_RESKEY_master_score_dev_factor:=$master_score_dev_factor}
     MASTER_SCORE_DELAY=${OCF_RESKEY_master_score_delay:=$master_score_delay}
 
     ocf_log debug "$RESOURCE: DEVS=$DEVS"
     ocf_log debug "$RESOURCE: REQUIRED_DEVS_NOF=$REQUIRED_DEVS_NOF"
     ocf_log debug "$RESOURCE: RESERVATION_TYPE=$RESERVATION_TYPE"
     ocf_log debug "$RESOURCE: MASTER_SCORE_BASE=$MASTER_SCORE_BASE"
     ocf_log debug "$RESOURCE: MASTER_SCORE_DEV_FACTOR=$MASTER_SCORE_DEV_FACTOR"
     ocf_log debug "$RESOURCE: MASTER_SCORE_DELAY=$MASTER_SCORE_DELAY"
 
     #expand path wildcards
     DEVS=$(echo $DEVS)
 
     if [ -z "$DEVS" ]; then
         ocf_log err "\"devs\" not defined"
         exit $OCF_ERR_INSTALLED
     fi
  
     sg_persist_check_devs
     sg_persist_get_status
 }
 
 sg_persist_action_usage() {
     cat <<END
     usage: $0 {start|stop|monitor|validate-all|promote|demote|notify|meta-data}
 
     Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 sg_persist_get_status() {
 
     unset WORKING_DEVS[*]
     
     for dev in ${EXISTING_DEVS[*]}
     do
         READ_KEYS=`$SG_PERSIST --in --read-keys $dev 2>&1`
+        [ $? -eq 0 ] || continue
+
+        WORKING_DEVS+=($dev)
+
+        echo "$READ_KEYS" | $GREP -qw $NODE_ID_HEX\$
+        [ $? -eq 0 ] || continue
+
+        REGISTERED_DEVS+=($dev)
+
+        READ_RESERVATION=`$SG_PERSIST --in --read-reservation $dev 2>&1`
+        [ $? -eq 0 ] || continue
+
+        echo "$READ_RESERVATION" | $GREP -qw $NODE_ID_HEX\$
         if [ $? -eq 0 ]; then
-            WORKING_DEVS+=($dev)
-            echo $READ_KEYS | $GREP $NODE_ID_HEX >/dev/null
-            if [ $? -eq 0 ]; then 
-                REGISTERED_DEVS+=($dev)
-
-                READ_RESERVATION=`$SG_PERSIST --in --read-reservation $dev 2>&1`
-                if [ $? -eq 0 ]; then
-                    echo $READ_RESERVATION | $GREP $NODE_ID_HEX >/dev/null
-                    if [ $? -eq 0 ]; then 
-                        RESERVED_DEVS+=($dev)
-                    fi
+            RESERVED_DEVS+=($dev)
+        fi
 
-                    reservation_key=`echo $READ_RESERVATION | $GREP -o 'Key=0x[0-9a-f]*' | $GREP -o '0x[0-9a-f]*'`
-                    if [ -n "$reservation_key" ]; then 
-                        DEVS_WITH_RESERVATION+=($dev)
-                        RESERVATION_KEYS+=($reservation_key)
-                    fi
-                fi
-            fi
+        reservation_key=`echo $READ_RESERVATION | $GREP -o 'Key=0x[0-9a-f]*' | $GREP -o '0x[0-9a-f]*'`
+        if [ -n "$reservation_key" ]; then
+            DEVS_WITH_RESERVATION+=($dev)
+            RESERVATION_KEYS+=($reservation_key)
         fi
     done
 
     WORKING_DEVS_NOF=${#WORKING_DEVS[*]}
 
     ocf_log debug "$RESOURCE: working devices: `sg_persist_echo_array ${WORKING_DEVS[*]}`"
     ocf_log debug "$RESOURCE: number of working devices: $WORKING_DEVS_NOF"
     
     ocf_log debug "$RESOURCE: registered devices: `sg_persist_echo_array ${REGISTERED_DEVS[*]}`"
     ocf_log debug "$RESOURCE: reserved devices: `sg_persist_echo_array ${RESERVED_DEVS[*]}`"
     ocf_log debug "$RESOURCE: devices with reservation: `sg_persist_echo_array ${DEVS_WITH_RESERVATION[*]}`"
     ocf_log debug "$RESOURCE: reservation keys: `sg_persist_echo_array ${RESERVATION_KEYS[*]}`"
     
     MASTER_SCORE=$(($MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF))
     ocf_log debug "$RESOURCE: master_score: $MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF = $MASTER_SCORE"
 
 }
 
 sg_persist_check_devs() {
 
     for dev in $DEVS 
     do
         if [ -e "$dev" ]; then
             EXISTING_DEVS+=($dev)
         fi
     done
 
     EXISTING_DEVS_NOF=${#EXISTING_DEVS[*]}
     if [ $EXISTING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then 
         ocf_log err "Number of existing devices=$EXISTING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF"
         exit $OCF_ERR_INSTALLED
     fi
 
 }
 
 sg_persist_is_registered() {
     for registered_dev in ${REGISTERED_DEVS[*]}
     do
         if [ "$registered_dev" == "$1" ]; then
             return 0
         fi
     done
     return 1
 }
 
 sg_persist_get_reservation_key() {
     for array_index in ${!DEVS_WITH_RESERVATION[*]}
     do
         if [ "${DEVS_WITH_RESERVATION[$array_index]}" == "$1" ]; then
             echo ${RESERVATION_KEYS[$array_index]}
             return 0
         fi
     done
     echo ""
 }
 
 sg_persist_echo_array() {
     str_count=0
     arr_str=""
 
     for str in "$@"
     do
         arr_str="$arr_str[$str_count]:$str "    
         str_count=$(($str_count+1))
     done
     echo $arr_str
 }
 
 sg_persist_parse_act_pending() {
     
     ACT_PENDING_TS=0
     ACT_PENDING_SCORE=0
 
     if [ -n "$ACT_PENDING" ]; then
         ACT_PENDING_TS=${ACT_PENDING%%_*}
         ACT_PENDING_SCORE=${ACT_PENDING##*_}
     fi
 }
 
 sg_persist_clear_pending() {
     if [ -n "$ACT_PENDING" ]; then 
         DO_PENDING_UPDATE="YES"
         NEW_PENDING=""
     fi
 }
 
 sg_persist_new_master_score() {
     DO_MASTER_SCORE_UPDATE="YES"
     NEW_MASTER_SCORE=$1
 }
 
 sg_persist_new_pending() {
     DO_PENDING_UPDATE="YES"
     NEW_PENDING=$1
 }
 
 
 # Functions invoked by resource manager actions
 
 sg_persist_action_start() {
 
     ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE
     ocf_run $PENDING_ATTRIBUTE --update=""
 
     if [ $WORKING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then
         ocf_log err "$RESOURCE: Number of working devices=$WORKING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF"
         exit $OCF_ERR_GENERIC
     fi
 
     for dev in ${WORKING_DEVS[*]}
     do
         if sg_persist_is_registered $dev ; then
             : OK
         else
-            ocf_run $SG_PERSIST --out --register --param-rk=0 --param-sark=$NODE_ID_HEX $dev
+            ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=0 --param-sark=$NODE_ID_HEX $dev
             if [ $? -ne $OCF_SUCCESS ]
             then
                 return $OCF_ERR_GENERIC
             fi
         fi
     done
 
     return $OCF_SUCCESS
 }
 
 sg_persist_action_stop() {
 
     if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then
         ocf_log debug "$RESOURCE stop: already no registrations"
     else
         # Clear preference for becoming master
         ocf_run $MASTER_SCORE_ATTRIBUTE --delete
         ocf_run $PENDING_ATTRIBUTE --delete
 
         for dev in ${REGISTERED_DEVS[*]}
         do
-            ocf_run $SG_PERSIST --out --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev
+            ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev
         done
     fi
 
     return $OCF_SUCCESS
 }
 
 sg_persist_action_monitor() {
 
     ACT_MASTER_SCORE=`$MASTER_SCORE_ATTRIBUTE --query --quiet 2>&1`
     ocf_log debug "$RESOURCE monitor: ACT_MASTER_SCORE=$ACT_MASTER_SCORE"
     
     ACT_PENDING=`$PENDING_ATTRIBUTE --query --quiet 2>&1`
     ocf_log debug "$RESOURCE monitor: ACT_PENDING=$ACT_PENDING"
 
     sg_persist_parse_act_pending
     ocf_log debug "$RESOURCE monitor: ACT_PENDING_TS=$ACT_PENDING_TS"
     ocf_log debug "$RESOURCE monitor: ACT_PENDING_VAL=$ACT_PENDING_SCORE"
    
     ocf_log debug "$MASTER_SCORE, $ACT_MASTER_SCORE, $ROLE"
     
     DO_MASTER_SCORE_UPDATE="NO"
     DO_PENDING_UPDATE="NO"
     if [ -n "$ACT_MASTER_SCORE" ] 
     then
         if [ $ACT_MASTER_SCORE -eq $MASTER_SCORE ]; then
             sg_persist_clear_pending
         else
             case $ROLE in
             Master)  
                 if [ $MASTER_SCORE -lt $ACT_MASTER_SCORE ]; then
                     if [ -n "$ACT_PENDING" ] 
                     then
                         if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then
                             sg_persist_new_master_score $MASTER_SCORE
                             sg_persist_clear_pending
                         fi
                     else
                         if [ $MASTER_SCORE_DELAY -eq 0 ]; then
                             sg_persist_new_master_score $MASTER_SCORE
                             sg_persist_clear_pending
                         else
                             sg_persist_new_pending "${NOW}_${MASTER_SCORE}"
                         fi
                     fi
                 else
                     sg_persist_new_master_score $MASTER_SCORE
                     sg_persist_clear_pending
                 fi
                 ;;
 
             Slave)
                 if [ $MASTER_SCORE -gt $ACT_MASTER_SCORE ]; then
                     if [ -n "$ACT_PENDING" ]; then 
                         if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then
                             sg_persist_new_master_score $MASTER_SCORE
                             sg_persist_clear_pending
                         fi
                     else
                         if [ $MASTER_SCORE_DELAY -eq 0 ]; then
                             sg_persist_new_master_score $MASTER_SCORE
                             sg_persist_clear_pending
                         else
                             sg_persist_new_pending "${NOW}_${MASTER_SCORE}"
                         fi
                     fi
                 else
                     sg_persist_new_master_score $MASTER_SCORE
                     sg_persist_clear_pending
                 fi
                 ;;
 
             *)
                 ;;
 
             esac
         fi
     fi
 
     if [ $DO_MASTER_SCORE_UPDATE == "YES" ]; then
         ocf_run $MASTER_SCORE_ATTRIBUTE --update=$NEW_MASTER_SCORE
     fi
 
     if [ $DO_PENDING_UPDATE == "YES" ]; then
         ocf_run $PENDING_ATTRIBUTE --update=$NEW_PENDING
     fi
 
     if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then
         ocf_log debug "$RESOURCE monitor: no registrations"
         return $OCF_NOT_RUNNING
     fi
 
     if [ ${#RESERVED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then 
         return $OCF_RUNNING_MASTER
     fi
 
     if [ ${#REGISTERED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then 
         if [ $RESERVATION_TYPE -eq 7 ] || [ $RESERVATION_TYPE -eq 8 ]; then
             if [ ${#DEVS_WITH_RESERVATION[*]} -gt 0 ]; then
                 return $OCF_RUNNING_MASTER
              else
                 return $OCF_SUCCESS
             fi
         else
             return $OCF_SUCCESS
         fi
     fi
 
     ocf_log err "$RESOURCE monitor: unexpected state"
     
     return $OCF_ERR_GENERIC
 }
 
 sg_persist_action_promote() {
 
     if [ ${#RESERVED_DEVS[*]} -gt 0 ]; then 
         ocf_log info "$RESOURCE promote: already master"
         return $OCF_SUCCESS
     fi
 
     for dev in ${WORKING_DEVS[*]}
     do
         reservation_key=`sg_persist_get_reservation_key $dev`
         case $RESERVATION_TYPE in
         1|3|5|6)        
             if [ -z "$reservation_key" ]; then
-                ocf_run $SG_PERSIST --out --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
+                ocf_run $SG_PERSIST --out --no-inquiry --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
                 if [ $? -ne $OCF_SUCCESS ]; then
                     return $OCF_ERR_GENERIC
                 fi
             else
-                ocf_run $SG_PERSIST --out --preempt --param-sark=$reservation_key --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
+                ocf_run $SG_PERSIST --out --no-inquiry --preempt --param-sark=$reservation_key --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
                 if [ $? -ne $OCF_SUCCESS ]; then
                     return $OCF_ERR_GENERIC
                 fi
             fi
             ;;
 
         7|8) 
             if [ -z "$reservation_key" ]; then
-                ocf_run $SG_PERSIST --out --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
+                ocf_run $SG_PERSIST --out --no-inquiry --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
                 if [ $? -ne $OCF_SUCCESS ]
                 then
                     return $OCF_ERR_GENERIC
                 fi
             else 
                 ocf_log info "$RESOURCE promote: there already exist an reservation holder, all registrants become reservation holders"
                 return $OCF_SUCCESS
             fi
             ;;
   
         *)
             return $OCF_ERR_ARGS
             ;;
 
         esac
     done
 
     return $OCF_SUCCESS
 }
 
 sg_persist_action_demote() {
     case $RESERVATION_TYPE in
     1|3|5|6)
         if [ ${#RESERVED_DEVS[*]} -eq 0 ]; then 
             ocf_log info "$RESOURCE demote: already slave"
             return $OCF_SUCCESS
         fi
 
         for dev in ${RESERVED_DEVS[*]}
         do
-            ocf_run $SG_PERSIST --out --release --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
+            ocf_run $SG_PERSIST --out --no-inquiry --release --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev
             if [ $? -ne  $OCF_SUCCESS ]; then
                return $OCF_ERR_GENERIC
             fi
         done
         ;;
 
     7|8)  #in case of 7/8, --release won't release the reservation unless unregister the key.    
         if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then 
             ocf_log info "$RESOURCE demote: already slave"
             return $OCF_SUCCESS
         fi
 
         for dev in ${REGISTERED_DEVS[*]}
         do
-            ocf_run $SG_PERSIST --out --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev
+            ocf_run $SG_PERSIST --out --no-inquiry --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev
             if [ $? -ne $OCF_SUCCESS ]; then
                return $OCF_ERR_GENERIC
             fi
         done
         ;;
    
     *)
         return $OCF_ERR_ARGS
         ;;
     esac
 
     return $OCF_SUCCESS
 }
 
 sg_persist_action_notify() {
     local n_type="$OCF_RESKEY_CRM_meta_notify_type"
     local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
     set -- $OCF_RESKEY_CRM_meta_notify_active_resource
     local n_active="$#"
     set -- $OCF_RESKEY_CRM_meta_notify_stop_resource
     local n_stop="$#"
     set -- $OCF_RESKEY_CRM_meta_notify_start_resource
     local n_start="$#"
 
     ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active $n_active - starting $n_start - stopping $n_stop"
     
     return $OCF_SUCCESS
 }
 
 sg_persist_action_validate_all () {
 
     if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE"  != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
         ocf_log err "Master options misconfigured."
         exit $OCF_ERR_CONFIGURED
     fi
 
     return $OCF_SUCCESS
 }
 
 if [ $# -ne 1 ]; then
     echo "Incorrect parameter count."
     sg_persist_action_usage
     exit $OCF_ERR_ARGS
 fi
 
 ACTION=$1
 case $ACTION in
     meta-data)    
         meta_data
         ;;
         
     validate-all)    
         sg_persist_init
         sg_persist_action_validate_all
         ;;
 
     start|promote|monitor|stop|demote)
         ocf_log debug "$RESOURCE: starting action \"$ACTION\""
         sg_persist_init
         sg_persist_action_$ACTION
         exit $?
         ;;
         
     notify)    
         sg_persist_action_notify
         exit $?
         ;;
 
     usage|help)    
         sg_persist_action_usage
         exit $OCF_SUCCESS
         ;;
 
     *)  
         sg_persist_action_usage
         exit $OCF_ERR_ARGS
         ;;
 
     esac
diff --git a/heartbeat/shellfuncs.in b/heartbeat/shellfuncs.in
index 7786ec3d1..999162012 100644
--- a/heartbeat/shellfuncs.in
+++ b/heartbeat/shellfuncs.in
@@ -1,96 +1,96 @@
 # Author:       Alan Robertson
 # Support:      linux-ha-dev@lists.tummy.com
 # License:      GNU Lesser General Public License (LGPL)
 #
 #	Set these variables if they're not already set...
 #
 
 : ${HA_SBIN_DIR:=@sbindir@}
 : ${HA_NOARCHBIN:=@datadir@/heartbeat}
 : ${OCF_AGENTS:=@OCF_RA_DIR@/heartbeat/}
 
 export HA_DIR HA_RCDIR HA_FIFO HA_BIN 
 export HA_DEBUGLOG HA_LOGFILE HA_LOGFACILITY
 export HA_DATEFMT HA_RESOURCEDIR HA_DOCDIR
 export OCF_AGENTS
 
 PATH=$HA_BIN:${HA_SBIN_DIR}:${HA_NOARCHBIN}:$PATH
 PATH=`echo $PATH | sed -e 's%::%%' -e 's%:\.:%:%' -e 's%^:%%' -e 's%^\.:%%'`
 export PATH
 
 #	A suitable echo command
 Echo() {
   echo "$@"
 }
 
 # copy stdin (text) to FIFO, with surrounding ">>>" and "<<<" marker lines.
 # no args.; no result
 # Notes:
 #	o Using "cat -" rather than "cat" simply for clarity.
 #	o The trailing "| cat -" tries to hold things together as a single
 #	  write (which is probably preferable behaviour in this context).
 ha_clustermsg() {
 	(echo ">>>"; cat -; echo "<<<")	| cat - >> $HA_FIFO
 }
 
 ha_parameter() {
   VALUE=`sed -e 's%[ 	][ 	]*% %' -e 's%^ %%' -e 's%#.*%%'   $HA_CF |
   grep -i "^$1 " | sed 's%[^ ]* %%'`
   if
     [ "X$VALUE" = X ]
   then
     
     case $1 in
       keepalive)	VALUE=2;;
       deadtime)
 			ka=`ha_parameter keepalive`
 			VALUE=`expr $ka '*' 2 '+' 1`;;
     esac
   fi
   Echo $VALUE
 }
 
 BSD_Status() {
   local base=${1##*/}
   local pid
 
   ret_status=`/bin/ps -ao pid,command | grep $base | sed 's/ .*//'`
 
   if 
     [ "$ret_status" != "" ]
   then
     echo "${base} is running..."
   return 0
   fi
 
   if 
     [ -f $HA_VARRUN/${base}.pid ] 
   then
     echo "${base} dead but pid file exists"
     return 1
   fi
 
   if 
     [ -f /var/run/${base}.pid ] 
   then
     echo "${base} dead but pid file exists"
     return 1
   fi
 
   if 
-    [ -f $HA_VARLOCK/var/lock/subsys/${base}.pid ] 
+    [ -f $HA_VARLOCK/${base}.pid ] 
   then
     echo "${base} dead but lock file exists"
     return 2
   fi
 
   if 
     [ -f /var/spool/lock/${base} ] 
   then
     echo "${base} dead but lock file exists"
     return 2
   fi
 }
 
 # Now get the good stuff
 . @OCF_LIB_DIR@/heartbeat/ocf-shellfuncs
diff --git a/heartbeat/symlink b/heartbeat/symlink
index 1e36a9c74..dbf633efa 100755
--- a/heartbeat/symlink
+++ b/heartbeat/symlink
@@ -1,245 +1,245 @@
 #!/bin/sh
 #
 #
 #   An OCF RA that manages a symlink
 #
 # Copyright (c) 2011 Dominik Klein
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 #######################################################################
 
 meta_data() {
         cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="symlink">
 <version>1.1</version>
 
 <longdesc lang="en">
 This resource agent that manages a symbolic link (symlink).
 
 It is primarily intended to manage configuration files which should be
 enabled or disabled based on where the resource is running, such as
 cron job definitions and the like.
 </longdesc>
 <shortdesc lang="en">Manages a symbolic link</shortdesc>
 <parameters>
 <parameter name="link" required="1">
 <longdesc lang="en">
 Full path of the symbolic link to be managed. This must obviously be
 in a filesystem that supports symbolic links.
 </longdesc>
 <shortdesc lang="en">Full path of the symlink</shortdesc>
 <content type="string"/>
 </parameter>
 <parameter name="target" required="1">
 <longdesc lang="en">
 Full path to the link target (the file or directory which the symlink points to).
 </longdesc>
 <shortdesc lang="en">Full path to the link target</shortdesc>
 <content type="string" />
 </parameter>
 <parameter name="backup_suffix">
 <longdesc lang="en">
 A suffix to append to any files that the resource agent moves out of
 the way because they clash with "link".
 
 If this is unset (the default), then the resource agent will simply
 refuse to create a symlink if it clashes with an existing file.
 </longdesc>
 <shortdesc lang="en">Suffix to append to backup files</shortdesc>
 <content type="string" />
 </parameter>
 </parameters>
 <actions>
 <action name="start"   timeout="15" />
 <action name="stop"    timeout="15" />
 <action name="monitor" depth="0"  timeout="15" interval="60"/>
 <action name="meta-data"  timeout="5" />
 <action name="validate-all"  timeout="10" />
 </actions>
 </resource-agent>
 END
 }
 
 symlink_monitor() {
     # This applies the following logic:
     #
     # * If $OCF_RESKEY_link does not exist, then the resource is
     #   definitely stopped.
     #
     # * If $OCF_RESKEY_link exists and is a symlink that points to
     #   ${OCF_RESKEY_target}, then the resource is definitely started.
     #
     # * If $OCF_RESKEY_link exists, but is anything other than a
     #   symlink to ${OCF_RESKEY_target}, then the status depends on whether
     #   ${OCF_RESKEY_backup_suffix} is set:
     #
     #   - if ${OCF_RESKEY_backup_suffix} is set, then the resource is
     #     simply not running. The existing file will be moved out of
     #     the way, to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix},
     #     when the resource starts.
     #
     #   - if ${OCF_RESKEY_backup_suffix} is not set, then an existing
     #     file ${OCF_RESKEY_link} is an error condition, and the
     #     resource can't start here.
     rc=$OCF_ERR_GENERIC
 
     # Using ls here instead of "test -e", as "test -e" returns false
     # if the file does exist, but it a symlink to a file that doesn't
     if ! ls "$OCF_RESKEY_link" >/dev/null 2>&1; then
         ocf_log debug "$OCF_RESKEY_link does not exist"
         rc=$OCF_NOT_RUNNING
     elif [ ! -L  "$OCF_RESKEY_link" ]; then
         if [ -z "$OCF_RESKEY_backup_suffix" ]; then
             ocf_exit_reason "$OCF_RESKEY_link exists but is not a symbolic link!"
             exit $OCF_ERR_INSTALLED
         else
             ocf_log debug "$OCF_RESKEY_link exists but is not a symbolic link, will be moved to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix} on start"
             rc=$OCF_NOT_RUNNING
         fi
-    elif readlink -f "$OCF_RESKEY_link" | egrep -q "^${OCF_RESKEY_target}$"; then
+    elif readlink -m "$OCF_RESKEY_link" | egrep -q "^${OCF_RESKEY_target}$"; then
         ocf_log debug "$OCF_RESKEY_link exists and is a symbolic link to ${OCF_RESKEY_target}."
         rc=$OCF_SUCCESS
     else
         if [ -z "$OCF_RESKEY_backup_suffix" ]; then
             ocf_exit_reason "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}!"
             exit $OCF_ERR_INSTALLED
         else
             ocf_log debug "$OCF_RESKEY_link does not point to ${OCF_RESKEY_target}, will be moved to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix} on start"
             rc=$OCF_NOT_RUNNING
         fi
     fi
     return $rc
 }
 
 symlink_start() {
     if ! symlink_monitor; then
         if [ -e "$OCF_RESKEY_link" ]; then
             if [ -z "$OCF_RESKEY_backup_suffix" ]; then
                 # Shouldn't happen, because symlink_monitor should
                 # have errored out. But there is a chance that
                 # something else put that file there after
                 # symlink_monitor ran.
                 ocf_exit_reason "$OCF_RESKEY_link exists and no backup_suffix is set, won't overwrite."
                 exit $OCF_ERR_GENERIC
             else
                 ocf_log debug "Found $OCF_RESKEY_link, moving to ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}"
                 ocf_run mv -v "$OCF_RESKEY_link" "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" \
                     || exit $OCF_ERR_GENERIC
             fi
         fi
         ocf_run ln -sv "$OCF_RESKEY_target" "$OCF_RESKEY_link"
         symlink_monitor
         return $?
     else
         return $OCF_SUCCESS
     fi
 }
 
 symlink_stop() {
     if symlink_monitor; then   
         ocf_run rm -vf "$OCF_RESKEY_link" || exit $OCF_ERR_GENERIC
         if ! symlink_monitor; then
             if [ -e "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" ]; then
                 ocf_log debug "Found backup ${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}, moving to $OCF_RESKEY_link"
                 # if restoring the backup fails then still return with
                 # $OCF_SUCCESS, but log a warning
                 ocf_run -warn mv "${OCF_RESKEY_link}${OCF_RESKEY_backup_suffix}" "$OCF_RESKEY_link"
             fi
             return $OCF_SUCCESS
         else
             ocf_exit_reason "Removing $OCF_RESKEY_link failed."
             return $OCF_ERR_GENERIC
         fi
     else
         return $OCF_SUCCESS
     fi
 }
 
 symlink_validate_all() {
     if [ "x${OCF_RESKEY_link}" = "x" ]; then
         ocf_exit_reason "Mandatory parameter link is unset"
         exit $OCF_ERR_CONFIGURED
     fi
     if [ "x${OCF_RESKEY_target}" = "x" ]; then
         ocf_exit_reason "Mandatory parameter target is unset"
         exit $OCF_ERR_CONFIGURED
     fi
 
     # Having a non-existant target is technically not an error, as
     # symlinks are allowed to point to non-existant paths. But it
     # still doesn't hurt to warn people if the target does not exist
     # (but only during non-probes).
     if [ ! -e "${OCF_RESKEY_target}" ]; then
         ocf_log warn "${OCF_RESKEY_target} does not exist!"
     fi
 }
 
 symlink_usage() {
         cat <<EOF
 usage: $0 {start|stop|monitor|validate-all|meta-data}
 Expects to have a fully populated OCF RA-compliant environment set.
 EOF
 }
 
 if [ $# -ne 1 ]; then
         symlink_usage
         exit $OCF_ERR_ARGS
 fi
 
 case $__OCF_ACTION in
 meta-data)
         meta_data
         exit $OCF_SUCCESS
         ;;
 usage)
         symlink_usage
         exit $OCF_SUCCESS
 esac
 
 # Everything except usage and meta-data must pass the validate test
 symlink_validate_all || exit
 
 case $__OCF_ACTION in
 start)
         symlink_start
         ;;
 stop)
         symlink_stop
         ;;
 status|monitor)
         symlink_monitor
         ;;
 validate-all)
         ;;
 *)
         symlink_usage
         exit $OCF_ERR_UNIMPLEMENTED
 esac
 # exit code is the exit code (return code) of the last command (shell function)
diff --git a/ldirectord/ldirectord.cf b/ldirectord/ldirectord.cf
index d1e8426a7..9452a5a60 100644
--- a/ldirectord/ldirectord.cf
+++ b/ldirectord/ldirectord.cf
@@ -1,341 +1,343 @@
 #
 # Sample ldirectord configuration file to configure various virtual services.
 #
 # Ldirectord will connect to each real server once per second and request
 # /index.html. If the data returned by the server does not contain the
 # string "Test Message" then the test fails and the real server will be
 # taken out of the available pool. The real server will be added back into
 # the pool once the test succeeds. If all real servers are removed from the
 # pool then localhost:80 is added to the pool as a fallback measure.
 
 # Global Directives
 checktimeout=3
 checkinterval=1
 #fallback=127.0.0.1:80
 #fallback6=[::1]:80
 autoreload=yes
 #logfile="/var/log/ldirectord.log"
 #logfile="local0"
 #emailalert="admin@x.y.z"
 #emailalertfreq=3600
 #emailalertstatus=all
 quiescent=no
 
 # Sample for an http virtual service
 virtual=192.168.6.240:80
+	servicename=Web Site
+	comment=Test load balanced web site
 	real=192.168.6.2:80 gate
 	real=192.168.6.3:80 gate
 	real=192.168.6.6:80 gate
 	fallback=127.0.0.1:80 gate
 	service=http
 	scheduler=rr
 	#persistent=600
 	#netmask=255.255.255.255
 	protocol=tcp
 	checktype=negotiate
 	checkport=80
 	request="index.html"
 	receive="Test Page"
 	virtualhost=www.x.y.z
 
 # Sample configuration for a fwmark based service For an explanation of
 # fwmark see the ipvsadm(8) man page
 #virtual=1
 #	real=192.168.6.2 gate
 #	real=192.168.6.3 gate
 #	real=192.168.6.6 gate
 #	fallback=127.0.0.1:80 gate
 #	service=http
 #	scheduler=rr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=fwm
 #	checktype=negotiate
 #	checkport=80
 #	request="index.html"
 #	receive="Test Page"
 #	virtualhost=x.y.z
 
 # Sample configuration for a service using a range of real servers
 # and a single real server for a virtual service
 #virtual=192.168.6.240:80
 #	real=192.168.6.2->192.168.6.7:80 gate
 #	real=192.168.6.32:80 gate
 #	fallback=127.0.0.1:80 gate
 #	service=http
 #	scheduler=rr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	checkport=80
 #	request="index.html"
 #	receive="Test Page"
 #	virtualhost=x.y.z
 
 #Sample configuration for an https virtual service.
 #Fallback setting overrides global
 #virtual=192.168.6.240:443
 #	real=192.168.16.3:443 masq
 #	real=192.168.16.5:443 masq
 #	fallback=127.0.0.1:443
 #	service=https
 #	scheduler=rr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	checkport=443
 #	request="index.html"
 #	receive="Test Page"
 #	virtualhost=x.y.z
 
 #Sample configuration for an ftp virtual service.
 #Fallback setting overrides global
 #virtual=192.168.6.240:21
 #	real=192.168.16.3:21 masq
 #	real=192.168.16.5:21 masq
 #	fallback=127.0.0.1:21
 #	service=ftp
 #	checkport=21
 #	scheduler=wlc
 #	scheduler=rr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	login="anonymous"
 #	passwd="ldirectord@localhost"
 #	request="welcome.msg"
 #	receive="test"
 
 #Sample configuration for an smtp virtual service.
 #Fallback setting overrides global
 #virtual=192.168.6.240:25
 #	real=192.168.16.3:25 masq
 #	real=192.168.16.5:25 masq
 #	fallback=127.0.0.1:25
 #	service=smtp
 #	scheduler=wlc
 #	protocol=tcp
 #	persistent=600
 #	#netmask=255.255.255.255
 #	checktype=negotiate
 #	checkport=25
 
 #Sample configuration for an submission virtual service.
 #Fallback setting overrides global
 #virtual=192.168.6.240:587
 #	real=192.168.16.3:587 masq
 #	real=192.168.16.5:587 masq
 #	fallback=127.0.0.1:587
 #	service=submission
 #	scheduler=wlc
 #	protocol=tcp
 #	persistent=600
 #	#netmask=255.255.255.255
 #	checktype=negotiate
 #	checkport=587
 
 #Sample configuration for a pop virtual service.
 #Fallback setting overrides global
 #virtual=192.168.6.240:110
 #	real=192.168.16.3:110 masq
 #	real=192.168.16.5:110 masq
 #	fallback=127.0.0.1:110
 #	service=pop
 #	scheduler=wlc
 #	persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	checkport=110
 #	#login="test"
 #	#passwd="test"
 
 ##Sample configuration for an imap virtual service.
 #Fallback setting overrides global
 #virtual=192.168.6.240:143
 #	real=127.0.0.1:143 masq
 #	real=192.168.16.3:143 masq
 #	real=192.168.16.5:143 masq
 #	fallback=127.0.0.1:143
 #	service=imap
 #	scheduler=wlc
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	checkport=143
 #	#login="test"
 #	#passwd="test"
 
 #Sample configuration for an ldap virtual service.
 #Fallback setting overrides global
 #virtual=192.168.84.5:389
 #	real=10.0.1.4:389 masq
 #	real=10.0.1.6:389 masq
 #	fallback=127.0.0.1:389
 #	service=ldap
 #	scheduler=wlc
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	checkport=369
 #	request="dc=upmc, dc=fr"
 #	receive="dc=upmc, dc=fr"
 #	#login="test"
 #	#passwd="test"
 
 #Sample configuration for an nntp virtual service.
 #Fallback setting overrides global
 #virtual=192.168.84.5:119
 #	real=10.0.1.4:119 masq
 #	real=10.0.1.6:119 masq
 #	fallback=127.0.0.1:119
 #	service=nntp
 #	scheduler=wlc
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	checkport=119
 
 #Sample configuration for a UDP DNS virtual service.
 #Fallback setting overrides global
 #virtual=192.168.84.5:53
 #	real=10.0.1.4:53 masq
 #	real=10.0.1.6:53 masq
 #	fallback=127.0.0.1:53
 #	service=dns
 #	scheduler=wlc
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=udp
 #	checktype=negotiate
 #	checkport=53
 #	request="x.y.z"
 #	receive="127.0.0.1"
 
 #Sample configuration for a MySQL virtual service.
 #virtual = 192.168.10.74:3306
 #	real=sql01->sql03:3306 gate 10
 #	fallback=127.0.0.1:3306
 #	service=mysql
 #	scheduler=wrr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	login="readuser"
 #	passwd="genericpassword"
 #	database="portal"
 #	request="SELECT * FROM link"
 
 #Sample configuration for a PostgreSQL virtual service.
 #virtual = 192.168.10.74:5432
 #	real=sql01->sql03:5432 gate 10
 #	fallback=127.0.0.1:5432
 #	service=pgsql
 #	scheduler=wrr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	login="readuser"
 #	passwd="genericpassword"
 #	database="portal"
 #	request="SELECT * FROM link"
 
 #Sample configuration for a Oracle virtual service.
 #virtual = 192.168.10.74:1521
 #	real=sql01->sql03:1521 gate 10
 #	fallback=127.0.0.1:1521
 #	service=oracle
 #	scheduler=wrr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	login="readuser"
 #	passwd="genericpassword"
 #	database="portal"
 #	request="SELECT * FROM link"
 
 #Sample configuration for an unsuported protocol
 #The real servers will just be brought up without checking for availability
 #virtual=192.168.6.240:23
 #	real=192.168.16.3:23 masq
 #	real=192.168.16.5:23 masq
 #	fallback=127.0.0.1:23
 #	service=none
 #	scheduler=wlc
 #	persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=on
 
 # A sample virtual services that uses a ping check.
 # Note that using checktype=connect and protocol=udp
 # will also effect ping checks
 #virtual=192.168.6.240:53
 #	real=192.168.6.2:53 gate
 #	real=192.168.6.3:53 gate
 #	real=192.168.6.6:53 gate
 #	fallback=127.0.0.1:53 gate
 #	scheduler=rr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=udp
 #	checktype=ping
 #	failurecount=3
 
 # A sample virtual services that uses a Radius check on UDP.
 # Note that using checktype=connect and protocol=udp
 # will also effect ping checks
 #virtual=192.168.6.240:1812
 #	real=192.168.6.2:1812 gate
 #	real=192.168.6.3:1812 gate
 #	real=192.168.6.6:1812 gate
 #	fallback=127.0.0.1:1812 gate
 #	scheduler=rr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=udp
 #	checktype=negotiate
 #	service=radius
 #	login="readuser"
 #	passwd="genericpassword"
 #	secret="somesecret"
 #	checktimeout=1
 
 # A sample virtual services that uses a SIP check on UDP.
 # Note that using checktype=connect and protocol=udp
 # will also effect ping checks
 #virtual=192.168.6.240:5060
 #	real=192.168.6.2::5060 gate
 #	real=192.168.6.3::5060 gate
 #	real=192.168.6.6::5060 gate
 #	fallback=127.0.0.1:5060 gate
 #	scheduler=rr
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=udp
 #	checktype=negotiate
 #	service=sip
 #	checktimeout=1
 
 #Sample configuration for an nntp virtual service with IPv6.
 #Fallback setting overrides global
 #virtual6=[2001:db8::5]:119
 #	real6=[2001:db8:0:1::4]:119 masq
 #	real6=[2001:db8:0:1::6]:119 masq
 #	fallback6=[::1]:119
 #	service=nntp
 #	scheduler=wlc
 #	#persistent=600
 #	#netmask=255.255.255.255
 #	protocol=tcp
 #	checktype=negotiate
 #	checkport=119
diff --git a/ldirectord/ldirectord.in b/ldirectord/ldirectord.in
old mode 100644
new mode 100755
index 44b7d6b85..628f1c3e3
--- a/ldirectord/ldirectord.in
+++ b/ldirectord/ldirectord.in
@@ -1,5297 +1,5371 @@
 #!/usr/bin/perl -w
 ######################################################################
 # ldirectord                 http://www.vergenet.net/linux/ldirectord/
 # Linux Director Daemon - run "perldoc ldirectord" for details
 #
 # 1999-2006 (C) Jacob Rief <jacob.rief@tiscover.com>,
 #               Horms <horms@verge.net.au> and others
 #
 # License:      GNU General Public License (GPL)
 #
 # Note: * The original author of this software was Jacob Rief circa 1999
 #       * It was maintained by Jacob Rief and Horms
 #         from November 1999 to July 2003.
 #       * From July 2003 Horms is the maintainer
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License as
 # published by the Free Software Foundation; either version 2 of the
 # License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 # 02111-1307  USA
 #
 ######################################################################
 
 # A Brief history of versions:
 #
 # From oldest to newest
 # 1.1-1.144: ldirecord maintained in CVS HEAD branch
 # 1.145-1.186: ldirectord.in maintained in CVS HEAD BRANCH
 # 1.186-ha-VERSION: ldirectord.in maintained in mercurial
 
 =head1 NAME
 
 ldirectord - Linux Director Daemon
 
 Daemon to monitor remote services and control Linux Virtual Server
 
 
 =head1 SYNOPSIS
 
 B<ldirectord> [B<-d|--debug>] [--] [I<configfile>]
 B<start> | B<stop> | B<restart> | B<try-restart> | B<reload> | B<force-reload> | B<status>
 
 B<ldirectord> [B<-h|-?|--help|-v|--version>]
 
 =head1 DESCRIPTION
 
 B<ldirectord> is a daemon to monitor and administer real servers in a
 cluster of load balanced virtual servers. B<ldirectord> typically is
 started from heartbeat but can also be run from the command line. On
 startup B<ldirectord> reads the file B<@sysconfdir@/ha.d/conf/>I<configuration>.
 After parsing the file, entries for virtual servers are created on the LVS.
 Now at regular intervals the specified real servers are monitored and if
 they are considered alive, added to a list for each virtual server. If a
 real server fails, it is removed from that list. Only one instance of
 B<ldirectord> can be started for each configuration, but more instances of
 B<ldirectord> may be started for different configurations. This helps to
 group clusters of services.  Normally one would put an entry inside
 B<@sysconfdir@/ha.d/haresources>
 
 I<nodename virtual-ip-address ldirectord::configuration>
 
 to start ldirectord from heartbeat.
 
 
 =head1 OPTIONS
 
 I<configuration>:
 This is the name for the configuration as specified in the file
 B<@sysconfdir@/ha.d/conf/>I<configuration>
 
 B<-d|--debug> Don't start as daemon and log verbosely.
 
 B<-h|--help> Print user manual and exit.
 
 B<-v|--version> Print version and exit.
 
 B<start> the daemon for the specified configuration.
 
 B<stop> the daemon for the specified configuration. This is the same as sending
 a TERM signal to the running daemon.
 
 B<restart> the daemon for the specified configuration. The same as stopping and starting.
 
 B<reload> the configuration file. This is only useful for modifications
 inside a virtual server entry. It will have no effect on adding or
 removing a virtual server block. This is the same as sending a HUP signal to
 the running daemon.
 
 B<status> of the running daemon for the specified configuration.
 
 
 =head1 SYNTAX
 
 =head2 Description of how to write configuration files
 
 B<virtual = >I<(ip_address|hostname:portnumber|servicename)|firewall-mark>
 
 Defines a virtual service by IP-address (or hostname) and port (or
 servicename) or firewall-mark.  A firewall-mark is an integer greater than
 zero. The configuration of marking packets is controlled using the C<-m>
 option to B<ipchains>(8).  All real services and flags for a virtual
 service must follow this line immediately and be indented.
 
 B<checktimeout = >I<n>
 
 Timeout in seconds for connect, external, external-perl and ping checks. If the timeout is
 exceeded then the real server is declared dead.
 
 If defined in a virtual server section then the global value is overridden.
 
 If undefined then the value of negotiatetimeout is used.  negotiatetimeout
 is also a global value that may be overridden by a per-virtual setting.
 
 If both checktimeout and negotiatetimeout are unset, the default is used.
 
 Default: 5 seconds
 
 B<negotiatetimeout = >I<n>
 
 Timeout in seconds for negotiate checks.
 
 If defined in a virtual server section then the global value is overridden.
 
 If undefined then the value of checktimeout is used.  checktimeout is
 also a global value that may be overridden by a per-virtual setting.
 
 If both negotiatetimeout and checktimeout are unset, the default is used.
 
 Default: 30 seconds
 
 B<checkinterval = >I<n>
 
 Defines the number of second between server checks.
 
 When fork=no this option defines the amount of time ldirectord sleeps
 between running all of the realserver checks in all virtual service pools.
 
 When fork=yes this option defines the amount of time each forked child
 sleeps per virtual service pool after running all realserver checks for
 that pool.
 
 If set in the virtual server section then the global value is overridden,
 but ONLY if using forking mode (B<fork = >I<yes>).
 
 Default: 10 seconds
 
 B<checkcount = >I<n>
 
 This option is deprecated and slated for removal in a future version.
 Please see the 'failurecount' option.
 
 The number of times a check will be attempted before it is considered to
 have failed. Only works with ping checks. Note that the
 checktimeout/negotiatetimeout is additive, so if a connect check is used,
 checkcount is 3 and checktimeout is 2 seconds, then a total of 6 seconds
 worth of timeout will occur before the check fails.
 
 If defined in a virtual server section then the global value is overridden.
 
 Default: 1
 
 B<failurecount = >I<n>
 
 The number of consecutive times a failure will have to be reported by a
 check before the realserver is considered to have failed.  A
 value of 1 will have the realserver considered failed on the first failure.
 A successful check will reset the failure counter to 0.
 
 If defined in a virtual server section then the global value is overridden.
 
 Default: 1
 
 B<autoreload = >B<yes> | B<no>
 
 Defines if <ldirectord> should continuously check the configuration file
 for modification. If this is set to 'yes' and the configuration file
 changed on disk and its modification time (mtime) is newer than the
 previous version, the configuration is automatically reloaded.
 
 Default: no
 
 B<callback = ">I</path/to/callback>B<">
 
 If this directive is defined, B<ldirectord> automatically calls
 the executable I</path/to/callback> after the configuration
 file has changed on disk. This is useful to update the configuration
 file through B<scp> on the other heartbeated host. The first argument
 to the callback is the name of the configuration.
 
 This directive might also be used to restart B<ldirectord> automatically
 after the configuration file changed on disk. However, if B<autoreload>
 is set to yes, the configuration is reloaded anyway.
 
 B<fallback = >I<ip_address|hostname[:portnumber|sercvicename]> [B<gate> | B<masq> | B<ipip>]
 
 the server onto which a webservice is redirected if all real
 servers are down. Typically this would be 127.0.0.1 with
 an emergency page.
 
 If defined in a virtual server section then the global value is overridden.
 
 B<fallbackcommand = ">I<path to script>B<">
 
 If this directive is defined, the supplied script is executed whenever all
 real servers for a virtual service are down or when the first real server
 comes up again. In the first case, it is called with "start" as its first
 argument, in the latter with "stop".
 Additional parameters are vserver with vport (vserver:vport) as second param
 and protocol (tcp/udp) as third param to identify the virtual service
 within the fallback script.
 
 If defined in a virtual server section then the global value is overridden.
 
 B<logfile = ">I</path/to/logfile>B<">|syslog_facility
 
 An alternative logfile might be specified with this directive. If the logfile
 does not have a leading '/', it is assumed to be a syslog(3) facility name.
 
 Default: log directly to the file I</var/log/ldirectord.log>.
 
 
 B<emailalert = ">I<emailaddress>[, I<emailaddress>]...B<">
 
 A valid email address for sending alerts about the changed connection status
 to any real server defined in the virtual service.  This option requires
 perl module MailTools to be installed.  Automatically tries to send email
 using any of the built-in methods. See perldoc Mail::Mailer for more info on
 methods.
 
 Multiple addresses may be supplied, comma delimited.
 
 If defined in a virtual server section then the global value is overridden.
 
 
 B<emailalertfrom = >I<emailaddress>
 
 A valid email address to use as the from address of the email alerts.  You
 can use a plain email address or any RFC-compliant string for the From header
 in the body of an email message (such as: "ldirectord Alerts" <alerts@example.com>)
 Do not quote this string unless you want the quotes passed in as part of the
 From header.
 
 Default: unset, take system generated default (probably root@hostname)
 
 
 B<emailalertfreq => I<n>
 
 Delay in seconds between repeating email alerts while any given real server
 in the virtual service remains inaccessible.  A setting of zero seconds
 will inhibit the repeating alerts. The email timing accuracy of this
 setting is dependent on the number of seconds defined in the checkinterval
 configuration option.
 
 If defined in a virtual server section then the global value is overridden.
 
 Default: 0
 
 
 B<emailalertstatus = >B<all> | B<none> | B<starting> | B<running> | B<stopping> | B<reloading>,...
 
 Comma delimited list of server states in which email alerts should be sent.
 B<all> is a short-hand for
 "B<starting>,B<running>,B<stopping>,B<reloading>".  If B<none> is
 specified, no other option may be specified, otherwise options are ored
 with each other.
 
 If defined in a virtual server section then the global value is overridden.
 
 Default: all
 
 
 B<smtp = >I<ip_address|hostname>B<">
 
 A valid SMTP server address to use for sending email via SMTP.
 
 If defined in a virtual server section then the global value is overridden.
 
 
 B<execute = ">I<configuration>B<">
 
 Use this directive to start an instance of ldirectord for
 the named I<configuration>.
 
 
 B<supervised = >B<yes> | B<no>
 
 If I<yes>, then ldirectord does not go into background mode.
 All log-messages are redirected to stdout instead of a logfile.
 This is useful to run B<ldirectord> supervised from daemontools.
 See http://untroubled.org/rpms/daemontools/ or http://cr.yp.to/daemontools.html
 for details.
 
 Default: I<no>
 
 
 B<fork = >B<yes> | B<no>
 
 If I<yes>, then ldirectord will spawn a child process for every virtual server,
 and run checks against the real servers from them.  This will increase response
 times to changes in real server status in configurations with many virtual
 servers.  This may also use less memory then running many separate instances of
 ldirectord.  Child processes will be automatically restarted if they die.
 
 Default: I<no>
 
 
 B<quiescent = >B<yes> | B<no>
 
 If I<yes>, then when real or failback servers are determined
 to be down, they are not actually removed from the kernel's LVS
 table. Rather, their weight is set to zero which means that no
 new connections will be accepted.
 
 This has the side effect, that if the real server has persistent
 connections, new connections from any existing clients will continue to be
 routed to the real server, until the persistent timeout can expire. See
 L<ipvsadm> for more information on persistent connections.
 
 This side-effect can be avoided by running the following:
 
 echo 1 > /proc/sys/net/ipv4/vs/expire_quiescent_template
 
 If the proc file isn't present this probably means that
 the kernel doesn't have LVS support, LVS support isn't loaded,
 or the kernel is too old to have the proc file. Running
 ipvsadm as root should load LVS into the kernel if it is possible.
 
 If I<no>, then the real or failback servers will be removed
 from the kernel's LVS table. The default is I<yes>.
 
 If defined in a virtual server section then the global value is overridden.
 
 Default: I<yes>
 
 B<readdquiescent = >B<yes> | B<no>
 
 If I<yes>, then when real or failback servers are determined
 to be down, they are readded to the kernel's LVS table with weight 0 if
 they do not exist in the table. Setting the value to no, allows manually 
 removing the realserver to manually disable all persistent connections.
 
 B<cleanstop = >B<yes> | B<no>
 
 If I<yes>, then when ldirectord exits it will remove all of the virtual
 server pools that it is managing from the kernel's LVS table.
 
 If I<no>, then the virtual server pools it is managing and any real
 or failback servers listed in them at the time ldirectord exits will
 be left as-is.  If you want to be able to stop ldirectord without having
 traffic to your realservers interrupted you will want to set this to I<no>.
 
 If defined in a virtual server section then the global value is overridden.
 
 Default: I<yes>
 
 
 B<maintenancedir = >I<directoryname>
 
 If this option is set ldirectord will look for a special file in the specified
 directory and, if found, force the status of the real server identified by the
 file to down, skipping the normal health check.  This would be useful if you
 wish to force servers down for maintenance without having to modify the actual
 ldirectord configuration file.
 
 For example, given a realserver with IP 172.16.1.2, service on port 4444, and
 a resolvable reverse DNS entry pointing to "realserver2.example.com" ldirectord
 will check for the existence of the following files:
 
 =over
 
 =item 172.16.1.2:4444
 
 =item 172.16.1.2
 
 =item realserver2.example.com:4444
 
 =item realserver2.example.com
 
 =item realserver2:4444
 
 =item realserver2
 
 =back
 
 If any one of those files is found then ldirectord will immediately force the
 status of the server to down as if the check had failed.
 
 Note: Since it checks for the IP/hostname without the port this means you can
 decide to place an entire realserver into maintenance across a large number of
 virtual service pools with a single file (if you were going to reboot the server,
 for instance) or include the port number and put just a particular service into
 maintenance.
 
 This option is not valid in a virtual server section.
 
 Default: disabled
 
 
 =head2 Section virtual
 
 The following commands must follow a B<virtual> entry and must be indented
 with a minimum of 4 spaces or one tab.
 
 B<real => I<ip_address|hostname[-E<gt>ip_address|hostname][:portnumber|servicename>] B<gate> | B<masq> | B<ipip> [I<weight>] [B<">I<request>B<", ">I<receive>B<">]
 
 Defines a real service by IP-address (or hostname) and port (or
 servicename). If the port is omitted then a 0 will be used, this is
 intended primarily for fwmark services where the port for real servers is
 ignored. Optionally a range of IPv4 addresses (or two hostnames) may be
 given, in which case each IPv4 address in the range will be treated as a real
 server using the given port. The second argument defines the forwarding
 method, must be B<gate>, B<ipip> or B<masq>.  The third argument is
 optional and defines the weight for that real server. If omitted then a
 weight of 1 will be used. The last two arguments are also optional. They
 define a request-receive pair to be used to check if a server is alive.
 They override the request-receive pair in the virtual server section. These
 two strings must be quoted. If the request string starts with I<http://...>
 the IP-address and port of the real server is overridden, otherwise the
 IP-address and port of the real server is used.
 
 =head2
 For TCP and UDP (non fwmark) virtual services, unless the forwarding method
 is B<masq> and the IP address of a real server is non-local (not present on
 a interface on the host running ldirectord) then the port of the real
 server will be set to that of its virtual service. That is, port-mapping is
 only available to if the real server is another machine and the forwarding
 method is B<masq>.  This is due to the way that the underlying LVS code in
 the kernel functions.
 
 =head2
 More than one of these entries may be inside a virtual section.  The
 checktimeout, negotiatetimeout, checkcount, fallback, emailalert,
 emailalertfreq and quiescent options listed above may also appear inside a
 virtual section, in which case the global setting is overridden.
 
 B<checktype =
 >B<connect> | B<external> | B<external-perl> | B<negotiate> | B<off> | B<on> | B<ping> | B<checktimeout>I<N>
 
 Type of check to perform. Negotiate sends a request and matches a receive
 string. Connect only attempts to make a TCP/IP connection, thus the
 request and receive strings may be omitted.  If checktype is a number then
 negotiate and connect is combined so that after each N connect attempts one
 negotiate attempt is performed. This is useful to check often if a service
 answers and in much longer intervals a negotiating check is done. Ping
 means that ICMP ping will be used to test the availability of real servers.
 Ping is also used as the connect check for UDP services. Off means no
 checking will take place and no real or fallback servers will be activated.
 On means no checking will take place and real servers will always be
 activated. Default is I<negotiate>.
 
 B<service = >B<dns> | B<ftp> | B<http> | B<https> | B<http_proxy> | B<imap> | B<imaps> | B<ldap> | B<mysql> | B<nntp> | B<none> | B<oracle> | B<pgsql> | B<pop> | B<pops> | B<radius> | B<simpletcp> | B<sip> | B<smtp> | B<submission>
 
 The type of service to monitor when using checktype=negotiate. None denotes
 a service that will not be monitored.
 
 simpletcp sends the B<request> string to the server and tests it against
 the B<receive> regexp. The other types of checks connect to the server
 using the specified protocol. Please see the B<request> and B<receive>
 sections for protocol specific information.
 
 Default:
 
 =over 4
 
 =item * Virtual server port is 21: ftp
 
 =item * Virtual server port is 25: smtp
 
 =item * Virtual server port is 53: dns
 
 =item * Virtual server port is 80: http
 
 =item * Virtual server port is 110: pop
 
 =item * Virtual server port is 119: nntp
 
 =item * Virtual server port is 143: imap
 
 =item * Virtual server port is 389: ldap
 
 =item * Virtual server port is 443: https
 
 =item * Virtual server port is 587: submission
 
 =item * Virtual server port is 993: imaps
 
 =item * Virtual server port is 995: pops
 
 =item * Virtual server port is 1521: oracle
 
 =item * Virtual server port is 1812: radius
 
 =item * Virtual server port is 3128: http_proxy
 
 =item * Virtual server port is 3306: mysql
 
 =item * Virtual server port is 5432: pgsql
 
 =item * Virtual server port is 5060: sip
 
 =item * Otherwise: none
 
 =back
 
 
 B<checkcommand = ">I<path to script>B<">
 
 This setting is used if checktype is external or external-perl and is the command to be run
 to check the status of a real server. It should exit with status 0 if
 everything is ok, or non-zero otherwise.
 
 Four parameters are passed to the script:
 
 =over 4
 
 =item * virtual server ip/firewall mark
 
 =item * virtual server port
 
 =item * real server ip
 
 =item * real server port
 
 =back
 
 If the checktype is external-perl then the command is assumed to be a
 Perl script and it is evaluated into an anonymous subroutine which is
 called at check time, avoiding a fork-exec.  The argument signature and
 exit code conventions are identical to checktype external.  That is, an
 external-perl checktype should also work as an external checktype.
 
 Default: /bin/true
 
 B<checkport = >I<n>
 
 Number of port to monitor. Sometimes check port differs from service port.
 
 Default: port specified for each real server
 
 B<request = ">I<uri to requested object>B<">
 
 This object will be requested each checkinterval seconds on each real
 server.  The string must be inside quotes. Note that this string may be
 overridden by an optional per real-server based request-string.
 
 For an HTTP/HTTPS check, this should be a relative URI, while it has to
 be absolute for the 'http_proxy' check type. In the latter case, this
 URI will be requested through the proxy backend that is being checked.
 
 For a DNS check this should the name of an A record, or the address
 of a PTR record to look up.
 
 For a MySQL, Oracle or PostgeSQL check, this should be an SQL SELECT query.
 The data returned is not checked, only that the
 answer is one or more rows.  This is a required setting.
 
 For a simpletcp check, this string is sent verbatim except any occurrences
 of \n are replaced with a new line character.
 
 B<receive = ">I<regexp to compare>B<">
 
 If the requested result contains this I<regexp to compare>, the real server
 is declared alive. The regexp must be inside quotes. Keep in mind that
 regexps are not plain strings and that you need to escape the special
 characters if they should as literals. Note that this regexp may be
 overridden by an optional per real-server based receive regexp.
 
 For a DNS check this should be any one the A record's addresses or
 any one of the PTR record's names.
 In case of dynamic DNS answers (different answers on the same question)
 a regex to match multiple addresses or PTR record names could also defined.
 
 For a MySQL check, the receive setting is not used.
 
 B<httpmethod = GET> | B<HEAD>
 
 Sets the HTTP method which should be used to fetch the URI specified in
 the request-string. GET is the method used by default if the parameter is
 not set. If HEAD is used, the receive-string should be unset.
 
 Default: GET
 
 B<virtualhost = ">I<hostname>B<">
 
 Used when using a negotiate check with HTTP or HTTPS. Sets the host header
 used in the HTTP request.  In the case of HTTPS this generally needs to
 match the common name of the SSL certificate. If not set then the host
 header will be derived from the request url for the real server if present.
 As a last resort the IP address of the real server will be used.
 
 B<login = ">I<username>B<">
 
 For FTP, IMAP, LDAP, MySQL, Oracle, POP and PostgreSQL, the username
 used to log in.
 
-For Radius the username is used for the attribute User-Name.
+For RADIUS the username is used for the attribute User-Name.
 
 For SIP, the username is used as both the to and from address for an
 OPTIONS query.
 
 Default:
 
 =over 4
 
 =item * FTP: Anonymous
 
 =item * MySQL Oracle, and PostgreSQL: Must be specified in the configuration
 
 =item * SIP: ldirectord\@<hostname>, hostname is derived as per the passwd
 	option below.
 
 =item * Otherwise: empty string, which denotes that
 	case authentication will not be attempted.
 
 =back
 
 B<passwd = ">I<password>B<">
 
 Password to use to login to FTP, IMAP, LDAP, MySQL, Oracle, POP, PostgreSQL
 and SIP servers.
 
-For Radius the passwd is used for the attribute User-Password.
+For RADIUS the passwd is used for the attribute User-Password.
 
 Default:
 
 =over 4
 
 =item * FTP: ldirectord\@<hostname>,
 	where hostname is the environment variable HOSTNAME evaluated at
 	run time, or sourced from uname if unset.
 
 =item * Otherwise: empty string.
 	In the case of LDAP, MySQL, Oracle, and PostgreSQL this means
 	that authentication will not be performed.
 
 =back
 
 B<database = ">I<databasename>B<">
 
 Database to use for MySQL, Oracle and PostgreSQL servers, this is the
 database that the query (set by B<receive> above) will be performed
 against.  This is a required setting.
 
 B<secret = ">I<radiussecret>B<">
 
-Secret to use for Radius servers, this is the secret used to perform an
+Secret to use for RADIUS servers, this is the secret used to perform an
 Access-Request with the username (set by B<login> above) and passwd (set by
 B<passwd> above).
 
 Default: empty string
 
 B<scheduler => I<scheduler_name>
 
 Scheduler to be used by LVS for loadbalancing.
 For an information on the available sehedulers please see
 the ipvsadm(8) man page.
 
 Default: "wrr"
 
 B<persistent => I<n>
 
 Number of seconds for persistent client connections.
 
 B<netmask => I<w.x.y.z> | I<prefixlen>
 
 Netmask to be used for granularity of persistent client connections.
 IPv4 netmask should be specified in dotted quad notation.
 IPv6 netmask should be specified as a prefix length between 1 and 128.
 
 B<protocol = tcp> | B<udp> | B<fwm>
 
 Protocol to be used. If the virtual is specified as an IP address and port
 then it must be one of tcp or udp. If a firewall
 mark then the protocol must be fwm.
 
 Default:
 
 =over 4
 
 =item * Virtual is an IP address and port, and the port is not 53: tcp
 
 =item * Virtual is an IP address and port, and the port is 53: udp
 
 =item * Virtual is a firewall mark: fwm
 
 =back
 
 B<monitorfile = ">I</path/to/monitorfile>B<">
 
 File to continuously log the real service checks to for this virtual
 service. This is useful for monitoring when and why real services were down
 or for statistics.
 
 The log format is:
 [timestamp|pid|real_service_id|status|message]
 
 Default: no separate logging of service checks.
 
 B<ops = >B<yes> | B<no>
 
 Specify that a virtual service uses one-packet scheduling. This option
 can be used only for UDP services. If this option is specified, all connections
 are created only to schedule one packet. Option is useful to schedule
 UDP packets from same client port to different real servers.
 
+B<servicename = >I<short name>
+
+A name for this service. This is for the sole purpose of making it easier
+to know which service is affected when e-mail notifications are sent out.
+It will be included in the e-mail subject and body.
+
+B<comment = >I<comment>
+
+Notes about this service to be included in e-mail notifications (for example,
+purpose of the service or relevant administrator to contact).
+
 =head1 IPv6
 
 Directives for IPv6 are virtual6, real6, fallback6.
 IPv6 addresses specified for virtual6, real6, fallback6 and a file
 of maintenance directory should be enclosed by
 brackets ([2001:db8::abcd]:80).
 
 Following checktype and service are supported.
 
 B<checktype: >B<connect> | B<external> | B<external-perl> | B<negotiate> | B<off> | B<on> | B<checktimeout>I<N>
 
 B<service: >B<dns> | B<http> | B<https> | B<nntp> | B<none> | B<simpletcp> | B<sip>
 
 Note: When using a service type with http or https, you need to install perl module perl-Net-INET6Glue.
 
 
 =head1 FILES
 
 B<@sysconfdir@/ha.d/ldirectord.cf>
 
 B</var/log/ldirectord.log>
 
 B</var/run/ldirectord.>I<configuration>B<.pid>
 
 B</etc/services>
 
 =head1 SEE ALSO
 
 L<ipvsadm>, L<heartbeat>
 
 Ldirectord Web Page: http://www.vergenet.net/linux/ldirectord/
 
 
 =head1 AUTHORS
 
 Horms <horms@verge.net.au>
 
 Jacob Rief <jacob.rief@tiscover.com>
 
 =cut
 
 use strict;
 # Set defaults for configuration variables in the "set_defaults" function
 use vars qw(
 	    $VERSION_STR
 	    $AUTOCHECK
 	    $CHECKINTERVAL
 	    $LDIRECTORD
 	    $LDIRLOG
 	    $NEGOTIATETIMEOUT
 	    $DEFAULT_NEGOTIATETIMEOUT
 	    $RUNPID
 	    $CHECKTIMEOUT
 	    $DEFAULT_CHECKTIMEOUT
 	    $CHECKCOUNT
 	    $FAILURECOUNT
 	    $QUIESCENT
 	    $READDQUIESCENT
 	    $FORKING
 	    $EMAILALERT
 	    $EMAILALERTFREQ
 	    $EMAILALERTSTATUS
 	    $EMAILALERTFROM
 	    $SMTP
 	    $CLEANSTOP
 	    $MAINTDIR
 
 	    $CALLBACK
 	    $CFGNAME
 	    $CMD
 	    $CONFIG
 	    $DEBUG
 	    $FALLBACK
 	    $FALLBACK6
 	    $FALLBACKCOMMAND
 	    $SUPERVISED
 	    $IPVSADM
 	    $checksum
 	    $DAEMON_STATUS
 	    $DAEMON_STATUS_STARTING
 	    $DAEMON_STATUS_RUNNING
 	    $DAEMON_STATUS_STOPPING
 	    $DAEMON_STATUS_RELOADING
 	    $DAEMON_STATUS_ALL
 	    $DAEMON_TERM
 	    $DAEMON_HUP
 	    $DAEMON_CHLD
 	    $opt_d
 	    $opt_h
 	    $stattime
 	    %LD_INSTANCE
 	    @OLDVIRTUAL
 	    @REAL
 	    @VIRTUAL
 	    $HOSTNAME
 	    %EMAILSTATUS
 	    %FORK_CHILDREN
 	    $SERVICE_UP
 	    $SERVICE_DOWN
 	    %check_external_perl__funcs
 
 	    $CRLF
 );
 
 $VERSION_STR = "Linux Director v1.186-ha";
 
 $DAEMON_STATUS_STARTING  = 0x1;
 $DAEMON_STATUS_RUNNING   = 0x2;
 $DAEMON_STATUS_STOPPING  = 0x4;
 $DAEMON_STATUS_RELOADING = 0x8;
 $DAEMON_STATUS_ALL       = $DAEMON_STATUS_STARTING |
 			   $DAEMON_STATUS_RUNNING  |
 			   $DAEMON_STATUS_STOPPING |
 			   $DAEMON_STATUS_RELOADING;
 
 $SERVICE_UP	= 0;
 $SERVICE_DOWN	=1;
 
 # default values
 $DAEMON_TERM      = undef;
 $DAEMON_HUP       = undef;
 $LDIRECTORD       = ld_find_cmd("ldirectord", 1);
 if (! defined $LDIRECTORD) {
 	$LDIRECTORD = "@sbindir@/ldirectord";
 }
 $RUNPID           = "/var/run/ldirectord";
 
 $CRLF = "\x0d\x0a";
 
 # Set global configuration default values:
 set_defaults();
 
 use Getopt::Long;
 use Pod::Usage;
 #use English;
 #use Time::HiRes qw( gettimeofday tv_interval );
 use Socket;
 use Socket6 qw(NI_NUMERICHOST NI_NUMERICSERV NI_NAMEREQD getaddrinfo getnameinfo inet_pton inet_ntop);
 # Workaround warnning messages : Three "_in6" symbols redefined.
 eval "use Socket6 qw(pack_sockaddr_in6)" unless defined &pack_sockaddr_in6;
 eval "use Socket6 qw(sockaddr_in6)" unless defined &sockaddr_in6;
 eval "use Socket6 qw(unpack_sockaddr_in6)" unless defined &unpack_sockaddr_in6;
 
 use Sys::Hostname;
 use POSIX qw(setsid :sys_wait_h);
 use Sys::Syslog qw(:DEFAULT setlogsock);
 
 BEGIN
 {
 	# wrap exit() to preserve replacability
 	*CORE::GLOBAL::exit = sub { CORE::exit(@_ ? shift : 0); };
 }
 
 # command line options
 my @OLD_ARGV = @ARGV;
 my $opt_d = '';
 my $opt_h = '';
 my $opt_v = '';
 Getopt::Long::Configure ("bundling", "no_auto_abbrev", "require_order");
 GetOptions("debug|d" => \$opt_d,
 	   "help|h|?" => \$opt_h,
 	   "version|v" => \$opt_v) or usage();
 
 # main code
 $DEBUG = $opt_d ? 3 : 0;
 
 if ($opt_h) {
 	exec_wrapper("/usr/bin/perldoc -U $LDIRECTORD");
 	&ld_exit(127, "Exec failed");
 }
 if ($opt_v) {
 	print("$VERSION_STR\n" .
 	      "1999-2006 Jacob Rief, Horms and others\n" .
 	      "<http://www.vergenet.net/linux/ldirectord/>\n".
 	      "\n" .
 	      "ldirectord comes with ABSOLUTELY NO WARRANTY.\n" .
 	      "This is free software, and you are welcome to redistribute it\n".
 	      "under certain conditions. " .
 		      "See the GNU General Public Licence for details.\n");
 
 	&ld_exit(0, "");
 }
 
 if ($DEBUG>0 and -f "./ipvsadm") {
 	$IPVSADM="./ipvsadm";
 } else {
 	if (-x "/sbin/ipvsadm") {
 		$IPVSADM="/sbin/ipvsadm";
 	} elsif (-x "/usr/sbin/ipvsadm") {
 		$IPVSADM="/usr/sbin/ipvsadm";
 	} else {
 		die "Can not find ipvsadm";
 	}
 }
 
 # There is a memory leak in perl's socket code when
 # the default IO layer is used. So use "perlio" unless
 # something else has been explicitly set.
 # http://archive.develooper.com/perl5-porters@perl.org/msg85468.html
 unless(defined($ENV{'PERLIO'})) {
 	$ENV{'PERLIO'} = "perlio";
 	exec_wrapper($0, @OLD_ARGV);
 }
 
 $DAEMON_STATUS = $DAEMON_STATUS_STARTING;
 ld_init();
 ld_setup();
 ld_start();
 ld_cmd_children("start", %LD_INSTANCE);
 $DAEMON_STATUS = $DAEMON_STATUS_RUNNING;
 ld_main();
 
 &ld_rm_file("$RUNPID.$CFGNAME.pid");
 &ld_exit(0, "Reached end of \"main\"");
 
 # functions
 sub ld_init
 {
 	# install signal handlers (this covers TERM)
 	#require Net::LDAP;
 	$SIG{'INT'} = \&ld_handler_term;
 	$SIG{'QUIT'} = \&ld_handler_term;
 	$SIG{'ILL'} = \&ld_handler_term;
 	$SIG{'ABRT'} = \&ld_handler_term;
 	$SIG{'FPE'} = \&ld_handler_term;
 	$SIG{'SEGV'} = \&ld_handler_term;
 	$SIG{'TERM'} = \&ld_handler_term;
 
 	$SIG{'BUS'} = \&ld_handler_term;
 	$SIG{'SYS'} = \&ld_handler_term;
 	$SIG{'XCPU'} = \&ld_handler_term;
 	$SIG{'XFSZ'} = \&ld_handler_term;
 
 	$SIG{'IOT'} = \&ld_handler_term;
 
 
 	# This used to call a signal handler, that logged a message
 	# However, this typically goes to syslog and if syslog
 	# is playing up a loop will occur.
 	$SIG{'PIPE'} = "IGNORE";
 
 	# HUP is actually used
 	$SIG{'HUP'} = \&ld_handler_hup;
 
 	# Reap Children
 	$SIG{'CHLD'} = \&ld_handler_chld;
 
 	if (defined $ENV{HOSTNAME}) {
 		$HOSTNAME = "$ENV{HOSTNAME}";
 	}
 	else {
 		use POSIX "uname";
 		my ($s, $n, $r, $v, $m) = uname;
 		$HOSTNAME = $n;
 	}
 
 	# search for the correct configuration file
 	if ( !defined $ARGV[0] ) {
 		usage();
 	}
 	if ( defined $ARGV[0] && defined $ARGV[1] ) {
 		$CONFIG = $ARGV[0];
 		if ($CONFIG =~ /([^\/]+)$/) {
 			$CFGNAME = $1;
 		}
 		$CMD = $ARGV[1];
 	} elsif ( defined $ARGV[0] ) {
 		$CONFIG = "ldirectord.cf";
 		$CFGNAME = "ldirectord";
 		$CMD = $ARGV[0];
 	}
 	if ( $CMD ne "start" and $CMD ne "stop" and $CMD ne "status"
 			and $CMD ne "restart" and $CMD ne "try-restart"
 			and $CMD ne "reload" and $CMD ne "force-reload") {
 		usage();
 	}
 	if ( -f "@sysconfdir@/ha.d/$CONFIG" ) {
 		$CONFIG = "@sysconfdir@/ha.d/$CONFIG";
 	} elsif ( -f "@sysconfdir@/ha.d/conf/$CONFIG" ) {
 		$CONFIG = "@sysconfdir@/ha.d/conf/$CONFIG";
 	} elsif ( ! -f "$CONFIG" ) {
 		init_error("Config file $CONFIG not found");
 	}
 	read_config();
 	undef @OLDVIRTUAL;
 
 	{
 		my $log_str = "Invoking ldirectord invoked as: $0 ";
 		for my $i (@ARGV) {
 			$log_str .= $i . " ";
 		}
 		ld_log($log_str);
 	}
 
 	my $oldpid;
 	my $filepid;
 	if (open(FILE, "<$RUNPID.$CFGNAME.pid")) {
 		$_ = <FILE>;
 		chomp;
 		$filepid = $_;
 		close(FILE);
 		# Check to make sure this isn't a stale pid file
 		if (open(FILE, "</proc/$filepid/cmdline")) {
 			$_ = <FILE>;
 			if (/ldirectord/) {
 				$oldpid = $filepid;
 			}
 			close(FILE);
 		}
 	}
 	if (defined $oldpid) {
 		if ($CMD eq "start") {
 			ld_exit(0, "Exiting from ldirectord $CMD");
 		} elsif ($CMD eq "stop") {
 			kill 15, $oldpid;
 			ld_exit(0, "Exiting from ldirectord $CMD");
 		} elsif ($CMD eq "restart" or $CMD eq "try-restart") {
 			kill 15, $oldpid;
 			while (-f "$RUNPID.$CFGNAME.pid") {
 				# wait until old pid file is removed
 				sleep 1;
 			}
 			# N.B Fall through
 		} elsif ($CMD eq "reload" or $CMD eq "force-reload") {
 			kill 1, $oldpid;
 			ld_exit(0, "Exiting from ldirectord $CMD");
 		} else { # status
 			print STDERR "ldirectord for $CONFIG is running with pid: $oldpid\n";
 			ld_cmd_children("status", %LD_INSTANCE);
 			ld_log("ldirectord for $CONFIG is running with pid: $oldpid");
 			ld_log("Exiting from ldirectord $CMD");
 			ld_exit(0, "Exiting from ldirectord $CMD");
 		}
 	} else {
 		if ($CMD eq "start" or $CMD eq "restart") {
 			;
 		} elsif ($CMD eq "stop" or $CMD eq "try-restart") {
 			ld_exit(0, "Exiting from ldirectord $CMD");
 		} elsif ($CMD eq "status") {
 			my $status;
 			if (defined $filepid) {
 				print STDERR "ldirectord stale pid file " .
 					"$RUNPID.$CFGNAME.pid for $CONFIG\n";
 				ld_log("ldirectord stale pid file " .
 					"$RUNPID.$CFGNAME.pid for $CONFIG");
 				$status = 1;
 			} else {
 				$status = 3;
 			}
 			print "ldirectord is stopped for $CONFIG\n";
 			ld_exit($status, "Exiting from ldirectord $CMD");
 		} else {
 			ld_log("ldirectord is stopped for $CONFIG");
 			ld_exit(1, "Exiting from ldirectord $CMD");
 		}
 	}
 
 	# Run as daemon
 	if ($SUPERVISED eq "yes" || $opt_d) {
 		&ld_log("Starting $VERSION_STR with pid: $$");
 	} else {
 		&ld_log("Starting $VERSION_STR as daemon");
 		open(FILE, ">$RUNPID.$CFGNAME.pid") ||
 			init_error("Can not open $RUNPID.$CFGNAME.pid");
 		&ld_daemon();
 		print FILE "$$\n";
 		close(FILE);
 	}
 }
 
 sub usage
 {
 	pod2usage(-input => $LDIRECTORD, -exitval => -1);
 }
 
 sub init_error
 {
 	my $msg = shift;
 	chomp($msg);
 	&ld_log("$msg");
 	unless ($opt_d) {
 		print STDERR "$msg\n";
 	}
 	ld_exit(1, "Initialisation Error");
 }
 
 # ld_handler_term
 # If we get a signal then log it and quit
 sub ld_handler_term
 {
 	my ($signal) = (@_);
 
 	if (defined $DAEMON_TERM) {
 		$SIG{'__DIE__'} = "IGNORE";
 		$SIG{"$signal"} = "IGNORE";
 		die("Exit Handler Repeatedly Called\n");
 	}
 	$DAEMON_TERM = $signal;
 	$DAEMON_STATUS = $DAEMON_STATUS_STOPPING;
 }
 
 sub ld_process_term
 {
 	$DAEMON_STATUS = $DAEMON_STATUS_STOPPING;
 	ld_cmd_children("stop", %LD_INSTANCE);
 	ld_stop();
 	&ld_log("Linux Director Daemon terminated on signal: $DAEMON_TERM");
 	&ld_rm_file("$RUNPID.$CFGNAME.pid");
 	&ld_exit(0, "Linux Director Daemon terminated on signal: $DAEMON_TERM");
 }
 
 sub ld_handler_hup
 {
 	$DAEMON_HUP=1;
 }
 
 sub ld_process_hup
 {
 	&ld_log("Reloading Linux Director Daemon config on signal");
 	$DAEMON_HUP = undef;
 	&reread_config();
 }
 
 sub ld_handler_chld
 {
 	$DAEMON_CHLD=1;
 	# NOTE: calling waitpid here would mess up $?
 }
 
 sub ld_process_chld
 {
 	my $i = 0;
 
 	undef $DAEMON_CHLD;
 	while (waitpid(-1, WNOHANG) > 0) {
 		print "child: $i\n";
 		$i++;
 	}
 }
 
 sub check_signal
 {
 	if (defined $DAEMON_TERM) {
 		ld_process_term();
 	}
 	if (defined $DAEMON_HUP) {
 		ld_process_hup();
 	}
 	if (defined $DAEMON_CHLD) {
 		ld_process_chld();
 	}
 }
 
 sub reread_config
 {
 	@OLDVIRTUAL = @VIRTUAL;
 	@VIRTUAL = ();
 	my %OLD_INSTANCE = %LD_INSTANCE;
 	my %RELOAD;
 	my %STOP;
 	my %START;
 	my $child;
 	$DAEMON_STATUS = $DAEMON_STATUS_RELOADING;
 	eval {
 		&read_config();
 
 		foreach $child (keys %LD_INSTANCE) {
 			if (defined $OLD_INSTANCE{$child}) {
 				$RELOAD{$child} = 1;
 			}
 			else {
 				$START{$child} = 1;
 			}
 		}
 
 		foreach $child (keys %OLD_INSTANCE) {
 			if (not defined $LD_INSTANCE{$child}) {
 				$STOP{$child} = 1;
 			}
 		}
 
 		&ld_cmd_children("stop", %STOP);
 		&ld_cmd_children("reload_or_start", %RELOAD);
 		&ld_cmd_children("start", %START);
 
 		foreach my $vid (keys %FORK_CHILDREN) {
 			&ld_log("Killing child $vid (PID=$FORK_CHILDREN{$vid})");
 			kill 15, $FORK_CHILDREN{$vid};
 		}
 
 		&ld_setup();
 		&ld_start();
 	};
 	if ($@) {
 		@VIRTUAL = @OLDVIRTUAL;
 		%LD_INSTANCE = %OLD_INSTANCE;
 	}
 	$DAEMON_STATUS = $DAEMON_STATUS_RUNNING;
 	undef @OLDVIRTUAL;
 }
 
 sub parse_emailalertstatus
 {
 	my ($line, $arg) = (@_);
 
 	my @s = split/\s*,\s*/, $arg;
 	my $none = 0;
 	my $status = 0;
 
 	for my $i (@s) {
 		if ($i eq "none") {
 			$none++;
 		}
 	}
 
 	for my $i (@s) {
 		if ($i eq "none") {
 			next;
 		}
 		elsif ($i eq "all") {
 			$status = $DAEMON_STATUS_ALL;
 		}
 		elsif ($i eq "starting") {
 			$status |= $DAEMON_STATUS_STARTING;
 		}
 		elsif ($i eq "stopping") {
 			$status |= $DAEMON_STATUS_STOPPING;
 		}
 		elsif ($i eq "running") {
 			$status |= $DAEMON_STATUS_RUNNING;
 		}
 		elsif ($i eq "reloading") {
 			$status |= $DAEMON_STATUS_RELOADING;
 		}
 		else {
 			&config_error($line,
 				      "invalid email alert status at: \"$i\"")
 		}
 		if ($none > 0) {
 			&config_error($line, "invalid email alert status: " .
 				      "\"$i\" specified with \"none\"");
 		}
 	}
 	return $status;
 }
 
 sub set_defaults
 {
 	$AUTOCHECK        = "no";
 	$CALLBACK         = undef;
 	$CHECKCOUNT       = 1;
 	$CHECKINTERVAL    = 10;
 	$CHECKTIMEOUT     = -1;
 	$CLEANSTOP	  = "yes";
 	$DEFAULT_CHECKTIMEOUT     = 5;
 	$DEFAULT_NEGOTIATETIMEOUT = 30;
 	$EMAILALERT	  = "";
 	$EMAILALERTFREQ	  = 0;
 	$EMAILALERTFROM   = undef;
 	$EMAILALERTSTATUS = $DAEMON_STATUS_ALL;
 	$FAILURECOUNT     = 1;
 	$FALLBACK         = undef;
 	$FALLBACK6        = undef;
 	$FALLBACKCOMMAND  = undef;
 	$FORKING          = "no";
 	$LDIRLOG          = "/var/log/ldirectord.log";
 	$MAINTDIR         = undef;
 	$NEGOTIATETIMEOUT = -1;
 	$QUIESCENT        = "no";
 	$READDQUIESCENT   = "no";
 	$SUPERVISED       = "no";
 	$SMTP             = undef;
 }
 
 sub read_emailalert
 {
 	my ($line, $addr) = (@_);
 
 	# Strip of enclosing quotes
 	$addr =~ s/^\"([^"]*)\"$/$1/;
 
 	$addr =~ /(.+)/ or &config_error($line, "no email address specified");
 
 	return $addr;
 }
 
 sub read_config
 {
 	undef @VIRTUAL;
 	undef @REAL;
 	undef $CALLBACK;
 	undef %LD_INSTANCE;
 	undef $checksum;
 	# Reset/set global config variables to defaults before parsing the config file.
 	set_defaults();
 	$stattime = 0;
 	my %virtual_seen;
 	open(CFGFILE, "<$CONFIG") or
 		&config_error(0, "can not open file $CONFIG");
 	my $line = 0;
 	my $linedata;
 	while(<CFGFILE>) {
 		$line++;
 		$linedata = $_;
 		outer_loop:
 		if ($linedata =~ /^virtual(6)?\s*=\s*(.*)/) {
 			my $af = defined($1) ? AF_INET6 : AF_INET;
 			my $vattr = $2;
 			my $ip_port = undef;
 			my $fwm = undef;
 			my $virtual_id;
 			my $virtual_line = $line;
 			my $virtual_port;
 			my $fallback_line;
 			my @rsrv_todo;
 			if ($vattr =~ /^(\d+\.\d+\.\d+\.\d+):([0-9A-Za-z-_]+)/ && $af == AF_INET) {
 				$ip_port = "$1:$2";
 				$virtual_port = $2;
 			} elsif ($vattr =~ /^([0-9A-Za-z._+-]+):([0-9A-Za-z-_]+)/) {
 				$ip_port = "$1:$2";
 				$virtual_port = $2;
 			} elsif ($vattr =~ /^(\d+)/){
 				$fwm = $1;
 			} elsif ($vattr =~ /^\[([0-9A-Fa-f:]+)\]:([0-9A-Za-z-_]+)/ && $af == AF_INET) {
 				&config_error($line, "cannot specify an IPv6 address here. please use \"virtual6\" instead.");
 			} elsif ($vattr =~ /^\[([0-9A-Fa-f:]+)\]:([0-9A-Za-z-_]+)/ && $af == AF_INET6) {
 				my $v6addr = $1;
 				my $v6port = $2;
 				if (!inet_pton(AF_INET6,$v6addr)) {
 					&config_error($line,"invalid ipv6 address for virtual server");
 				}
 				$ip_port = "[$v6addr]:$v6port";
 				$virtual_port = $v6port;
 			} else {
 				&config_error($line,
 					"invalid address for virtual server");
 			}
 
 			my (%vsrv, @rsrv);
 			if ($ip_port) {
 				$vsrv{checktype} = "negotiate";
 				$vsrv{protocol} = "tcp";
 				if ($ip_port =~ /:(53|domain)$/) {
 					$vsrv{protocol} = "udp";
 				}
 				$vsrv{port} = $virtual_port;
 			} else {
 				$vsrv{fwm} = $fwm;
 				$vsrv{checktype} = "negotiate";
 				$vsrv{protocol} = "fwm";
 				$vsrv{service} = "none";
 				$vsrv{port} = "0";
 			}
 			$vsrv{addressfamily} = $af;
 			$vsrv{real} = \@rsrv;
 			$vsrv{scheduler} = "wrr";
 			$vsrv{checkcommand} = "/bin/true";
 			$vsrv{request} = "/";
 			$vsrv{receive} = "";
 			$vsrv{login} = "";
 			$vsrv{passwd} = "";
 			$vsrv{database} = "";
 			$vsrv{checktimeout} = -1;
 			$vsrv{checkcount} = -1;
 			$vsrv{negotiatetimeout} = -1;
 			$vsrv{failurecount} = -1;
 			$vsrv{num_connects} = 0;
 			$vsrv{httpmethod} = "GET";
 			$vsrv{secret} = "";
 			$vsrv{ops} = "no";
 			push(@VIRTUAL, \%vsrv);
 			while(<CFGFILE>) {
 				$line++;
 				$linedata=$_;
 				if(m/^\s*#/) {
 					next;
 				}
 				s/#.*//;
 				s/\t/    /g;
 				unless (/^ {4,}(.+)/) {
 					last;
 				}
 				my $rcmd = $1;
 				if ($rcmd =~ /^(real(6)?)\s*=\s*(.*)/) {
 					if ($af == AF_INET  &&   defined($2) ||
 					    $af == AF_INET6 && ! defined($2)) {
 					    &config_error($line, join("", ("cannot specify \"$1\" here.  please use \"real", ($af == AF_INET) ?  "" : "6", "\" instead")));
 					}
 					push @rsrv_todo, [$3, $line];
 				} elsif ($rcmd =~ /^request\s*=\s*\"(.*)\"/) {
 					$1 =~ /(.+)/ or &config_error($line, "no request string specified");
 					$vsrv{request} = $1;
 					unless($vsrv{request}=~/^\//){
 						$vsrv{request} = "/" . $vsrv{request};
 					}
 
 				} elsif ($rcmd =~ /^receive\s*=\s*\"(.*)\"/) {
 					$1 =~ /(.+)/ or &config_error($line, "invalid receive string");
 					$vsrv{receive} = $1;
 				} elsif ($rcmd =~ /^checktype\s*=\s*(.*)/){
 					if ($1 =~ /(\d+)/ && $1>=0) {
 						$vsrv{num_connects} = $1;
 						$vsrv{checktype} = "combined";
 					} elsif ( $1 =~ /([\w-]+)/ && ($1 eq "connect" || $1 eq "negotiate" || $1 eq "ping" || $1 eq "off" || $1 eq "on" || $1 eq "external" || $1 eq "external-perl") ) {
 						$vsrv{checktype} = $1;
 					} else {
 						&config_error($line, "checktype must be \"connect\", \"negotiate\", \"on\", \"off\", \"ping\", \"external\", \"external-perl\" or a positive number");
 					}
 				} elsif ($rcmd =~ /^checkcommand\s*=\s*\"(.*)\"/ or $rcmd =~ /^checkcommand\s*=\s*(.*)/){
 					$1 =~ /(.+)/ or &config_error($line, "invalid check command");
 					$vsrv{checkcommand} = $1;
 				} elsif ($rcmd =~ /^checktimeout\s*=\s*(.*)/){
 					$1 =~ /(\d+)/ && $1 or &config_error($line, "invalid check timeout");
 					$vsrv{checktimeout} = $1;
 				} elsif ($rcmd =~ /^connecttimeout\s*=\s*(.*)/){
 					&config_error($line,
 						"connecttimeout directive " .
 						"deprecated in favour of " .
 						"negotiatetimeout");
 				} elsif ($rcmd =~ /^negotiatetimeout\s*=\s*(.*)/){
 					$1 =~ /(\d+)/ && $1 or &config_error($line, "invalid negotiate timeout");
 					$vsrv{negotiatetimeout} = $1;
 				} elsif ($rcmd =~ /^checkcount\s*=\s*(.*)/){
 					$1 =~ /(\d+)/ && $1 or &config_error($line, "invalid check count");
 					$vsrv{checkcount} = $1;
 					&config_warn($line, "checkcount option is deprecated and slated for removal.  please see 'failurecount'");
 				} elsif ($rcmd =~ /^failurecount\s*=\s*(.*)/){
 					$1 =~ /(\d+)/ && $1 or &config_error($line, "invalid failure count");
 					$vsrv{failurecount} = $1;
 				} elsif ($rcmd =~ /^checkinterval\s*=\s*(.*)/){
 					$1 =~ /(\d+)/ && $1 or &config_error($line, "invalid checkinterval");
 					$vsrv{checkinterval} = $1
 				} elsif ($rcmd =~ /^checkport\s*=\s*(.*)/){
 					$1 =~ /(\d+)/ or &config_error($line, "invalid port");
 					( $1 > 0 && $1 < 65536 ) or &config_error($line, "checkport must be in range 1..65536");
 					$vsrv{checkport} = $1;
 				} elsif ($rcmd =~ /^login\s*=\s*\"(.*)\"/) {
 					$1 =~ /(.+)/ or &config_error($line, "invalid login string");
 					$vsrv{login} = $1;
 				} elsif ($rcmd =~ /^passwd\s*=\s*\"(.*)\"/) {
 					$1 =~ /(.+)/ or &config_error($line, "invalid password");
 					$vsrv{passwd} = $1;
 				} elsif ($rcmd =~ /^database\s*=\s*\"(.*)\"/) {
 					$1 =~ /(.+)/ or &config_error($line, "invalid database");
 					$vsrv{database} = $1;
 				} elsif ($rcmd =~ /^secret\s*=\s*\"(.*)\"/) {
 					$1 =~ /(.+)/ or &config_error($line, "invalid secret");
 					$vsrv{secret} = $1;
 				} elsif ($rcmd =~ /^load\s*=\s*\"(.*)\"/) {
 					$1 =~ /(\w+)/ or &config_error($line, "invalid string for load testing");
 					$vsrv{load} = $1;
 				} elsif ($rcmd =~ /^scheduler\s*=\s*(.*)/) {
 					# Intentionally ommit checking the
 					# scheduler against a list of know
 					# schedulers. This is because from
 					# time to time new schedulers are
 					# added. But ldirectord is
 					# maintained distributed
 					# independently of this. Thus
 					# ldirectord needs to be manually
 					# updated/upgraded.  So just accept
 					# any scheduler that matches
 					# [a-z]+. I.e. is syntactically
 					# correct (all schedulers so far
 					# match that pattern). Ipvsadm will
 					# report an error is a scheduler
 					# isn't available / doesn't exist.
 					$1 =~ /([a-z]+)/
 					    or &config_error($line, "invalid scheduler, should be only lowercase letters (a-z)");
 					$vsrv{scheduler} = $1;
 				} elsif ($rcmd =~ /^persistent\s*=\s*(.*)/) {
 					$1 =~ /(\d+)/ or &config_error($line, "invalid persistent timeout");
 					$vsrv{persistent} = $1;
 				} elsif ($rcmd =~ /^netmask\s*=\s*(.*)/) {
 					my $val = $1;
 					if ($vsrv{addressfamily} == AF_INET6) {
 						if ($val !~ /^\d+$/ or ($val < 1 || $val > 128)) {
 							&config_error($line, "invalid netmask: a prefix length between 1 and 128 is required");
 						}
 					} else {
 						if ($val !~ /^\d+\.\d+\.\d+\.\d+$/) {
 							&config_error($line, "invalid netmask: dotted quad notation is required");
 						}
 					}
 					$vsrv{netmask} = $val;
 				} elsif ($rcmd =~ /^protocol\s*=\s*(.*)/) {
 					if ( $1 =~ /(\w+)/ ) {
 						if ( $vsrv{protocol} eq "fwm" ) {
 							if ($1 eq "fwm" ) {
 								; #Do nothing, it is already set
 							} else {
 								&config_error($line, "protocol must be fwm if the virtual service is a fwmark (a number)");
 							}
 						} else {    # tcp or udp
 							if ($1 eq "tcp" || $1 eq "udp") {
 								$vsrv{protocol} = $1;
 							} else {
 								&config_error($line, "protocol must be tcp or udp if the virtual service is an address and port");
 							}
 						}
 					} else {
 						&config_error($line, "invalid protocol");
 					}
 				} elsif ($rcmd  =~ /^ops\s*=\s*(.*)/) {
 					if ($1 eq "yes" || $1 eq "no") {
 						$vsrv{ops} = $1;
 					} else {
 						&config_error($line, "ops must be 'yes' or 'no'");
 					}
 				} elsif ($rcmd =~ /^service\s*=\s*(.*)/) {
 					$1 =~ /(\w+)/ && ($1 eq "dns"	||
 							  $1 eq "ftp"	||
 							  $1 eq "http"	||
 							  $1 eq "https"	||
 							  $1 eq "http_proxy"	||
 							  $1 eq "imap"	||
 							  $1 eq "imaps"	||
 							  $1 eq "ldap"	||
 							  $1 eq "nntp"	||
 							  $1 eq "mysql"	||
 							  $1 eq "none"	||
 							  $1 eq "oracle"||
 							  $1 eq "pop"	||
 							  $1 eq "pops"	||
 							  $1 eq "radius"||
 							  $1 eq "pgsql"	||
 							  $1 eq "sip"	||
 							  $1 eq "smtp"	||
 							  $1 eq "submission"	||
 							  $1 eq "simpletcp")
 					    or &config_error($line,
 							     "service must " .
 							     "be dns, ftp, " .
 							     "http, https, " .
 							     "http_proxy, " .
 							     "imap, imaps, " .
 							     "ldap, nntp, "  .
 							     "mysql, none, " .
 							     "oracle, "      .
 							     "pop, pops, "   .
 							     "radius, "      .
 							     "pgsql, "       .
 							     "simpletcp, "   .
 							     "sip, smtp "    .
 							     "or submission");
 					$vsrv{service} = $1;
 					if($vsrv{service} eq "ftp" and
 							$vsrv{login} eq "") {
 						$vsrv{login} = "anonymous";
 					}
 					elsif($vsrv{service} eq "sip" and
 							$vsrv{login} eq "") {
 						$vsrv{login} = "ldirectord\@$HOSTNAME";
 					}
 					if($vsrv{service} eq "ftp" and
 							$vsrv{passwd} eq "") {
 						$vsrv{passwd} = "ldirectord\@$HOSTNAME";
 					}
 				} elsif ($rcmd =~ /^httpmethod\s*=\s*(.*)/) {
 					$1 =~ /(\w+)/ && (uc($1) eq "GET" || uc($1) eq "HEAD")
 					    or &config_error($line, "httpmethod must be GET or HEAD");
 					$vsrv{httpmethod} = uc($1);
 				} elsif ($rcmd =~ /^virtualhost\s*=\s*(.*)/) {
 					$1 =~ /\"?([^\"]*)\"?/ or
 					&config_error($line, "invalid virtualhost");
 					$vsrv{virtualhost} = $1;
 				} elsif ($rcmd =~ /^(fallback(6)?)\s*=\s*(.*)/) {    # Allow specification of a virtual-specific fallback host
 					if ($af == AF_INET  &&   defined($2) ||
 					    $af == AF_INET6 && ! defined($2)) {
 					    &config_error($line, join("", ("cannot specify \"$1\" here.  please use \"fallback", ($af == AF_INET) ?  "" : "6", "\" instead")));
 					}
 					$fallback_line=$line;
 					$vsrv{fallback} =
 						parse_fallback($line, $3,
 							       \%vsrv);
 				} elsif ($rcmd =~
 				/^fallbackcommand\s*=\s*\"(.*)\"/ or $rcmd =~ /^fallbackcommand\s*=\s*(.*)/) {
 					$1 =~ /(.+)/ or &config_error($line, "invalid fallback command");
 					$vsrv{fallbackcommand} = $1;
 				} elsif ($rcmd =~ /^quiescent\s*=\s*(.*)/) {
 					($1 eq "yes" || $1 eq "no")
 						or &config_error($line, "quiescent must be 'yes' or 'no'");
 					$vsrv{quiescent} = $1;
 				} elsif  ($rcmd =~ /^emailalert\s*=\s*(.*)/) {
 					$vsrv{emailalert} =
 						read_emailalert($line, $1);
 				} elsif  ($rcmd =~ /^emailalertfreq\s*=\s*(\d*)/) {
 					$1 =~ /(\d+)/ or &config_error($line, "invalid email alert frequency");
 					$vsrv{emailalertfreq} = $1;
 				} elsif  ($rcmd =~ /^emailalertstatus\s*=\s*(.*)/) {
 					$vsrv{emailalertstatus} = &parse_emailalertstatus($line, $1);
 				} elsif  ($rcmd =~ /^monitorfile\s*=\s*\"(.*)\"/ or
 					  $rcmd =~ /^monitorfile\s*=\s*(.*)/) {
 					my $monitorfile = $1;
 					unless (open(MONITORFILE, ">>$monitorfile") and close(MONITORFILE)) {
 						&config_error($line, "unable to open monitorfile $monitorfile: $!");
 					}
 					$vsrv{monitorfile} = $monitorfile;
 				} elsif  ($rcmd =~ /^cleanstop\s*=\s*(.*)/) {
 					($1 eq "yes" || $1 eq "no")
 						or &config_error($line, "cleanstop must be 'yes' or 'no'");
 					$vsrv{cleanstop} = $1;
 				} elsif  ($rcmd =~ /^smtp\s*=\s*(.*)/) {
 					$1 =~ /(^([0-9A-Za-z._+-]+))/ or &config_error($line, "invalid SMTP server address");
 					$vsrv{smtp} = $1;
+				} elsif  ($rcmd =~ /^servicename\s*=\s*(.*)/) {
+					$vsrv{servicename} = $1;
+				} elsif  ($rcmd =~ /^comment\s*=\s*(.*)/) {
+					$vsrv{comment} = $1;
 				} else {
 					&config_error($line, "Unknown command \"$linedata\"");
 				}
 				undef $linedata;
 			}
 			# As the protocol needs to be known to call
 			# getservbyname() all resolution must be
 			# delayed until the protocol is finalised.
 			# That is after the entire configuration
 			# for a virtual service has been parsed.
 
 			&_ld_read_config_fallback_resolve($fallback_line,
 				$vsrv{protocol}, $vsrv{fallback}, $af);
 			&_ld_read_config_virtual_resolve($virtual_line, \%vsrv,
 				$ip_port, $af);
 			&_ld_read_config_real_resolve(\%vsrv, \@rsrv_todo, $af);
 
 			# Check for duplicate now we have all the
 			# information to generate the id
 			$virtual_id = get_virtual_id_str(\%vsrv);
 			if (defined $virtual_seen{$virtual_id}) {
 				&config_error($line,
 					"duplicate virtual server");
 			}
 			$virtual_seen{$virtual_id} = 1;
 
 			unless(defined($linedata)) {
 				last;
 			}
 			#Arggh a goto :(
 			goto outer_loop;
 		}
 		next if ($linedata =~ /^\s*$/ || $linedata =~ /^\s*#/);
 		if ($linedata  =~ /^checktimeout\s*=\s*(.*)/) {
 			($1 =~ /(\d+)/ && $1 && $1>0) or &config_error($line,
 					"invalid check timeout value");
 			$CHECKTIMEOUT = $1;
 		} elsif ($linedata  =~ /^connecttimeout\s*=\s*(.*)/) {
 			&config_error($line,
 					"connecttimeout directive " .
 					"deprecated in favour of " .
 					"negotiatetimeout");
 		} elsif ($linedata  =~ /^negotiatetimeout\s*=\s*(.*)/) {
 			($1 =~ /(\d+)/ && $1 && $1>0) or &config_error($line,
 					"invalid negotiate timeout value");
 			$NEGOTIATETIMEOUT = $1;
 		} elsif ($linedata  =~ /^checkinterval\s*=\s*(.*)/) {
 			$1 =~ /(\d+)/ && $1 or &config_error($line,
 					"invalid check interval value");
 			$CHECKINTERVAL = $1;
 		} elsif ($linedata  =~ /^checkcount\s*=\s*(.*)/) {
 			$1 =~ /(\d+)/ && $1 or &config_error($line,
 					"invalid check count value");
 			$CHECKCOUNT = $1;
 			&config_warn($line, "checkcount option is deprecated and slated for removal.  please see 'failurecount'");
 		} elsif ($linedata  =~ /^failurecount\s*=\s*(.*)/) {
 			$1 =~ /(\d+)/ && $1 or &config_error($line,
 					"invalid failure count value");
 			$FAILURECOUNT = $1;
 		} elsif ($linedata  =~ /^fallback(6)?\s*=\s*(.*)/) {
 			my $af = defined($1) ? AF_INET6 : AF_INET;
 			my $tcp = parse_fallback($line, $2, undef);
 			my $udp = parse_fallback($line, $2, undef);
 			&_ld_read_config_fallback_resolve($line, "tcp", $tcp, $af);
 			&_ld_read_config_fallback_resolve($line, "udp", $udp, $af);
 			if ($af == AF_INET) {
 				$FALLBACK = { "tcp" => $tcp, "udp" => $udp };
 			} else {
 				$FALLBACK6 = { "tcp" => $tcp, "udp" => $udp };
 			}
 		} elsif ($linedata =~ /^fallbackcommand\s*=\s*(.*)/) {
 			$1 =~ /(.+)/ or &config_error($line, "invalid fallback command");
 			$FALLBACKCOMMAND = $1;
 		} elsif ($linedata  =~ /^autoreload\s*=\s*(.*)/) {
 			($1 eq "yes" || $1 eq "no")
 			    or &config_error($line,
 					"autoreload must be 'yes' or 'no'");
 			$AUTOCHECK = $1;
 		} elsif ($linedata  =~ /^callback\s*=\s*\"(.*)\"/) {
 			$CALLBACK = $1;
 		} elsif ($linedata  =~ /^logfile\s*=\s*\"(.*)\"/) {
 			my $tmpLDIRLOG = $LDIRLOG;
 			$LDIRLOG = $1;
 			if (&ld_openlog()) {
 				$LDIRLOG = $tmpLDIRLOG;
 				&config_error($line,
 						"unable to open logfile: $1");
 			}
 		} elsif ($linedata  =~ /^execute\s*=\s*(.*)/) {
 			$LD_INSTANCE{$1} = 1;
 		} elsif ($linedata  =~ /^fork\s*=\s*(.*)/) {
 			($1 eq "yes" || $1 eq "no")
 			    or &config_error($line, "fork must be 'yes' or 'no'");
 			$FORKING = $1;
 		} elsif ($linedata  =~ /^supervised/) {
 			if (($linedata  =~ /^supervised\s*=\s*(.*)/) and
 			    ($1 eq "yes" || $1 eq "no")) {
 				$SUPERVISED = $1;
 			}
 			elsif ($linedata  =~ /^supervised\s*$/) {
 				$SUPERVISED = "yes";
 				&config_warn($line,
 					"please update your config not to " .
 					"use a bare supervised directive");
 			}
 			else {
 				&config_error($line,
 					"supervised must be 'yes' or 'no'");
 			}
 		} elsif ($linedata  =~ /^quiescent\s*=\s*(.*)/) {
 			($1 eq "yes" || $1 eq "no")
 			    or &config_error($line,
 					"quiescent must be 'yes' or 'no'");
 			$QUIESCENT = $1;
 		} elsif ($linedata  =~ /^readdquiescent\s*=\s*(.*)/) {
 			($1 eq "yes" || $1 eq "no")
 			    or &config_error($line,
 					"readdquiescent must be 'yes' or 'no'");
 			$READDQUIESCENT = $1;
 		} elsif  ($linedata  =~ /^emailalert\s*=\s*(.*)/) {
 			$EMAILALERT = read_emailalert($line, $1);
 		} elsif  ($linedata  =~ /^emailalertfreq\s*=\s*(\d*)/) {
 			$1 =~ /(\d+)/ or &config_error($line,
 					"invalid email alert frequency");
 			$EMAILALERTFREQ = $1;
 		} elsif  ($linedata  =~ /^emailalertstatus\s*=\s*(.*)/) {
 			$EMAILALERTSTATUS = &parse_emailalertstatus($line, $1);
 		} elsif  ($linedata  =~ /^emailalertfrom\s*=\s*(.*)/) {
 			$1 =~ /(.+)/ or &config_error($line,
 					"no email from address specified");
 			$EMAILALERTFROM = $1;
 		} elsif  ($linedata  =~ /^cleanstop\s*=\s*(.*)/) {
 			($1 eq "yes" || $1 eq "no")
 			    or &config_error($line, "cleanstop must be 'yes' or 'no'");
 			$CLEANSTOP = $1;
 		} elsif  ($linedata  =~ /^smtp\s*=\s*(.*)/) {
 			$1 =~ /(^([0-9A-Za-z._+-]+))/ or &config_error($line,
 					"invalid SMTP server address");
 			$SMTP = $1;
 		} elsif  ($linedata  =~ /^maintenancedir\s*=\s*(.*)/) {
 			$1 =~ /(.+)/ or &config_error($line,
 					"maintenance directory not specified");
 			$MAINTDIR = $1;
 			-d $MAINTDIR or &config_warn($line,
 					"maintenance directory does not exist");
 		} else {
 			if ($linedata  =~ /^timeout\s*=\s*(.*)/) {
 				&config_error($line,
 						"timeout directive " .
 						"deprecated in favour of " .
 						"checktimeout and " .
 						"negotiatetimeout");
 			}
 			&config_error($line, "Unknown command $linedata ");
 		}
 	}
 	close(CFGFILE);
 
 	# Check for sensible use of checkinterval, warn if it is used in a virtual
 	# service when fork=no
 	if ($FORKING eq 'no') {
 		foreach my $v (@VIRTUAL) {
 			if (defined($$v{checkinterval})) {
 				config_warn(-1, "checkinterval in virtual service ".
 					get_virtual_id_str($v)." ignored when fork=no");
 			}
 		}
 	}
 
 	return(0);
 }
 
 # _ld_read_config_virtual_resolve
 # Note: Should not need to be called directly, but won't do any damage if
 #       you do.
 # Resolve the server (ip address) and port for a virtual service
 # pre: line: Line of configuration file fallback server was read from
 #            Used for debugging messages
 #      vsrv: Virtual Service to resolve server and port of
 #      ip_port: server and port in the form
 #               ip_address|hostname:port|service
 #      af: Address family: AF_INET or AF_INET6
 # post: Take ip_port, resolve it as per ld_gethostservbyname
 #       and set $vsrv->{server} and $vsrv->{port} accordingly.
 #       If $vsrv->{service} is not set, then set according to the value of
 #       $vsrv->{port}
 # return: none
 #        Debugging message will be reported and programme will exit
 #        on error.
 sub _ld_read_config_virtual_resolve
 {
 	my($line, $vsrv, $ip_port, $af)=(@_);
 
 	if($ip_port){
 		$ip_port=&ld_gethostservbyname($ip_port, $vsrv->{protocol}, $af);
 		if ($ip_port =~ /(\[[0-9A-Fa-f:]+\]):(\d+)/) {
 			$vsrv->{server} = $1;
 			$vsrv->{port} = $2;
 		} elsif($ip_port){
 			($vsrv->{server}, $vsrv->{port}) = split /:/, $ip_port;
 		}
 		else {
 			&config_error($line,
 				"invalid address for virtual service");
 		}
 
 		if(!defined($vsrv->{service})){
 			$vsrv->{service} = ld_port_to_service($vsrv->{port});
 		}
 	}
 }
 
 # ld_service_to_port
 # Resolve an ldirectord service name from its port number
 # pre: port: port number of the service
 # return: port name
 #         "none" if the service is unknown
 sub ld_port_to_service
 {
 	my ($port) = (@_);
 
 	if ($port eq 21)	{ return "ftp"; }
 	if ($port eq 25)	{ return "smtp"; }
 	if ($port eq 53)	{ return "dns"; }
 	if ($port eq 80)	{ return "http"; }
 	if ($port eq 110)	{ return "pop"; }
 	if ($port eq 119)	{ return "nntp"; }
 	if ($port eq 143)	{ return "imap"; }
 	if ($port eq 389)	{ return "ldap"; }
 	if ($port eq 443)	{ return "https"; }
 	if ($port eq 587)	{ return "submission"; }
 	if ($port eq 995)	{ return "pops"; }
 	if ($port eq 993)	{ return "imaps"; }
 	if ($port eq 1521)	{ return "oracle"; }
 	if ($port eq 1812)	{ return "radius"; }
 	if ($port eq 3128)	{ return "http_proxy"; }
 	if ($port eq 3306)	{ return "mysql"; }
 	if ($port eq 5060)	{ return "sip"; }
 	if ($port eq 5432)	{ return "pgsql"; }
 
 	return "none";
 }
 
 # ld_service_to_port
 # Resolve the port number from an ldirectord service name
 # pre: service: name of the service
 # return: port number
 #         undef if the service is unknown
 sub ld_service_to_port
 {
 	my ($service) = (@_);
 
 	if ($service eq "ftp")		{ return 21; }
 	if ($service eq "smtp")		{ return 25; }
 	if ($service eq "dns")		{ return 53; }
 	if ($service eq "http")		{ return 80; }
 	if ($service eq "pop")		{ return 110; }
 	if ($service eq "nntp")		{ return 119; }
 	if ($service eq "imap")		{ return 143; }
 	if ($service eq "ldap")		{ return 389; }
 	if ($service eq "https")	{ return 443; }
 	if ($service eq "submission")	{ return 587; }
 	if ($service eq "imaps")	{ return 993; }
 	if ($service eq "pops")		{ return 995; }
 	if ($service eq "oracle")	{ return 1521; }
 	if ($service eq "radius")	{ return 1812; }
 	if ($service eq "http_proxy")	{ return 3128; }
 	if ($service eq "mysql")	{ return 3306; }
 	if ($service eq "sip")		{ return 5060; }
 	if ($service eq "pgsql")	{ return 5432; }
 
 	return undef;
 }
 
 # ld_checkport
 # Resolve the port to connect to for service checks
 # Note: Should only be used inside service checks,
 #       as its not the same as the port of the real server
 # pre: v: virtual service
 #      r: real server
 # return: port number
 #         undef if the service is unknown
 sub ld_checkport
 {
 	my ($v, $r) = (@_);
 
 	if (defined $v->{checkport}) {
 		return $v->{checkport};
 	}
 	if ($r->{port} > 0) {
 		return $r->{port};
 	}
 
 	return ld_service_to_port($v->{service});
 }
 
 # _ld_read_config_fallback_resolve
 # Note: Should not need to be called directly, but won't do any damage if
 #       you do.
 # Resolve the fallback server for a virtual service
 # pre: line: Line of configuration file fallback server was read from
 #            Used for debugging messages
 #      vsrv: Virtual Service to resolve fallback server of
 #      af: Address family: AF_INET or AF_INET6
 # post: Take $vsrv->{fallback}, resolve it as per ld_gethostservbyname
 #       and set $vsrv->{fallback} to the result
 # return: none
 #	Debugging message will be reported and programme will exit
 #	on error.
 sub _ld_read_config_fallback_resolve
 {
 	my($line, $protocol, $fallback, $af)=(@_);
 
 	my ($ipversion, $ipaddress);
 
 	unless($fallback) {
 		return;
 	}
 	if ($af == AF_INET) {
 	 	$ipversion = "IPv4";
 	}
 	elsif ($af == AF_INET6) {
 	 	$ipversion = "IPv6";
 	}
 	else {
 	 	$ipversion = "IP??($af)";
 	}
 	unless ($ipaddress = &ld_gethostbyname($fallback->{server}, $af)) {
 		&config_error($line, "invalid $ipversion address or could not resolve for fallback server: " .
 			      $fallback->{server});
 	}
 	$fallback->{server} = $ipaddress;
 
 	unless($fallback->{"port"}) {
 		return;
 	}
 
 	$fallback->{port} = &ld_getservbyname($fallback->{port}, $protocol) or
 		&config_error($line, "invalid port for fallback server");
 }
 
 # _ld_read_config_real_resolve
 # Note: Should not need to be called directly, but won't do any damage if
 #       you do.
 # Run through the list of real servers read in the configuration file for a
 # virtual server and parse these entries
 # pre: vsrv: Virtual Service to parse real servers for
 #      rsrv_todo: List of real servers read from config but not parsed.
 #                 List is a list of list reference. The first element in
 #                 each list reference is the line read from the
 #                 configuration after "real=". The second element is the
 #                 line number, used for error reporting
 #      af: Address family: AF_INET or AF_INET6
 # post: Run through rsrv_todo and parse real servers
 # return: none
 #	Debugging message will be reported and programme will exit
 #	on error.
 sub _ld_read_config_real_resolve
 {
 	my ($vsrv, $rsrv_todo, $af)=(@_);
 
 	my $i;
 	my $str;
 	my $line;
 	my $ip1;
 	my $ip2;
 	my $port;
 	my $resolved_ip1;
 	my $resolved_ip2;
 	my $resolved_port;
 	my $flags;
 
 	for $i (@$rsrv_todo) {
 		($str, $line)=@$i;
 		$str =~	 /(\d+\.\d+\.\d+\.\d+|[A-Za-z0-9.-]+|\[[0-9A-fa-f:]+\])(->(\d+\.\d+\.\d+\.\d+|[A-Za-z0-9.-]+|\[[0-9A-fa-f:]+\]))?(:(\d+|[A-Za-z0-9-_]+))?\s+(.*)/
 			or &config_error($line,
 				"invalid address for real server" .
 				" (wrong format)");
 		$ip1=$1;
 		$ip2=$3;
 		if(defined($5)){
 			$port=$5;
 		}
 		else {
 			$port="0";
 		}
 		$flags=$6;
 		$resolved_ip1=&ld_gethostbyname($ip1, $af);
 		unless( defined($resolved_ip1) ) {
 			&config_error($line,
 				"invalid address ($ip1) for real server" .
 				" (could not resolve host)");
 		}
 		if( defined($port) ){
 			$resolved_port=&ld_getservbyname($port,"");
 			unless( defined($resolved_port) ){
 				&config_error($line,
 					"invalid port ($port) for real server" .
 					" (could not resolve port)");
 			}
 		}
 		if ( defined ($ip2) ) {
 			$resolved_ip2=&ld_gethostbyname($ip2, $af);
 			unless( defined ($resolved_ip2) ) {
 				&config_error($line,
 					"invalid address ($ip2) for " .
 					"real server" .
 					" (could not resolve end host)");
 			}
 			&add_real_server_range($line, $vsrv, $resolved_ip1,
 				$resolved_ip2, $resolved_port, $flags, $af);
 		} else {
 			&add_real_server($line, $vsrv, $resolved_ip1,
 				$resolved_port, $flags);
 		}
 	}
 }
 
 # add_real_server_range
 # Add a real server for each IP address in a range
 # pre: line: line number real server was read from
 #            Used for debugging information
 #      vsrv: virtual server to add real server to
 #      first: First IP address in range
 #      last: First IP address in range
 #      port: Port of real servers
 #      flags: Flags for real servers. Should be of the form
 #             gate|masq|ipip [<weight>] [">I<request>", "<receive>"]
 #      af: Address family: AF_INET or AF_INET6
 # post: real servers are added to virtual server
 # return: none
 #         Debugging message will be reported and programme will exit
 #         on error.
 sub add_real_server_range
 {
 	my ($line, $vsrv, $first, $last, $port, $flags, $af) = (@_);
 
 	my (@tmp, $first_i, $last_i, $i, $rsrv);
 
 	if ($af == AF_INET) {
 		if ( ($first_i=&ip_to_int($first)) <0 ) {
 			&config_error($line, "Invalid IP address: $first");
 		}
 		if ( ($last_i=&ip_to_int($last)) <0 ) {
 			&config_error($line, "Invalid IP address: $last");
 		}
 
 		if ($first_i>$last_i) {
 			&config_error($line,
 				"Invalid Range: $first-$last: First value must be " .
 				"greater than or equal to the second value");
 		}
 
 		# A for loop didn't seem to want to work
 		$i=$first_i;
 		while ( $i le $last_i ) {
 			&add_real_server($line, $vsrv, &int_to_ip($i), $port, $flags);
 			$i++;
 		}
 	}
 	elsif ($af == AF_INET6) {
 		# not supported yet
 		&config_error($line, "Address range for IPv6 is not supported yet");
 	}
 	else {
 		die "address family must be AF_INET or AF_INET6\n";
 	}
 }
 
 # add_real_server
 # Add a real server to a virtual
 # pre: line: line number real server was read from
 #            Used for debugging information
 #      vsrv: virtual server to add real server to
 #      ip: IP address of real server
 #      port: Port of real server
 #      flags: Flags for real server. Should be of the form
 #             gate|masq|ipip [<weight>] [">I<request>", "<receive>"]
 # post: real server is added to virtual server
 # return: none
 #         Debugging message will be reported and programme will exit
 #         on error.
 sub add_real_server
 {
 	my ($line, $vsrv, $ip, $port, $flags) = (@_);
 
 	my $ref;
 	my $realsrv=0;
 	my $new_rsrv;
 	my $rsrv;
 
-	$new_rsrv = {"server"=>$ip, "port"=>$port};
+	$new_rsrv = {"server"=>$ip, "port"=>$port, "failcount"=>0};
 
 	$flags =~ /(\w+)(.*)/ && ($1 eq "gate" || $1 eq "masq" || $1 eq "ipip")
 		or &config_error($line,	"forward method must be gate, masq or ipip");
 
 	$new_rsrv->{"forward"} =$1;
 	$flags = $2;
 
 	$rsrv=$vsrv->{"real"};
 
 	if(defined($flags) and $flags =~ /\s+(\d+)(.*)/) {
 		$new_rsrv->{"weight"} = $1;
 		$flags = $2;
 	}
 	else {
 		$new_rsrv->{"weight"} = 1;
 	}
 
 	if(defined($flags) and $flags =~ /\s+\"(.*)\"[, ]\s*\"(.*)\"(.*)/) {
 		$new_rsrv->{"request"} = $1;
 		unless ($new_rsrv->{request}=~/^\//) {
 			$new_rsrv->{request} = "/" . $new_rsrv->{request};
 		}
 		$new_rsrv->{"receive"} = $2;
 		$flags = $3;
 	}
 
 	if (defined($flags) and $flags =~/\S/) {
 		&config_error($line, "Invalid real server line, around "
 			. "\"$flags\"");
 	}
 
 	push(@$rsrv, $new_rsrv);
 
 	my $real    = get_real_id_str($new_rsrv, $vsrv);
 	my $virtual = get_virtual_id_str($vsrv);
 	for my $r (@REAL){
 		if($r->{"real"} eq $real){
 			my $ref=$r->{"virtual"};
 			push(@$ref, $virtual);
 			$realsrv=1;
 			last;
 		}
 	}
 	if($realsrv==0){
 		push(@REAL, { "real"=>$real, "virtual"=>[ $virtual ] });
 	}
 }
 
 # parse_fallback
 # Parse a fallback server
 # pre: line: line number real server was read from
 #      fallback: line read from configuration file
 #                Should be of the form
 #                ip_address|hostname[:port|:service_name] [gate|masq|ipip]
 # post: fallback is parsed
 # return: Reference to hash of the form
 #         { server => blah, forward => blah }
 #         Debugging message will be reported and programme will exit
 #         on error.
 sub parse_fallback
 {
 	my ($line, $fallback, $vsrv) = (@_);
 
 	my $parse_line;
 	my $server;
 	my $port;
 	my $fwd;
 
 	$parse_line = $fallback;
 	if ($parse_line =~ /(\S+)(\s+(\S+))?\s*$/) {
 		# get "ip:port" and a forwarding method
 		$fwd = $3;
 		$parse_line = $1;
 	}
 	if ($parse_line =~ /(:(\d+|[A-Za-z0-9-_]+))?$/) {
 		# get host and port
 		$port=$2;
 		
 		$parse_line =~ s/(:(\d+|[A-Za-z0-9-_]+))?$//;
 		$server = $parse_line;
 	}
 	unless(defined($server)) {
 		&config_error($line, "invalid fallback server: $fallback");
 	}
 
 	if (not defined($port) and defined($vsrv)) {
 		$port = $vsrv->{"port"};
 	}
 
 	if($fwd) {
 		($fwd eq "gate" || $fwd eq "masq" || $fwd eq "ipip")
 		or &config_error($line,
 			"forward method must be gate, masq or ipip");
 	}
 	else {
 		$fwd="gate"
 	}
 
 	return({"server"=>$server, "port"=>$port, "forward"=>$fwd,
 		"weight"=>1});
 }
 
 sub __config_log
 {
 	my ($line, $prefix, $msg) = @_;
 
 	chomp($msg);
 	$msg .= "\n";
 
 	my $msg_prefix = "$prefix [$$]";
 	if ($line > 0) {
 		$msg_prefix .= " reading file $CONFIG at line $line";
 	}
 	$msg = "$msg_prefix: $msg";
 
 	if ($opt_d or $DAEMON_STATUS == $DAEMON_STATUS_STARTING) {
 		print STDERR $msg;
 	}
 	else {
 		&ld_log("$msg");
 	}
 }
 
 sub config_warn
 {
 	my ($line, $msg) = @_;
 
 	__config_log($line, "Warning", $msg);
 }
 
 sub config_error
 {
 	my ($line, $msg) = @_;
 
 	__config_log($line, "Error", $msg);
 	if ($DAEMON_STATUS == $DAEMON_STATUS_STARTING) {
 		&ld_rm_file("$RUNPID.$CFGNAME.pid");
 		&ld_exit(2, "config_error: Configuration Error");
 	} else {
 		die;
 	}
 }
 
 sub ld_setup
 {
 	for my $v (@VIRTUAL) {
 		if ($$v{protocol} eq "tcp") {
 			$$v{proto} = "-t";
 		} elsif ($$v{protocol} eq "udp") {
 			$$v{proto} = "-u";
 		} elsif ($$v{protocol} eq "fwm") {
 			$$v{proto} = "-f";
 		}
 		$$v{flags} = "$$v{proto} " .  &get_virtual_option($v) . " ";
 		if ($$v{protocol} eq "udp" && $$v{ops} eq "yes") {
 			$$v{flags} .= "-o ";
 		}
 		$$v{flags} .= "-s $$v{scheduler} " if defined ($$v{scheduler});
 		if (defined $$v{persistent}) {
 			$$v{flags} .= "-p $$v{persistent} ";
 			$$v{flags} .= "-M $$v{netmask} " if defined ($$v{netmask});
 		}
 		my $real = $$v{real};
 		for my $r (@$real) {
 			$$r{forw} = get_forward_flag($$r{forward});
 			my $port=ld_checkport($v, $r);
 
 			my $schema = $$v{service};
 			if ($$v{service} eq 'http_proxy') {
 				$schema = 'http';
 			}
 
 			if (defined $$r{request} && defined $$r{receive}) {
 				my $uri = $$r{request};
 				$uri =~ s/^\///g;
 				if ($$r{request} =~ /$schema:\/\//) {
 					$$r{url} = "$uri";
 				} else {
 					$$r{url} = "$schema:\/\/$$r{server}:$port\/$uri";
 				}
 			} else {
 				my $uri = $$v{request};
 				$uri =~ s/^\///g;
 
 				if ($$v{service} eq 'http_proxy') {
 					$$r{url} = "$uri";
 				} else {
 					$$r{url} = "$schema:\/\/$$r{server}:$port\/$uri";
 				}
 
 				$$r{request} = $$v{request} unless defined $$r{request};
 				$$r{receive} = $$v{receive};
 			}
 			if ($$v{checktype} eq "combined") {
 				$$r{num_connects} = 999999;
 			} else {
 				$$r{num_connects} = -1;
 			}
 		}
 
 		# checktimeout and negotiate timeout are
 		# mutual defaults for each other, so calculate
 		# checktimeout in a temporary variable so as not
 		# to affect the calculation of negotiatetimeout.
 
 		my $checktimeout = $$v{checktimeout};
 		if ($checktimeout < 0) {
 			$checktimeout = $$v{negotiatetimeout};
 		}
 		if ($checktimeout < 0) {
 			$checktimeout = $CHECKTIMEOUT;
 		}
 		if ($checktimeout < 0) {
 			$checktimeout = $NEGOTIATETIMEOUT;
 		}
 		if ($checktimeout < 0) {
 			$checktimeout = $DEFAULT_CHECKTIMEOUT;
 		}
 
 		if ($$v{negotiatetimeout} < 0) {
 			$$v{negotiatetimeout} = $$v{checktimeout};
 		}
 		if ($$v{negotiatetimeout} < 0) {
 			$$v{negotiatetimeout} = $NEGOTIATETIMEOUT;
 		}
 		if ($$v{negotiatetimeout} < 0) {
 			$$v{negotiatetimeout} = $CHECKTIMEOUT;
 		}
 		if ($$v{negotiatetimeout} < 0) {
 			$$v{negotiatetimeout} = $DEFAULT_NEGOTIATETIMEOUT;
 		}
 
 		$$v{checktimeout} = $checktimeout;
 
 		if ($$v{checkcount} < 0) {
 			$$v{checkcount} = $CHECKCOUNT;
 		}
 
 		if ($$v{failurecount} < 0) {
 			$$v{failurecount} = $FAILURECOUNT;
 		}
 	}
 }
 
 # ld_read_ipvsadm
 #
 # Net::FTP seems to set the input record separator ($\) to null
 # putting IO into slurp (whole file at a time, rather than line at a time)
 # mode. Net::FTP does this using local $\, which should mean
 # that the change doesn' effect code here, but it does. It also
 # seems to be impossible to turn it off, by say setting $\ back to '\n'
 # Perhaps there is more to this than meets the eye. Perhaps it's a perl bug.
 # In any case, this should fix the problem.
 #
 # This should not affect pid or config file parsing as they are called
 # before Net::FTP and as this appears to be a bit of a work around,
 # I'd rather use it in as few places as possible
 #
 # Observed with perl v5.8.8 (Debian's perl 5.8.8-6)
 # -- Horms, 17th July 2005
 sub ld_readline
 {
 	my ($fd, $buf) = (@_);
 	my $line;
 
 	# Uncomment the following line to turn off this work around
 	# return readline($fd);
 
 	$line = shift @$buf;
 	if (defined $line) {
 		return $line . "\n";
 	}
 
 	push @$buf, split /\n/, readline($fd);
 
 	$line = shift @$buf;
 	if (defined $line) {
 		return $line . "\n";
 	}
 
 	return undef;
 }
 
 # ld_read_ipvsadm
 # Parses the output of "ipvsadm -L -n" and puts into a structure of
 # the following from:
 #
 # {
 #   (vip_address:vport|fwmark) protocol => {
 #     "scheduler" => scheduler,
 #     "persistent" => timeout,     # May be omitted
 #     "netmask" => netmask,        # May be omitted
 #     "real" => {
 #       rip_address:rport => {
 #         "forward" => forwarding_mechanism,
 #         "weight"  => weight
 #       },
 #       ...
 #     }
 #   },
 #   ...
 # }
 #
 # where:
 #   vip_address: IP address of virtual service
 #   vport: Port of virtual service
 #   fwmark: Firewall Mark of virtual service
 #   scheduler: Scheduler for virtual service
 #   timeout: Timeout for persistency. Omitted if service is not persistent.
 #   nemask: Netmask for persistency. Omitted if service is not persistent.
 #
 #   rip_address: IP address of real server
 #   rport: Port of real server
 #   forwarding_mechanism: Forwarding mechanism for real server.
 #                         One of: gate, ipip, masq.
 #   weight: Weight of real server
 #
 # pre: none
 # post: ipvsadm -L -n is parsed
 # result: reference to sructure detailed above.
 sub ld_read_ipvsadm
 {
 	my %oldsrv;
 	my $real_service;
 	my $fwd;
 	my $buf = [];
 	my $fh;
 	my $line;
 
 	# read status of current ipvsadm -L -n
 	unless(open($fh, "$IPVSADM -L -n 2>&1|")){
 		&ld_exit(1, "Could not run $IPVSADM -L -n: $!");
 	}
 
 	# Skip the first three lines
 	$line = ld_readline($fh, $buf);
 	$line = ld_readline($fh, $buf);
 	$line = ld_readline($fh, $buf);
 
 	while (1) {
 		$line = ld_readline($fh, $buf);
 		if (not defined $line) {
 			last;
 		}
 		if ($line =~ /^(\w+)\s+(\d+\.\d+\.\d+\.\d+\:\d+|\[[0-9A-Fa-f:]+\]:\d+|\d+)( IPv6)?\s+(\w+)\s+persistent\s+(\d+)\s+mask\s+(.*)/) {
 			$real_service = &gen_real_service_str($2, $1, $3);
 			$oldsrv{"$real_service"} = {"real"=>{}, "scheduler"=>$4, "persistent"=>$5, "netmask"=>$6};
 		} elsif ($line =~ /^(\w+)\s+(\d+\.\d+\.\d+\.\d+\:\d+|\[[0-9A-Fa-f:]+\]:\d+|\d+)( IPv6)?\s+(\w+)\s+persistent\s+(\d+)/) {
 			$real_service = &gen_real_service_str($2, $1, $3);
 			$oldsrv{"$real_service"} = {"real"=>{}, "scheduler"=>$4, "persistent"=>$5};
 		} elsif ($line =~ /^(\w+)\s+(\d+\.\d+\.\d+\.\d+\:\d+|\[[0-9A-Fa-f:]+\]:\d+|\d+)( IPv6)?\s+(\w+)/) {
 			$real_service = &gen_real_service_str($2, $1, $3);
 			$oldsrv{"$real_service"} = {"real"=>{}, "scheduler"=>$4};
 		} elsif ($line =~ /^  ->\s+(\d+\.\d+\.\d+\.\d+\:\d+|\[[0-9A-Fa-f:]+\]:\d+)\s+(\w+)\s+(\d+)/) {
 			if (not defined( $real_service)) {
 				&ld_debug(2, "Real server read from ipvsadm " .
 					  "doesn't seem to be inside a " .
 					  "virtual service: \"$line\"\n");
 				next;
 			}
 			if ($2 eq "Route") {
 				$fwd = "gate";
 			} elsif ($2 eq "Tunnel") {
 				$fwd = "ipip";
 			} elsif ($2 eq "Masq") {
 				$fwd = "masq";
 			}
 			$oldsrv{"$real_service"}->{"real"}->{"$1"} = {"forward"=>$fwd, "weight"=>$3};
 		} else {
 			&ld_debug(2, "Unknown line read from ipvsadm: " .
 				  "\"$line\"\n");
 			next;
 		}
 	}
 	close($fh);
 
 	return(\%oldsrv);
 }
 
 sub gen_real_service_str
 {
 	my ($service_address, $protocol, $v6flag) = @_;
 
 	return "$service_address ".lc($protocol).(defined($v6flag) ? "6" : "");
 }
 
 sub get_real_service_str
 {
 	my ($v) = (@_);
 
 	if ($v->{"protocol"} eq "fwm") {
 		return &get_virtual($v) . " "  . $v->{protocol} . (($v->{addressfamily} == AF_INET6) ? "6" : "");
 	}
 	else {
 		return &get_virtual($v) . " "  . $v->{protocol};
 	}
 }
 
 sub ld_start
 {
 	my $oldsrv;
 	my $real_service;
 	my $nv;
 	my $nr;
 	my $server_down = {};
 
 	# read status of current ipvsadm -L -n
 	$oldsrv=&ld_read_ipvsadm();
 
 	# make sure virtual servers are up to date
 	foreach $nv (@VIRTUAL) {
 		my $real_service = &get_real_service_str($nv);
 
 		if (exists($oldsrv->{"$real_service"})) {
 			# service exists, modify it
 			&system_wrapper("$IPVSADM -E $$nv{flags}");
 			&ld_log("Changed virtual server: " . &get_virtual($nv));
 		}
 		else {
 			# no such service, create a new one
 			&system_wrapper("$IPVSADM -A $$nv{flags}");
 			&ld_log("Added virtual server: " . &get_virtual($nv));
 		}
 	}
 
 	# make sure real servers are up to date
 	foreach $nv (@VIRTUAL) {
 		my $nreal = $nv->{real};
 		my $ov = $oldsrv->{&get_real_service_str($nv)};
 		my $or = $ov->{real};
 		my $fallback = fallback_find($nv);
 
 		if (defined($fallback)) {
 			delete($or->{"$fallback->{server}:$fallback->{port}"});
 		}
 
 		for $nr (@$nreal) {
 			my $real_str = "$nr->{server}:$nr->{port}";
 			if (! defined($or->{$real_str}) or
 					$or->{$real_str}->{weight} == 0) {
 				$server_down->{$real_str} = [$nv, $nr];
 				#service_set($nv, $nr, "down", {force => 1});
 			}
 			else {
 				if (defined $server_down->{$real_str}) {
 					delete($server_down->{$real_str});
 				}
 				service_set($nv, $nr, "up", {force => 1});
 			}
 			delete($or->{$real_str});
 		}
 
 		# remove remaining entries for real servers
 		for my $k (keys %$or) {
 			purge_untracked_service($nv, $k, "start");
 			delete($$or{$k});
 		}
 
 		delete($oldsrv->{&get_real_service_str($nv)});
 		&fallback_on($nv);
 	}
 
 	for my $k (keys (%$server_down)) {
 		my $v = $server_down->{$k};	
 		if ($READDQUIESCENT eq "no") {
 			# Ensure that the server is initially added
 			service_set(@$v[0], @$v[1], "up", {force => 1});
 		}
 		# Remove Server
 		service_set(@$v[0], @$v[1], "down", {force => 1});
 		delete($server_down->{$k});
 		#sleep 5;
 	}
 
 	# remove remaining entries for virtual servers
 	foreach $nv (@OLDVIRTUAL) {
 		if (! defined($oldsrv->{&get_real_service_str($nv)})) {
 			next;
 		}
 		purge_virtual($nv, "start");
 	}
+
+	# initial check of all real servers while we are still starting up so
+	# any e-mail notifications sent out are in 'starting' daemon status.
+	_ld_main_check_all();
 }
 
 sub ld_cmd_children
 {
 	my ($cmd, %children) = (@_);
 	# instantiate other ldirectord, if specified
 	my $child;
 	foreach $child (keys %children) {
 		if ($cmd eq "reload_or_start") {
 			if (&system_wrapper("$LDIRECTORD $child reload")) {
 				&system_wrapper("$LDIRECTORD $child start");
 			}
 		}
 		else {
 			&system_wrapper("$LDIRECTORD $child $cmd");
 		}
 	}
 }
 
 sub ld_stop
 {
 	# Kill children
 	if ($FORKING eq 'yes') {
 		foreach my $virtual_id (keys (%FORK_CHILDREN)) {
 			my $pid = $FORK_CHILDREN{$virtual_id};
 			ld_log("Killing child $virtual_id PID=$pid");
 			kill 15, $pid;
 		}
 	}
 	foreach my $v (@VIRTUAL) {
 		next if ( (! defined($$v{cleanstop}) and $CLEANSTOP eq 'no') or
 			(defined($$v{cleanstop}) and $$v{cleanstop} eq 'no') );
 		my $real = $$v{real};
 		foreach my $r (@$real) {
 			if (defined $$r{virtual_status}) {
 				purge_service($v, $r, "stop");
 			}
 		}
 		purge_virtual($v, "stop");
 	}
 }
 
 sub ld_main
 {
 	# Main failover checking code
 	while (1) {
 		if ($FORKING eq 'yes') {
 			foreach my $v (@VIRTUAL) {
 				my $virtual_id = get_virtual_id_str($v);
 				if (!exists($FORK_CHILDREN{$virtual_id})) {
 					&ld_log("Child not running for $virtual_id, spawning");
 					my $pid = fork;
 					if (!defined($pid)) {
 						&ld_log("fork failed");
 					} elsif ($pid == 0) {
 						run_child($v);
 					} else {
 						$FORK_CHILDREN{get_virtual_id_str($v)} = $pid;
 						&ld_log("Spawned child $virtual_id PID=$pid");
 					}
 				} elsif (waitpid($FORK_CHILDREN{get_virtual_id_str($v)}, WNOHANG)) {
 					delete $FORK_CHILDREN{get_virtual_id_str($v)};
 				}
 			}
 			check_signal();
 			if (!check_cfgfile()) {
 				sleep 1;
 			}
 
 			check_signal();
 
 		} else {
-			my @real_checked;
-			foreach my $v (@VIRTUAL) {
-				my $real = $$v{real};
-				my $virtual_id = get_virtual_id_str($v);
+			_ld_main_check_all();
 
-				REAL: foreach my $r (@$real) {
-					my $real_id = get_real_id_str($r, $v);
-					check_signal();
-					foreach my $tmp_id (@real_checked) {
-						if($real_id eq $tmp_id) {
-							&ld_debug(3, "Already checked: real server=$real_id (virtual=$virtual_id)");
-							next REAL;
-						}
-					}
-					_check_real($v, $r);
-					push(@real_checked, $real_id);
-				}
-			}
 			check_signal();
 			if (!check_cfgfile()) {
 				sleep $CHECKINTERVAL;
 			}
 
 			check_signal();
 			ld_emailalert_resend();
 
 			check_signal();
 		}
 	}
 }
 
 sub run_child
 {
 	my $v = shift;
 	# Just exit on signals
 	$SIG{'INT'} = "DEFAULT";
 	$SIG{'QUIT'} = "DEFAULT";
 	$SIG{'ILL'} = "DEFAULT";
 	$SIG{'ABRT'} = "DEFAULT";
 	$SIG{'FPE'} = "DEFAULT";
 	$SIG{'SEGV'} = "DEFAULT";
 	$SIG{'TERM'} = "DEFAULT";
 
 	$SIG{'BUS'} = "DEFAULT";
 	$SIG{'SYS'} = "DEFAULT";
 	$SIG{'XCPU'} = "DEFAULT";
 	$SIG{'XFSZ'} = "DEFAULT";
 
 	$SIG{'IOT'} = "DEFAULT";
 
 	$SIG{'PIPE'} = "IGNORE";
 	$SIG{'HUP'} = sub { exit 1 };
 
 	my $real = $$v{real};
 	my $virtual_id = get_virtual_id_str($v);
 	my $checkinterval = $$v{checkinterval} || $CHECKINTERVAL;
+
+	# delete any entries in EMAILSTATUS that don't belong to this child
+	my %myservices = ();
+	foreach my $r (@$real) {
+		my $virtual_str = &get_virtual($v);
+		my $id = $r->{server} . ":" . $r->{port} . " ($virtual_str)";
+		$myservices{$id} = 1;
+	}
+	foreach my $id (keys %EMAILSTATUS) {
+		delete $EMAILSTATUS{$id} unless defined $myservices{$id};
+	}
+
 	$0 = "ldirectord $virtual_id";
 	while (1) {
 		foreach my $r (@$real) {
 			$0 = "ldirectord $virtual_id checking $$r{server}";
 			_check_real($v, $r);
 		}
 		$0 = "ldirectord $virtual_id";
 		sleep $checkinterval;
 		ld_emailalert_resend();
 	}
 }
 
+# run checks for everything
+sub _ld_main_check_all
+{
+	my @real_checked;
+
+	foreach my $v (@VIRTUAL) {
+		my $real = $$v{real};
+		my $virtual_id = get_virtual_id_str($v);
+
+		REAL: foreach my $r (@$real) {
+			my $real_id = get_real_id_str($r, $v);
+			check_signal();
+			foreach my $tmp_id (@real_checked) {
+				if($real_id eq $tmp_id) {
+					&ld_debug(3, "Already checked: real server=$real_id (virtual=$virtual_id)");
+					next REAL;
+				}
+			}
+			_check_real($v, $r);
+			push(@real_checked, $real_id);
+		}
+	}
+}
+
 sub _check_real
 {
 	my $v = shift;
 	my $r = shift;
 
 
 	my $real_id = get_real_id_str($r, $v);
 	my $virtual_id = get_virtual_id_str($v);
 
 	if (_check_real_for_maintenance($r)) {
 		service_set($v, $r, "down", {do_log => 1, force => 1}, "Server in maintenance");
 		return;
 	} elsif ($$v{checktype} eq "negotiate" || $$r{num_connects}>=$$v{num_connects}) {
 		&ld_debug(2, "Checking negotiate: real server=$real_id (virtual=$virtual_id)");
 		if (grep $$v{service} eq $_, ("http", "https", "http_proxy")) {
 			$$r{num_connects} = 0 if (check_http($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "pop") {
 			$$r{num_connects} = 0 if (check_pop($v, $r, 0) == $SERVICE_UP);
 		} elsif ($$v{service} eq "pops") {
 			$$r{num_connects} = 0 if (check_pop($v, $r, 1) == $SERVICE_UP);
 		} elsif ($$v{service} eq "imap") {
 			$$r{num_connects} = 0 if (check_imap($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "imaps") {
 			$$r{num_connects} = 0 if (check_imaps($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "smtp" or $$v{service} eq "submission") {
 			$$r{num_connects} = 0 if (check_smtp($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "ftp") {
 			$$r{num_connects} = 0 if (check_ftp($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "ldap") {
 			$$r{num_connects} = 0 if (check_ldap($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "nntp") {
 			$$r{num_connects} = 0 if (check_nntp($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "dns") {
 			$$r{num_connects} = 0 if (check_dns($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "sip") {
 			$$r{num_connects} = 0 if (check_sip($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "radius") {
 			$$r{num_connects} = 0 if (check_radius($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "mysql") {
 			$$r{num_connects} = 0 if (check_mysql($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "pgsql") {
 			$$r{num_connects} = 0 if (check_pgsql($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "oracle") {
 			$$r{num_connects} = 0 if (check_oracle($v, $r) == $SERVICE_UP);
 		} elsif ($$v{service} eq "simpletcp") {
 			$$r{num_connects} = 0 if (check_simpletcp($v, $r) == $SERVICE_UP);
 		} else {
 			$$r{num_connects} = 0 if (check_none($v, $r) == $SERVICE_UP);
 		}
 	} elsif ($$v{checktype} eq "connect") {
 		if ($$v{protocol} ne "udp") {
 			&ld_debug(2, "Checking connect: real server=$real_id (virtual=$virtual_id)");
 			check_connect($v, $r);
 		}
 		else {
 			&ld_debug(2, "Checking connect (ping): real server=$real_id (virtual=$virtual_id)");
 			check_ping($v, $r);
 		}
 	} elsif ($$v{checktype} eq "ping") {
 		&ld_debug(2, "Checking ping: real server=$real_id (virtual=$virtual_id)");
 		check_ping($v, $r);
 	} elsif ($$v{checktype} eq "external") {
 		&ld_debug(2, "Checking external: real server=$real_id (virtual=$virtual_id)");
 		check_external($v, $r);
 	} elsif ($$v{checktype} eq "external-perl") {
 		&ld_debug(2, "Checking external-perl: real server=$real_id (virtual=$virtual_id)");
 		check_external_perl($v, $r);
 	} elsif ($$v{checktype} eq "off") {
 		&ld_debug(2, "Checking off: No real or fallback servers to be added\n");
 	} elsif ($$v{checktype} eq "on") {
 		&ld_debug(2, "Checking on: Real servers are added without any checks\n");
 		&service_set($v, $r, "up");
 	} elsif ($$v{checktype} eq "combined") {
 		&ld_debug(2, "Checking combined-connect: real server=$real_id (virtual=$virtual_id)");
 		if (check_connect($v, $r) == $SERVICE_UP) {
 			$$r{num_connects}++;
 		} else {
 			$$r{num_connects} = 999999;
 		}
 	}
 }
 
 sub _check_real_for_maintenance
 {
 	my $r = shift;
 
 	return undef if(!$MAINTDIR);
 
 	my $servername = ld_gethostbyaddr($$r{server});
 
 	# Extract just the first component of the full name so we can match short or FQDN names
 	$servername =~ /^([a-z][a-z0-9\-]+)\./;
 	my $servershortname = $1;
 
 	if (-e "$MAINTDIR/$$r{server}:$$r{port}") {
 		&ld_debug(2, "Server maintenance: Found file $$r{server}:$$r{port}");
 		return 1;
 	} elsif (-e "$MAINTDIR/$$r{server}") {
 		&ld_debug(2, "Server maintenance: Found file $$r{server}");
 		return 1;
 	} elsif ($servername && -e "$MAINTDIR/$servername:$$r{port}") {
 		&ld_debug(2, "Server maintenance: Found file $servername:$$r{port}");
 		return 1;
 	} elsif ($servername && -e "$MAINTDIR/$servername") {
 		&ld_debug(2, "Server maintenance: Found file $servername");
 		return 1;
 	} elsif ($servershortname && -e "$MAINTDIR/$servershortname:$$r{port}") {
 		&ld_debug(2, "Server maintenance: Found file $servershortname:$$r{port}");
 		return 1;
 	} elsif ($servershortname && -e "$MAINTDIR/$servershortname") {
 		&ld_debug(2, "Server maintenance: Found file $servershortname");
 		return 1;
 	}
 	return undef;
 }
 
 sub check_http
 {
 	use LWP::UserAgent;
 	use LWP::Debug;
+	use Net::HTTP;
+	use URI;
 	if($DEBUG > 2) {
 		LWP::Debug::level('+');
 	}
 	my ($v, $r) = @_;
 
 	my $host = $$r{server};
 	my $virtualhost = (defined $$v{virtualhost} ? $$v{virtualhost} : $host);
 
 	&ld_debug(2, "check_http: url=\"$$r{url}\" "
 		. "virtualhost=\"$virtualhost\"");
 
 	if (inet_pton(AF_INET6,&ld_strip_brackets($host))) {
-		no warnings 'once';
 		require Net::INET6Glue::INET_is_INET6;
-		# Workaround for Net-HTTP IPv6 Address URLs Broken
+	}
+
+	# Workaround for Net-HTTP IPv6 Address URLs Broken
+	if ($LWP::VERSION < 6.08 || $Net::HTTP::VERSION < 6.07 || $URI::VERSION < 1.64) {
+		no warnings 'once';
 		@LWP::Protocol::http::EXTRA_SOCK_OPTS = (PeerAddr => $host,
 							 PeerHost => &ld_strip_brackets($host),
 							 Host => &ld_strip_brackets($host));
 	}
 
 	my $ua = new LWP::UserAgent(ssl_opts => { verify_hostname => 0 });
 
 	my $h = undef;
 	if ($$v{service} eq "http_proxy") {
 		my $port = ld_checkport($v, $r);
 		$ua->proxy("http", "http://$$r{server}:$port/");
 	} else {
 		$h = new HTTP::Headers("Host" => $virtualhost);
 	}
 
 	my $req = new HTTP::Request("$$v{httpmethod}", "$$r{url}", $h);
 	my $res;
 
 	# LWP does not seem to honour timeouts set using $ua->timeout()
 	# for HTTPS. So use an alarm instead. This also has the advantage
 	# of being cumulative timeout, rather than a per send/receive
 	# timeout.
 	eval {
 		# LWP makes unguarded calls to eval
 		# which throw a fatal exception if they fail
 		# Needless to say, this is completely stupid.
 		# Resetting of $SIG{'__DIE__'} is also
 		# needed now that alarm() is used.
 		local $SIG{'__DIE__'} = "DEFAULT";
 		local $SIG{'ALRM'} = sub { die "Timeout Alarm" };
 		&ld_debug(4, "Timeout is $$v{negotiatetimeout}");
 		&ld_debug(2, "Starting Check");
 		alarm $$v{negotiatetimeout};
 
 		&ld_debug(2, "Starting HTTP/HTTPS");
 		$res = $ua->request($req);
 		&ld_debug(2, "Finished HTTP/HTTPS");
 		alarm 0; # Cancel the alarm
 	};
 
 	if (not defined $res) {
 		&ld_debug(2, "check_http: timeout");
 		goto down;
 	}
 
 	if ($$v{service} eq "https") {
                 &ld_debug(2, "SSL-Cipher: " .
                         ($res->header('Client-SSL-Cipher') || '<not set>'));
                 &ld_debug(2, "SSL-Cert-Subject: " .
                         ($res->header('Client-SSL-Cert-Subject') || '<not set>'));
                 &ld_debug(2, "SSL-Cert-Issuer: " .
                         ($res->header('Client-SSL-Cert-Issuer') || '<not set>'));
 	}
 
 	&ld_debug(2, "Return status: " . $res->status_line);
 
 	my $recstr = $$r{receive};
 	if ($res->is_success && (!($recstr =~ /.+/) ||
 				$res->content =~ /$recstr/)) {
 		service_set($v, $r, "up", {do_log => 1}, $res->status_line);
 		&ld_debug(2, "check_http: $$r{url} is up\n");
 		return $SERVICE_UP;
 	}
 
 	my $log_message = $res->is_success ? $res->content : $res->status_line;
 	service_set($v, $r, "down", {do_log => 1}, $log_message);
 
 	&ld_debug(3, "Headers " .  $res->headers->as_string);
 down:
 	&ld_debug(2, "check_http: $$r{url} is down\n");
 	return $SERVICE_DOWN;
 }
 
 sub check_smtp
 {
 	require Net::SMTP;
 	my ($v, $r) = @_;
 	my $port = ld_checkport($v, $r);
 
 	&ld_debug(2, "Checking $$v{service}: server=$$r{server} port=$port");
 
 	my $smtp = new Net::SMTP($$r{server}, Port => $port,
 			Timeout => $$v{negotiatetimeout});
 	if ($smtp) {
 		$smtp->quit;
 		service_set($v, $r, "up", {do_log => 1});
 		return $SERVICE_UP;
 	} else {
 		service_set($v, $r, "down", {do_log => 1});
 		return $SERVICE_DOWN;
 	}
 }
 
 sub check_pop
 {
 	require Mail::POP3Client;
 	my ($v, $r, $ssl) = @_;
 	my $port = ld_checkport($v, $r);
 
-	&ld_debug(2, "Checking pop server=$$r{server} port=$port ssl=$ssl");
+	&ld_debug(2, "Checking POP3 server=$$r{server} port=$port ssl=$ssl");
 
 	my $pop = new Mail::POP3Client(USER => $$v{login},
 					PASSWORD => $$v{passwd},
 					HOST => $$r{server},
 					USESSL => $ssl,
 					PORT => $port,
 					DEBUG => 0,
 					TIMEOUT => $$v{negotiatetimeout});
 
 	if (!$pop) {
 		service_set($v, $r, "down", {do_log => 1});
 		return $SERVICE_DOWN;
 	}
 
 	if($$v{login} ne "") {
 		my $authres = $pop->login();
 		$pop->close();
 		if (!$authres) {
 			service_set($v, $r, "down", {do_log => 1});
 			return $SERVICE_DOWN;
 		}
 	}
 
 	$pop->close();
 	service_set($v, $r, "up", {do_log => 1});
 	return $SERVICE_UP;
 }
 
 sub check_imap
 {
 	require Net::IMAP::Simple;
 	my ($v, $r) = @_;
 	my $port = ld_checkport($v, $r);
 
-	&ld_debug(2, "Checking imap server=$$r{server} port=$port");
+	&ld_debug(2, "Checking IMAP server=$$r{server} port=$port");
 
 	my $imap = Net::IMAP::Simple->new($$r{server},
 					port => $port,
 					timeout => $$v{negotiatetimeout});
 
 	if (!$imap) {
 		service_set($v, $r, "down", {do_log => 1});
 		return $SERVICE_DOWN;
 	}
 
 	if($$v{login} ne "") {
 		my $authres = $imap->login($$v{login},$$v{passwd});
 		$imap->quit;
 		if (!$authres) {
 			service_set($v, $r, "down", {do_log => 1});
 			return $SERVICE_DOWN;
 		}
 	}
 
 	$imap->quit();
 	service_set($v, $r, "up", {do_log => 1});
 	return $SERVICE_UP;
 }
 
 sub check_imaps
 {
 	require Net::IMAP::Simple::SSL;
 	my ($v, $r) = @_;
 	my $port = ld_checkport($v, $r);
 
-	&ld_debug(2, "Checking imaps server=$$r{server} port=$port");
+	&ld_debug(2, "Checking IMAPS server=$$r{server} port=$port");
 
 	my $imaps = Net::IMAP::Simple::SSL->new($$r{server},
 					port => $port,
 					timeout => $$v{negotiatetimeout});
 	if (!$imaps) {
 		service_set($v, $r, "down", {do_log => 1});
 		return $SERVICE_DOWN;
 	}
 
 	if($$v{login} ne "") {
 		my $authres = $imaps->login($$v{login},$$v{passwd});
 		$imaps->quit;
 		if (!$authres) {
 			service_set($v, $r, "down", {do_log => 1});
 			return $SERVICE_DOWN;
 		}
 	}
 
 	$imaps->quit();
 	service_set($v, $r, "up", {do_log => 1});
 	return $SERVICE_UP;
 }
 
 sub check_ldap
 {
 	my ($v, $r) = @_;
 	require Net::LDAP;
 	my $port = ld_checkport($v, $r);
 
 	my $result;
 	my $recstr = $$r{receive};
 
-	&ld_debug(2, "Checking ldap server=$$r{server} port=$port");
+	&ld_debug(2, "Checking LDAP server=$$r{server} port=$port");
 	eval {
 		local $SIG{'__DIE__'} = "DEFAULT";
 		local $SIG{'ALRM'} = sub { die "Timeout Alarm" };
 		&ld_debug(4, "Timeout is $$v{checktimeout}");
 		&ld_debug(2, "Starting Check");
 		alarm $$v{checktimeout};
 
 		my $ldap = Net::LDAP->new("$$r{server}", port => $port,
 					timeout => $$v{negotiatetimeout});
 		if(!$ldap) {
 		    service_set($v, $r, "down", {do_log => 1}, "Connection failed");
 		    &ld_debug(4, "Connection failed");
 		    alarm 0; # Cancel the alarm
 		    return $SERVICE_DOWN;
 		}
 
 		my $mesg;
 		if ($$v{login} && $$v{passwd}) {
 		    $mesg = $ldap->bind($$v{login}, password=>$$v{passwd}) ;
 		}
 		else {
 		    $mesg = $ldap->bind ;
 		}
 		if ($mesg->is_error) {
 		    service_set($v, $r, "down", {do_log => 1}, "Bind failed");
 		    &ld_debug(4, "Bind failed");
 		    alarm 0; # Cancel the alarm
 		    return $SERVICE_DOWN;
 		}
 
 		&ld_debug(4, "Base : " . substr($$r{request},1));
 		$result = $ldap->search (
 		    base	=> substr($$r{request},1) . "",
 		    scope	=> "base",
 		    filter	=> "(objectClass=*)"
 		    );
 
 		alarm 0; # Cancel the alarm
 	};
 
 	if (!defined($result)) {
 		service_set($v, $r, "down", {do_log => 1}, "No answer received");
                 &ld_debug(2, "check timeout alarm");
                 return $SERVICE_DOWN;
         }
 
 	if($result->count != 1) {
 		service_set($v, $r, "down", {do_log => 1}, "No answer received");
 		&ld_debug(2, "Count failed : " . $result->count);
 		return $SERVICE_DOWN;
 	}
 
 	my $href = $result->as_struct;
 	my @arrayOfDNs  = keys %$href ;
 	if (!($recstr =~ /.+/) || $arrayOfDNs[0] =~ /$recstr/) {
 		service_set($v, $r, "up", {do_log => 1}, "Success");
 		return $SERVICE_UP;
 	} else {
 		service_set($v, $r, "down", {do_log => 1}, "Response mismatch");
 		&ld_debug(4,"Message differs : " . ", " . $$r{receive}
 				. ", " . $arrayOfDNs[0] . ".");
 		return $SERVICE_DOWN;
 	}
 }
 
 sub check_nntp
 {
 	use IO::Socket;
 	use IO::Socket::INET6;
 	use IO::Select;
 	my ($v, $r) = @_;
 	my $sock;
 	my $s;
 	my $buf;
 	my $port = ld_checkport($v, $r);
 	my $status = 1;
 
-	&ld_debug(2, "Checking nntp server=$$r{server} port=$port");
+	&ld_debug(2, "Checking NNTP server=$$r{server} port=$port");
 
 	unless ($sock = IO::Socket::INET6->new(PeerAddr => $$r{server},
 		PeerPort => $port, Proto => 'tcp',
 		TimeOut => $$v{negotiatetimeout})) {
 		service_set($v, $r, "down", {do_log => 1});
 		return $SERVICE_DOWN;
 	}
 	$s = IO::Select->new();
 	$s->add($sock);
 	if (scalar($s->can_read($$v{negotiatetimeout})) == 0) {
 		service_set($v, $r, "down", {do_log => 1});
 	} else {
 		sysread($sock, $buf, 64);
 		if ($buf =~ /^2/) {
 			service_set($v, $r, "up", {do_log => 1});
 			$status = 0;
 		} else {
 			service_set($v, $r, "down", {do_log => 1});
 		}
 	}
 	$s->remove($sock);
 	$sock->close;
 
 	return $status;
 }
 
 sub check_radius
 {
 	require Authen::Radius;
 
 	my ($v, $r) = @_;
 
-	&ld_debug(2, "Checking radius");
+	&ld_debug(2, "Checking RADIUS");
 
 	my $port = ld_checkport($v, $r);
 	my $radius;
 	my $result = "";
 
 	eval {
 		local $SIG{'__DIE__'} = "DEFAULT";
 		local $SIG{'ALRM'} = sub { die "Timeout Alarm" };
 		&ld_debug(4, "Timeout is $$v{checktimeout}");
 		&ld_debug(2, "Starting Check");
 		alarm $$v{checktimeout};
 
-		&ld_debug(2, "Starting Radius");
+		&ld_debug(2, "Starting RADIUS");
 		$radius = new Authen::Radius(Host => "$$r{server}:$port",
 					     Secret=>$$v{secret},
 					     TimeOut=>$$v{negotiatetimeout},
 					     Errmode=>'die');
 		$result = $radius->check_pwd($$v{login}, $$v{passwd});
-		&ld_debug(2, "Finished Radius");
+		&ld_debug(2, "Finished RADIUS");
 		alarm 0; # Cancel the alarm
 	};
 	if ($result eq "") {
 		&service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@");
-		&ld_debug(3, "Radius Error: ".$radius->get_error);
+		&ld_debug(3, "RADIUS Error: ".$radius->get_error);
 		return $SERVICE_DOWN;
 	} else {
 		&service_set($v, $r, "up", {do_log => 1});
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return $SERVICE_UP;
 	}
 }
 
 sub check_mysql
 {
 	return check_sql(@_, "mysql", "database");
 }
 
 sub check_pgsql
 {
 	return check_sql(@_, "Pg", "dbname");
 }
 
 sub check_sql_log_errstr
 {
 	my ($prefix, $errstr) = (@_);
 
 	for $_ (split /\n/, $errstr) {
 		&ld_debug(4, "$prefix $_\n");
 	}
 
 }
 
 sub check_oracle
 {
 	return check_sql(@_, "Oracle", "sid");
 }
 
 sub check_sql
 {
 	require DBI;
 	my ($v, $r, $dbd, $dbname) = @_;
 	my $port = ld_checkport($v, $r);
 	my ($dbh, $sth, $query, $rows, $result);
 	$result = $SERVICE_DOWN;
 	$query = $$r{request};
 	$query =~ s#^/##;
 	unless ($$v{login} && $query) {
 		&ld_log("Error: Must specify a login and request string " .
 			"for MySQL, Oracle and PostgreSQL checks. " .
 			"Not adding $$r{server}.\n");
 		goto err_down;
 	}
 	$result=2;   # Set result flag.  Only ok if ends up at zero.
 	&ld_debug(2, "Checking $$v{server} server=$$r{server} port=$port\n");
 	$dbh = DBI->connect("dbi:$dbd:$dbname=$$v{database};" .
 			    "host=$$r{server};port=$port", $$v{login},
 			    $$v{passwd});
 	unless ($dbh) {
 		&ld_debug(2, "Failed to bind to $$r{server} with DBI->errstr\n");
 		check_sql_log_errstr("Failed to bind to $$r{server} with",
 				     DBI->errstr);
 		goto err_down;
 	}
 	$result--;
 	$sth = $dbh->prepare($query);
 	unless ($sth) {
 		&ld_debug(2, "Error preparing statement: $dbh->errstr\n");
 		check_sql_log_errstr("Error preparing statement:",
 				     $dbh->errstr);
 		goto err_disconect;
 	}
 
 	# Test to see if any errors are returned
 	$sth->execute;
 	if ($dbh->err) {
 		&ld_debug(2, "Error executing statement: $dbh->errstr : $dbh->err\n");
 		check_sql_log_errstr("Error executing statement:",
 				     $dbh->errstr, $dbh->err);
 		goto err_finish;
 	}
 
 	# On error "execute" will return undef.
 	#
 	# Assuming you're using 'SELECT' you will get the number of rows
 	# returned from the db when running execute: the 'rows' method is
 	# only used when doing something that is NOT a select.  I cannot
 	# imagine that you would ever want to insert or update from a
 	# regular polling on this system, so we will assume you are using
 	# SELECT here.
 	#
 	# Ideally you will do something like this: 'select * from
 	# director_slave where enabled=1' This way you can have, say, a
 	# MEMORY table in MySQL where you insert a value into a row
 	# (enabled) that says whether or not you want to actually use this
 	# in the pool from ldirector / ipvs, and disable them without
 	# actually turning off your sql server.
 	
 	$sth->execute;
 	if ($dbd eq "Oracle") { $sth->fetchrow_hashref() }
 	unless ($rows = $sth->rows) {
 		check_sql_log_errstr("Error executing statement:",
 				     $dbh->errstr, $dbh->err);
 		goto err_finish;
 	}
 
 	# Actually look to see if there was data returned in this statement,
 	# else disable node
 	if($rows > 0) {
 		goto out;
 	} else {
 		goto err_finish;
 	}
 
 out:
 	$result = $SERVICE_UP;
 err_finish:
 	$sth->finish();
 err_disconnect:
 	$dbh->disconnect();
 err_down:
 	service_set($v, $r, $result == $SERVICE_UP ? "up" : "down", {do_log => 1});
 	return $result;
 }
 
 sub check_connect
 {
 	my ($v, $r) = @_;
 	my $port = ld_checkport($v, $r);
 
 	eval {
 		local $SIG{'__DIE__'} = "DEFAULT";
 		local $SIG{'ALRM'} = sub { die "Timeout Alarm" };
 		&ld_debug(4, "Timeout is $$v{checktimeout}");
 		alarm $$v{checktimeout};
 		my $sock = &ld_open_socket($$r{server}, $port, $$v{protocol});
 		if ($sock) {
 			close($sock);
 		} else {
 			alarm 0; # Cancel the alarm
 			die("Socket Connect Failed");
 		}
 		&ld_debug(3, "Connected to $$r{server} (port $port)");
 		alarm 0; # Cancel the alarm
 	};
 	if ($@) {
 		&service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@");
 		return $SERVICE_DOWN;
 	} else {
 		&service_set($v, $r, "up", {do_log => 1});
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return $SERVICE_UP;
 	}
 }
 
 sub check_external
 {
 	my ($v, $r) = @_;
 	my $v_server;
 
 	if (defined $$v{server}) {
 		$v_server = $$v{server};
 	} else {
 		$v_server = $$v{fwm};
 	}
 
 	my $result = system_timeout($$v{checktimeout},
 				    $$v{checkcommand}, $v_server, $$v{port},
 				    $$r{server}, $$r{port});
 
 	if ($result) {
 		&service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: " .
 			  "$@ after calling $$v{checkcommand} with result " .
 			  "$result");
 		return 0;
 	} else {
 		&service_set($v, $r, "up", {do_log => 1});
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return 1;
 	}
 }
 
 sub check_external_perl
 {
 	my ($v, $r) = @_;
 	my $result;
 	my $v_server;
 
 	eval {
 		local $SIG{'__DIE__'} = "DEFAULT";
 		local $SIG{'ALRM'} = sub { die "Timeout Alarm" };
 		&ld_debug(4, "Timeout is $$v{checktimeout}");
 		alarm $$v{checktimeout};
 		if (defined $$v{server}) {
 			$v_server = $$v{server};
 		} else {
 			$v_server = $$v{fwm};
 		}
 		my $cmdfunc = $check_external_perl__funcs{$$v{checkcommand}};
 		if (!defined($cmdfunc)) {
 			open(CMDFILE, "<$$v{checkcommand}") || die "cannot open external-perl checkcommand file: $$v{checkcommand}";
 			$cmdfunc = eval("sub { \@ARGV=\@_; " . join("", <CMDFILE>) . " }");
 			close(CMDFILE);
 			$check_external_perl__funcs{$$v{checkcommand}} = $cmdfunc;
 		}
 		no warnings 'redefine';
 		local *CORE::GLOBAL::exit = sub {
 			$result = shift;
 			goto external_exit;
 		};
 		$cmdfunc->($v_server, $$v{port}, $$r{server}, $$r{port});
 		external_exit:
 		alarm 0;
 	};
 	if ($@ or $result != 0) {
 		&service_set($v, $r, "down");
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: " .
 			  "$@ after calling (external-perl) $$v{checkcommand} with result " .
 			  "$result");
 		return 0;
 	} else {
 		&service_set($v, $r, "up");
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return 1;
 	}
 }
 
 sub check_sip
 {
 	my ($v, $r) = @_;
 	my $sip_d_port = ld_checkport($v, $r);
 
-	&ld_debug(2, "Checking sip server=$$r{server} port=$sip_d_port");
+	&ld_debug(2, "Checking SIP server=$$r{server} port=$sip_d_port");
 
 
 	eval {
 		use Socket;
 
 		local $SIG{'__DIE__'} = "DEFAULT";
 		local $SIG{'ALRM'} = sub { die "Timeout Alarm" };
 		&ld_debug(4, "Timeout is $$v{checktimeout}");
 		alarm $$v{negotiatetimeout};
 
 		my $sock = &ld_open_socket($$r{server}, $sip_d_port,
 					$$v{protocol});
 		unless ($sock) {
 			alarm 0;
 			die("Socket Connect Failed");
 		}
 
 		my ($sip_s_addr_str, $sip_s_port) = &ld_get_addrport($sock);
 
 		&ld_debug(3, "Connected from $sip_s_addr_str:$sip_s_port to " .
 			$$r{server} . ":$sip_d_port");
 
 		select $sock;
 		$|=1;
 		select STDOUT;
 
 		my $request =
 		"OPTIONS sip:" . $$v{login} . " SIP/2.0\r\n" .
 		"Via: SIP/2.0/UDP $sip_s_addr_str:$sip_s_port;" .
 			"branch=z9hG4bKhjhs8ass877\r\n" .
 		"Max-Forwards: 70\r\n" .
 		"To: <sip:" . $$v{login} . ">\r\n" .
 		"From: <sip:" . $$v{login} . ">;tag=1928301774\r\n" .
 		"Call-ID: " . (join "", map { unpack "H*", chr(rand(256)) } 1..8) . "\r\n" .
 		"CSeq: 63104 OPTIONS\r\n" .
 		"Contact: <sip:" . $$v{login} . ">\r\n" .
 		"Accept: application/sdp\r\n" .
 		"Content-Length: 0\r\n\r\n";
 
 		print "Request:\n$request";
 		print $sock $request;
 
 		my $ok;
 		my $reply;
 		while (<$sock>) {
 			chomp;
 			$/="\r";
 			chomp;
 			$/="\n";
 
 			last if ($_ eq "");
 
 			if (!defined $ok) {
 				# Check status
 				$ok = $_;
 				if ($ok !~ m/^SIP\/2.0 200 OK/) {
 					alarm 0; # Cancel the alarm
 					close($sock);
 					die "$ok\n";
 				}
 				next;
 			}
 			$reply .= "$_\n";
 
 			# Add more checks here as desired
 		}
 		alarm 0; # Cancel the alarm
 		close($sock);
 
 		if (!defined $ok) {
 			die "No OK\n";
 		}
 
 		print "Reply:\n$ok\n$reply\n";
 	};
 
 	if ($@) {
 		&service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@");
 		return $SERVICE_DOWN;
 	} else {
 		&service_set($v, $r, "up", {do_log => 1});
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return $SERVICE_UP;
 	}
 }
 
 sub check_simpletcp
 {
 	my ($v, $r) = @_;
 	my $d_port = ld_checkport($v, $r);
 
 	&ld_debug(2, "Checking simpletcp server=$$r{server} port=$d_port");
 
 	eval {
 		use Socket;
 
 		local $SIG{'__DIE__'} = "DEFAULT";
 		local $SIG{'ALRM'} = sub { die "Timeout Alarm" };
 		&ld_debug(4, "Timeout is $$v{checktimeout}");
 		alarm $$v{negotiatetimeout};
 
 		my $sock = &ld_open_socket($$r{server}, $d_port,
 					$$v{protocol});
 		unless ($sock) {
 			alarm 0;
 			die("Socket Connect Failed");
 		}
 
 		my ($s_addr_str, $s_port) = &ld_get_addrport($sock);
 
 		&ld_debug(3, "Connected from $s_addr_str:$s_port to " .
 			$$r{server} . ":$d_port");
 
 		select $sock;
 		$|=1;
 		select STDOUT;
 
 		my $request = substr($$r{request}, 1);
 		$request =~ s/\\n/\n/g ;
 
 		&ld_debug(2, "Checking simpletcp server=$$r{server} port=$d_port request:\n$request");
 		print $sock $request;
 		shutdown($sock, SHUT_WR);
 
 		my $ok;
 		my $reply;
 		while (<$sock>) {
 			&ld_debug(2, "Checking simpletcp server=$$r{server} port=$d_port receive=" . $$r{receive} ." got: $_\n");
 			if ( $_ =~ /$$r{receive}/ ) {
 				$ok = 1;
 				last;
 			}
 		}
 		alarm 0; # Cancel the alarm
 		close($sock);
 
 		if (!defined $ok) {
 			die "No OK\n";
 		}
 	};
 
 	if ($@) {
 		&service_set($v, $r, "down", {do_log => 1});
 		&ld_debug(3, "Deactivated service $$r{server}:$$r{port}: $@");
 		return $SERVICE_DOWN;
 	} else {
 		&service_set($v, $r, "up", {do_log => 1});
 		&ld_debug(3, "Activated service $$r{server}:$$r{port}");
 		return $SERVICE_UP;
 	}
 }
 
 sub check_ftp
 {
 	require Net::FTP;
 	my ($v, $r) = @_;
 	my $ftp;
 	my $memory;
 	my $debug = ($DEBUG > 2) ? 1 : 0;
 	my $port = ld_checkport($v, $r);
 
-	&ld_debug(2, "Checking ftp server=$$r{server} port=$port");
+	&ld_debug(2, "Checking FTP server=$$r{server} port=$port");
 	&ld_debug(4, "Timeout is $$v{negotiatetimeout}");
 
 	open(TMP,'+>', undef);
 
 	# In some cases Net::FTP dies if there is a timeout
 	eval {
 		unless ($ftp = Net::FTP->new("$$r{server}:$port",
 				Timeout=>$$v{negotiatetimeout},
 				Debug=>$debug)) {
 			die "Could not connect\n";
 		}
 		$ftp->login($$v{login}, $$v{passwd});
 		$ftp->cwd("/");
 		$ftp->binary();
 		$ftp->pasv();
 		$ftp->get("$$r{request}", *TMP);
 		$ftp->quit();
 	};
 	if ($@) {
 		&ld_debug(2, "Warning: $@");
 	}
 
 	seek TMP, 0, 0;
 	local $/;
 	$memory = <TMP>;
 	close TMP;
 
 	if ($memory =~ /$$r{receive}/) {
 		service_set($v, $r, "up", {do_log => 1});
 		return $SERVICE_UP;
 	}
 
 	service_set($v, $r, "down", {do_log => 1});
 	return $SERVICE_DOWN;
 }
 
 sub check_dns
 {
 	my $res;
 	my $query;
 	my $rr;
 	my $request;
 	my $server;
 	my ($v,$r) = @_;
 	my $port = ld_checkport($v, $r);
 	{
 		# Net::DNS makes unguarded calls to eval
 		# which throw a fatal exception if they fail
 		# Needless to say, this is completely stupid.
 		local $SIG{'__DIE__'} = "DEFAULT";
 		# When fork=yes we need to ignore the child death
 		local $SIG{'CHLD'} = "IGNORE";
 		require Net::DNS;
 	}
 	$res = new Net::DNS::Resolver;
 	if($DEBUG > 2) {
 		$res->debug(1);
 	}
 
 	$$r{"request"} =~ m/^\/?(.*)/;
 	$request=$1;
 	
 	$server = &ld_strip_brackets($$r{server});
 
-	&ld_debug(2, "Checking dns: request=\"$request\" receive=\""
+	&ld_debug(2, "Checking DNS: request=\"$request\" receive=\""
 		. $$r{"receive"} . "\"\n");
 
 	eval {
 		local $SIG{'__DIE__'} = "DEFAULT";
 		local $SIG{'ALRM'} = sub { die "timeout\n"; };
 		alarm($$v{negotiatetimeout});
 		$res->nameservers($server);
 		$res->port($port);
 		if ($$v{"protocol"} eq "tcp") {
 			$res->usevc(1);
 		}
 		$query = $res->search($request);
 		alarm(0);
 	};
 
 	if (@$ eq "timeout\n" or ! $query) {
 		service_set($v, $r, "down", {do_log => 1}, "Connection timed out");
 		return $SERVICE_DOWN;
 	}
 
 	my $recstr = $$r{receive};
 	foreach $rr ($query->answer) {
 		if (($rr->type eq "A" and length($recstr) and $rr->address =~ /$recstr/) or
 		    ($rr->type eq "PTR" and length($recstr) and $rr->ptrdname =~ /$recstr/)) {
 			service_set($v, $r, "up", {do_log => 1}, "Success");
 			return $SERVICE_UP;
 		}
 	}
 
 	service_set($v, $r, "down", {do_log => 1}, "Response mismatch");
 	return $SERVICE_DOWN;
 }
 
 sub check_ping
 {
 	use Net::Ping;
 
 	my ($v,$r) = (@_);
 
 	&ld_debug(2, "Checking ping: " .  "host=\"" .  $$r{server} .
 		"\" checktimeout=\"" . $$v{"checktimeout"} .
 		"\" checkcount=\"" . $$v{"checkcount"} . "\"\n");
 
 	my $p = Net::Ping->new("icmp","1","64");
 	for (my $attempt = 0; $attempt < $$v{"checkcount"}; $attempt++) {
 		if ($p->ping($$r{server}, $$v{"checktimeout"})) {
 			&ld_debug(2, "pong from $$r{server}\n");
 			service_set($v, $r, "up", {do_log => 1});
 			return $SERVICE_UP;
 		}
 		&ld_debug(2, "ping to $$r{server} timed out " .
 					"(attempt " .  ($attempt + 1) . "/" .
 					$$v{"checkcount"} . ")\n");
 	}
 
 	service_set($v, $r, "down");
 	return $SERVICE_DOWN;
 }
 
 # check_none
 # Dummy function to check service if service type is none.
 # Just activates the real server
 sub check_none
 {
 	my ($v, $r) = @_;
 
 	&ld_debug(2, "Checking none");
 
 	service_set($v, $r, "up", {do_log => 1});
 	return $SERVICE_UP;
 }
 
 # service_set
 # Used to bring up and down real servers.
 # This is the function you should call if you want to bring a real
 # server up or down.
 # This function is safe to call regardless of the current state of a
 # real server.
 # Do _not_ call _service_up or _service_down directly.
 # pre: v: virtual that the real service belongs to
 #         Only used to determine the protocol of the service
 #      r: real server to take down
 #      state: up or down
 #             up to bring the real service up
 #             down to bring the real service up
 #      flags: hash with the following (optional) keys:
 #             force => 1  - force setting of the specified state
 #             do_log => 1 - log the state to the monitorfile
 #                           (when called as the result of a check)
 # post: The real server is brought up or down for each virtual service
 #       it belongs to.
 # return: none
 sub service_set
 {
 	my ($v, $r, $state, $flags, $log_msg) = @_;
 
 	my ($real, $virtual, $virt, $now);
 
 	if ($$flags{'do_log'}) {
 		$now = localtime();
 
 		if (!defined($log_msg)) {
 			$log_msg = "-";
 		}
 
 		# URI-escape special log characters ('|' and newlines)
 		$log_msg =~ s/([%|\r\n])/sprintf("%%%.2x", ord($1))/eg;
 	}
 
 	# Find the real server in @REAL
 	foreach $real (@REAL) {
 		if($real->{"real"} eq get_real_id_str($r, $v)) {
 			$virtual = $real->{"virtual"};
 			last;
 		}
 	}
 	return unless (defined($virtual));
 
 	# Check each virtual service for the real server and make
 	# changes as necessary
 	foreach $v (@VIRTUAL){
 		# Use found rather than relying on tmp_id being
 		# set when we leave the foreach loop. There
 		# seems to some weirdness in Perl (5.6.0 on Redhat 7.2)
 		my $found = 0;
 		my $tmp_id;
 		my $virtual_id = get_virtual_id_str($v);
 		my $real_id = get_real_id_str($r, $v);
 		my $log_str = "real server=$real_id" .
 			      " (virtual=$virtual_id)";
 		foreach $tmp_id (@$virtual) {
 			if($virtual_id eq $tmp_id) {
 				$found = 1;
 				last;
 			}
 		}
 		if ($found == 1) {
 			if ($state=~/up/i) {
 				_service_up($v, $r, $$flags{"force"});
 				&ld_debug(2, "Enabled  $log_str");
 			} elsif ($state=~/down/i) {
 				_service_down($v, $r, $$flags{"force"});
 				&ld_debug(2, "Disabled $log_str");
 			}
 
 			if ($$v{"monitorfile"} and $$flags{"do_log"}) {
 				my $real_log_msg = $real_id;
 				$real_log_msg =~ tr/:/ /s;
 				$real_log_msg =~ s/\\//g;
 				unless(
 					open(CHECKLOG, ">>$$v{monitorfile}") and
 					print CHECKLOG "[$now] [$$] $real_log_msg [$state] $log_msg\n" and
 					close(CHECKLOG)
 				) { die("Error writing to monitorfile '$$v{monitorfile}': $!"); }
 			}
 		}
 	}
 }
 
 # _remove_service
 # Remove a real server by either making it quiescent or deleting it
 # Should be called by _service_down or fallback_off
 # I.e. If you want to change the state of a real server call service_set.
 #      If you call this function directly then ldirectord will lose track
 #      of the state of real servers.
 # If the real server exists (which it should) make it quiescent or
 # delete it, depending on the global and per virtual service quiescent flag.
 # If it # doesn't exist, just leave it as it will be added by the
 # _service_up code as appropriate.
 # pre: v: reference to virtual service to with the real server belongs
 #      rservice: service to restore. Of the form server:port for a tcp or
 #                udp service. Of the form fwmark for a fwm service.
 #      rforw: Forwarding mechanism of service. Sould be one of "-g" "-i" or
 #             "-m"
 #      tag: Tag to use for logging. Should be either "real" or "fallback"
 # post: real service is taken up from the respective virtual service
 #       if it is inactive
 # return: none
 sub _remove_service
 {
 	my ($v, $rservice, $rforw, $tag) = (@_);
 
 	my $oldsrv;
 	my $ov;
 	my $or;
 	my $ipvsadm_args;
 	my $log_args;
 	my $virtual_str;
 	my $old_rservice;
 	my $is_quiescent;
 
 	$virtual_str = &get_virtual($v);
 
 	$oldsrv=&ld_read_ipvsadm();
 	$ov=$oldsrv->{&get_real_service_str($v)};
 	if(!defined($ov)){
 		return;
 	}
 
 	if ($tag ne "fallback"
 			and ((defined $$v{quiescent}
 					and $$v{quiescent} eq "yes")
 				or (!defined($$v{quiescent})
 					and $QUIESCENT eq "yes"))){
 		$is_quiescent = "quiescent";
 	}
 
 	$or=$ov->{"real"}->{$rservice};
 
 	# If a virtual service is a IP/port service (not fwmark)
 	# and a real-servers uses a forwarding mechanism other than masq
 	# then the port will always be that of the virtual service.
 	# This includes real-servers that LVS has set to use
 	# the local forwarding mechanism because their IP address
 	# is local. Thus, if $rservice does not exist test
 	# for the same ip address with the virtual servers port.
 	# N.B: This could cause strange things to happen if
 	# there is a clash between two real servers on different ports
 	# that LVS has mapped to being the same thing.
 	if(!defined($or)) {
 		$old_rservice = $rservice;
 		$rservice =~ /(.*):(.*)/;
 		$rservice = $1;
 		$virtual_str =~ /(.*):(.*)/;
 		$rservice .= ":" . $2;
 		$or=$ov->{"real"}->{$rservice};
 		# If this doesn't exist either, use the original service.
 		# Otherwise if masq and quiescence is in use, the
 		# real server is not local, and it has an alternate port to
 		# the virtual server, using the mapped service will
 		# result in a quiescent service being created on the
 		# virtual server's port, which is not wanted.
 		if(!defined($or)) {
 			$rservice = $old_rservice;
 			$old_rservice = undef;
 		}
 	}
 
 	if((!defined($or) and !defined($is_quiescent)) or
 			(defined($is_quiescent) and defined($or) and
 				$or->{"weight"} eq 0 and
 				get_forward_flag($or->{"forward"}) eq $rforw)){
 		return;
 	}
 
 	$ipvsadm_args = "$$v{proto} " . &get_virtual_option($v) . " -r $rservice";
 	$log_args = "$tag server: $rservice ";
 	if(defined($old_rservice)) {
 		$log_args .= "mapped from $old_rservice "
 	}
 	$log_args .= "($virtual_str)";
 
 	my $server_str=$rservice . " " . $virtual_str;
 	my $currenttime=time();
 	if(defined($is_quiescent)) {
 		if (defined($or)) {
 			&system_wrapper("$IPVSADM -e "
 					. "$ipvsadm_args $rforw -w 0");
 			&ld_log("Quiescent $log_args (Weight set to 0)");
 			&ld_emailalert_send("Quiescent $log_args (Weight set to 0)",
 				    $v, $rservice, $currenttime);
 		}
 		elsif ($READDQUIESCENT eq "yes") {
 			&system_wrapper("$IPVSADM -a "
 					. "$ipvsadm_args $rforw -w 0");
 			&ld_log("Readd Quiescent $log_args (Weight set to 0)");
 			&ld_emailalert_send("Quiescent $log_args (Weight set to 0)",
 				    $v, $rservice, $currenttime);
 		}
 	}
 	else {
 		&system_wrapper("$IPVSADM -d $ipvsadm_args");
 		&ld_log("Deleted $log_args");
 		&ld_emailalert_send("Deleted $log_args", $v,
 				    $rservice, $tag eq "fallback" ? 0 : $currenttime);
 	}
 }
 
 # _restore_service
 # Make a retore a real server. The opposite of _quiescent_server.
 # Should be called by _service_up or fallback_on
 # I.e. If you want to change the state of a real server call service_set.
 #      If you call this function directly then ldirectord will lose track
 #      of the state of real servers.
 # If the real server exists (which it should) make it quiescent. If it
 # doesn't exist, just leave it as it will be added by the _service_up code
 # as appropriate.
 # pre: v: reference to virtual service to with the real server belongs
 #      rservice: service to restore. Of the form server:port for a tcp or
 #                udp service. Of the form fwmark for a fwm service.
 #      rforw: Forwarding mechanism of service. Sould be one of "-g" "-i" or
 #             "-m"
 #      rwght: Weight of service. Sold be of the form "<weight>"
 #             e.g. "1"
 #      tag: Tag to use for logging. Should be either "real" or "fallback"
 # post: real service is taken up from the respective virtual service
 #       if it is inactive
 # return: none
 sub _restore_service
 {
 	my ($v, $rservice, $rforw, $rwght, $tag) = (@_);
 
 	my $oldsrv;
 	my $ov;
 	my $or;
 	my $ipvsadm_args;
 	my $log_args;
 
 	$ipvsadm_args = "$$v{proto} " . &get_virtual_option($v)
 			. " -r $rservice $rforw -w $rwght";
 	$log_args = "$tag server: $rservice "
 		    . "(" #. scalar(%{$v->{real_status}})
 		    .  &get_virtual($v) . ")";
 
 	#if the server exists then restore its weight
 	# otherwise add the server
 	$oldsrv=&ld_read_ipvsadm();
 	$ov=$oldsrv->{&get_real_service_str($v)};
 	if(defined($ov)){
 		$or=$ov->{"real"}->{$rservice};
 	}
 	if(defined($or)){
 		unless($or->{"weight"} eq $rwght and
 			get_forward_flag($or->{"forward"}) eq $rforw){
 			&system_wrapper("$IPVSADM -e $ipvsadm_args");
 			&ld_log("Restored $log_args (Weight set to $rwght)");
 			&ld_emailalert_send("Restored $log_args " .
 					    "(Weight set to $rwght)",
 					    $v, $rservice, 0);
 		}
 	}
 	else {
 		&system_wrapper("$IPVSADM -a $ipvsadm_args");
 		&ld_log("Added $log_args (Weight set to $rwght)");
 		&ld_emailalert_send("Added $log_args (Weight set to $rwght)",
 				    $v, $rservice, 0);
 	}
 }
 
 # Check the status of a server
 # Should only be called from _status_up, _status_down,
 # _service_up, or _service_down
 # Returns 1 if the server is up, 0 if down
 sub _status_check
 {
 	my ($v, $r, $is_fallback) = (@_);
 
 	my $virtual_id = get_virtual_id_str($v);
 	my $real_id = get_real_id_str($r, $v);
 
 	if (defined($is_fallback)) {
 		if (defined($v->{real_status}) or
 				(defined($v->{fallback_status}) and
 				$v->{fallback_status}->{"$real_id"})) {
 			return 1;
 		}
 	}
 	else {
 		if (defined ($v->{real_status}) and
 				$v->{real_status}->{"$real_id"}) {
 			return 1;
 		}
 	}
 	return 0;
 }
 
 # Set the status of a server as up
 # Should only be called from _service_up or _ld_start
 sub _status_up
 {
 	my ($v, $r, $is_fallback) = (@_);
 
 	my $virtual_id = get_virtual_id_str($v);
 	my $real_id = get_real_id_str($r, $v);
 
 	return undef if(_status_check($v, $r, $is_fallback));
 
 	$r->{virtual_status}->{"$virtual_id"} = 1;
 	if (defined $is_fallback) {
 		$v->{fallback_status}->{"$real_id"} = 1;
 	}
 	else {
 		$v->{real_status}->{"$real_id"} = 1;
 	}
 
 	return 1;
 }
 
 # Set the status of a server as down
 # Should only be called from _service_down or ld_stop
 sub _status_down
 {
 	my ($v, $r, $is_fallback) = (@_);
 
 	my $virtual_id = get_virtual_id_str($v);
 	my $real_id = get_real_id_str($r, $v);
 
 	return undef if (!_status_check($v, $r, $is_fallback));
 
 	if (defined($is_fallback)) {
 		delete $v->{fallback_status}->{"$real_id"};
 		if (! %{$v->{fallback_status}}) {
 			$v->{fallback_status} = undef;
 		}
 	}
 	else {
 		delete $v->{real_status}->{"$real_id"};
 		if (! %{$v->{real_status}}) {
 			$v->{real_status} = undef;
 		}
 	}
 
 	delete $r->{virtual_status}->{"$virtual_id"};
 	if (! %{$r->{virtual_status}}) {
 		$r->{virtual_status} = undef;
 	}
 
 	return 1;
 }
 
 # _service_up
 # Bring a real service up if it is down
 # Should be called by service_set only
 # I.e. If you want to change the state of a real server call service_set.
 #      If you call this function directly then ldirectord will lose track
 #      of the state of real servers.
 # pre: v: reference to virtual service to with the real server belongs
 #      r: reference to the real server to take down
 # post: real service is taken up from the respective virtual service
 #       if it is inactive
 # return: none
 sub _service_up
 {
 	my ($v, $r, $force) = (@_);
 
 	if ($r->{failcount} > 0) {
 		ld_log("Resetting soft failure count: " . $r->{server} . ":" .
 		       $r->{port} . " (" . get_virtual_id_str($v) . ")");
 	}
 
 	$r->{failcount} = 0;
 
 	if (! _status_up($v, $r) and ! defined($force)) {
 		return;
 	}
 
 	&_restore_service($v, $r->{server} . ":" . $r->{port},
 			  $r->{forw}, $r->{weight}, "real");
 	&fallback_off($v);
 }
 
 # _service_down
 # Bring a real service down if it is up
 # Should be called by service_set only
 # I.e. if you want to change the state of a real server call service_set.
 #      If you call this function directly then ldirectord will lose track
 #      of the state of real servers.
 # pre: v: reference to virtual service to with the real server belongs
 #      r: reference to the real server to take down
 # post: real service is taken down from the respective virtual service
 #       if it is active
 # return: none
 sub _service_down
 {
 	my ($v, $r, $force) = @_;
 
 	if (!_status_check($v, $r) and !defined($force)) {
 		return;
 	}
 
 	$r->{failcount}++;
 
 	if (!defined($force) and _status_check($v, $r) and
 	     ($r->{failcount} < $v->{failurecount})) {
 		ld_log("Soft failure real server: " . $r->{server} . ":" .
 		       $r->{port} . " (" . get_virtual_id_str($v) .
 		       ") failure " . $r->{failcount} . "/" . $v->{failurecount});
 		return;
 	}
 
 	_status_down($v, $r);
 
 	&_remove_service($v, $r->{server} . ":" . $r->{port},
 			 $r->{forw}, "real");
 
 	&fallback_on($v);
 }
 
 # fallback_on
 # Turn on the fallback server for a virtual service if it is inactive
 # pre: v: virtual to turn fallback service on for
 # post: fallback server is turned on if it was inactive
 # return: none
 sub fallback_on
 {
 	my ($v, $force) = (@_);
 
 	my $fallback=&fallback_find($v);
 
 	if (defined($fallback) and (_status_up($v, $fallback, "fallback")
 			or defined($force))) {
 		&_restore_service($v, $fallback->{server} . ":" . $fallback->{port},
 				  get_forward_flag($fallback->{forward}),
 				  "1", "fallback");
 	}
 
 	if (!defined ($v->{real_status})) {
 		&do_fallback_command($v, "start");
 	}
 }
 
 # fallback_off
 # Turn off the fallback server for a virtual service if it is active
 # pre: v: virtual to turn fallback service off for
 # post: fallback server is turned off if it was active
 # return: none
 sub fallback_off
 {
 	my ($v, $force) = (@_);
 
 	my $fallback=&fallback_find($v);
 
 	if (defined($fallback) and (_status_down($v, $fallback, "fallback")
 			or defined($force))) {
 		&_remove_service($v, $fallback->{server} . ":" .  $fallback->{port},
 				 get_forward_flag($fallback->{forward}),
 				 "fallback");
 	}
 
 	if (defined ($v->{real_status})) {
 		&do_fallback_command($v, "stop");
 	}
 }
 
 # fallback_find
 # Determine the fallback for a virtual service
 # pre: virtual: reference to a virtual service
 # post: none
 # return: $virtual->{"fallback"} if defined
 #         else $FALLBACK->{$virtual->{"protocol"}} if defined
 #         else undef
 sub fallback_find
 {
 	my ($virtual) = (@_);
 
 	my($global_fallback_ptr);	# fallback pointer
 	my $ipv6p = ($virtual->{addressfamily} == AF_INET6) ? 1 : 0;
 
 	if( defined $virtual->{"fallback"} ) {
 		return($virtual->{"fallback"});
 	} elsif ( not defined($FALLBACK) and not $ipv6p ) {
 		return undef;
 	} elsif ( not defined($FALLBACK6) and $ipv6p ) {
 		return undef;
 	}
 
 	if ($ipv6p) {	# IPv6
 		$global_fallback_ptr = $FALLBACK6;
 	} else {
 		$global_fallback_ptr = $FALLBACK;
 	}
 
 	# If the global fallback has a port, it can be used as is
 	if (defined($global_fallback_ptr->{$virtual->{"protocol"}}->{"port"})) {
 		return $global_fallback_ptr->{$virtual->{"protocol"}};
 	}
 
 	# Else create an anonymous fallback
 	my %anon_fallback = %{$global_fallback_ptr->{$virtual->{"protocol"}}};
 	$anon_fallback{"port"} = $virtual->{"port"};
 
 	return \%anon_fallback;
 }
 
 # fallback_command
 # Execute the fallback command with the given status if it wasn't executed
 # with this status already for the supplied virtual service.
 sub do_fallback_command
 {
 	my ($v, $status) = (@_);
 
 	if (defined $v->{fallbackcommand_status} and $v->{fallbackcommand_status} eq $status) {
 		return;
 	}
 
 	$v->{fallbackcommand_status} = $status;
 
 	if (defined($v->{fallbackcommand})) {
 		&system_wrapper($v->{fallbackcommand} . " " . $status . " " . $v->{server} . ":" . $v->{port} . " " . $v->{protocol});
 	} elsif (defined($FALLBACKCOMMAND)) {
 		&system_wrapper($FALLBACKCOMMAND . " " . $status . " " . $v->{server} . ":" . $v->{port} . " " . $v->{protocol});
 	}
 }
 
 # Used during stop, start and reload to remove stale real servers from LVS
 sub purge_untracked_service
 {
 	my ($v, $rservice, $tag) = (@_);
 
 	my $log_arg = "Purged real server ($tag): $rservice (" .
 		      &get_virtual($v) . ")";
 
 	&system_wrapper("$IPVSADM -d $v->{proto} " . &get_virtual_option($v) .
 			" -r $rservice");
 	&ld_log($log_arg);
 	&ld_emailalert_send($log_arg, $v, $rservice, 0);
 }
 
 # Used during stop, start and reload to remove stale real servers from LVS
 sub purge_service
 {
 	my ($v, $r, $tag) = (@_);
 
 	purge_untracked_service($v, "$r->{server}:$r->{port}", $tag);
 	_status_down($v, $r);
 }
 
 # Used during stop, start and reload to remove stale virtual services from LVS
 sub purge_virtual
 {
 	my ($v, $tag) = (@_);
 
 	&system_wrapper("$IPVSADM -D $v->{proto} " .  &get_virtual_option($v));
 	&ld_log("Purged virtual server ($tag): " .  &get_virtual($v));
 }
 
 sub check_cfgfile
 {
 	my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
 		$size, $atime, $mtime) = stat($CONFIG);
 	my ($status);
 	return if ($stattime==$mtime);
 	$stattime = $mtime;
 	use Digest::MD5 qw(md5 md5_hex);
 	my $ctx = Digest::MD5->new;
 	unless (open(CFGFILE, "<$CONFIG")) {
 		&config_warn(0, "can not open file $CONFIG for checking");
 		return 0;
 	}
 	$ctx->addfile(*CFGFILE);
 	close(CFGFILE);
 	my $digest = $ctx->hexdigest;
 	if (defined $checksum && $checksum ne $digest) {
 		&ld_log("Configuration file '$CONFIG' has changed on disk");
 		if ($AUTOCHECK eq "yes") {
 			&ld_log(" - reread new configuration");
 			&reread_config();
 		} else {
 			&ld_log(" - ignore new configuration\n");
 		}
 		if (defined($CALLBACK) and -x $CALLBACK) {
 			&system_wrapper("$CALLBACK $CONFIG");
 		}
 		$status = 1;
 	}
 	$checksum = $digest;
 
 	return $status;
 }
 
 # ld_openlog
 # Open logger
 # make log rotation work
 # pre: none
 # post: If logger is a file, it opened and closed again as a test
 #       If logger is syslog, it is opened so it can be used without
 #       needing to be opened again.
 #       Otherwise, nothing is done.
 # return: 0 on success
 #         1 on error
 sub ld_openlog
 {
 	if ($opt_d or $SUPERVISED eq "yes") {
 		# Instantly do nothing
 		return(0);
 	}
 	if( $LDIRLOG =~ /^\/(.*)/ ) {
 		# Open and close the file as a test.
 		# We open the file each time we want to log to it
 		unless (open(LOGFILE, ">>$LDIRLOG") and close(LOGFILE)) {
 			return 1;
 		}
 	}
 	else
 	{
 		# Assume LDIRLOG is a logfacility, log to syslog
 		setlogsock( "unix" );
 		openlog( "ldirectord", "pid", "$LDIRLOG" );
 	}
 	return(0);
 }
 
 # ld_log
 # Log a message.
 # pre: message: Message to write
 # post: message and timetsamp is written to loged
 #       If logger is a file, it is opened and closed again as a
 #       primitive means to make log rotation work
 # return: 0 on success
 #         1 on error
 sub ld_log
 {
 	my ($message) = (@_);
 
 	my $now = localtime();
 
 	&ld_debug(2, $message);
 	chomp $message;
 	if ($opt_d) {
 		print STDERR "$message\n";
 	} elsif ($SUPERVISED eq "yes") {
 		print "[$now] $message\n";
 	} elsif ( $LDIRLOG =~ /^\/(.*)/ ) {
 		unless (open(LOGFILE, ">>$LDIRLOG")
 				and print LOGFILE "[$now|$CFGNAME|$$] $message\n"
 				and close(LOGFILE)) {
 			print STDERR "$message\n";
 			return 1;
 		}
 	}
 	else {
 		# Assume LDIRLOG is a logfacility, log to syslog
 		syslog( "info", "$message" );
 	}
 	return(0);
 }
 
 sub daemon_status_str
 {
 	if ($DAEMON_STATUS == $DAEMON_STATUS_STARTING) {
 		return "starting";
 	}
 	elsif ($DAEMON_STATUS == $DAEMON_STATUS_RUNNING) {
 		return "running";
 	}
 	elsif ($DAEMON_STATUS == $DAEMON_STATUS_STOPPING) {
 		return "stopping";
 	}
 	elsif ($DAEMON_STATUS == $DAEMON_STATUS_RELOADING) {
 		return "reloading";
 	}
 	return "UNKNOWN";
 }
 
 # ld_emailalert_send
 # Send email alerts per virtual server
 # pre: message: Message to email
 # post: message is emailed if emailalert defined for virtualserver
 # return: 0 on success
 #         1 on error
 sub ld_emailalert_send
 {
 	my ($subject, $v, $rserver, $currenttime) = (@_);
 	my $status = 0;
 	my $to_addr;
 	my $frequency;
 	my $virtual_str;
 	my $id;
 	my $statusfilter;
 	my $smtp_server;
+	my $virtual_info;
 
 	$frequency = defined $v->{emailalertfreq} ?  $v->{emailalertfreq} :
 				$EMAILALERTFREQ;
 
 	$virtual_str = &get_virtual($v);
 	$id = "$rserver ($virtual_str)";
 
 	if ($currenttime == 0 or $frequency == 0) {
 		delete $EMAILSTATUS{"$id"};
 	}
 	else {
 		$EMAILSTATUS{$id}->{v} = $v;
 		$EMAILSTATUS{$id}->{alerttime} = $currenttime;
 	}
 
 	$statusfilter = defined $v->{emailalertstatus} ?
 			$v->{emailalertstatus} : $EMAILALERTSTATUS;
 	if (($DAEMON_STATUS & $statusfilter) == 0) {
 		return 0;
 	}
 
 	$to_addr = defined $v->{emailalert} ? $v->{emailalert} : $EMAILALERT;
 	if ($to_addr eq "") {
 		return 0;
 	}
 
 	$smtp_server = defined $v->{smtp} ? $v->{smtp} :
 				$SMTP;
 
 	&ld_log("emailalert: $subject");
+
+	# get extra service details
+	$virtual_info = _ld_virtual_server_details($v);
+
+	# add service name into e-mail subject if it has been set
+	if ($v->{servicename}) {
+		$subject = "[" . $v->{servicename}  ."] $subject";
+	}
+
 	if (defined $smtp_server) {
-		$status = &ld_emailalert_net_smtp($smtp_server, $to_addr, $subject);
+		$status = &ld_emailalert_net_smtp($smtp_server, $to_addr, $subject, $virtual_info);
 	}
 	else {
-		$status = &ld_emailalert_mail_send($to_addr, $subject);
+		$status = &ld_emailalert_mail_send($to_addr, $subject, $virtual_info);
 	}
 
 	return($status);
 }
 
+# generate virtual server information to go in to alert e-mails
+sub _ld_virtual_server_details
+{
+	my ($v) = @_;
+	my $details;
+
+	if ($v->{servicename}) {
+		$details .= "Service name: " . $v->{servicename} . "\n"
+	}
+
+	if ($v->{comment}) {
+		$details .= "Comment: " . $v->{comment} . "\n";
+	}
+
+	return $details;
+}
+
+
 # ld_emailalert_net_smtp
 # Send email alerts via SMTP server
 # pre: smtp: SMTP server defined
 # post: message is emailed if SMTP server is valid and working
 # return: 0 on success
 #	  1 on error
 sub ld_emailalert_net_smtp
 {
-	my ($smtp_server, $to_addr, $subject) = (@_);
+	my ($smtp_server, $to_addr, $subject, $extrabody) = (@_);
 	my $status = 0;
 
 	use Net::SMTP;
 	use Sys::Hostname;
 
 	my $hostname = hostname;
 
 	my $smtp = Net::SMTP->new($smtp_server);
 
 	if ($smtp) {
 		my $myusername = getpwuid( $< );
 		$smtp->mail("$myusername\@$hostname");
 		$smtp->to($to_addr);
 		$smtp->data();
 		if($EMAILALERTFROM) {
 			$smtp->datasend("From: $EMAILALERTFROM\n");
 		} else {
 			$smtp->datasend("From: $myusername\@$hostname\n");
 		}
 		$smtp->datasend("To: $to_addr\n");
 		$smtp->datasend("Subject: $subject\n\n");
 		$smtp->datasend("ldirectord host: $hostname\n" .
 				"Log-Message: $subject\n" .
 				"Daemon-Status: " .
 				&daemon_status_str() . "\n");
+		$smtp->datasend("\n$extrabody\n") if ($extrabody);
 		$smtp->dataend();
 		$smtp->quit;
 	} else {
 		&ld_log("failed to send SMTP email message\n");
 		$status = 1;
 	}
 
 	return($status);
 }
 
 # ld_emailalert_mail_send
 # Send email alerts via Mail::Send
 # pre: smtp: SMTP server not defined
 # post: message is emailed if one of the Mail::Send methods works
 # return: 0 on success
 #	  1 on error
 sub ld_emailalert_mail_send
 {
-	my ($to_addr, $subject) = (@_);
+	my ($to_addr, $subject, $extrabody) = (@_);
 	my $emailmsg;
 	my $emailfh;
 	my $status = 0;
 
 	use Mail::Send;
 
 	$emailmsg = new Mail::Send Subject=>$subject, To=>$to_addr;
 	$emailmsg->set('From', $EMAILALERTFROM) if ($EMAILALERTFROM);
 	$emailfh = $emailmsg->open;
 	print $emailfh "ldirectord host: " . hostname() . "\n" .
 		       "Log-Message: $subject\n" .
 		       "Daemon-Status: " . &daemon_status_str() . "\n";
+	print $emailfh "\n$extrabody\n" if ($extrabody);
 	unless ($emailfh->close) {
 		&ld_log("failed to send email message\n");
 		$status = 1;
 	}
 
 	return($status);
 }
 
 # ld_emailalert_resend
 # Resend email alerts as necessary
 # pre: none
 # post: EMAILSTATUS array is updated and alerts are sent as necessary
 # return: none
 sub ld_emailalert_resend
 {
 	my $currenttime = time();
 	my $es;
 	my $id;
 	my $rserver;
 	my $frequency;
 
 	foreach $id (keys %EMAILSTATUS) {
 		$es = $EMAILSTATUS{$id};
 		$frequency = defined $es->{v}->{emailalertfreq} ?
 					$es->{v}->{emailalertfreq} :
 					$EMAILALERTFREQ;
 		$id =~ m/(.*) /;
 		$rserver = $1;
 		if ($currenttime - $es->{alerttime} < $frequency) {
 			next;
 		}
 		&ld_emailalert_send("Inaccessible real server: $id",
 				    $es->{v}, $rserver, $currenttime);
 	}
 }
 
 # ld_debug
 # Log a message to a STDOUT.
 # pre: priority: priority of message
 #      message: Message to write
 # post: message is written to STDOUT if $DEBUG >= priority
 # return: none
 sub ld_debug
 {
 	my ($priority, $message) = (@_);
 
 	if ( $DEBUG >= $priority ) {
 		chomp $message;
 		print STDERR "DEBUG${priority}: $message\n";
 	}
 }
 
 # system_wrapper
 # Wrapper around system() to log errors
 #
 # WARNING: Do not use alarm() together with this function.  A internal
 # pipe will not be reclaimed (at least with Perl 5.8.8).  This can
 # cause ldirectord to run out of file handles.
 #
 # pre: LIST: arguments to pass to system()
 # post: system() is called and if it returns non-zero a failure
 #       message is logged
 # return: return value of system()
 sub system_wrapper
 {
 	my (@args)=(@_);
 
 	my $status;
 
 	&ld_log("Running system(@args)") if $DEBUG>2;
 	$status = system(@args);
 	if($status != 0) {
 		&ld_log("system(@args) failed: $!");
 	}
 
 	return($status)
 }
 
 # system_timeout
 # Emulate system() with timeout via fork(), exec(), and waitpid() and
 # TERMinate the child on timeout.  Set an alarm() for the timeout.
 #
 # This function does not suffer the deficiencies of system_wrapper()
 # of leaving pipes unreclaimed.  Zombies are reaped by ld_handler_chld
 # and the related code.
 #
 # pre: timeout: timeout in seconds
 #      LIST: arguments to pass to exec()
 # return: >= 0 exit status of the child process
 #          127 exec failed
 #           -1 timeout
 #           -2 fork failed
 sub system_timeout
 {
 	my $timeout = shift;
 	my (@args) = (@_);
 	my $status;
 
 	&ld_log("Running system_timeout($timeout, @args)") if $DEBUG>2;
 
 	my $childpid = fork();
 	if (!defined($childpid)) {
 		&ld_log("fork failed: $!");
 		return(-2);
         }
 	elsif ($childpid) {
 		# parent
 		eval {
 			local $SIG{'ALRM'} = sub { die "timeout\n"; };
 			alarm $timeout;
 			waitpid($childpid, 0);
 			$status = $? >> 8;
 			# When die()-ing in the SIGALRM handler we
 			# will never reach this point.  Child/Zombie
 			# is left behind.  The grim reaper
 			# (ld_handler_chld + ld_process_chld) will
 			# take care of the zombie.
 		};
 		alarm 0;
 		if ($@) {
 			# timeout
 			if ($@ ne "timeout\n") {
 				# log unexpected errors
 				&ld_log("system_timeout($timeout, @args) " .
 					"unexpected error: $@");
 			}
 			else {
 				&ld_log("system_timeout($timeout, @args) " .
 					"timed out, kill -TERM child");
 			}
 
 			# TERMinate child
 			kill 15, $childpid;
 			return(-1);
 		}
 		else {
 			# did not timeout
 			return($status);
 		}
 	}
 	else {
 		# child
         	exec(@args) or &ld_exit(127, "exec(@args) failed: $!");
 		die "ld_exit() broken?, stopped";
         }
 }
 
 # exec_wrapper
 # Wrapper around exec() to log errors
 # pre: LIST: arguments to pass to exec()
 # post: exec() is called and if it returns non-zero a failure
 #       message is logged
 # return: return value of exec() on failure
 #         does not return on success
 sub exec_wrapper
 {
 	my (@args)=(@_);
 
 	my $status;
 
 	&ld_log("Running exec(@args)") if $DEBUG>2;
 	$status = exec(@args) or &ld_log("exec(@args) failed");
 	return($status)
 }
 
 # ld_rm_file
 # Remove a file, symink, or anything that isn't a directory
 # and exists
 # pre: filename: file to delete
 # post: If filename does not exist or is a directory an
 #       error state is reached
 #       Else filename is delete
 #       If $DEBUG >=2 errors are logged
 # return: 0 on success
 #         -1 on error
 sub ld_rm_file
 {
 	my ($filename)=(@_);
 
 	my ($status);
 
 	if(-d "$filename"){
 		&ld_debug(2, "ld_rm_file: $filename is a directory, skipping");
 		return(-1);
 	}
 	if(! -e "$filename"){
 		&ld_debug(2, "ld_rm_file: $filename doesn't exist, skipping");
 		return(-1);
 	}
 	$status = unlink($filename);
 	if($status!=1){
 		&ld_debug(2, "ld_rm_file: Error deleting: $filename: $!");
 	}
 	return(($status==1)?0:-1)
 }
 
 # is_octet
 # See if a number is an octet, that is >=0 and <=255
 # pre: alleged_octet: the octet to test
 # post: alleged_octet is checked to see if it is valid
 # return: 1 if the alleged_octet is an octet
 #         0 otherwise
 sub is_octet
 {
 	my ($alleged_octet)=(@_);
 
 	if($alleged_octet<0){ return 0; }
 	if($alleged_octet>255){ return 0; }
 
 	return(1);
 }
 
 # is_ip
 # Check that a given string is an IP address
 # pre: alleged_ip: string representing ip address
 # post: alleged_ip is checked to see if it is valid
 # return: 1 if alleged_ip is a valid ip address
 #         0 otherwise
 sub is_ip
 {
 	my ($alleged_ip)=(@_);
 
 	if ($alleged_ip =~ /:/) {
 		unless(inet_pton(AF_INET6,$alleged_ip)){ return 0; }
 		return(1);
 	}
 
 	#If we don't have four, . delimited numbers then we have no hope
 	unless($alleged_ip=~m/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) { return 0; }
 
 	#Each octet mist be >=0 and <=255
 	unless(&is_octet($1)){ return 0; }
 	unless(&is_octet($2)){ return 0; }
 	unless(&is_octet($3)){ return 0; }
 	unless(&is_octet($4)){ return 0; }
 
 	return(1);
 }
 
 # ip_to_int
 # Turn an IP address given as a dotted quad into an integer
 # pre: ip_address: string representing IP address
 # post: post ip_address is converted to an integer
 # return: -1 if an error occurs
 #         integer representation of IP address otherwise
 sub ip_to_int
 {
 	my ($ip_address)=(@_);
 
 	unless(&is_ip($ip_address)){ return(-1); }
 	unless($ip_address=~m/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/){ return(-1); }
 
 	return(((((($1<<8)+$2)<<8)+$3)<<8)+$4);
 }
 
 # int_to_ip
 # Turn an IP address given as a dotted quad into an integer
 # pre: ip_address: string representing IP address
 # post: Decimal is converted to a dotted quad
 # return: -1 if an error occurs
 #        integer representation of IP address otherwise
 sub int_to_ip
 {
 	my ($ip_address)=(@_);
 
 	my $result = "";
 
 	return(sprintf(
 		"%d.%d.%d.%d",
 		($ip_address>>24)&255,
 		($ip_address>>16)&255,
 		($ip_address>>8)&255,
 		$ip_address&255
 	));
 }
 
 # get_virtual
 # Get the service for a virtual
 # pre: nv: virtual to get the service for
 # post: none
 # return: fwmark of service if it is a fwm service
 #         ip_address:port otherwise
 sub get_virtual
 {
 	my ($nv) = (@_);
 
 	if ($nv->{"protocol"} eq "fwm"){
 		return $nv->{"fwm"};
 	} else {
 		return $nv->{"server"} . ":" . $nv->{"port"};
 	}
 }
 
 # get_virtual_option
 # Get the ipvsadm option corresponding to a virtual service
 # pre: nv: virtual to get the service for
 # post: none
 # return: fwmark of service if it is a fwm service
 #         fwmark of service + "-6" if it is a fwm service and the address family is AF_INET6
 #         ip_address:port otherwise
 sub get_virtual_option
 {
 	my ($nv) = (@_);
 
 	my ($cmdline) = &get_virtual($nv);
 
 	if ($nv->{"protocol"} eq "fwm" && $nv->{addressfamily} == AF_INET6) {
 		$cmdline .=  " -6";
 	}
 	
 	return $cmdline;
 }
 
 # get_real_id_str
 # Get an id string for a real server
 # pre: r: Real service.
 #      protocol: protocol of the real service
 #                tcp or udp
 #      service: type of service
 # post: none
 # return: Id string for the real server
 sub get_real_id_str
 {
 	my ($r, $v) = (@_);
 
 	my $request = "";
 	my $receive = "";
 	my $checkport = "";
 	my $virtualhost = "";
 	my $check;
 	my $real;
 
 	if(defined($r->{"request"})) {
 		$request = $r->{"request"};
 	}
 	else {
 		$request = $v->{"request"};
 	}
 
 	if(defined($r->{"receive"})) {
 		$receive = $r->{"receive"};
 	}
 	else {
 		$receive = $v->{"receive"};
 	}
 
 	if($v->{"checktype"} eq "negotiate" or
 			$v->{"checktype"} eq "combined") {
 		$check = $v->{"checktype"} . ":" . $v->{"service"};
 	}
 	elsif($v->{"checktype"} eq "external" or
 			$v->{"checktype"} eq "external-perl") {
 		$check = $v->{"checktype"} . ":" . $v->{"checkcommand"};
 	}
 	else {
 		$check = $v->{"checktype"};
 	}
 
 	if(defined($v->{"checkport"})) {
 		$checkport = $v->{"checkport"};
 	}
 
 	if(defined($v->{"virtualhost"})) {
 		$virtualhost = $v->{"virtualhost"};
 	}
 
 	$real    = $check . ":" . $v->{"protocol"} . ":"
 		 . $r->{"server"} . ":" . $r->{"port"} . ":"
 		 . $virtualhost . ":" . $checkport . ":"
 		 . $r->{"weight"} . ":" . $r->{"forward"} . ":"
 		 . quotemeta($request) . ":" . quotemeta($receive);
 }
 
 # get_virtual_id_str
 # Get an id string for a virtual service
 # pre: v: Virtual service
 # post: none
 # return: Id string for the virtual service
 sub get_virtual_id_str
 {
 	my ($v) = (@_);
 
 	if ($v->{"protocol"} eq "fwm") {
 		return $v->{"protocol"} . (($v->{addressfamily} == AF_INET6)?"6":"") . ":" .  &get_virtual($v);
 	}
 	else {
 		return $v->{"protocol"} . ":" .  &get_virtual($v);
 	}
 }
 
 # get_forward_flag
 # Get the ipvsadm flag corresponding to a forwarding mechanism
 # pre: forward: Name of forwarding mechanism. u
 #               Should be one of ipip, masq or gate
 # post: none
 # return: ipvsadm flag corresponding to the forwarding mechanism
 #         " " if $forward is unknown
 sub get_forward_flag
 {
 	my ($forward) = (@_);
 
 	unless(defined($forward)) {
 		return(" ");
 	}
 
 	if ($forward eq "masq") {
 		return("-m");
 	}
 	elsif ($forward eq "gate") {
 		return("-g");
 	}
 	elsif ($forward eq "ipip") {
 		return("-i");
 	}
 
 	return(" ");
 }
 
 # ld_exit
 # Exit and log a message
 # pre: exit_status: Integer exit status to exit with
 #                   0 will be used if parameter is omitted
 #      message: Message to log when exiting. May be omitted
 # post: If exit_status is non-zero or $DEBUG>2 then
 #       message logged.
 #       Programme exits with exit_status
 # return: does not return
 sub ld_exit
 {
 	my ($exit_status, $message)=(@_);
 	unless(defined($exit_status)) { $exit_status=0; }
 	unless(defined($message)) { $message=""; }
 
 	if ($exit_status!=0 or $DEBUG>2) {
 		&ld_log("Exiting with exit_status $exit_status: $message");
 	}
 	exit($exit_status);
 }
 
 # ld_open_socket
 # Open a socket connection
 # pre: remote: IP address as a dotted quad of remote host to connect to
 #      port: port to connect to
 #      protocol: Protocol to use. Should be either "tcp" or "udp"
 # post: A Socket connection is opened to the remote host
 # return: Open socket
 #         undef on error
 sub ld_open_socket
 {
 	my ($remote, $port, $protocol) = @_;
 	my ($iaddr, $paddr, $pro, $result, $pf);
 	local *SOCK;
 
 	$remote = &ld_strip_brackets($remote);
 	if (inet_pton(AF_INET6,$remote)) {
 		$iaddr = inet_pton(AF_INET6,$remote);
 		$paddr = pack_sockaddr_in6($port, $iaddr);
 		$pf = PF_INET6;
 	} else {
 		$iaddr = inet_aton($remote) || die "no host: $remote";
 		$paddr = sockaddr_in($port, $iaddr);
 		$pf = PF_INET;
 	}
 	$pro = getprotobyname($protocol);
 	if ($protocol eq "udp") {
 		socket(SOCK, $pf, SOCK_DGRAM, $pro) || die "socket: $!";
 	}
 	else {
 		socket(SOCK, $pf, SOCK_STREAM, $pro) || die "socket: $!";
 	}
 	$result = connect(SOCK, $paddr);
 	unless ($result) {
 		return undef;
 	}
 	return *SOCK;
 }
 
 # daemon
 # Close and fork to become a daemon.
 #
 # Notes from unix programmer faq
 # http://www.landfield.com/faqs/unix-faq/programmer/faq/
 #
 # Almost none of this is necessary (or advisable) if your daemon is being
 # started by `inetd'.  In that case, stdin, stdout and stderr are all set up
 # for you to refer to the network connection, and the `fork()'s and session
 # manipulation should *not* be done (to avoid confusing `inetd').  Only the
 # `chdir()' step remains useful.
 #
 # Gratuitously over documented, because it can be
 #
 # Written by Horms, horms@verge.net.au for an unrelated project while
 # working for Zip World, http://www.zipworld.com.au/, 1997-1999.
 sub ld_daemon
 {
 	# `fork()' so the parent can exit, this returns control to the command
 	# line or shell invoking your program.  This step is required so that
 	# the new process is guaranteed not to be a process group leader. The
 	# next step, `setsid()', fails if you're a process group leader.
 	&ld_daemon_become_child();
 
 	# setsid()' to become a process group and session group leader. Since a
 	# controlling terminal is associated with a session, and this new
 	# session has not yet acquired a controlling terminal our process now
 	# has no controlling terminal, which is a Good Thing for daemons.
 	if(POSIX::setsid()<0){
 		&ld_exit(1, "ld_daemon: Could not setsid");
 	}
 
 	# fork()' again so the parent, (the session group leader), can exit.
 	# This means that we, as a non-session group leader, can never regain a
 	# controlling terminal.
 	&ld_daemon_become_child();
 
 	# `chdir("/")' to ensure that our process doesn't keep any directory in
 	# use. Failure to do this could make it so that an administrator
 	# couldn't unmount a filesystem, because it was our current directory.
 	if(chdir("/")<0){
 		&ld_exit(1, "ld_daemon: Could not chdir");
 	}
 
 	# `close()' fds 0, 1, and 2. This releases the standard in, out, and
 	# error we inherited from our parent process. We have no way of knowing
 	# where these fds might have been redirected to. Note that many daemons
 	# use `sysconf()' to determine the limit `_SC_OPEN_MAX'.  `_SC_OPEN_MAX'
 	# tells you the maximum open files/process. Then in a loop, the daemon
 	# can close all possible file descriptors. You have to decide if you
 	# need to do this or not.  If you think that there might be
 	# file-descriptors open you should close them, since there's a limit on
 	# number of concurrent file descriptors.
 	close(STDIN);
 	close(STDOUT);
 	close(STDERR);
 
 	# Establish new open descriptors for stdin, stdout and stderr. Even if
 	# you don't plan to use them, it is still a good idea to have them open.
 	# The precise handling of these is a matter of taste; if you have a
 	# logfile, for example, you might wish to open it as stdout or stderr,
 	# and open `/dev/null' as stdin; alternatively, you could open
 	# `/dev/console' as stderr and/or stdout, and `/dev/null' as stdin, or
 	# any other combination that makes sense for your particular daemon.
 	#
 	# This code used to open /dev/console for STDOUT and STDERR,
 	# but that was changed to /dev/null to stop the code hanging in
 	# the case where /dev/console is unavailable for some reason
 	# http://www.osdl.org/developer_bugzilla/show_bug.cgi?id=1180
 	if(open(STDIN, "</dev/null")<0){
 		&ld_exit(1, "ld_daemon: Could not open /dev/null");
 	}
 	if(open(STDOUT, ">>/dev/null")<0){
 		&ld_exit(-1, "ld_daemon: Could not open /dev/null");
 	}
 	if(open(STDERR, ">>/dev/null")<0){
 		&ld_exit(-1, "ld_daemon: Could not open /dev/null");
 	}
 }
 
 # ld_daemon_become_child
 # Fork, kill parent and return child process
 # pre: none
 # post: process forks and parent exits
 #       All process exit with exit status -1 if an error occurs
 # return: parent: exits
 #         child: none  (this is the process that returns)
 # Written by Horms, horms@verge.net.au for an unrelated project while
 # working for Zip World, http://www.zipworld.com.au/, 1997-1999.
 sub ld_daemon_become_child
 {
 	my($status);
 
 	$status = fork();
 
 	if ($status<0){
 		&ld_exit(-1, "ld_daemon_become_child: Could not fork: $!");
 	}
 	if ($status>0){
 		&ld_exit(0,
 			"ld_daemon_become_child: Parent exiting as it should");
 	}
 }
 
 # ld_gethostbyname
 # Wrapper to gethostbyname. Look up the/an IP address of a hostname
 # If an IP address is given is it returned
 # pre: name: Hostname of IP address to lookup
 #      af: Address Family: AF_INET etc..
 # post: gethostbyname is called to find an IP address for $name
 #       This is converted to a string
 # return: IP address
 #         undef on error
 sub ld_gethostbyname
 {
 	my ($name, $af)=(@_);
 
 	if ($name =~ /\[(.*)\]/) {
 		$name = $1;
 	}
 	my @host = getaddrinfo($name, 0, $af);
 	if (!defined($host[3])) {
 		return undef;
 	}
 	my @ret = getnameinfo($host[3], NI_NUMERICHOST | NI_NUMERICSERV);
 	if ($host[0] == AF_INET6) {
 		return "[$ret[0]]";
 	}
 	else {
 		return $ret[0];
 	}
 }
 
 # ld_gethostbyaddr
 # Wrapper to gethostbyaddr. Look up the hostname from an IP address.
 # If no reverse DNS record is found, return undef
 # pre: ip: IP address of host to lookup
 # post: gethostbyaddr is called to find a hostname for IP $ip
 # return: hostname
 #         undef on error
 sub ld_gethostbyaddr
 {
 	my ($ip)=(@_);
 
 	$ip = &ld_strip_brackets($ip);
 	my @host = getaddrinfo($ip,0);
 	if (!defined($host[3])) {
 		return undef;
 	}
 	my @ret = getnameinfo($host[3], NI_NAMEREQD);
 	return undef unless(scalar(@ret) == 2);
 	return $ret[0];
 }
 
 # ld_getservbyname
 # Wrapper for getservbyname. Look up the port for a service name
 # If a port is given it is returned.
 # pre: name: Port or Service name to look up
 # post: if $name is a number
 #         if 0<=$name<=65536 $name is returned
 #         else undef is returned
 #       else getservbyname is called to look up the port for the service
 # return: Port
 #         undef on error
 sub ld_getservbyname
 {
 	my ($name, $protocol)=(@_);
 
 	if($name=~/^[0-9]+$/){
 		return(($name>=0 and $name<65536)?$name:undef);
 	}
 
 	my @serv=getservbyname($name, $protocol);
 
 	return((@serv and defined($serv[2]))?$serv[2]:undef);
 }
 
 # ld_getservhostbyname
 # Wrapper for ld_gethostbyname and ld_getservbyname. Given a server of the
 # form ip_address|hostname[:port|servicename] return ip_address[:port]
 # pre: hostserv: Servver of the form ip_address|hostname[:port|servicename]
 #      protocol: Protocol for service. Should be either "tcp" or "udp"
 #      af: Address Family: AF_INET etc..
 # post: lookups performed as per ld_getservbyname and ld_gethostbyname
 # return: ip_address[:port]
 #         undef on error
 sub ld_gethostservbyname{
 	my ($hostserv, $protocol, $af) = (@_);
 
 	my $ip;
 	my $port;
 	
 	if ($hostserv =~ /(:(\d+|[A-Za-z0-9-_]+))?$/) {
 		$port = $2;
 		$ip = $hostserv;
 		$ip =~ s/(:(\d+|[A-Za-z0-9-_]+))?$//;
 	} else {
 		$ip = $hostserv;
 	}
 	$ip=&ld_gethostbyname($ip, $af)  or return(undef);
 
 	if(defined($port)){
 		$port=&ld_getservbyname($port, $protocol);
 		if (defined($port)) {
 			return("$ip:$port");
 		} else {
 			return(undef);
 		}
 	}
 	return($ip);
 }
 
 # ld_find_cmd_path
 # Find executable in path
 # pre: cmd: command to find
 #      path: ':' delimited paths to check
 #      relative: if set, allow cmd to be a relative path,
 #                which is checked first
 # return: path to command
 #         undef if not found
 sub ld_find_cmd_path
 {
 	my ($cmd, $path, $relative) = (@_);
 
 	if (defined $relative  and $relative and -f "$cmd" ) {
 		return $cmd;
 	}
 	if ($cmd =~ /^\// and -x "$cmd" ) {
 		return $cmd;
 	}
 	if ($cmd =~ /\//) {
 		return undef;
 	}
 
 	for my $p (split /:/, $path) {
 		if ( -x "$p/$cmd" ) {
 			return "$p/$cmd";
 		}
 	}
 	return undef;
 }
 
 # ld_find_cmd_path
 # Find executable in $ENV{'PATH'}
 # pre: cmd: command to find
 #      relative: if set, allow cmd to be a relative path,
 #                which is checked first
 # return: path to command
 #         undef if not found
 sub ld_find_cmd
 {
 	return ld_find_cmd_path($_[0], $ENV{'PATH'}, $_[1]);
 }
 
 # ld_get_addrport
 # Get address string and port number from a given socket.
 # pre: socket
 # return: (address, port)
 #         undef if cannot get
 sub ld_get_addrport
 {
 	my($sock) = @_;
 
 	my ($s_addr_str, $s_port, $s_addr, $len);
 
 	my $s_sockaddr = getsockname($sock);
 	$len = length($s_sockaddr);
 	if ($len == 28) {	# IPv6
 		($s_port, $s_addr) = unpack_sockaddr_in6($s_sockaddr);
 		$s_addr_str = inet_ntop(AF_INET6, $s_addr);
 		$s_addr_str = "[$s_addr_str]";
 	}
 	elsif ($len == 16) {	# IPv4
 		($s_port, $s_addr) = unpack_sockaddr_in($s_sockaddr);
 		$s_addr_str = inet_ntop(AF_INET, $s_addr);
 	}
 	else {
 		die "unexpected length of sockaddr\n";
 	}
 
 	return ($s_addr_str, $s_port);
 }
 
 # ld_strip_brackets
 # Strip brackets in the string
 # pre: string
 # return: string
 sub ld_strip_brackets
 {
 	my($str) = @_;
 
 	$str =~ s/[\[\]]//g;
 
 	return $str;
 }
diff --git a/rgmanager/src/resources/clusterfs.sh b/rgmanager/src/resources/clusterfs.sh
index 07fd73b10..ab2c292d5 100755
--- a/rgmanager/src/resources/clusterfs.sh
+++ b/rgmanager/src/resources/clusterfs.sh
@@ -1,341 +1,342 @@
 #!/bin/bash
 
 #
 # Cluster File System mount/umount/fsck/etc. agent
 #
 # Copyright (C) 2000 Mission Critical Linux
 # Copyright (C) 2002-2011 Red Hat, Inc.  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 
 . $(dirname $0)/utils/fs-lib.sh
 
 do_metadata()
 {
 	cat <<EOT
 <?xml version="1.0" ?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd">
 <resource-agent name="clusterfs" version="rgmanager 2.0">
     <version>1.0</version>
 
     <longdesc lang="en">
         This defines a cluster file system mount (i.e. GFS)
     </longdesc>
     <shortdesc lang="en">
         Defines a cluster file system mount.
     </shortdesc>
 
     <parameters>
         <parameter name="name" primary="1">
 	    <longdesc lang="en">
 	        Symbolic name for this file system.
 	    </longdesc>
             <shortdesc lang="en">
                 File System Name
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="mountpoint" unique="1" required="1">
 	    <longdesc lang="en">
 	        Path in file system heirarchy to mount this file system.
 	    </longdesc>
             <shortdesc lang="en">
                 Mount Point
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="device" unique="1" required="1">
 	    <longdesc lang="en">
 	        Block device, file system label, or UUID of file system.
 	    </longdesc>
             <shortdesc lang="en">
                 Device or Label
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="fstype">
 	    <longdesc lang="en">
 	        File system type.  If not specified, mount(8) will attempt to
 		determine the file system type.
 	    </longdesc>
             <shortdesc lang="en">
                 File system type
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="force_unmount">
             <longdesc lang="en">
                 If set, the cluster will kill all processes using 
                 this file system when the resource group is 
                 stopped.  Otherwise, the unmount will fail, and
                 the resource group will be restarted.
             </longdesc>
             <shortdesc lang="en">
                 Force Unmount
             </shortdesc>
 	    <content type="boolean"/>
         </parameter>
 
 	<parameter name="self_fence">
 	    <longdesc lang="en">
 	        If set and unmounting the file system fails, the node will
 		immediately reboot.  Generally, this is used in conjunction
 		with force_unmount support, but it is not required.
 	    </longdesc>
 	    <shortdesc lang="en">
 	        Seppuku Unmount
 	    </shortdesc>
 	    <content type="boolean"/>
 	</parameter>
 
 	<parameter name="fsid">
 	    <longdesc lang="en">
 	    	File system ID for NFS exports.  This can be overridden
 		in individual nfsclient entries.
 	    </longdesc>
 	    <shortdesc lang="en">
 	    	NFS File system ID
 	    </shortdesc>
 	    <content type="string"/>
 	</parameter>
 
 	<parameter name="nfslock" inherit="service%nfslock">
 	    <longdesc lang="en">
 	        If set, the node will try to kill lockd and issue 
 		reclaims across all remaining network interface cards.
 		This happens always, regardless of unmounting failed.
 	    </longdesc>
 	    <shortdesc lang="en">
 	        Enable NFS lock workarounds
 	    </shortdesc>
 	    <content type="boolean"/>
 	</parameter>
 
 	<parameter name="nfsrestart">
 	    <longdesc lang="en">
 		If set and unmounting the file system fails, the node will
 		try to restart nfs daemon and nfs lockd to drop all filesystem
 		references. Use this option as last resource.
 		This option requires force_unmount to be set and it is not
 		compatible with nfsserver resource.
 	    </longdesc>
 	    <shortdesc lang="en">
 		Enable NFS daemon and lockd workaround
 	    </shortdesc>
 	    <content type="boolean"/>
 	</parameter>
 
         <parameter name="options">
             <longdesc lang="en">
                 Options used when the file system is mounted.  These
                 are often file-system specific.  See mount(8) and/or
                 mount.gfs2(8) for supported mount options.
             </longdesc>
             <shortdesc lang="en">
                 Mount Options
             </shortdesc>
             <content type="string"/>
         </parameter>
 
         <parameter name="use_findmnt">
             <longdesc lang="en">
         Use findmnt to determine if and where a filesystem is mounted.
         Disabling this uses the failback method (should be used if autofs
         maps are located on network storage (ie. nfs, iscsi, etc).
             </longdesc>
             <shortdesc lang="en">
         Utilize findmnt to detect if and where filesystems are mounted
             </shortdesc>
             <content type="boolean"/>
         </parameter>
 
     </parameters>
 
     <actions>
         <action name="start" timeout="900"/>
 	<action name="stop" timeout="30"/>
 	<!-- Recovery isn't possible; we don't know if resources are using
 	     the file system. -->
 
 	<!-- Checks to see if it's mounted in the right place -->
 	<action name="status" interval="1m" timeout="10"/>
 	<action name="monitor" interval="1m" timeout="10"/>
 
 	<!-- Checks to see if we can read from the mountpoint -->
 	<action name="status" depth="10" timeout="30" interval="5m"/>
 	<action name="monitor" depth="10" timeout="30" interval="5m"/>
 
 	<!-- Checks to see if we can write to the mountpoint (if !ROFS) -->
 	<action name="status" depth="20" timeout="30" interval="10m"/>
 	<action name="monitor" depth="20" timeout="30" interval="10m"/>
 
 	<action name="meta-data" timeout="5"/>
 	<action name="validate-all" timeout="5"/>
     </actions>
 
     <special tag="rgmanager">
     	<child type="fs" start="1" stop="3"/>
     	<child type="clusterfs" start="1" stop="3"/>
         <child type="nfsexport" start="3" stop="1"/>
     </special>
 </resource-agent>
 EOT
 }
 
 
 verify_fstype()
 {
 	# Auto detect?
 	[ -z "$OCF_RESKEY_fstype" ] && return $OCF_SUCCESS
 
 	case $OCF_RESKEY_fstype in
 	gfs|gfs2)
 		return $OCF_SUCCESS
 		;;
 	*)
 		ocf_log err "File system type $OCF_RESKEY_fstype not supported"
 		return $OCF_ERR_ARGS
 		;;
 	esac
 }
 
 
 verify_options()
 {
 	declare -i ret=$OCF_SUCCESS
 
 	#
 	# From mount(8)
 	#
 	for o in `echo $OCF_RESKEY_options | sed -e s/,/\ /g`; do
 		case $o in
 		async|atime|auto|defaults|dev|exec|_netdev|noatime)
 			continue
 			;;
 		noauto|nodev|noexec|nosuid|nouser|ro|rw|suid|sync)
 			continue
 			;;
 		dirsync|user|users)
 			continue
 			;;
 		esac
 
 		case $OCF_RESKEY_fstype in
 		gfs)
 			case $o in
 				lockproto=*|locktable=*|hostdata=*)
 					continue;
 					;;
 				localcaching|localflocks|ignore_local_fs)
 					continue;
 					;;
 				num_glockd|acl|suiddir)	
 					continue
 					;;
 			esac
 			;;
 		gfs2)
 			# XXX
 			continue
 			;;
 		esac
 
 
 		ocf_log err "Option $o not supported for $OCF_RESKEY_fstype"
 		ret=$OCF_ERR_ARGS
 	done
 
 	return $ret
 }
 
 
 do_verify()
 {
 	verify_name || return $OCF_ERR_ARGS
 	verify_fstype || return $OCF_ERR_ARGS
 	verify_device || return $OCF_ERR_ARGS
 	verify_mountpoint || return $OCF_ERR_ARGS
 	verify_options || return $OCF_ERR_ARGS
 }
 
 
 do_pre_unmount() {
 	#
 	# Check the rgmanager-supplied reference count if one exists.
 	# If the reference count is <= 1, we can safely proceed
 	#
 	if [ -n "$OCF_RESKEY_RGMANAGER_meta_refcnt" ]; then
 		refs=$OCF_RESKEY_RGMANAGER_meta_refcnt
 		if [ $refs -gt 0 ]; then
 			ocf_log debug "Not unmounting $OCF_RESOURCE_INSTANCE - still in use by $refs other service(s)"
 			return 2
 		fi
 	fi
 
 	if [ -z "$force_umount" ]; then
 		ocf_log debug "Not umounting $dev (clustered file system)"
 		return 2
 	fi
 
 	#
 	# Always do this hackery on clustered file systems.
 	#
 	if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
 	   [ "$OCF_RESKEY_nfslock" = "1" ]; then
 		ocf_log warning "Dropping node-wide NFS locks"
 		mkdir -p $mp/.clumanager/statd
+		chown rpcuser.rpcuser $mp/.clumanager/statd
 		pkill -KILL -x lockd
 		# Copy out the notify list; our 
 		# IPs are already torn down
 		if notify_list_store $mp/.clumanager/statd; then
 			notify_list_broadcast $mp/.clumanager/statd
 		fi
 	fi
 
 	# Always invalidate buffers on clusterfs resources
 	clubufflush -f $dev
 
 	return 0
 }
 
 do_force_unmount() {
 	if [ "$OCF_RESKEY_nfsrestart" = "yes" ] || \
 	   [ "$OCF_RESKEY_nfsrestart" = "1" ]; then
 		ocf_log warning "Restarting nfsd/nfslock"
 		nfsexports=$(cat /var/lib/nfs/etab)
 		service nfslock stop
 		service nfs stop
 		service nfs start
 		service nfslock start
 		echo "$nfsexports" | { while read line; do
 			nfsexp=$(echo $line | awk '{print $1}')
 			nfsopts=$(echo $line | sed -e 's#.*(##g' -e 's#).*##g')
 			nfsacl=$(echo $line | awk '{print $2}' | sed -e 's#(.*##g')
 			if [ -n "$nfsopts" ]; then
 				exportfs -i -o "$nfsopts" "$nfsacl":$nfsexp
 			else
 				exportfs -i "$nfsacl":$nfsexp
 			fi
 		done; }
 	fi
 	return 1
 }
 
 main $*
diff --git a/rgmanager/src/resources/fs.sh.in b/rgmanager/src/resources/fs.sh.in
index 2924fa7d9..6d99f9561 100644
--- a/rgmanager/src/resources/fs.sh.in
+++ b/rgmanager/src/resources/fs.sh.in
@@ -1,502 +1,504 @@
 #!/bin/bash
 
 #
 # File system (normal) mount/umount/fsck/etc. agent
 #
 #
 # Copyright (C) 1997-2003 Sistina Software, Inc.  All rights reserved.
 # Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 
 . $(dirname $0)/utils/fs-lib.sh
 
 do_metadata()
 {
 	cat <<EOT
 <?xml version="1.0" encoding="ISO-8859-1" ?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd">
 <resource-agent name="fs" version="rgmanager 2.0">
     <version>1.0</version>
 
     <longdesc lang="en">
         This defines a standard file system mount (= not a clustered
 	or otherwise shared file system).
     </longdesc>
     <shortdesc lang="en">
         Defines a file system mount.
     </shortdesc>
 
     <parameters>
         <parameter name="name" primary="1">
 	    <longdesc lang="en">
 	        Symbolic name for this file system.
 	    </longdesc>
             <shortdesc lang="en">
                 File System Name
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="mountpoint" unique="1" required="1">
 	    <longdesc lang="en">
 	        Path in file system heirarchy to mount this file system.
 	    </longdesc>
             <shortdesc lang="en">
                 Mount Point
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="device" unique="1" required="1">
 	    <longdesc lang="en">
 	        Block device, file system label, or UUID of file system.
 	    </longdesc>
             <shortdesc lang="en">
                 Device or Label
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="fstype">
 	    <longdesc lang="en">
 	        File system type.  If not specified, mount(8) will attempt to
 		determine the file system type.
 	    </longdesc>
             <shortdesc lang="en">
                 File system type
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="force_unmount">
             <longdesc lang="en">
                 If set, the cluster will kill all processes using 
                 this file system when the resource group is 
                 stopped.  Otherwise, the unmount will fail, and
                 the resource group will be restarted.
             </longdesc>
             <shortdesc lang="en">
                 Force Unmount
             </shortdesc>
 	    <content type="boolean"/>
         </parameter>
 
         <parameter name="quick_status">
             <longdesc lang="en">
 		Use quick status checks.  When set to 0 (the default), this
 		agent behaves normally.  When set to 1, this agent will not
 		log errors incurred or perform the file system accessibility
 		check (e.g. it will not try to read from/write to the file
 		system).  You should only set this to 1 if you have lots of
 		file systems on your cluster or you are seeing very high load
 		spikes as a direct result of this agent.
             </longdesc>
             <shortdesc lang="en">
 	    	Quick/brief status checks.
             </shortdesc>
 	    <content type="boolean"/>
         </parameter>
 
 	<parameter name="self_fence">
 	    <longdesc lang="en">
 	        If set and unmounting the file system fails, the node will
 		immediately reboot.  Generally, this is used in conjunction
 		with force_unmount support, but it is not required.
 	    </longdesc>
 	    <shortdesc lang="en">
 	        Seppuku Unmount
 	    </shortdesc>
 	    <content type="boolean"/>
 	</parameter>
 
 	<parameter name="nfslock" inherit="nfslock">
 	    <longdesc lang="en">
 	        If set and unmounting the file system fails, the node will
 		try to kill lockd and issue reclaims across all remaining
 		network interface cards.
 	    </longdesc>
 	    <shortdesc lang="en">
 	        Enable NFS lock workarounds
 	    </shortdesc>
 	    <content type="boolean"/>
 	</parameter>
 
 	<parameter name="nfsrestart">
 	    <longdesc lang="en">
 		If set and unmounting the file system fails, the node will
 		try to restart nfs daemon and nfs lockd to drop all filesystem
 		references. Use this option as last resource.
 		This option requires force_unmount to be set and it is not
 		compatible with nfsserver resource.
 	    </longdesc>
 	    <shortdesc lang="en">
 		Enable NFS daemon and lockd workaround
 	    </shortdesc>
 	    <content type="boolean"/>
 	</parameter>
 
 	<parameter name="fsid">
 	    <longdesc lang="en">
 	    	File system ID for NFS exports.  This can be overridden
 		in individual nfsclient entries.
 	    </longdesc>
 	    <shortdesc lang="en">
 	    	NFS File system ID
 	    </shortdesc>
 	    <content type="string"/>
 	</parameter>
 
         <parameter name="force_fsck">
             <longdesc lang="en">
                 If set, the file system will be checked (even if
                 it is a journalled file system).  This option is
                 ignored for non-journalled file systems such as
                 ext2.
             </longdesc>
             <shortdesc lang="en">
                 Force fsck support
             </shortdesc>
 	    <content type="boolean"/>
         </parameter>
 
         <parameter name="options">
             <longdesc lang="en">
 	    	Options used when the file system is mounted.  These
 		are often file-system specific.  See mount(8) for supported
 		mount options.
             </longdesc>
             <shortdesc lang="en">
                 Mount Options
             </shortdesc>
 	    <content type="string"/>
         </parameter>
 
         <parameter name="use_findmnt">
             <longdesc lang="en">
         Use findmnt to determine if and where a filesystem is mounted.
         Disabling this uses the failback method (should be used if autofs
         maps are located on network storage (ie. nfs, iscsi, etc).
             </longdesc>
             <shortdesc lang="en">
         Utilize findmnt to detect if and where filesystems are mounted
             </shortdesc>
             <content type="boolean"/>
         </parameter>
 
     </parameters>
 
     <actions>
         <action name="start" timeout="900"/>
 	<action name="stop" timeout="30"/>
 	<!-- Recovery isn't possible; we don't know if resources are using
 	     the file system. -->
 
 	<!-- Checks to see if it's mounted in the right place -->
 	<action name="status" interval="1m" timeout="10"/>
 	<action name="monitor" interval="1m" timeout="10"/>
 
 	<!-- Note: active monitoring is constant and supplants all
 	     check depths -->
 	<!-- Checks to see if we can read from the mountpoint -->
 	<action name="status" depth="10" timeout="30" interval="30"/>
 	<action name="monitor" depth="10" timeout="30" interval="30"/>
 
 	<!-- Checks to see if we can write to the mountpoint (if !ROFS) -->
 	<action name="status" depth="20" timeout="30" interval="1m"/>
 	<action name="monitor" depth="20" timeout="30" interval="1m"/>
 
 	<action name="meta-data" timeout="5"/>
 	<action name="validate-all" timeout="5"/>
     </actions>
 
     <special tag="rgmanager">
 	<attributes maxinstances="1"/>
         <child type="fs" start="1" stop="3"/>
         <child type="clusterfs" start="1" stop="3"/>
         <child type="nfsexport" start="3" stop="1"/>
     </special>
 </resource-agent>
 EOT
 }
 
 
 verify_fstype()
 {
 	# Auto detect?
 	[ -z "$OCF_RESKEY_fstype" ] && return 0
 
 	case $OCF_RESKEY_fstype in
 	ext2|ext3|ext4|btrfs|jfs|xfs|reiserfs|vfat|vxfs)
 		return 0
 		;;
 	*)
 		echo "File system type $OCF_RESKEY_fstype not supported"
 		return $OCF_ERR_ARGS
 		;;
 	esac
 }
 
 
 verify_options()
 {
 	declare -i ret=$OCF_SUCCESS
 	declare o
 
 	#
 	# From mount(8)
 	#
 	for o in `echo $OCF_RESKEY_options | sed -e s/,/\ /g`; do
 		case $o in
 		async|atime|auto|defaults|dev|exec|_netdev|noatime)
 			continue
 			;;
 		noauto|nodev|noexec|nosuid|nouser|ro|rw|suid|sync)
 			continue
 			;;
 		dirsync|user|users)
 			continue
 			;;
 		esac
 
 		do_verify_option $OCF_RESKEY_fstype "$o"
 
 		case $OCF_RESKEY_fstype in
 		ext2|ext3|ext4)
 			case $o in
 			bsddf|minixdf|check|check=*|nocheck|debug)
 				continue
 				;;
 			errors=*|grpid|bsdgroups|nogrpid|sysvgroups)
 				continue
 				;;
 			resgid=*|resuid=*|sb=*|grpquota|noquota)
 				continue
 				;;
 			quota|usrquota|nouid32)
 				continue
 				;;
 			esac
 
 			if [ "$OCF_RESKEY_fstype" = "ext3" ] ||
 			   [ "$OCF_RESKEY_fstype" = "ext4" ]; then
 				case $o in
 				noload|data=*)
 					continue
 					;;
 				esac
 			fi
 			;;
 		vfat)
 			case $o in
 			blocksize=512|blocksize=1024|blocksize=2048)
 				continue
 				;;
 			uid=*|gid=*|umask=*|dmask=*|fmask=*)
 				continue
 				;;
 			check=r*|check=n*|check=s*|codepage=*)
 				continue
 				;;
 			conv=b*|conv=t*|conv=a*|cvf_format=*)
 				continue
 				;;
 			cvf_option=*|debug|fat=12|fat=16|fat=32)
 				continue
 				;;
 			iocharset=*|quiet)
 				continue
 				;;
 			esac
 			;;
 
 		jfs)
 			case $o in
 			conv|hash=rupasov|hash=tea|hash=r5|hash=detect)
 				continue
 				;;
 			hashed_relocation|no_unhashed_relocation)
 				continue
 				;;
 			noborder|nolog|notail|resize=*)
 				continue
 				;;
 			esac
 			;;
 
 		xfs)
 			case $o in
 			biosize=*|dmapi|xdsm|logbufs=*|logbsize=*)
 				continue
 				;;
 			logdev=*|rtdev=*|noalign|noatime)
 				continue
 				;;
 			norecovery|osyncisdsync|quota|userquota)
 				continue
 				;;
 			uqnoenforce|grpquota|gqnoenforce)
 				continue
 				;;
 			sunit=*|swidth=*)
 				continue
 				;;
 			esac
 			;;
 
 		btrfs)
 			# tbd
 			continue
 			;;
 		esac
 
 		echo Option $o not supported for $OCF_RESKEY_fstype
 		ret=$OCF_ERR_ARGS
 	done
 
 	return $ret
 }
 
 
 do_validate()
 {
 	verify_name || return $OCF_ERR_ARGS
 	verify_fstype || return $OCF_ERR_ARGS
 	verify_device || return $OCF_ERR_ARGS
 	verify_mountpoint || return $OCF_ERR_ARGS
 	verify_options || return $OCF_ERR_ARGS
 }
 
 
 do_pre_mount()
 {
 	declare fstype="$OCF_RESKEY_fstype"
 
 	#
 	# Check to determine if we need to fsck the filesystem.
 	#
 	# Note: this code should not indicate in any manner suggested
 	# file systems to use in the cluster.  Known filesystems are
 	# listed here for correct operation.
 	#
         case "$fstype" in
         reiserfs) typeset fsck_needed="" ;;
         ext3)     typeset fsck_needed="" ;;
         ext4)     typeset fsck_needed="" ;;
         btrfs)    typeset fsck_needed="" ;;
         jfs)      typeset fsck_needed="" ;;
         xfs)      typeset fsck_needed="" ;;
 	vxfs)	  typeset fsck_needed="" ;;
         ext2)     typeset fsck_needed=yes ;;
         minix)    typeset fsck_needed=yes ;;
         vfat)     typeset fsck_needed=yes ;;
         msdos)    typeset fsck_needed=yes ;;
 	"")       typeset fsck_needed=yes ;;		# assume fsck
 	*)
 		typeset fsck_needed=yes 		# assume fsck
 	     	ocf_log warn "\
 Unknown file system type '$fstype' for device $dev.  Assuming fsck is required."
 		;;
 	esac
 
 
 	#
 	# Fsck the device, if needed.
 	#
 	if [ -n "$fsck_needed" ] || [ "${OCF_RESKEY_force_fsck}" = "yes" ] ||\
 	   [ "${OCF_RESKEY_force_fsck}" = "1" ]; then
 		typeset fsck_log=@LOGDIR@/$(basename $dev).fsck.log
 		ocf_log debug "Running fsck on $dev"
 		fsck -p $dev >> $fsck_log 2>&1
 		ret_val=$?
 		if [ $ret_val -gt 1 ]; then
 			ocf_log err "\
 'fsck -p $dev' failed, error=$ret_val; check $fsck_log for errors"
 			ocf_log debug "Invalidating buffers for $dev"
 			$INVALIDATEBUFFERS -f $dev
 			return $OCF_ERR_GENERIC
 		fi
 		rm -f $fsck_log
 	fi
 
 	return 0
 }
 
 do_post_mount() {
 	#
 	# Create this for the NFS NLM broadcast bit
 	#
 	if [ $NFS_TRICKS -eq 0 ]; then
 		if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
 	   	   [ "$OCF_RESKEY_nfslock" = "1" ]; then
 			mkdir -p "$mp"/.clumanager/statd
+			chown rpcuser.rpcuser "$mp"/.clumanager/statd
 			notify_list_merge "$mp"/.clumanager/statd
 		fi
 	fi
 
 	return 0
 }
 
 
 do_force_unmount() {
 	if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
 	   [ "$OCF_RESKEY_nfslock" = "1" ]; then
 		ocf_log warning "Dropping node-wide NFS locks"
 		pkill -KILL -x lockd
 		mkdir -p "$mp"/.clumanager/statd
+		chown rpcuser.rpcuser "$mp"/.clumanager/statd
 		# Copy out the notify list; our
 		# IPs are already torn down
 		notify_list_store "$mp"/.clumanager/statd
 
 		# Save for post-umount phase
 		export nfslock_reclaim=1
 	fi
 
 	if [ "$OCF_RESKEY_nfsrestart" = "yes" ] || \
 	   [ "$OCF_RESKEY_nfsrestart" = "1" ]; then
 		ocf_log warning "Restarting nfsd/nfslock"
 		nfsexports=$(cat /var/lib/nfs/etab)
 		service nfslock stop
 		service nfs stop
 		service nfs start
 		service nfslock start
 		echo "$nfsexports" | { while read line; do
 			nfsexp=$(echo $line | awk '{print $1}')
 			nfsopts=$(echo $line | sed -e 's#.*(##g' -e 's#).*##g')
 			nfsacl=$(echo $line | awk '{print $2}' | sed -e 's#(.*##g')
 			if [ -n "$nfsopts" ]; then
 				exportfs -i -o "$nfsopts" "$nfsacl":$nfsexp
 			else
 				exportfs -i "$nfsacl":$nfsexp
 			fi
 		done; }
 	fi
 
 	# Proceed with fuser -kvm...
 	return 1
 }
 
 
 do_post_unmount() {
 	if [ "$nfslock_reclaim" = "1" ]; then
 		# If we have this flag set, do a full reclaim broadcast
 		notify_list_broadcast "$mp"/.clumanager/statd
 	fi
 
 	return 0
 }
 
 main $*
diff --git a/tools/ocf-tester.in b/tools/ocf-tester.in
index ae2c4a9ff..10822a5a0 100755
--- a/tools/ocf-tester.in
+++ b/tools/ocf-tester.in
@@ -1,432 +1,432 @@
 #!/bin/sh
 #
 #	$Id: ocf-tester,v 1.2 2006/08/14 09:38:20 andrew Exp $
 #
 # Copyright (c) 2006 Novell Inc, Andrew Beekhof
 #                    All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 LRMD=@libdir@/heartbeat/lrmd
 LRMADMIN=@sbindir@/lrmadmin
 DATADIR=@datadir@
 METADATA_LINT="xmllint --noout --valid -"
 
 # set some common meta attributes, which are expected to be
 # present by resource agents
 export OCF_RESKEY_CRM_meta_timeout=20000  # 20 seconds timeout
 export OCF_RESKEY_CRM_meta_interval=10000  # reset this for probes
 
 num_errors=0
 
 info() {
     [ "$quiet" -eq 1 ] && return
     echo "$*"
 }
 debug() {
     [ "$verbose" -eq 0 ] && return
     echo "$*"
 }
 usage() {
     # make sure to output errors on stderr
     [ "x$1" = "x0" ] || exec >&2
 
     echo "Tool for testing if a cluster resource is OCF compliant"
     echo ""
     echo "Usage: ocf-tester [-LhvqdX] -n resource_name [-o name=value]* /full/path/to/resource/agent"
     echo ""
     echo "Options:"
     echo "  -h       		This text"
     echo "  -v       		Be verbose while testing"
     echo "  -q       		Be quiet while testing"
     echo "  -d       		Turn on RA debugging"
     echo "  -X       		Turn on RA tracing (expect large output)"
     echo "  -n name		Name of the resource"	
     echo "  -o name=value		Name and value of any parameters required by the agent"
     echo "  -L			Use lrmadmin/lrmd for tests"
     exit $1
 }
 
 assert() {
     rc=$1; shift
     target=$1; shift
     msg=$1; shift
     local targetrc matched
 
     if [ $# = 0 ]; then
 	exit_code=0
     else
 	exit_code=$1; shift
     fi
 
     for targetrc in `echo $target | tr ':' ' '`; do
         [ $rc -eq $targetrc ] && matched=1
     done
     if [ "$matched" != 1 ]; then
 	num_errors=`expr $num_errors + 1`
 	echo "* rc=$rc: $msg"
 	if [ $exit_code != 0 ]; then
 	    [ -n "$command_output" ] && cat<<EOF
 $command_output
 EOF
 	    echo "Aborting tests"
 	    exit $exit_code
 	fi
     fi
     command_output=""
 }
 
 done=0
 ra_args=""
 verbose=0
 quiet=0
 while test "$done" = "0"; do
     case "$1" in
 	-n) OCF_RESOURCE_INSTANCE=$2; ra_args="$ra_args OCF_RESOURCE_INSTANCE=$2"; shift; shift;;
 	-o) name=${2%%=*}; value=${2#*=}; 
 		lrm_ra_args="$lrm_ra_args $2";
 		ra_args="$ra_args OCF_RESKEY_$name='$value'"; shift; shift;;
 	-L) use_lrmd=1; shift;;
 	-v) verbose=1; shift;;
 	-d) export HA_debug=1; shift;;
 	-X) export OCF_TRACE_RA=1; verbose=1; shift;;
 	-q) quiet=1; shift;;
 	-?|--help) usage 0;;
 	--version) echo "@PACKAGE_VERSION@"; exit 0;;
 	-*) echo "unknown option: $1" >&2; usage 1;;
 	*) done=1;;
     esac
 done
 
 if [ "x" = "x$OCF_ROOT" ]; then
     if [ -d /usr/lib/ocf ]; then
 	export OCF_ROOT=/usr/lib/ocf
     else
 	echo "You must supply the location of OCF_ROOT (common location is /usr/lib/ocf)" >&2
 	usage 1
     fi
 fi
 
 if [ "x" = "x$OCF_RESOURCE_INSTANCE" ]; then
     echo "You must give your resource a name, set OCF_RESOURCE_INSTANCE" >&2
     usage 1
 fi
 
 agent=$1
 if [ ! -e $agent ]; then
     echo "You must provide the full path to your resource agent" >&2
     usage 1
 fi
 installed_rc=5
 stopped_rc=7
 has_demote=1
 has_promote=1
 
 start_lrmd() {
 	lrmd_timeout=0
 	lrmd_interval=0
 	lrmd_target_rc=EVERYTIME
 	lrmd_started=""
 	$LRMD -s 2>/dev/null
 	rc=$?
 	if [ $rc -eq 3 ]; then
 		lrmd_started=1
 		$LRMD &
 		sleep 1
 		$LRMD -s 2>/dev/null
 	else
 		return $rc
 	fi
 }
 add_resource() {
 	$LRMADMIN -A $OCF_RESOURCE_INSTANCE \
 		ocf \
 		`basename $agent` \
 		$(basename `dirname $agent`) \
 		$lrm_ra_args > /dev/null
 }
 del_resource() {
 	$LRMADMIN -D $OCF_RESOURCE_INSTANCE
 }
 parse_lrmadmin_output() {
 	awk '
 BEGIN{ rc=1; }
 /Waiting for lrmd to callback.../ { n=1; next; }
 n==1 && /----------------operation--------------/ { n++; next; }
 n==2 && /return code:/ { rc=$0; sub("return code: *","",rc); next }
 n==2 && /---------------------------------------/ {
         n++;
         next;
 }
 END{
 	if( n!=3 ) exit 1;
 	else exit rc;
 }
 '
 }
 exec_resource() {
 	op="$1"
 	args="$2"
 	$LRMADMIN -E $OCF_RESOURCE_INSTANCE \
 		$op $lrmd_timeout $lrmd_interval \
 		$lrmd_target_rc \
 		$args | parse_lrmadmin_output
 }
 
 if [ "$use_lrmd" = 1 ]; then
 	echo "Using lrmd/lrmadmin for all tests"
 	start_lrmd || {
 		echo "could not start lrmd" >&2
 		exit 1
 	}
 	trap '
 		[ "$lrmd_started" = 1 ] && $LRMD -k
 	' EXIT
 	add_resource || {
 		echo "failed to add resource to lrmd" >&2
 		exit 1
 	}
 fi
 
 lrm_test_command() {
 	action="$1"
 	msg="$2"
 	debug "$msg"
 	exec_resource $action "$lrm_ra_args"
 }
 
 test_permissions() {
     action=meta-data
     debug ${1:-"Testing permissions with uid nobody"}
-    su nobody -s /bin/sh $agent $action > /dev/null
+    su nobody -s /bin/sh -c "$agent $action" > /dev/null
 }
 
 test_metadata() {
     action=meta-data
     msg=${1:-"Testing: $action"}
     debug $msg
     $agent $action | (cd $DATADIR/resource-agents && $METADATA_LINT)
     rc=$?
     #echo rc: $rc
     return $rc
 }
 
 test_command() {
     action=$1; shift
     export __OCF_ACTION=$action
     msg=${1:-"Testing: $action"}
     if [ "$use_lrmd" = 1 ]; then
     	lrm_test_command $action "$msg"
     	return $?
     fi
     #echo Running: "export $ra_args; $agent $action 2>&1 > /dev/null"
     if [ $verbose -eq 0 ]; then
 	command_output=`$agent $action 2>&1`
     else
     	debug $msg
 	$agent $action
     fi
     rc=$?
     #echo rc: $rc
     return $rc
 }
 
 # Begin tests
 info "Beginning tests for $agent..."
 
 if [ ! -f $agent ]; then
     assert 7 0 "Could not find file: $agent"
 fi
 
 if [ `id -u` = 0 ]; then
 	test_permissions
 	assert $? 0 "Your agent has too restrictive permissions: should be 755"
 else
 	echo "WARN: Can't check agent's permissions because we're not root; they should be 755"
 fi
 
 test_metadata
 assert $? 0 "Your agent produces meta-data which does not conform to ra-api-1.dtd"
 
 OCF_TESTER_FAIL_HAVE_BINARY=1
 export OCF_TESTER_FAIL_HAVE_BINARY
 test_command meta-data
 rc=$?
 if [ $rc -eq 3 ]; then
     assert $rc 0 "Your agent does not support the meta-data action"
 else
     assert $rc 0 "The meta-data action cannot fail and must return 0"
 fi
 unset OCF_TESTER_FAIL_HAVE_BINARY
 
 ra_args="export $ra_args"
 eval $ra_args
 test_command validate-all
 rc=$?
 if [ $rc -eq 3 ]; then
     assert $rc 0 "Your agent does not support the validate-all action"
 elif [ $rc -ne 0 ]; then
     assert $rc 0 "Validation failed.  Did you supply enough options with -o ?" 1
     usage $rc
 fi
 
 test_command monitor "Checking current state"
 rc=$?
 if [ $rc -eq 3 ]; then
     assert $rc 7 "Your agent does not support the monitor action" 1
 
 elif [ $rc -eq 8 ]; then
     test_command demote "Cleanup, demote"
     assert $? 0 "Your agent was a master and could not be demoted" 1
 
     test_command stop "Cleanup, stop"
     assert $? 0 "Your agent was a master and could not be stopped" 1
 
 elif [ $rc -ne 7 ]; then
     test_command stop
     assert $? 0 "Your agent was active and could not be stopped" 1
 fi
 
 test_command monitor
 assert $? $stopped_rc "Monitoring a stopped resource should return $stopped_rc"
 
 OCF_TESTER_FAIL_HAVE_BINARY=1
 export OCF_TESTER_FAIL_HAVE_BINARY
 OCF_RESKEY_CRM_meta_interval=0
 test_command monitor
 assert $? $stopped_rc:$installed_rc "The initial probe for a stopped resource should return $stopped_rc or $installed_rc even if all binaries are missing"
 unset OCF_TESTER_FAIL_HAVE_BINARY
 OCF_RESKEY_CRM_meta_interval=20000
 
 test_command start
 assert $? 0 "Start failed.  Did you supply enough options with -o ?" 1
 
 test_command monitor
 assert $? 0 "Monitoring an active resource should return 0"
 
 OCF_RESKEY_CRM_meta_interval=0
 test_command monitor
 assert $? 0 "Probing an active resource should return 0"
 OCF_RESKEY_CRM_meta_interval=20000
 
 test_command notify
 rc=$?
 if [ $rc -eq 3 ]; then
     info "* Your agent does not support the notify action (optional)"
 else
     assert $rc 0 "The notify action cannot fail and must return 0"
 fi
 
 test_command demote "Checking for demote action"
 if [ $? -eq 3 ]; then
     has_demote=0
     info "* Your agent does not support the demote action (optional)"
 fi
 
 test_command promote "Checking for promote action"
 if [ $? -eq 3 ]; then
     has_promote=0
     info "* Your agent does not support the promote action (optional)"
 fi
 
 if [ $has_promote -eq 1 -a $has_demote -eq 1 ]; then
     test_command demote "Testing: demotion of started resource"
     assert $? 0 "Demoting a start resource should not fail"
 
     test_command promote
     assert $? 0 "Promote failed"
 
     test_command demote
     assert $? 0 "Demote failed" 1
 
     test_command demote "Testing: demotion of demoted resource"
     assert $? 0 "Demoting a demoted resource should not fail"
 
     test_command promote "Promoting resource"
     assert $? 0 "Promote failed" 1
 
     test_command promote "Testing: promotion of promoted resource"
     assert $? 0 "Promoting a promoted resource should not fail"
 
     test_command demote "Demoting resource"
     assert $? 0 "Demote failed" 1
 
 elif [ $has_promote -eq 0 -a $has_demote -eq 0 ]; then
     info "* Your agent does not support master/slave (optional)"
 
 else
     echo "* Your agent partially supports master/slave"
     num_errors=`expr $num_errors + 1`
 fi
 
 test_command stop
 assert $? 0 "Stop failed" 1
 
 test_command monitor
 assert $? $stopped_rc "Monitoring a stopped resource should return $stopped_rc"
 
 test_command start "Restarting resource..."
 assert $? 0 "Start failed" 1
 
 test_command monitor
 assert $? 0 "Monitoring an active resource should return 0"
 
 test_command start "Testing: starting a started resource"
 assert $? 0 "Starting a running resource is required to succeed"
 
 test_command monitor
 assert $? 0 "Monitoring an active resource should return 0"
 
 test_command stop "Stopping resource"
 assert $? 0 "Stop could not clean up after multiple starts" 1
 
 test_command monitor
 assert $? $stopped_rc "Monitoring a stopped resource should return $stopped_rc"
 
 test_command stop "Testing: stopping a stopped resource"
 assert $? 0 "Stopping a stopped resource is required to succeed"
 
 test_command monitor
 assert $? $stopped_rc "Monitoring a stopped resource should return $stopped_rc"
 
 test_command migrate_to "Checking for migrate_to action"
 rc=$?
 if [ $rc -ne 3 ]; then
     test_command migrate_from "Checking for migrate_from action"
 fi
 if [ $? -eq 3 ]; then
     info "* Your agent does not support the migrate action (optional)"
 fi
 
 test_command reload "Checking for reload action"
 if [ $? -eq 3 ]; then
     info "* Your agent does not support the reload action (optional)"
 fi
 
 if [ $num_errors -gt 0 ]; then
     echo "Tests failed: $agent failed $num_errors tests" >&2
     exit 1
 else 
     echo $agent passed all tests
     exit 0
 fi
 
 # vim:et:ts=8:sw=4
diff --git a/tools/ocft/IPaddr2 b/tools/ocft/IPaddr2
index 1cf81bf73..04698a056 100644
--- a/tools/ocft/IPaddr2
+++ b/tools/ocft/IPaddr2
@@ -1,137 +1,137 @@
 # IPaddr2
 
 CONFIG
 	Agent IPaddr2
 	AgentRoot /usr/lib/ocf/resource.d/heartbeat
 	HangTimeout 20
 
 SETUP-AGENT
 	ip addr add 192.168.144.1/24 dev eth0 brd 192.168.144.255
 
 CLEANUP-AGENT
 	ip addr del 192.168.144.1/24 dev eth0
 
 CASE-BLOCK required_args
 	Env OCF_RESKEY_ip=192.168.144.2
 
 CASE-BLOCK check_iflabel_assigned
 	Bash ip -4 -o addr show eth0 | grep -w 192.168.144.2/24 | grep -w eth0:iflabel >/dev/null # checking iflabel was assigned correctly
 
 CASE-BLOCK check_iflabel_removed
 	Bash ! ip -4 -o addr show eth0 | grep -w 192.168.144.2/24 | grep -w eth0:iflabel >/dev/null # checking iflabel was removed correctly
 
 CASE-BLOCK default_status
 	AgentRun stop
 
 CASE-BLOCK prepare
 	Include required_args
 	Include default_status
 
 CASE "check base env"
 	Include prepare
 	AgentRun start OCF_SUCCESS
 
 CASE "check base env: unset 'OCF_RESKEY_ip'"
 	Include prepare
 	Unenv OCF_RESKEY_ip
 	AgentRun start OCF_ERR_CONFIGURED
 
 CASE "check base env: set invalid 'OCF_RESKEY_ip'"
 	Include prepare
 	Env OCF_RESKEY_ip=not_ip_address
 	AgentRun start OCF_ERR_CONFIGURED
 
 CASE "check base env: set 'OCF_RESKEY_cidr_netmask'"
 	Include prepare
 	Env OCF_RESKEY_cidr_netmask=24
 	AgentRun start OCF_SUCCESS
 
 CASE "check base env: set invalid 'OCF_RESKEY_cidr_netmask'"
 	Include prepare
 	Env OCF_RESKEY_cidr_netmask=not_netmask
 	AgentRun start OCF_ERR_CONFIGURED
 
 CASE "check base env: set 'OCF_RESKEY_broadcast'"
 	Include prepare
 	Env OCF_RESKEY_broadcast=192.168.144.255
 	AgentRun start OCF_SUCCESS
 
 CASE "check base env: set invalid 'OCF_RESKEY_broadcast'"
 	Include prepare
 	Env OCF_RESKEY_broadcast=not_broadcast
 	AgentRun start OCF_ERR_CONFIGURED
 
 CASE "check base env: set 'OCF_RESKEY_nic'"
 	Include prepare
 	Env OCF_RESKEY_nic=eth0
 	AgentRun start OCF_SUCCESS
 
 CASE "check base env: set invalid 'OCF_RESKEY_nic'"
 	Include prepare
 	Env OCF_RESKEY_nic=not_nic
 	AgentRun start OCF_ERR_CONFIGURED
 	AgentRun validate-all OCF_ERR_CONFIGURED
 
 CASE "normal start"
 	Include prepare
 	AgentRun start OCF_SUCCESS
 
 CASE "normal stop"
 	Include prepare
 	AgentRun start
 	AgentRun stop OCF_SUCCESS
 
 CASE "double start"
 	Include prepare
 	AgentRun start
 	AgentRun start OCF_SUCCESS
 
 CASE "double stop"
 	Include prepare
 	AgentRun stop OCF_SUCCESS
 
 CASE "monitor with running"
 	Include prepare
 	AgentRun start
 	AgentRun monitor OCF_SUCCESS
 
 CASE "monitor with not running"
 	Include prepare
 	AgentRun monitor OCF_NOT_RUNNING
 
 CASE "unimplemented command"
 	Include prepare
 	AgentRun no_cmd OCF_ERR_UNIMPLEMENTED
 
 CASE "Attachment to loopback interface"
 	Env OCF_RESKEY_ip=127.0.0.3
 	AgentRun start OCF_SUCCESS
 	AgentRun monitor OCF_SUCCESS
 	AgentRun stop OCF_SUCCESS
 
 CASE "check additional env: set 'OCF_RESKEY_iflabel'"
 	Include prepare
 	Env OCF_RESKEY_nic=eth0
 	Env OCF_RESKEY_iflabel=iflabel
 	AgentRun start OCF_SUCCESS
 	Include check_iflabel_assigned
 	AgentRun stop OCF_SUCCESS
 	Include check_iflabel_removed
 
 # This is deprecated but still supported for the compatibility.
 CASE "check additional env: specify iflabel in 'OCF_RESKEY_nic'"
 	Include prepare
 	Env OCF_RESKEY_nic=eth0:iflabel
 	AgentRun start OCF_SUCCESS
 	Include check_iflabel_assigned
 	AgentRun stop OCF_SUCCESS
 	Include check_iflabel_removed
 
 # monitor should return OCF_ERR_GENERIC rather than OCF_ERR_CONFIGURED
-# when the specified OCF_RESKEY_nic is disappeard by a failure.
+# when the specified OCF_RESKEY_nic is vanished by a failure.
 # This has been changed as of 3.9.6.
-CASE "monitor failure when 'OCF_RESKEY_nic' is disappeared"
+CASE "monitor failure when 'OCF_RESKEY_nic' is vanished"
 	Include prepare
-	Env OCF_RESKEY_nic=ethDisappear
+	Env OCF_RESKEY_nic=ethVanished
 	Env OCF_RESKEY_CRM_meta_interval=10 # not in probe
 	AgentRun monitor OCF_ERR_GENERIC
diff --git a/tools/ocft/Makefile.am b/tools/ocft/Makefile.am
index 8191c11d1..69c59eeec 100644
--- a/tools/ocft/Makefile.am
+++ b/tools/ocft/Makefile.am
@@ -1,63 +1,63 @@
 # Author: John Shi
 # jshi@suse.de
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 
 MAINTAINERCLEANFILES = Makefile.in
 
-EXTRA_DIST		= $(ocftcfgs_DATA) $(ocft_DATA)
+EXTRA_DIST		= $(ocftcfgs_DATA) $(ocft_DATA) $(ocft_SCRIPTS)
 
 sbin_SCRIPTS		= ocft
 
 ocftcfgsdir		= $(datadir)/$(PACKAGE_NAME)/ocft/configs
 ocftcfgs_DATA      =  apache  	\
 			 IPaddr2	\
 			 IPaddr2v4	\
 			 IPaddr2v6	\
 			 IPv6addr	\
 			 Filesystem	\
 			 LVM	\
 		       	 Raid1	\
 			 IPsrcaddr  	\
 			 MailTo		\
 			 jboss	\
 		       	 mysql		\
 		       	 mysql-proxy		\
 		       	 pgsql		\
 		       	 db2		\
 		       	 oracle		\
 		       	 drbd.linbit		\
 		       	 exportfs	\
 		       	 exportfs-multidir	\
 		       	 nfsserver	\
 		       	 portblock	\
 		       	 iscsi	\
 		       	 named	\
 		       	 postfix	\
 			 sg_persist \
 			 tomcat	\
 		       	 Xinetd	\
 		       	 Xen	\
 		       	 VirtualDomain	\
 			 SendArp
 
 ocftdir			= $(datadir)/$(PACKAGE_NAME)/ocft
 ocft_DATA		= README	\
 			  README.zh_CN	\
 			  caselib	\
 			  helpers.sh	\
 			  runocft.prereq
 ocft_SCRIPTS	= runocft
 
diff --git a/tools/ocft/README.in b/tools/ocft/README.in
index c837fcc35..1c4ae1287 100644
--- a/tools/ocft/README.in
+++ b/tools/ocft/README.in
@@ -1,147 +1,147 @@
 INTRODUCTION & DESIGN
 ~~~~~~~~~~~~~~~~~~~~~
 
   - Ocft is a testing tool for resource agents. Instead of the policy of HA,
-    it mainly concerns whether resource agents run correct locally. It can 
-    design types of complicated environments to test the reliability of 
-    resource agents. Precisely, it is to display whether resource agents can 
-    return to correct or expected value. The advantage of the tool provides 
-    us with competence to design conditions which can be recorded or reproduced. 
+    it mainly concerns whether resource agents run correct locally. It can
+    design types of complicated environments to test the reliability of
+    resource agents. Precisely, it is to display whether resource agents can
+    return to correct or expected value. The advantage of the tool provides
+    us with competence to design conditions which can be recorded or reproduced.
     Hence it is useful to debuggers.
 
 * Components
     ** Test case generator (@sbindir@/ocft)
       - Turning configuration files of test case to executable scripts.
 
     ** Configuration file  (@datadir@/@PACKAGE_NAME@/ocft/configs/)
-      - Every configuration file directs only one resource agent and share the same 
+      - Every configuration file directs only one resource agent and share the same
         name with resource agent but contains more test cases.
 
     ** The testing script  (/var/lib/@PACKAGE_NAME@/ocft/cases/)
-      - After the generator reads configuration files and generates many testing 
+      - After the generator reads configuration files and generates many testing
         scripts and the script is underway, the test begins.
 
 * How to customize the environment of testing
-  - Ocft designs the running conditions through two ways, one is changing the 
-    environment variables of resource agents (it is the interface left by OCF itself), 
-    the other is modifying the OS environment of resource agents, such as altering 
+  - Ocft designs the running conditions through two ways, one is changing the
+    environment variables of resource agents (it is the interface left by OCF itself),
+    the other is modifying the OS environment of resource agents, such as altering
     the permission of some key file or IP address of the machine.
 
 * How to test
-  - Firstly, you need to sketch the all complex and uncommon environments against 
-    a certain resource agent and keep in mind what consequences may be caused by 
-    these uncommon environments. 
-    Secondly, write the designed conditions and foreknown consequences into 
-    configuration files, and then run the generator to translate the test case to 
-    executable scripts. 
-    Finally, you need running these scripts to observe the output and learn 
-    the running status of each test case, which will compares the predicated result 
-    with the actual one. If they differ, you will be able to find the bugs of the 
+  - Firstly, you need to sketch the all complex and uncommon environments against
+    a certain resource agent and keep in mind what consequences may be caused by
+    these uncommon environments.
+    Secondly, write the designed conditions and foreknown consequences into
+    configuration files, and then run the generator to translate the test case to
+    executable scripts.
+    Finally, you need running these scripts to observe the output and learn
+    the running status of each test case, which will compares the predicated result
+    with the actual one. If they differ, you will be able to find the bugs of the
     resource agent.
   - All of the output with test will be recorded into the log files, you can find them
     in /var/lib/@PACKAGE_NAME@/ocft/cases/logs.
 
 
 HOW TO WRITE CONFIGURATION FILE
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-  - There are only 6 top level options that are all spelled by capital letters and "-". 
+  - There are only 6 top level options that are all spelled by capital letters and "-".
     Every top level option contains sub-options that they are initials.
 
 * 'CONFIG' (top level option)
   - Grammar: CONFIG
   - The design in this option is global and influences every test case.
 
     ** 'Agent' (sub-option)
       - Grammar: Agent AGENT_NAME
       - The agent name you want to test.
 
     ** 'AgentRoot' (sub-option)
       - Grammar: AgentRoot /usr/lib/ocf/resource.d/xxx
       - A few agents will go to "linbit" or "pacemaker" directory, if you define this option,
         ocft will use it to replace the default directory "heartbeat".
 
     ** 'InstallPackage' (sub-option)
       - Grammar: InstallPackage package [package2 [...]]
-      - It will test whether the system have installed the service of the resource agent. 
+      - It will test whether the system have installed the service of the resource agent.
         If not, it will download from Internet and have it installed automatically.
 
     ** 'HangTimeout' (sub-option)
       - Grammar: HangTimeout secs
-      - If you alter some key options, some resource agents will get puzzled and stop, 
-        which will influence the running of the following test case.  Hence timeout setting is 
+      - If you alter some key options, some resource agents will get puzzled and stop,
+        which will influence the running of the following test case.  Hence timeout setting is
         needed, if the resource agent stops timeout, the scripts will kill this resource agent.
 
 * 'VARIABLE' (top level option)
-  - Garmmar: 
+  - Grammar:
       VARIABLE
           VAR1=value1
           VAR2=value2
           ...
   - Define the global variable here, the variables can be visited everywhere, they can be referenced
     using $VAR_NAME. Note, the variables in VARIABLE are different from 'Env VAR1=value1', 'Env' can
     affect the activity of agent, but the variables in VARIABLE just be shared with top level option.
 
 * 'SETUP-AGENT' (top level option)
-  - Grammar: 
+  - Grammar:
       SETUP-AGENT
           bash scripts...
 	  ...
-  - Some of Agents may need to be initialized before testing, you can do it here with bash script. 
+  - Some of Agents may need to be initialized before testing, you can do it here with bash script.
 
 * 'CLEANUP-AGENT' (top level option)
-  - Garmmar:
+  - Grammar:
       CLEANUP-AGENT
           bash scripts...
           ...
   - If SETUP-AGENT set, usually you might be use this option do some cleaning work after test.
 
 * 'CASE' & 'CASE-BLOCK' (top level option)
   - Grammar: CASE "description" & CASE-BLOCK macro_name
-  - Usually, the conditions you designed are more than one and a few 'CASE "..."' will 
-    appear in configuration file. It is worth noting that the following sub-options 
+  - Usually, the conditions you designed are more than one and a few 'CASE "..."' will
+    appear in configuration file. It is worth noting that the following sub-options
     have 2 spellings:
-    One is general, where shell affects the local environment; the other is special, 
+    One is general, where shell affects the local environment; the other is special,
     where each options added "@ipaddr". It can remotely execute shell codes. In other words,
     it is to execute the shell codes from a remote host, which is meaningful when a resource
     agent needs 2 hosts. This remote shell is not a remote execution only through "ssh", but
     running a remote shell in the background while the test case is running. The remote shell
     runs in the background till the end and saves the results during the process. That is to
-    say, you can alternatively carry out local and remote shell code segments. 
-    The "CASE-BLOCK" option is a macro definer, the statements in "CASE-BLOCK" will be inserted 
+    say, you can alternatively carry out local and remote shell code segments.
+    The "CASE-BLOCK" option is a macro definer, the statements in "CASE-BLOCK" will be inserted
     into "CASE" if you "Include" the "macro_name".
 
     ** 'Env' (sub-option)
       - Grammar: Env VARIABLE=value
-      - It is to set up an environment variable of the resource agent. They usually appear to 
+      - It is to set up an environment variable of the resource agent. They usually appear to
         be OCF_RESKEY_xxx. One point is to be noted is there is no blank by both sides of "=".
 
     ** 'Unenv' (sub-option)
-      - Grammer: Unenv VARIABLE [VARIABLE2 [...]]
+      - Grammar: Unenv VARIABLE [VARIABLE2 [...]]
       - Remove the environment variable.
 
     ** 'Include' (sub-option)
-      - Garmmer: Include macro_name
-      - It will be replaced by statements in 'macro_name', of course, you should define the 
+      - Grammar: Include macro_name
+      - It will be replaced by statements in 'macro_name', of course, you should define the
         content of 'macro_name' with 'CASE-BLOCK' first.
 
     ** 'Bash' (sub-option)
       - Grammar: Bash bash_codes
-      - This option is to set up the environment of OS, where you can insert BASH code to 
-        customize the system randomly. Note, do not cause unrecoverable consequences to the 
+      - This option is to set up the environment of OS, where you can insert BASH code to
+        customize the system randomly. Note, do not cause unrecoverable consequences to the
         system.
 
     ** 'BashAtExit' (sub-option)
       - Grammar: BashAtExit bash_codes
-      - This option is to recover the OS environment in order to run another test case 
-        correctly. Of cause you can use 'Bash' option to recover it. However, if mistakes occur 
-        in the process, the script will quit directly instead of running your recovery codes. 
-        If it happens, you ought to use BashAtExit which can restore the system environment 
+      - This option is to recover the OS environment in order to run another test case
+        correctly. Of cause you can use 'Bash' option to recover it. However, if mistakes occur
+        in the process, the script will quit directly instead of running your recovery codes.
+        If it happens, you ought to use BashAtExit which can restore the system environment
         before you quit.
 
     ** 'AgentRun' (sub-option)
       - Grammar: AgentRun cmd [ret_value]
-      - This option is to run resource agent. "cmd" is the parameter of the resource agent, 
-        such as "start, status, stop ...". The second parameter is optional. It will compare the 
-        actual returned value with the expected value when the script has run recourse agent. 
+      - This option is to run resource agent. "cmd" is the parameter of the resource agent,
+        such as "start, status, stop ...". The second parameter is optional. It will compare the
+        actual returned value with the expected value when the script has run recourse agent.
         If differs, bugs will be found.
diff --git a/tools/ocft/runocft b/tools/ocft/runocft
index f66b6a462..d269a6bba 100755
--- a/tools/ocft/runocft
+++ b/tools/ocft/runocft
@@ -1,37 +1,38 @@
+#!/bin/sh
 OCFTDIR=/usr/share/resource-agents/ocft
 CONFDIR=$OCFTDIR/configs
 
 prereq_run() {
 	eval "$@"
 }
 prereq_prog() {
 	which $@
 }
 
 test_prereq() {
 	local tp arg
 	tp=`echo $prereq|sed 's/:.*//'`
 	arg=`echo $prereq|sed 's/[a-z]*://'`
 	prereq_$tp $arg >/dev/null 2>&1
 }
 
 rm -f ocft.FAILED
 rc=0
 while read f prereq; do
 	if [ -n "$prereq" ] && ! test_prereq; then
 		echo "$f: prerequisite not fulfilled, skipping"
 		continue
 	fi
 	ocft make $f
 	if ! ocft test $f; then
 		echo $f >> ocft.FAILED
 		rc=1
 	fi
 done < $OCFTDIR/runocft.prereq
 
 if [ -f ocft.FAILED ]; then
 	echo "The following ocft tests failed:"
 	cat ocft.FAILED
 fi
 
 exit $rc
diff --git a/tools/tickle_tcp.c b/tools/tickle_tcp.c
index cf0bdcb39..7c5a53713 100644
--- a/tools/tickle_tcp.c
+++ b/tools/tickle_tcp.c
@@ -1,379 +1,379 @@
 /* 
    Tickle TCP connections tool
 
    Author:	Jiaju Zhang
    Based on the code in CTDB http://ctdb.samba.org/ written by
    Andrew Tridgell and Ronnie Sahlberg
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 
 #define discard_const(ptr) ((void *)((intptr_t)(ptr)))
 
 typedef union {
 	struct sockaddr     sa;
 	struct sockaddr_in  ip;
 	struct sockaddr_in6 ip6;
 } sock_addr;
 
 uint32_t uint16_checksum(uint16_t *data, size_t n);
 void set_nonblocking(int fd);
 void set_close_on_exec(int fd);
 static int parse_ipv4(const char *s, unsigned port, struct sockaddr_in *sin);
 static int parse_ipv6(const char *s, const char *iface, unsigned port, sock_addr *saddr);
 int parse_ip(const char *addr, const char *iface, unsigned port, sock_addr *saddr);
 int parse_ip_port(const char *addr, sock_addr *saddr);
 int send_tickle_ack(const sock_addr *dst, 
 		    const sock_addr *src, 
 		    uint32_t seq, uint32_t ack, int rst);
 static void usage(void);
 
 uint32_t uint16_checksum(uint16_t *data, size_t n)
 {
 	uint32_t sum=0;
 	while (n >= 2) {
 		sum += (uint32_t)ntohs(*data);
 		data++;        
 		n -= 2;
 	}                      
 	if (n == 1) {
 		sum += (uint32_t)ntohs(*(uint8_t *)data);
 	}
 	return sum;
 }       
 
 static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
 {
 	uint32_t sum = uint16_checksum(data, n);
 	uint16_t sum2;
 	sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
 				sizeof(ip->saddr));
 	sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
 				sizeof(ip->daddr));
 	sum += ip->protocol + n;
 	sum = (sum & 0xFFFF) + (sum >> 16);
 	sum = (sum & 0xFFFF) + (sum >> 16);
 	sum2 = htons(sum);
 	sum2 = ~sum2;
 	if (sum2 == 0) {
 		return 0xFFFF;
 	}
 	return sum2;
 }
 
 static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
 {
 	uint32_t phdr[2];
 	uint32_t sum = 0;
 	uint16_t sum2;
 
 	sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
 	sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
 
 	phdr[0] = htonl(n);
 	phdr[1] = htonl(ip6->ip6_nxt);
 	sum += uint16_checksum((uint16_t *)phdr, 8);
 
 	sum += uint16_checksum(data, n);
 
 	sum = (sum & 0xFFFF) + (sum >> 16);
 	sum = (sum & 0xFFFF) + (sum >> 16);
 	sum2 = htons(sum);
 	sum2 = ~sum2;
 	if (sum2 == 0) {
 		return 0xFFFF;
 	}
 	return sum2;
 }
 
 void set_nonblocking(int fd)
 {
 	unsigned v;
 	v = fcntl(fd, F_GETFL, 0);
 	fcntl(fd, F_SETFL, v | O_NONBLOCK);
 }
 
 void set_close_on_exec(int fd) 
 {               
 	unsigned v;
 	v = fcntl(fd, F_GETFD, 0);
 	fcntl(fd, F_SETFD, v | FD_CLOEXEC);
 }
 
 static int parse_ipv4(const char *s, unsigned port, struct sockaddr_in *sin)
 {
 	sin->sin_family = AF_INET;
 	sin->sin_port   = htons(port);
 
 	if (inet_pton(AF_INET, s, &sin->sin_addr) != 1) {
 		fprintf(stderr, "Failed to translate %s into sin_addr\n", s);
 		return -1;
 	}
 
 	return 0;
 }
 
 static int parse_ipv6(const char *s, const char *iface, unsigned port, sock_addr *saddr)
 {
 	saddr->ip6.sin6_family   = AF_INET6;
 	saddr->ip6.sin6_port     = htons(port);
 	saddr->ip6.sin6_flowinfo = 0;
 	saddr->ip6.sin6_scope_id = 0;
 
 	if (inet_pton(AF_INET6, s, &saddr->ip6.sin6_addr) != 1) {
 		fprintf(stderr, "Failed to translate %s into sin6_addr\n", s);
 		return -1;
 	}
 
 	if (iface && IN6_IS_ADDR_LINKLOCAL(&saddr->ip6.sin6_addr)) {
 		saddr->ip6.sin6_scope_id = if_nametoindex(iface);
 	}
 
         return 0;
 }
 
 int parse_ip(const char *addr, const char *iface, unsigned port, sock_addr *saddr)
 {
 	char *p;
 	int ret;
 
 	p = index(addr, ':');
 	if (!p)
 		ret = parse_ipv4(addr, port, &saddr->ip);
 	else
 		ret = parse_ipv6(addr, iface, port, saddr);
 
 	return ret;
 }
 
 int parse_ip_port(const char *addr, sock_addr *saddr)
 {
 	char *s, *p;
 	unsigned port;
 	char *endp = NULL;
 	int ret;
 
 	s = strdup(addr);
 	if (!s) {
 		fprintf(stderr, "Failed strdup()\n");
 		return -1;
 	}
 
 	p = rindex(s, ':');
 	if (!p) {
 		fprintf(stderr, "This addr: %s does not contain a port number\n", s);
 		free(s);
 		return -1;
 	}
 	
 	port = strtoul(p+1, &endp, 10);
 	if (!endp || *endp != 0) {
 		fprintf(stderr, "Trailing garbage after the port in %s\n", s);
 		free(s);
 		return -1;
 	}
 	*p = 0;
 
 	ret = parse_ip(s, NULL, port, saddr);
 	free(s);
 	return ret;
 }
 
 int send_tickle_ack(const sock_addr *dst, 
 		    const sock_addr *src, 
 		    uint32_t seq, uint32_t ack, int rst)
 {
 	int s;
 	int ret;
 	uint32_t one = 1;
 	uint16_t tmpport;
 	sock_addr *tmpdest;
 	struct {
 		struct iphdr ip;
 		struct tcphdr tcp;
 	} ip4pkt;
 	struct {
 		struct ip6_hdr ip6;
 		struct tcphdr tcp;
 	} ip6pkt;
 
 	switch (src->ip.sin_family) {
 	case AF_INET:
 		memset(&ip4pkt, 0, sizeof(ip4pkt));
 		ip4pkt.ip.version  = 4;
 		ip4pkt.ip.ihl      = sizeof(ip4pkt.ip)/4;
 		ip4pkt.ip.tot_len  = htons(sizeof(ip4pkt));
 		ip4pkt.ip.ttl      = 255;
 		ip4pkt.ip.protocol = IPPROTO_TCP;
 		ip4pkt.ip.saddr    = src->ip.sin_addr.s_addr;
 		ip4pkt.ip.daddr    = dst->ip.sin_addr.s_addr;
 		ip4pkt.ip.check    = 0;
 
 		ip4pkt.tcp.source  = src->ip.sin_port;
 		ip4pkt.tcp.dest    = dst->ip.sin_port;
 		ip4pkt.tcp.seq     = seq;
 		ip4pkt.tcp.ack_seq = ack;
 		ip4pkt.tcp.ack     = 1;
 		if (rst)
 			ip4pkt.tcp.rst = 1;
 		ip4pkt.tcp.doff    = sizeof(ip4pkt.tcp)/4;
 		ip4pkt.tcp.window   = htons(1234);
 		ip4pkt.tcp.check    = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
 
-		s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
+		s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
 		if (s == -1) {
 			fprintf(stderr, "Failed to open raw socket (%s)\n", strerror(errno));
 			return -1;
 		}
 
 		ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
 		if (ret != 0) {
 			fprintf(stderr, "Failed to setup IP headers (%s)\n", strerror(errno));
 			close(s);
 			return -1;
 		}
 
 		set_nonblocking(s);
 		set_close_on_exec(s);
 
 		ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0, 
 			     (const struct sockaddr *)&dst->ip, sizeof(dst->ip));
 		close(s);
 		if (ret != sizeof(ip4pkt)) {
 			fprintf(stderr, "Failed sendto (%s)\n", strerror(errno));
 			return -1;
 		}
 		break;
 
         case AF_INET6:
 		memset(&ip6pkt, 0, sizeof(ip6pkt));
 		ip6pkt.ip6.ip6_vfc  = 0x60;
 		ip6pkt.ip6.ip6_plen = htons(20);
 		ip6pkt.ip6.ip6_nxt  = IPPROTO_TCP;
 		ip6pkt.ip6.ip6_hlim = 64;
 		ip6pkt.ip6.ip6_src  = src->ip6.sin6_addr;
 		ip6pkt.ip6.ip6_dst  = dst->ip6.sin6_addr;
 
 		ip6pkt.tcp.source   = src->ip6.sin6_port;
 		ip6pkt.tcp.dest     = dst->ip6.sin6_port;
 		ip6pkt.tcp.seq      = seq;
 		ip6pkt.tcp.ack_seq  = ack;
 		ip6pkt.tcp.ack      = 1;
 		if (rst)
 			ip6pkt.tcp.rst      = 1;
 		ip6pkt.tcp.doff     = sizeof(ip6pkt.tcp)/4;
 		ip6pkt.tcp.window   = htons(1234);
 		ip6pkt.tcp.check    = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
 
 		s = socket(PF_INET6, SOCK_RAW, IPPROTO_RAW);
 		if (s == -1) {
 			fprintf(stderr, "Failed to open sending socket\n");
 			return -1;
                 }
 
 		tmpdest = discard_const(dst);
 		tmpport = tmpdest->ip6.sin6_port;
 
 		tmpdest->ip6.sin6_port = 0;
 		ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0, (const struct sockaddr *)&dst->ip6, sizeof(dst->ip6));
 		tmpdest->ip6.sin6_port = tmpport;
 		close(s);
 
 		if (ret != sizeof(ip6pkt)) {
 			fprintf(stderr, "Failed sendto (%s)\n", strerror(errno));
 			return -1;
 		}
 		break;
 
 	default:
 		fprintf(stderr, "Not an ipv4/v6 address\n");
 		return -1;
 	}
 
 	return 0;
 }
 
 static void usage(void)
 {
 	printf("Usage: /usr/lib/heartbeat/tickle_tcp [ -n num ]\n");
 	printf("Please note that this program need to read the list of\n");
 	printf("{local_ip:port remote_ip:port} from stdin.\n");
 	exit(1);
 }
 
 #define OPTION_STRING "n:h"
 
 int main(int argc, char *argv[])
 {
 	int optchar, i, num = 1, cont = 1;
 	sock_addr src, dst;
 	char addrline[128], addr1[64], addr2[64];
 
 	while(cont) {
 		optchar = getopt(argc, argv, OPTION_STRING);
 		switch(optchar) {
 		case 'n':
 			num = atoi(optarg);
 			break;
 		case 'h':
 			usage();
 			exit(EXIT_SUCCESS);
 			break;
 		case EOF:
 			cont = 0;
 			break;
 		default:
 			fprintf(stderr, "unknown option, please use '-h' for usage.\n");
 			exit(EXIT_FAILURE);
 			break;
 		};
 	}
 
 	while(fgets(addrline, sizeof(addrline), stdin)) {
 		sscanf(addrline, "%s %s", addr1, addr2);
 
 		if (parse_ip_port(addr1, &src)) {
 			fprintf(stderr, "Bad IP:port '%s'\n", addr1);
 			return -1;
 		}
 		if (parse_ip_port(addr2, &dst)) {
 			fprintf(stderr, "Bad IP:port '%s'\n", addr2);
 			return -1;
 		}
 	
 		for (i = 1; i <= num; i++) {
 			if (send_tickle_ack(&dst, &src, 0, 0, 0)) {
 				fprintf(stderr, "Error while sending tickle ack from '%s' to '%s'\n",
 					addr1, addr2);
 				return -1;
 			}
 		}
 
 	}
 	return 0;
 }