diff --git a/heartbeat/db2 b/heartbeat/db2
index 83020fc70..82f2f82c3 100755
--- a/heartbeat/db2
+++ b/heartbeat/db2
@@ -1,1084 +1,1084 @@
 #!/bin/sh
 #
 # db2
 #
 # Resource agent that manages a DB2 LUW database in Standard role 
 # or HADR configuration in promotable configuration.
 # Multi partition is supported as well.
 #
 # Copyright (c) 2011 Holger Teutsch <holger.teutsch@web.de>
 #
 # This agent incoporates code of a previous release created by
 # Alan Robertson and the community.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 
 # Use runuser if available for SELinux.
 if [ -x "/sbin/runuser" ]; then
     SU="runuser"
 else
     SU="su"
 fi
 
 # Parameter defaults
 
 OCF_RESKEY_instance_default=""
 OCF_RESKEY_skip_basic_sql_health_check_default="false"
 OCF_RESKEY_monitor_retries_default="1"
-OCF_RESKEY_monitor_sleep_default="1"
+OCF_RESKEY_monitor_retries_sleep_default="1"
 OCF_RESKEY_monitor_retry_all_errors_default="false"
 OCF_RESKEY_admin_default=""
 OCF_RESKEY_dbpartitionnum_default="0"
 
 : ${OCF_RESKEY_instance=${OCF_RESKEY_instance_default}}
 : ${OCF_RESKEY_skip_basic_sql_health_check=${OCF_RESKEY_skip_basic_sql_health_check_default}}
 : ${OCF_RESKEY_monitor_retries=${OCF_RESKEY_monitor_retries_default}}
-: ${OCF_RESKEY_monitor_sleep=${OCF_RESKEY_monitor_sleep_default}}
+: ${OCF_RESKEY_monitor_retries_sleep=${OCF_RESKEY_monitor_retries_sleep_default}}
 : ${OCF_RESKEY_monitor_retry_all_errors=${OCF_RESKEY_monitor_retry_all_errors_default}}
 : ${OCF_RESKEY_admin=${OCF_RESKEY_admin_default}}
 : ${OCF_RESKEY_dbpartitionnum=${OCF_RESKEY_dbpartitionnum_default}}
 
 POSIX_UNICODE_LOCALE="C.UTF-8"
 #######################################################################
 
 
 db2_usage() {
     echo "db2 start|stop|monitor|promote|demote|validate-all|meta-data"
 }
 
 db2_meta_data() {
 cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="db2" version="1.0">
 <version>1.0</version>
 <longdesc lang="en">
 Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles in promotable configuration. Multiple partitions are supported.
 
 Standard mode:
 
 An instance including all or selected databases is made highly available.
 Configure each partition as a separate primitive resource.
 
 HADR mode:
 
 A single database in HADR configuration is made highly available by automating takeover operations.
 Configure a promotable resource with notifications enabled and an
 additional monitoring operation with role "Promoted".
 
 In case of HADR be very deliberate in specifying intervals/timeouts. The detection of a failure including promote must complete within HADR_PEER_WINDOW.
 
 In addition to honoring requirements for crash recovery etc. for your specific database use the following relations as guidance:
 
 "monitor interval" &lt; HADR_PEER_WINDOW - (appr 30 sec)
 
 "promote timeout" &lt; HADR_PEER_WINDOW + (appr 20 sec)
 
 For further information and examples consult http://www.linux-ha.org/wiki/db2_(resource_agent)
 </longdesc>
 <shortdesc lang="en">Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles as promotable configuration. Multiple partitions are supported.</shortdesc>
 
 <parameters>
 <parameter name="instance" unique="1" required="1">
 <longdesc lang="en">
 The instance of the database(s).
 </longdesc>
 <shortdesc lang="en">instance</shortdesc>
 <content type="string" default="${OCF_RESKEY_instance_default}" />
 </parameter>
 <parameter name="dblist" unique="0" required="0">
 <longdesc lang="en">
 List of databases to be managed, e.g "db1 db2".
 Defaults to all databases in the instance. Specify one db for HADR mode.
 </longdesc>
 <shortdesc lang="en">List of databases to be managed</shortdesc>
 <content type="string"/>
 </parameter>
 <parameter name="skip_basic_sql_health_check" unique="0" required="0">
 <longdesc lang="en">
 Skip basic health check SQL query.
 
 Only set to "true" when the "monitor_retries" and "monitor_retry_all_errors" parameters arent
 enough to avoid issues under high load.
 </longdesc>
 <shortdesc lang="en">Skip basic health check SQL query</shortdesc>
 <content type="boolean" default="${OCF_RESKEY_skip_basic_sql_health_check_default}" />
 </parameter>
 <parameter name="monitor_retries" unique="0" required="0">
 <longdesc lang="en">
 Monitor retries before failing.
 </longdesc>
 <shortdesc lang="en">Monitor retries</shortdesc>
 <content type="string" default="${OCF_RESKEY_monitor_retries_default}" />
 </parameter>
 <parameter name="monitor_retries_sleep" unique="0" required="0">
 <longdesc lang="en">
 Monitor sleep between tries.
 </longdesc>
 <shortdesc lang="en">Monitor sleep</shortdesc>
-<content type="string" default="${OCF_RESKEY_monitor_sleep_default}" />
+<content type="string" default="${OCF_RESKEY_monitor_retries_sleep_default}" />
 </parameter>
 <parameter name="monitor_retry_all_errors" unique="0" required="0">
 <longdesc lang="en">
 Set to true to retry monitor-action for all errors instead of the default "db2pd" race conditions.
 </longdesc>
 <shortdesc lang="en">Retry monitor for all errors</shortdesc>
 <content type="string" default="${OCF_RESKEY_monitor_retry_all_errors_default}" />
 </parameter>
 <parameter name="admin" unique="0" required="0">
 <longdesc lang="en">
 DEPRECATED: The admin user of the instance.
 </longdesc>
 <shortdesc lang="en">DEPRECATED: admin</shortdesc>
 <content type="string" default="${OCF_RESKEY_admin_default}" />
 </parameter>
 <parameter name="dbpartitionnum" unique="0" required="0">
 <longdesc lang="en">
 The number of the partition (DBPARTITIONNUM) to be managed.
 </longdesc>
 <shortdesc lang="en">database partition number (DBPARTITIONNUM)</shortdesc>
 <content type="string" default="${OCF_RESKEY_dbpartitionnum_default}" />
 </parameter>
 </parameters>
 
 <actions>
 <action name="start" timeout="120s"/>
 <action name="stop" timeout="120s"/>
 <action name="promote" timeout="120s"/>
 <action name="demote" timeout="120s"/>
 <action name="monitor" depth="0" timeout="60s" interval="20s"/>
 <action name="monitor" depth="0" timeout="60s" role="Promoted" interval="22s"/>
 <action name="validate-all" timeout="5s"/>
 <action name="meta-data" timeout="5s"/>
 </actions>
 </resource-agent>
 END
 }
 
 #
 # validate
 # .. and set global variables
 #
 # exit on error
 #
 db2_validate() {
     local db2home db2sql db2instance
 
     # db2 uses korn shell
     check_binary "ksh"
 
     # check required instance vars
     if [ -z "$OCF_RESKEY_instance" ]
     then
         ocf_log err "DB2 required parameter instance is not set!"
         return $OCF_ERR_CONFIGURED
     fi
 
     instance=$OCF_RESKEY_instance
     if [ -n "$OCF_RESKEY_admin" ]
     then
         ocf_log warn "DB2 deprecated parameter admin is set, using $OCF_RESKEY_admin as instance."
         instance=$OCF_RESKEY_admin
     fi
 
     db2node=${OCF_RESKEY_dbpartitionnum:-0}
 
     db2home=$(sh -c "echo ~$instance")
     db2sql=$db2home/sqllib
     db2profile=$db2sql/db2profile
     db2bin=$db2sql/bin
 
     STATE_FILE=${HA_RSCTMP}/db2-${OCF_RESOURCE_INSTANCE}.state
 
     #	Let's make sure a few important things are there...
     if ! [ -d "$db2sql" -a  -d "$db2bin" -a -f "$db2profile" -a \
            -x "$db2profile" -a -x "$db2bin/db2" ]
     then
         ocf_is_probe && exit $OCF_NOT_RUNNING
         ocf_log err "DB2 required directories and/or files not found"
         exit $OCF_ERR_INSTALLED
     fi
 
     db2instance=$(runasdb2 'echo $DB2INSTANCE')
     if [ "$db2instance" != "$instance" ]
     then
         ocf_is_probe && exit $OCF_NOT_RUNNING
         ocf_log err "DB2 parameter instance \"$instance\" != DB2INSTANCE \"$db2instance\""
         exit $OCF_ERR_CONFIGURED
     fi
 
     # enough checking for stop to succeed
     [ $__OCF_ACTION = stop ] && return $OCF_SUCCESS
 
     dblist=$OCF_RESKEY_dblist
     if [ -n "$dblist" ]
     then
         # support , as separator as well
         dblist=$(echo "$dblist" | sed -e 's/[,]/ /g')
     else
         if ! dblist=$(db2_dblist)
         then
             ocf_log err "DB2 $instance($db2node): cannot retrieve db directory"
             exit $OCF_ERR_INSTALLED
         fi
     fi
 
     # check requirements for the HADR case
     if ocf_is_ms
     then
         set -- $dblist
         if [ $# != 1 ]
         then
             ocf_log err "DB2 resource $OCF_RESOURCE_INSTANCE must have exactly one name in dblist"
             exit $OCF_ERR_CONFIGURED
         fi
 
         if [ $db2node != 0 ]
         then
             ocf_log err "DB2 resource $OCF_RESOURCE_INSTANCE must have dbpartitionnum=0"
             exit $OCF_ERR_CONFIGURED
         fi
     fi
 
     return $OCF_SUCCESS
 }
 
 master_score()
 {
     if ! have_binary "crm_master"; then
         return
     fi
 
     crm_master $*
 }
 
 #
 # Run the given command as db2 instance user
 #
 runasdb2() {
     $SU $instance -c ". $db2profile; $*"
 }
 
 #
 # Run the given command as db2 instance user using $SU
 # We run this function as opposed to runasdb2 whenever we have to issue commands
 # that leave processes running on the system, such as db2start
 # We do not want these processes to hog the resources as they were run with elevated privileges
 #
 runasdb2_session() {
    # Override db2profile with unicode locale is required to maintain compatibility with unicode CODEPAGE
    $SU "$instance" -c "ksh -c '. $db2profile; export LC_ALL="$POSIX_UNICODE_LOCALE"; export LANG="$POSIX_UNICODE_LOCALE"; $*'"
 }
 
 #
 # Run a command as the DB2 admin, and log the output
 #
 logasdb2() {
     local output rc
 
     output=$(runasdb2 $*)
     rc=$?
     if [ $rc -eq 0 ]
     then
         ocf_log info "$output"
     else
         ocf_log err "$output"
     fi
     return $rc
 }
 
 
 #
 # unfortunately a first connect after a crash may need several minutes
 # for some internal cleanup stuff in DB2.
 # We run a connect in background so other connects (i.e. monitoring!) may proceed.
 #
 db2_run_connect() {
     local db=$1
 
     logasdb2 "db2 connect to $db; db2 terminate"
 }
 
 #
 # get some data from the database config
 # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW
 #
 db2_get_cfg() {
     local db=$1
 
     local output hadr_vars
 
     output=$(runasdb2 db2 get db cfg for $db)
     [ $? != 0 ] && return $OCF_ERR_GENERIC
 
     hadr_vars=$(echo "$output" |
         awk '/HADR database role/ {printf "HADR_ROLE='%s'; ", $NF;}
             /HADR_TIMEOUT/ {printf "HADR_TIMEOUT='%s'; ", $NF;}
             /First active log file/ {printf "FIRST_ACTIVE_LOG='%s'\n", $NF;}
             /HADR_PEER_WINDOW/ {printf "HADR_PEER_WINDOW='%s'\n", $NF;}')
 
     # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW 
     HADR_ROLE=$(echo "$output" | awk '/HADR database role/ {print $NF;}')
     HADR_TIMEOUT=$(echo "$output" | awk '/HADR_TIMEOUT/ {print $NF;}')
     FIRST_ACTIVE_LOG=$(echo "$output" | awk '/First active log file/ {print $NF;}')
     HADR_PEER_WINDOW=$(echo "$output" | awk '/HADR_PEER_WINDOW/ {print $NF;}')
 
     # HADR_PEER_WINDOW comes with V9 and is checked later
     if [ -z "$HADR_ROLE" -o -z "$HADR_TIMEOUT" ]
     then
         ocf_log error "DB2 cfg values invalid for $instance($db2node)/$db: $hadr_vars"
         return $OCF_ERR_GENERIC
     fi
 
     return $OCF_SUCCESS
 }
 
 #
 # return the list of databases in the instance
 #
 db2_dblist() {
     local output
 
     output=$(runasdb2 db2 list database directory) || return $OCF_ERR_GENERIC
     
     echo "$output" | grep -i 'Database name.*=' | sed 's%.*= *%%'
 }
 
 #
 # Delayed check of the compatibility of DB2 instance and pacemaker
 # config.
 # Logically this belongs to validate but certain parameters can only
 # be retrieved once the instance is started.
 #
 db2_check_config_compatibility() {
     local db=$1
     local is_ms
 
     ocf_is_ms
     is_ms=$?
 
     case "$HADR_ROLE/$is_ms" in
         STANDARD/0)
         ocf_log err "DB2 database $instance/$db is not in a HADR configuration but I am a M/S resource"
         exit $OCF_ERR_INSTALLED
         ;;
 
         STANDARD/1)
         # OK
         ;;
 
         */0)
         if [ -z "$HADR_PEER_WINDOW" ]
         then
             ocf_log err "DB2 database $instance: release to old, need HADR_PEER_WINDOW (>=V9)"
             exit $OCF_ERR_INSTALLED
         fi
         ;;
 
         */1)
         ocf_log err "DB2 database $instance/$db is in a HADR configuration but I must be a M/S resource"
     esac
 
 }
 
 #
 # Start HADR as standby.
 #
 # Parameters
 #     1 - Calling function
 #     2 - Calling functions line number
 #
 # Return codes:
 #     0 - Start as standby successful
 #     1 - Start as standby failed
 #
 reintegrateAsStandby() {
    db=$1
    reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
    ocf_log info "$__OCF_ACTION: $LINENO: reintegrateAsStandby called by $2 at $3. Attempting to reintegrate $db as standby."
    if output=$(runasdb2_session "db2 start hadr on db $db as standby"); then
       rc=0
       ocf_log info "$__OCF_ACTION: $LINENO: Db2 database $instance($db2node)/$db started/activated"
    else
       case $output in
       SQL1777N*)
          # SQL1777N: HADR is already started in given state.
          ocf_log info "$__OCF_ACTION: $LINENO: $output"
          rc=0
          ;;
 
       *)
          rc=1
          ocf_log err "$__OCF_ACTION: $LINENO: Unable to reintegrate Db2 database $instance($db2node)/$db. Please reintegrate manually: $output, return with rc=$rc"
          ;;
       esac
    fi
    crm_attribute -n "$reint_attr" -N "$local_host" -v "0" -l forever
    return $rc
 }
 
 #
 # Start instance and DB.
 # Standard mode is through "db2 activate" in order to start in previous
 # mode (Standy/Primary).
 # If the database is a primary AND we can determine that the running master
 # has a higher "first active log" we conclude that we come up after a crash
 # an the previous Standby is now Primary.
 # The db is then started as Standby.
 #
 # Other cases: danger of split brain, log error and do nothing.
 #
 db2_start() {
     local output start_cmd db
     local start_opts="dbpartitionnum $db2node"
 
     # If we detect that db partitions are not in use, and no
     # partition is explicitly specified, activate without
     # partition information. This allows db2 instances without
     # partition support to be managed. 
     if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -e "$db2sql/db2nodes.cfg" ]; then
         start_opts=""
     fi
 
     if output=$(runasdb2 db2start $start_opts)
     then
         ocf_log info "DB2 instance $instance($db2node) started: $output"
     else
         case $output in
             *SQL1026N*)
             ocf_log info "DB2 instance $instance($db2node) already running: $output"
             ;;
 
             *)
             ocf_log err "$output"
             return $OCF_ERR_GENERIC
         esac
     fi
 
     if ! db2_instance_status
     then
         ocf_log err "DB2 instance $instance($db2node) is not active!"
         return $OCF_ERR_GENERIC
     fi
 
     [ $db2node = 0 ] || return $OCF_SUCCESS
     # activate DB only on node 0
 
     for db in $dblist
     do
         reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
 
         # sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW FIRST_ACTIVE_LOG
         db2_get_cfg $db || return $?
 
         # Better late than never: can only check this when the instance is already up
         db2_check_config_compatibility $db
 
         start_cmd="db2 activate db $db"
 
         if [ $HADR_ROLE = PRIMARY ]
         then
             cib_value=$(crm_attribute -n "$reint_attr" -N "$local_host" -G | awk -v FS=' value=' '{print $2}')
             ocf_log info "$__OCF_ACTION: $LINENO: CIB attribute $reint_attr is set to '$cib_value'"
             if [ "$cib_value" = "1" ]; then
                 ocf_log info "DB2 database $instance($db2node)/$db is Primary and outdated, starting as secondary"
                 start_cmd="db2 start hadr on db $db as standby"
                 HADR_ROLE=STANDBY
                 standby_reintegration=1
             fi
         fi
 
         if output=$(runasdb2 $start_cmd)
         then
             ocf_log info "DB2 database $instance($db2node)/$db started/activated"
             [ $HADR_ROLE != STANDBY ] && db2_run_connect $db &
         else
             case $output in
             SQL1490W* | SQL1494W* | SQL1497W* | SQL1777N*)
                 # SQL1490W  Activate database is successful, however, the database has already been activated on one or more nodes.
                 # SQL1494W  Activate database is successful, however, there is already a connection to the database.
                 # SQL1497W  Activate/Deactivate database was successful, however, an error occurred on some nodes.
                 # SQL1777N  HADR is already started.
 
                 ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database is already activated: $output"
                 ;;
 
             SQL1768N*"Reason code = \"7\""*)
                 rc="$OCF_ERR_GENERIC"
 
                 ocf_log err "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database is a Primary and the Standby is down"
                 ocf_log err "Possible split brain! Manual intervention required."
                 ocf_log err "If this DB is outdated use \"db2 start hadr on db $db as standby\""
                 ocf_log err "If this DB is the surviving primary use \"db2 start hadr on db $db as primary by force\".  db2_start() exit with rc=$rc."
 
                 # let pacemaker give it another try and we will succeed then
                 return "$rc"
                 ;;
 
             SQL1776N*"Reason code = \"6\""*)
                 # SQL1776N  The command cannot be issued on an HADR database.
                 # Reason code 6:
                 #  This database is an old primary database. It cannot be started
                 #  because the standby has become the new primary through forced
                 #  takeover.
 
                 rc="$OCF_ERR_GENERIC"
                 ocf_log err "$__OCF_ACTION: $LINENO: Db2 database $instance($db2node)/$db didn't start: $output, return with rc=$rc"
                 ocf_log err "$__OCF_ACTION: $LINENO: This database is an old primary database. Trying start again as standby"
 
                 start_cmd="db2 start hadr on db $db as standby"
                 if output=$(runasdb2_session "$start_cmd"); then
                     rc="$OCF_SUCCESS"
                     ocf_log info "$__OCF_ACTION: $LINENO: Db2 database $instance($db2node)/$db started/activated"
                 else
                     case $output in
                     SQL1777N*)
                         # SQL1777N: HADR is already started.
                         ocf_log info "$__OCF_ACTION: $LINENO: $output"
                         rc="$OCF_SUCCESS"
                         ;;
 
                     *)
                         rc="$OCF_ERR_GENERIC"
                         ocf_log err "$__OCF_ACTION: $LINENO: Unable to reintegrate Db2 database $instance($db2node)/$db. Please reintegrate manually: $output, return with rc=$rc"
                         ;;
                     esac
                 fi
 
                 return "$rc"
                 ;;
 
             *)
                 rc="$OCF_ERR_GENERIC"
                 ocf_log err "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database didn't start: $output, db2_start() exit with rc=$rc."
                 return "$rc"
                 ;;
             esac
         fi
     done
 
     # come here with success
     # Even if we are a db2 Primary pacemaker requires start to end up in slave mode
     echo SLAVE > $STATE_FILE
 
     # Unset primary failover attribute as host was successfully reintegrated as standby
     if [ "$standby_reintegration" = "1" ]; then
         for db in $dblist; do
             reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
             crm_attribute -n "$reint_attr" -N "$local_host" -v "0" -l forever
         done
     fi
 
     return $OCF_SUCCESS
 }
 
 #
 # helper function to be spawned
 # so we can detect a hang of the db2stop command
 #
 db2_stop_bg() {
     local rc output
     local stop_opts="dbpartitionnum $db2node"
 
     rc=$OCF_SUCCESS
 
     if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -e "$db2sql/db2nodes.cfg" ]; then
         stop_opts=""
     fi
 
     if output=$(runasdb2 db2stop force $stop_opts)
     then
         ocf_log info "DB2 instance $instance($db2node) stopped: $output"
     else
         case $output in
             *SQL1032N*)
             #SQL1032N  No start database manager command was issued
             ocf_log info "$output"
             ;;
 
             *)
             ocf_log err "DB2 instance $instance($db2node) stop failed: $output"
             rc=$OCF_ERR_GENERIC
         esac
     fi
 
     return $rc
 }
 
 #
 # Stop the given db2 database instance
 #
 db2_stop() {
     local stop_timeout grace_timeout stop_bg_pid i must_kill
 
     # remove master score
     master_score -D -l reboot
 
     # be very early here in order to avoid stale data
     rm -f $STATE_FILE
 
     db2_instance_status
     if [ $? -eq $OCF_NOT_RUNNING ]; then
         ocf_log info "DB2 instance $instance already stopped"
         return $OCF_SUCCESS
     fi
 
     stop_timeout=${OCF_RESKEY_CRM_meta_timeout:-20000}
 
     # grace_time is 4/5 (unit is ms)
     grace_timeout=$((stop_timeout/1250))
 
     # start db2stop in background as this may hang
     db2_stop_bg &
     stop_bg_pid=$!
 
     # wait for grace_timeout
     i=0
     while [ $i -lt $grace_timeout ]
     do
         kill -0 $stop_bg_pid 2>/dev/null || break;
         sleep 1
         i=$((i+1))
     done
 
     # collect exit status but don't hang
     if kill -0 $stop_bg_pid 2>/dev/null
     then
         stoprc=1
         kill -9 $stop_bg_pid 2>/dev/null
     else
         wait $stop_bg_pid
         stoprc=$?
     fi
 
     must_kill=0
 
     if [ $stoprc -ne 0 ]
     then
         ocf_log warn "DB2 instance $instance($db2node): db2stop failed, using db2nkill"
         must_kill=1
     elif ! db2_instance_dead
     then
         ocf_log warn "DB2 instance $instance($db2node): db2stop indicated success but there a still processes, using db2nkill"
         must_kill=1
     fi
 
     if [ $must_kill -eq 1 ]
     then
         # db2nkill kills *all* partitions on the node
         if [ -x $db2bin/db2nkill ]
         then
             logasdb2 $db2bin/db2nkill $db2node
         elif [ -x $db2bin/db2_kill ]
         then
             logasdb2 $db2bin/db2_kill
         fi
 
         # loop forever (or lrmd kills us due to timeout) until the
         # instance is dead
         while ! db2_instance_dead
         do
             ocf_log info "DB2 instance $instance($db2node): waiting for processes to exit"
             sleep 1
         done
 
         ocf_log info "DB2 instance $instance($db2node) is now dead"
     fi
 
     return $OCF_SUCCESS
 }
 
 #
 # check whether `enough´ processes for a healthy instance are up
 # 
 db2_instance_status() {
     local pscount
 
     pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- |  grep ' db2[^ ]' | wc -l)
     if [ $pscount -ge 4 ]; then
         return $OCF_SUCCESS;
     elif [ $pscount -ge 1 ]; then
         return $OCF_ERR_GENERIC
     fi
     return $OCF_NOT_RUNNING
 }
 
 #
 # is the given db2 instance dead?
 # 
 db2_instance_dead() {
     local pscount
 
     pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- |  grep ' db2[^ ]' | wc -l)
     test $pscount -eq 0
 }
 
 #
 # return the status of the db as "Role/Status"
 # e.g. Primary/Peer, Standby/RemoteCatchupPending
 #
 # If not in HADR configuration return "Standard/Standalone"
 #
 db2_hadr_status() {
     local db=$1
     local output
 
     output=$(runasdb2 db2pd -hadr -db $db)
     ocf_log debug "db2_hadr_status: $output"
     if [ $? != 0 ]
     then
         echo "Down/Off"
         return 1 
     fi
 
     echo "$output" |
     awk '/^\s+HADR_(ROLE|STATE) =/ {printf $3"/"}
          /^\s+HADR_CONNECT_STATUS =/ {print $3; exit; }
          /^HADR is not active/ {print "Standard/Standalone"; exit; }
          /^Role *State */ {getline; printf "%s/%s\n", $1, $2; exit; }
          /^Option -hadr requires -db <database> or -alldbs option and active database./ { exit 255 }
          /^Another possibility of this failure is the Virtual Address Space Randomization is currently enabled on this system./ { exit 255 }
          /^Changing data structure forced command termination./ { exit 255 }'
 }
 
 db2_monitor_retry() {
     local tries=$(($OCF_RESKEY_monitor_retries + 1))
 
     for try in $(seq $tries); do
         ocf_log debug "monitor try $try of $tries"
         db2_monitor
         rc=$?
         [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_RUNNING_MASTER ] && [ $rc -ne $OCF_NOT_RUNNING ] && ocf_log warn "Monitor failed with rc $rc."
         if [ $rc -eq $OCF_SUCCESS ] || [ $rc -eq $OCF_RUNNING_MASTER ] || [ $rc -eq $OCF_NOT_RUNNING ] || { [ $rc -ne 255 ] && ! ocf_is_true "$OCF_RESKEY_monitor_retry_all_errors" ;} ;then
             break
         fi
-        [ $try -lt $tries ] && sleep $OCF_RESKEY_monitor_sleep
+        [ $try -lt $tries ] && sleep $OCF_RESKEY_monitor_retries_sleep
     done
 
     [ $rc -eq 255 ] && rc=$OCF_ERR_GENERIC
 
     if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_RUNNING_MASTER ]; then
         # instance is dead remove master score
         master_score -D -l reboot
     fi
 
     return $rc
 }
 
 #
 # Monitor the db
 # And as side effect set crm_master
 #
 db2_monitor() {
     local CMD output hadr db
     local rc
 
     db2_instance_status
     rc=$?
     if [ $rc -ne $OCF_SUCCESS ]; then
         return $rc
     fi
 
     [ $db2node = 0 ] || return 0
     # monitoring only for partition 0
 
     for db in $dblist
     do
         reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
 
         #Check for the reintegration file, then set the flag if it exists and delete the file
         if [ -e "/tmp/$reint_attr" ] && [ -n "$remote_host" ]; then
             #The file exist, try to set the reintegration attribute
             crm_attribute -n "$reint_attr" -N "$remote_host" -v "1" -l forever
             cib_value=$(crm_attribute -n "$reint_attr" -N "$remote_host" -G | awk -v FS=' value=' '{print $2}')
 
             if [ "$cib_value" = "1" ]; then
                 ocf_log info "$__OCF_ACTION: $LINENO: CIB attribute $reint_attr is set to '$cib_value', reintegration flag file will now be deleted."
                 rm -f "/tmp/$reint_attr"
             else
                 ocf_log err "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The reintegration flag file exists, but its attribute failed to set."
             fi
         fi
 
         hadr=$(db2_hadr_status $db)
         rc=$?
         ocf_log debug "Monitor: DB2 database $instance($db2node)/$db has HADR status $hadr"
         if [ "$rc" -eq 255 ]; then
             if [ "$__OCF_ACTION" = "monitor" ]; then
                 return $rc
             else
                 return $OCF_ERR_GENERIC
             fi
         elif [ "$rc" -ne 0 ]; then
             return $OCF_ERR_GENERIC
         fi
 
         # set master preference accordingly
         case "$hadr" in
             PRIMARY/*|Primary/*|Standard/*)
             if ! ocf_is_true "$OCF_RESKEY_skip_basic_sql_health_check"; then
                 # perform  a basic health check
                 CMD="if db2 connect to $db;
                 then
                     db2 select \* from sysibm.sysversions ; rc=\$?;
                     db2 terminate;
                 else
                     rc=\$?;
                 fi;
                 exit \$rc"
 
                 if ! output=$(runasdb2 $CMD)
                 then
                     case "$output" in
                         SQL1776N*)
                         # can't connect/select on standby, may be spurious turing takeover
                         ;;
 
                         *)
                         ocf_log err "DB2 database $instance($db2node)/$db is not working"
                         ocf_log err "DB2 message: $output"
 
                         # dead primary, remove master score
                         master_score -D -l reboot
                         return $OCF_ERR_GENERIC
                     esac
                 fi
             fi
 
             ocf_log debug "DB2 database $instance($db2node)/$db appears to be working"
             ocf_is_ms && master_score -v 10000 -l reboot
             ;;
 
             STANDBY/*PEER/*|Standby/*Peer)
             # If db is in standby peer, then it has already reintegrated.
             # If the reintegrate flag is still set, remove it
             cib_value=$(crm_attribute -n "$reint_attr" -N "$local_host" -G | awk -v FS=' value=' '{print $2}')
             if [ "$cib_value" = "1" ]; then
                ocf_log info "$__OCF_ACTION: $LINENO: Reintegrate flag detected for $db, but it has already reintegrated as standby. Removing reintegration flag."
                crm_attribute -n "$reint_attr" -N "$local_host" -v "0" -l forever
             fi
 
             master_score -v 8000 -l reboot
             ;;
 
             STANDBY/*|Standby/*)
             ocf_log warn "DB2 database $instance($db2node)/$db in status $hadr can never be promoted"
             master_score -D -l reboot
             ;;
 
             Down/Off)
             # If db is a deactivated primary and it has a reintegration flag, then reintegrate as standby.
             cib_value=$(crm_attribute -n "$reint_attr" -N "$local_host" -G | awk -v FS=' value=' '{print $2}')
             if [ "$cib_value" = "1" ]; then
                 output=$(runasdb2 "db2 get db cfg for $db" | grep 'HADR database role' | awk '{print $5}')
                 if [ "PRIMARY" = "$output" ]; then
                    ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: Database is deactivated with Primary role and the reintegration flag is set. Role: $output, Reintegration flag: $reint_attr = $cib_value"
                    # Reintegrate as the standby database.
                    if reintegrateAsStandby "$db" 'db2_monitor' $LINENO; then
                       ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database reintegration succeeded."
                       # Setting slave state here will cause rc to be OCF_SUCCESS below.
                       ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: Echoing SLAVE into $STATE_FILE"
                       echo SLAVE >"$STATE_FILE"
                       # Update master score to reflect standby state.
                       master_score -v 8000 -l reboot
                    else
                       ocf_log err "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database reintegration failed."
                       return "$OCF_ERR_GENERIC"
                    fi
                 fi
             else
                 rc="$OCF_NOT_RUNNING"
                 ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database has HADR status $hadr."
                 ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: db2_monitor() exit with rc=$rc."
                 return "$rc"
             fi
             ;;
 
             *)
             return $OCF_ERR_GENERIC
         esac
     done
 
     # everything OK, return if running as slave
     grep MASTER $STATE_FILE >/dev/null 2>&1 || return $OCF_SUCCESS
 
     return $OCF_RUNNING_MASTER
 }
 
 #
 # Promote db to Primary
 #
 db2_promote() {
     # validate ensured that dblist contains only one entry
     local db=$dblist
     local i hadr output force
 
     # we run this twice as after a crash of the other node
     # within HADR_TIMEOUT the status may be still reported as Peer
     # although a connection no longer exists
 
     for i in 1 2
     do
         hadr=$(db2_hadr_status $db) || return $OCF_ERR_GENERIC
         ocf_log info "DB2 database $instance($db2node)/$db has HADR status $hadr and will be promoted"
 
         case "$hadr" in
             Standard/Standalone)
             # this case only to keep ocf-tester happy
             return $OCF_SUCCESS
             ;;
 
             PRIMARY/PEER/*|PRIMARY/REMOTE_CATCHUP/*|PRIMARY/REMOTE_CATCHUP_PENDING/CONNECTED|Primary/Peer)
             # nothing to do, only update pacemaker's view
             echo MASTER > $STATE_FILE
             return $OCF_SUCCESS
             ;;
 
             STANDBY/PEER/CONNECTED|Standby/Peer)
             # must take over
             ;;
 
             STANDBY/*PEER/DISCONNECTED|Standby/DisconnectedPeer)
             # must take over by force peer window only
             force="by force peer window only"
             ;;
 
             # must take over by force
             STANDBY/REMOTE_CATCHUP_PENDING/DISCONNECTED)
             force="by force"
             ;;
 
             *)
             return $OCF_ERR_GENERIC
         esac
 
         if output=$(runasdb2 db2 takeover hadr on db $db $force)
         then
             # update pacemaker's view
             echo MASTER > $STATE_FILE
 
             return $OCF_SUCCESS
         fi
 
         case "$output" in
             SQL1770N*"Reason code = \"7\""*)
             # expected, HADR_TIMEOUT is now expired
             # go for the second try
             continue
             ;;
 
             *)
             ocf_log err "DB2 database $instance($db2node)/$db promote failed: $output"
             return $OCF_ERR_GENERIC
         esac
     done
 
     return $OCF_ERR_GENERIC
 }
 
 #
 # Demote db to standby
 #
 db2_demote() {
     # validate ensured that dblist contains only one entry
     local db=$dblist
     local hadr
     
     # house keeping, set pacemaker's view to slave
     echo SLAVE > $STATE_FILE
 
     hadr=$(db2_hadr_status $dblist) || return $OCF_ERR_GENERIC
     ocf_log info "DB2 database $instance($db2node)/$db has HADR status $hadr and will be demoted"
 
     db2_monitor
     return $?
 }
 
 ########
 # Main #
 ########
 case "$__OCF_ACTION" in
     meta-data)
     db2_meta_data
     exit $OCF_SUCCESS
     ;;
 
     usage)
     db2_usage
     exit $OCF_SUCCESS
     ;;
 esac
 
 local_host=$(ocf_local_nodename)
 inst1=$(echo "$OCF_RESKEY_instance" | cut -d"," -f1)
 inst2=$(echo "$OCF_RESKEY_instance" | cut -d"," -f2)
 host1=$(crm_node -l | sort | awk '{print $2;}' | sed -n 1p)
 
 if [ "$host1" = "$local_host" ]; then
    remote_host=$(crm_node -l | sort | awk '{print $2;}' | sed -n 2p)
 else
    remote_host="$host1"
 fi
 
 db2_validate; validate_rc=$?
 
 case "$__OCF_ACTION" in
     start)
     db2_start || exit $?
     db2_monitor
     ;;
 
     stop)
     db2_stop
     ;;
 
     promote)
     db2_promote
     ;;
 
     demote)
     db2_demote
     ;;
 
     notify)
     ocf_log debug "notify-action has been DEPRECATED, and should be removed"
     ;;
 
     monitor)
     db2_monitor_retry
     ;;
 
     validate-all)
     exit $validate_rc
     ;;
 
     *)
     db2_usage
     exit $OCF_ERR_UNIMPLEMENTED
 esac
 
 exit $?