diff --git a/cts/CM_lha.py b/cts/CM_lha.py
index 376aa4058a..410c8c7147 100755
--- a/cts/CM_lha.py
+++ b/cts/CM_lha.py
@@ -1,601 +1,631 @@
 '''CTS: Cluster Testing System: LinuxHA v2 dependent modules...
 '''
 
 __copyright__='''
 Author: Huang Zhen <zhenhltc@cn.ibm.com>
 Copyright (C) 2004 International Business Machines
 
 Additional Audits, Revised Start action, Default Configuration:
      Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
 
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 import os, sys, warnings
 from cts          import CTS
 from cts.CTSvars  import *
 from cts.CTS      import *
 from cts.CIB      import *
 from cts.CTStests import AuditResource
 
 try:
     from xml.dom.minidom import *
 except ImportError:
     sys.__stdout__.write("Python module xml.dom.minidom not found\n")
     sys.__stdout__.write("Please install python-xml or similar before continuing\n")
     sys.__stdout__.flush()
     sys.exit(1)
 
 #######################################################################
 #
 #  LinuxHA v2 dependent modules
 #
 #######################################################################
 
 
 class crm_lha(ClusterManager):
     '''
     The linux-ha version 2 cluster manager class.
     It implements the things we need to talk to and manipulate
     linux-ha version 2 clusters
     '''
     def __init__(self, Environment, randseed=None):
         ClusterManager.__init__(self, Environment, randseed=randseed)
         #HeartbeatCM.__init__(self, Environment, randseed=randseed)
 
         self.fastfail = 0
         self.clear_cache = 0
         self.cib_installed = 0
         self.config = None
         self.cluster_monitor = 0
         self.use_short_names = 1
 
         self.update({
             "Name"           : "crm-lha",
             "DeadTime"       : 300,
             "StartTime"      : 300,        # Max time to start up
             "StableTime"     : 30,
             "StartCmd"       : "service heartbeat start > /dev/null 2>&1",
             "StopCmd"        : "service heartbeat stop  > /dev/null 2>&1",
             "StatusCmd"      : "crmadmin -t 60000 -S %s 2>/dev/null",
             "EpocheCmd"      : "crm_node -H -e",
             "QuorumCmd"      : "crm_node -H -q",
             "ParitionCmd"    : "crm_node -H -p",
             "CibQuery"       : "cibadmin -Ql",
+            "CibAddXml"      : "cibadmin --modify -c --xml-text %s",
+            "CibDelXpath"    : "cibadmin --delete --xpath %s",
             # 300,000 == 5 minutes
             "RscRunning"     : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s",
             "CIBfile"        : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
             "TmpDir"         : "/tmp",
 
             "BreakCommCmd"   : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
             "FixCommCmd"     : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
 
 # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
 # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
 # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
             "ReduceCommCmd"  : "",
             "RestoreCommCmd" : "tc qdisc del dev lo root",
 
             "LogFileName"    : Environment["LogFileName"],
 
             "UUIDQueryCmd"    : "crmadmin -N",
+
+            "MaintenanceModeOn"    : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'",
+            "MaintenanceModeOff"    : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
+
             "StandbyCmd"      : "crm_attribute -VQ  -U %s -n standby -l forever -v %s 2>/dev/null",
             "StandbyQueryCmd" : "crm_attribute -QG -U %s -n standby -l forever -d off 2>/dev/null",
 
             # Patterns to look for in the log files for various occasions...
             "Pat:DC_IDLE"      : "crmd.*State transition.*-> S_IDLE",
             
             # This wont work if we have multiple partitions
             "Pat:Local_started" : "%s .*The local CRM is operational",
             "Pat:Slave_started" : "%s .*State transition.*-> S_NOT_DC",
             "Pat:Master_started"   : "%s .* State transition.*-> S_IDLE",
             "Pat:We_stopped"   : "heartbeat.*%s.*Heartbeat shutdown complete",
             "Pat:Logd_stopped" : "%s logd:.*Exiting write process",
             "Pat:They_stopped" : "%s .*LOST:.* %s ",
             "Pat:They_dead"    : "node %s.*: is dead",
             "Pat:TransitionComplete" : "Transition status: Complete: complete",
 
             "Pat:ChildKilled"  : "%s heartbeat.*%s.*killed by signal 9",
             "Pat:ChildRespawn" : "%s heartbeat.*Respawning client.*%s",
             "Pat:ChildExit"    : "(ERROR|error): Client .* exited with return code",
             
             "Pat:Fencing_start"    : "Initiating remote operation .* for %s",
             "Pat:Fencing_start_offset"    : " for ",
 
             "Pat:Fencing_ok"  : "stonith.* log_operation: Operation .* for host '%s' with device .* returned: 0",
             "Pat:Fencing_ok_offset"  : "for host '",
 
 
             # Bad news Regexes.  Should never occur.
             "BadRegexes"   : (
                 r" trace:",
                 r"error:",
                 r"crit:",
                 r"ERROR:",
                 r"CRIT:",
                 r"Shutting down...NOW",
                 r"Timer I_TERMINATE just popped",
                 r"input=I_ERROR",
                 r"input=I_FAIL",
                 r"input=I_INTEGRATED cause=C_TIMER_POPPED",
                 r"input=I_FINALIZED cause=C_TIMER_POPPED",
                 r"input=I_ERROR",
                 r", exiting\.",
                 r"WARN.*Ignoring HA message.*vote.*not in our membership list",
                 r"pengine.*Attempting recovery of resource",
                 r"is taking more than 2x its timeout",
                 r"Confirm not received from",
                 r"Welcome reply not received from",
                 r"Attempting to schedule .* after a stop",
                 r"Resource .* was active at shutdown",
                 r"duplicate entries for call_id",
                 r"Search terminated:",
                 r"No need to invoke the TE",
                 r"global_timer_callback:",
                 r"Faking parameter digest creation",
                 r"Parameters to .* action changed:",
                 r"Parameters to .* changed",
             ),
         })
 
         if self.Env["DoBSC"]:
             del self["Pat:They_stopped"]
             del self["Pat:Logd_stopped"]
             self.Env["use_logd"] = 0
 
         self._finalConditions()
 
         self.check_transitions = 0
         self.check_elections = 0
         self.CIBsync = {}
         self.CibFactory = ConfigFactory(self)
         self.cib = self.CibFactory.createConfig(self.Env["Schema"])
     
     def errorstoignore(self):
         # At some point implement a more elegant solution that 
         #   also produces a report at the end
         '''Return list of errors which are known and very noisey should be ignored'''
         if 1:
             return [ 
                 "(ERROR|error): crm_abort: crm_glib_handler: ",
                 "(ERROR|error): Message hist queue is filling up",
                 "stonithd.*CRIT: external_hostlist: 'vmware gethosts' returned an empty hostlist",
                 "stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.",
                 "pengine.*Preventing .* from re-starting",
                 ]
         return []
 
     def install_config(self, node):
         if not self.ns.WaitForNodeToComeUp(node):
             self.log("Node %s is not up." % node)
             return None
 
         if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1:
             self.CIBsync[node] = 1
             self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")
 
             # Only install the CIB on the first node, all the other ones will pick it up from there
             if self.cib_installed == 1:
                 return None
 
             self.cib_installed = 1
             if self.Env["CIBfilename"] == None:
                 self.log("Installing Generated CIB on node %s" %(node))
                 self.cib.install(node)
 
             else:
                 self.log("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node))
                 if 0 != self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)):
                     raise ValueError("Can not scp file to %s %d"%(node))
         
             self.rsh(node, "chown "+CTSvars.CRM_DAEMON_USER+" "+CTSvars.CRM_CONFIG_DIR+"/cib.xml")
 
     def prepare(self):
         '''Finish the Initialization process. Prepare to test...'''
 
         self.partitions_expected = 1
         for node in self.Env["nodes"]:
             self.ShouldBeStatus[node] = ""
             self.unisolate_node(node)
             self.StataCM(node)
 
     def test_node_CM(self, node):
         '''Report the status of the cluster manager on a given node'''
 
         watchpats = [ ]
         watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)")
         watchpats.append(self["Pat:Slave_started"]%node)
         watchpats.append(self["Pat:Master_started"]%node)
         idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterIdle")
         idle_watch.setwatch()
 
         out = self.rsh(node, self["StatusCmd"]%node, 1)
         self.debug("Node %s status: '%s'" %(node, out))            
 
         if not out or string.find(out, 'ok') < 0:
             if self.ShouldBeStatus[node] == "up":
                 self.log(
                     "Node status for %s is %s but we think it should be %s"
                     %(node, "down", self.ShouldBeStatus[node]))
             self.ShouldBeStatus[node]="down"
             return 0
 
         if self.ShouldBeStatus[node] == "down":
             self.log(
                 "Node status for %s is %s but we think it should be %s: %s"
                 %(node, "up", self.ShouldBeStatus[node], out))
 
         self.ShouldBeStatus[node]="up"
 
         # check the output first - because syslog-ng looses messages
         if string.find(out, 'S_NOT_DC') != -1:
             # Up and stable
             return 2
         if string.find(out, 'S_IDLE') != -1:
             # Up and stable
             return 2
 
         # fall back to syslog-ng and wait
         if not idle_watch.look():
             # just up
             self.debug("Warn: Node %s is unstable: %s" %(node, out))
             return 1
 
         # Up and stable
         return 2
 
     # Is the node up or is the node down
     def StataCM(self, node):
         '''Report the status of the cluster manager on a given node'''
 
         if self.test_node_CM(node) > 0:
             return 1
         return None
 
     # Being up and being stable is not the same question...
     def node_stable(self, node):
         '''Report the status of the cluster manager on a given node'''
 
         if self.test_node_CM(node) == 2:
             return 1
         self.log("Warn: Node %s not stable" %(node)) 
         return None
 
     def partition_stable(self, nodes, timeout=None):
         watchpats = [ ]
         watchpats.append("Current ping state: S_IDLE")
         watchpats.append(self["Pat:DC_IDLE"])
         self.debug("Waiting for cluster stability...") 
 
         if timeout == None:
             timeout = self["DeadTime"]
 
         idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterStable", timeout)
         idle_watch.setwatch()
 
         any_up = 0
         for node in self.Env["nodes"]:
             # have each node dump its current state
             if self.ShouldBeStatus[node] == "up":
                 self.rsh(node, self["StatusCmd"] %node, 1)
                 any_up = 1
 
         if any_up == 0:
             self.debug("Cluster is inactive") 
             return 1
 
         ret = idle_watch.look()
         while ret:
             self.debug(ret) 
             for node in nodes:
                 if re.search(node, ret):
                     return 1
             ret = idle_watch.look()
 
         self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout)) 
         return None
 
     def cluster_stable(self, timeout=None, double_check=False):
         partitions = self.find_partitions()
 
         for partition in partitions:
             if not self.partition_stable(partition, timeout):
                 return None
 
         if double_check:
             # Make sure we are really stable and that all resources,
             # including those that depend on transient node attributes,
             # are started if they were going to be
             time.sleep(5)
             for partition in partitions:
                 if not self.partition_stable(partition, timeout):
                     return None
 
         return 1
 
     def is_node_dc(self, node, status_line=None):
         rc = 0
 
         if not status_line: 
             status_line = self.rsh(node, self["StatusCmd"]%node, 1)
 
         if not status_line:
             rc = 0
         elif string.find(status_line, 'S_IDLE') != -1:
             rc = 1
         elif string.find(status_line, 'S_INTEGRATION') != -1: 
             rc = 1
         elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: 
             rc = 1
         elif string.find(status_line, 'S_POLICY_ENGINE') != -1: 
             rc = 1
         elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: 
             rc = 1
 
         return rc
 
     def active_resources(self, node):
         # [SM].* {node} matches Started, Slave, Master
         # Stopped wont be matched as it wont include {node}
         (rc, output) = self.rsh(node, """crm_resource -c""", None)
 
         resources = []
         for line in output: 
             if re.search("^Resource", line):
                 tmp = AuditResource(self, line)
                 if tmp.type == "primitive" and tmp.host == node:
                     resources.append(tmp.id)
         return resources
 
     def ResourceLocation(self, rid):
         ResourceNodes = []
         for node in self.Env["nodes"]:
             if self.ShouldBeStatus[node] == "up":
 
                 cmd = self["RscRunning"] % (rid)
                 (rc, lines) = self.rsh(node, cmd, None)
 
                 if rc == 127:
                     self.log("Command '%s' failed. Binary or pacemaker-cts package not installed?" % cmd)
                     for line in lines:
                         self.log("Output: "+line)
                 elif rc == 0:
                     ResourceNodes.append(node)
 
         return ResourceNodes
 
     def find_partitions(self):
         ccm_partitions = []
 
         for node in self.Env["nodes"]:
             if self.ShouldBeStatus[node] == "up":
                 partition = self.rsh(node, self["ParitionCmd"], 1)
 
                 if not partition:
                     self.log("no partition details for %s" %node)
                 elif len(partition) > 2:
                     partition = partition[:-1]
                     found=0
                     for a_partition in ccm_partitions:
                         if partition == a_partition:
                             found = 1
                     if found == 0:
                         self.debug("Adding partition from %s: %s" %(node, partition))
                         ccm_partitions.append(partition)
                     else:
                         self.debug("Partition '%s' from %s is consistent with existing entries" %(partition, node))
 
                 else:
                     self.log("bad partition details for %s" %node)
             else:
                 self.debug("Node %s is down... skipping" %node)
 
         return ccm_partitions
 
     def HasQuorum(self, node_list):
         # If we are auditing a partition, then one side will
         #   have quorum and the other not.
         # So the caller needs to tell us which we are checking
         # If no value for node_list is specified... assume all nodes  
         if not node_list:
             node_list = self.Env["nodes"]
 
         for node in node_list:
             if self.ShouldBeStatus[node] == "up":
                 quorum = self.rsh(node, self["QuorumCmd"], 1)
                 if string.find(quorum, "1") != -1:
                     return 1
                 elif string.find(quorum, "0") != -1:
                     return 0
                 else:
                     self.debug("WARN: Unexpected quorum test result from "+ node +":"+ quorum)
 
         return 0
     def Components(self):    
         complist = []
         common_ignore = [
                     "Pending action:",
                     "(ERROR|error): crm_log_message_adv:",
                     "(ERROR|error): MSG: No message to dump",
                     "pending LRM operations at shutdown",
                     "Lost connection to the CIB service",
                     "Connection to the CIB terminated...",
                     "Sending message to CIB service FAILED",
                     "Action A_RECOVER .* not supported",
                     "(ERROR|error): stonithd_op_result_ready: not signed on",
                     "pingd.*(ERROR|error): send_update: Could not send update",
                     "send_ipc_message: IPC Channel to .* is not connected",
                     "unconfirmed_actions: Waiting on .* unconfirmed actions",
                     "cib_native_msgready: Message pending on command channel",
                     "do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd",
                     "verify_stopped: Resource .* was active at shutdown.  You may ignore this error if it is unmanaged.",
             ]
 
         stonith_ignore = [
             "(ERROR|error): stonithd_signon: ",
             "update_failcount: Updating failcount for child_DoFencing",
             "(ERROR|error): te_connect_stonith: Sign-in failed: triggered a retry",
             "lrmd.*(ERROR|error): cl_get_value: wrong argument (reply)",
             "lrmd.*(ERROR|error): is_expected_msg:.* null message",
             "lrmd.*(ERROR|error): stonithd_receive_ops_result failed.",
              ]
 
         stonith_ignore.extend(common_ignore)
 
         ccm_ignore = [
             "(ERROR|error): get_channel_token: No reply message - disconnected"
             ]
 
         ccm_ignore.extend(common_ignore)
 
         ccm = Process(self, "ccm", triggersreboot=self.fastfail, pats = [
                     "State transition .* S_RECOVERY",
                     "CCM connection appears to have failed",
                     "crmd.*Action A_RECOVER .* not supported",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "Exiting to recover from CCM connection failure",
                     "crmd.*do_exit: Could not recover from internal error",
                     "crmd.*I_ERROR.*(ccm_dispatch|crmd_cib_connection_destroy)",
                     "crmd.*exited with return code 2.",
                     "attrd.*exited with return code 1.",
                     "cib.*exited with return code 2.",
 
 # Not if it was fenced
 #                    "A new node joined the cluster",
 
 #                    "WARN: determine_online_status: Node .* is unclean",
 #                    "Scheduling Node .* for STONITH",
 #                    "Executing .* fencing operation",
 #                    "tengine_stonith_callback: .*result=0",
 #                    "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE",
 #                    "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN",
                     "State transition S_STARTING -> S_PENDING",
                     ], badnews_ignore = ccm_ignore)
 
         cib = Process(self, "cib", triggersreboot=self.fastfail, pats = [
                     "State transition .* S_RECOVERY",
                     "Lost connection to the CIB service",
                     "Connection to the CIB terminated...",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*I_ERROR.*crmd_cib_connection_destroy",
                     "crmd.*do_exit: Could not recover from internal error",
                     "crmd.*exited with return code 2.",
                     "attrd.*exited with return code 1.",
                     ], badnews_ignore = common_ignore)
 
         lrmd = Process(self, "lrmd", triggersreboot=self.fastfail, pats = [
                     "State transition .* S_RECOVERY",
                     "LRM Connection failed",
                     "crmd.*I_ERROR.*lrm_connection_destroy",
                     "State transition S_STARTING -> S_PENDING",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*do_exit: Could not recover from internal error",
                     "crmd.*exited with return code 2.",
                     ], badnews_ignore = common_ignore)
 
         crmd = Process(self, "crmd", triggersreboot=self.fastfail, pats = [
 #                    "WARN: determine_online_status: Node .* is unclean",
 #                    "Scheduling Node .* for STONITH",
 #                    "Executing .* fencing operation",
 #                    "tengine_stonith_callback: .*result=0",
                     "State transition .* S_IDLE",
                     "State transition S_STARTING -> S_PENDING",
                     ], badnews_ignore = common_ignore)
 
         pengine = Process(self, "pengine", triggersreboot=self.fastfail, pats = [
                     "State transition .* S_RECOVERY",
                     "crmd.*exited with return code 2.",
                     "crmd.*Input I_TERMINATE from do_recover",
                     "crmd.*do_exit: Could not recover from internal error",
                     "crmd.*CRIT: pe_connection_destroy: Connection to the Policy Engine failed",
                     "crmd.*I_ERROR.*save_cib_contents",
                     "crmd.*exited with return code 2.",
                     ], badnews_ignore = common_ignore, dc_only=1)
 
         if self.Env["DoFencing"] == 1 :
             complist.append(Process(self, "stoniths", triggersreboot=self.fastfail, dc_pats = [
                         "crmd.*CRIT: tengine_stonith_connection_destroy: Fencing daemon connection failed",
                         "Attempting connection to fencing daemon",
                         "te_connect_stonith: Connected",
                     ], badnews_ignore = stonith_ignore))
 
         if self.fastfail == 0:
             ccm.pats.extend([
                 "attrd .* exited with return code 1",
                 "(ERROR|error): Respawning client .*attrd",
                 "cib.* exited with return code 2",
                 "(ERROR|error): Respawning client .*cib",
                 "crmd.* exited with return code 2",
                 "(ERROR|error): Respawning client .*crmd" 
                 ])
             cib.pats.extend([
                 "attrd.* exited with return code 1",
                 "(ERROR|error): Respawning client .*attrd",
                 "crmd.* exited with return code 2",
                 "(ERROR|error): Respawning client .*crmd" 
                 ])
             lrmd.pats.extend([
                 "crmd.* exited with return code 2",
                 "(ERROR|error): Respawning client .*crmd" 
                 ])
             pengine.pats.extend([
                 "(ERROR|error): Respawning client .*crmd" 
                 ])
 
         complist.append(ccm)
         complist.append(cib)
         complist.append(lrmd)
         complist.append(crmd)
         complist.append(pengine)
 
         return complist
 
     def NodeUUID(self, node):
         lines = self.rsh(node, self["UUIDQueryCmd"], 1)
         for line in lines:
             self.debug("UUIDLine:"+ line)
             m = re.search(r'%s.+\((.+)\)' % node, line)
             if m:
                 return m.group(1)
         return ""
 
     def StandbyStatus(self, node):
         out=self.rsh(node, self["StandbyQueryCmd"]%node, 1)
         if not out:
             return "off"
         out = out[:-1]
         self.debug("Standby result: "+out)
         return out
 
     # status == "on" : Enter Standby mode
     # status == "off": Enter Active mode
     def SetStandbyMode(self, node, status):
         current_status = self.StandbyStatus(node)
         cmd = self["StandbyCmd"] % (node, status)
         ret = self.rsh(node, cmd)
         return True
 
+    def AddDummyRsc(self, node, rid):
+        rsc_xml = """ '<resources>
+                <primitive class=\"ocf\" id=\"%s\" provider=\"pacemaker\" type=\"Dummy\">
+                    <operations>
+                        <op id=\"%s-interval-10s\" interval=\"10s\" name=\"monitor\"/
+                    </operations>
+                </primitive>
+            </resources>'""" % (rid, rid)
+        constraint_xml = """ '<constraints>
+                <rsc_location id=\"location-%s-%s\" node=\"%s\" rsc=\"%s\" score=\"INFINITY\"/>
+            </constraints>'
+            """ % (rid, node, node, rid)
+
+        self.rsh(node, self['CibAddXml'] % (rsc_xml))
+        self.rsh(node, self['CibAddXml'] % (constraint_xml))
+
+    def RemoveDummyRsc(self, node, rid):
+        constraint = "\"//rsc_location[@rsc='%s']\"" % (rid)
+        rsc = "\"//primitive[@id='%s']\"" % (rid)
+
+        self.rsh(node, self['CibDelXpath'] % constraint)
+        self.rsh(node, self['CibDelXpath'] % rsc)
+
+
 #######################################################################
 #
 #   A little test code...
 #
 #   Which you are advised to completely ignore...
 #
 #######################################################################
 if __name__ == '__main__': 
     pass
diff --git a/cts/CTStests.py b/cts/CTStests.py
index 237b647ee3..876d26cd73 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -1,2296 +1,2476 @@
 '''CTS: Cluster Testing System: Tests module
 
 There are a few things we want to do here:
 
  '''
 
 __copyright__='''
 Copyright (C) 2000, 2001 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 
 Add RecourceRecover testcase Zhao Kai <zhaokai@cn.ibm.com>
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 #
 #        SPECIAL NOTE:
 #
 #        Tests may NOT implement any cluster-manager-specific code in them.
 #        EXTEND the ClusterManager object to provide the base capabilities
 #        the test needs if you need to do something that the current CM classes
 #        do not.  Otherwise you screw up the whole point of the object structure
 #        in CTS.
 #
 #                Thank you.
 #
 
 import time, os, re, types, string, tempfile, sys
 from stat import *
 from cts import CTS
 from cts.CTSaudits import *
 
 AllTestClasses = [ ]
 
 class CTSTest:
     '''
     A Cluster test.
     We implement the basic set of properties and behaviors for a generic
     cluster test.
 
     Cluster tests track their own statistics.
     We keep each of the kinds of counts we track as separate {name,value}
     pairs.
     '''
 
     def __init__(self, cm):
         #self.name="the unnamed test"
         self.Stats = {"calls":0
         ,        "success":0
         ,        "failure":0
         ,        "skipped":0
         ,        "auditfail":0}
 
 #        if not issubclass(cm.__class__, ClusterManager):
 #            raise ValueError("Must be a ClusterManager object")
         self.CM = cm
         self.Audits = []
         self.timeout=120
         self.passed = 1
         self.is_loop = 0
         self.is_unsafe = 0
         self.is_experimental = 0
         self.is_valgrind = 0
         self.benchmark = 0  # which tests to benchmark
         self.timer = {}  # timers
 
     def has_key(self, key):
         return self.Stats.has_key(key)
 
     def __setitem__(self, key, value):
         self.Stats[key] = value
         
     def __getitem__(self, key):
         return self.Stats[key]
 
     def log_mark(self, msg):
         self.CM.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
         return
 
     def get_timer(self,key = "test"):
         try: return self.timer[key]
         except: return 0
 
     def set_timer(self,key = "test"):
         self.timer[key] = time.time()
         return self.timer[key]
 
     def log_timer(self,key = "test"):
         elapsed = 0
         if key in self.timer:
             elapsed = time.time() - self.timer[key]
             s = key == "test" and self.name or "%s:%s" %(self.name,key)
             self.CM.debug("%s runtime: %.2f" % (s, elapsed))
             del self.timer[key]
         return elapsed
 
     def incr(self, name):
         '''Increment (or initialize) the value associated with the given name'''
         if not self.Stats.has_key(name):
             self.Stats[name]=0
         self.Stats[name] = self.Stats[name]+1
 
         # Reset the test passed boolean
         if name == "calls":
             self.passed = 1
 
     def failure(self, reason="none"):
         '''Increment the failure count'''
         self.passed = 0
         self.incr("failure")
         self.CM.log(("Test %s" % self.name).ljust(35)  +" FAILED: %s" % reason)
         return None
 
     def success(self):
         '''Increment the success count'''
         self.incr("success")
         return 1
 
     def skipped(self):
         '''Increment the skipped count'''
         self.incr("skipped")
         return 1
 
     def __call__(self, node):
         '''Perform the given test'''
         raise ValueError("Abstract Class member (__call__)")
         self.incr("calls")
         return self.failure()
 
     def audit(self):
         passed = 1
         if len(self.Audits) > 0:
             for audit in self.Audits:
                 if not audit():
                     self.CM.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
                     self.incr("auditfail")
                     passed = 0
         return passed
 
     def setup(self, node):
         '''Setup the given test'''
         return self.success()
 
     def teardown(self, node):
         '''Tear down the given test'''
         return self.success()
 
     def create_watch(self, patterns, timeout, name=None):
         if not name:
             name = self.name
         return CTS.LogWatcher(self.CM.Env, self.CM["LogFileName"], patterns, name, timeout)
 
     def local_badnews(self, prefix, watch, local_ignore=[]):
         errcount = 0
         if not prefix:
             prefix = "LocalBadNews:"
 
         ignorelist = []                
         ignorelist.append(" CTS: ")
         ignorelist.append(prefix)
         ignorelist.extend(local_ignore)
 
         while errcount < 100:
             match=watch.look(0)
             if match:
                add_err = 1
                for ignore in ignorelist:
                    if add_err == 1 and re.search(ignore, match):
                        add_err = 0
                if add_err == 1:
                    self.CM.log(prefix + " " + match)
                    errcount=errcount+1
             else:
               break
         else:
             self.CM.log("Too many errors!")
 
         return errcount
 
     def is_applicable(self):
         return self.is_applicable_common()
 
     def is_applicable_common(self):
         '''Return TRUE if we are applicable in the current test configuration'''
         #raise ValueError("Abstract Class member (is_applicable)")
 
         if self.is_loop and not self.CM.Env["loop-tests"]:
             return 0
         elif self.is_unsafe and not self.CM.Env["unsafe-tests"]:
             return 0
         elif self.is_valgrind and not self.CM.Env["valgrind-tests"]:
             return 0
         elif self.is_experimental and not self.CM.Env["experimental-tests"]:
             return 0
         elif self.CM.Env["benchmark"] and self.benchmark == 0:
             return 0
 
         return 1
 
     def find_ocfs2_resources(self, node):
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "o2cb" and r.parent != "NA":
                     self.CM.debug("Found o2cb: %s" % self.r_o2cb)
                     self.r_o2cb = r.parent
             if re.search("^Constraint", line):
                 c = AuditConstraint(self.CM, line)
                 if c.type == "rsc_colocation" and c.target == self.r_o2cb:
                     self.r_ocfs2.append(c.rsc)
 
         self.CM.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
         return len(self.r_ocfs2)
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         return 1
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return []
 
 ###################################################################
 class StopTest(CTSTest):
 ###################################################################
     '''Stop (deactivate) the cluster manager on a node'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name="Stop"
 
     def __call__(self, node):
         '''Perform the 'stop' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] != "up":
             return self.skipped()
 
         patterns = []
         # Technically we should always be able to notice ourselves stopping
         patterns.append(self.CM["Pat:We_stopped"] % node)
 
         #if self.CM.Env["use_logd"]:
         #    patterns.append(self.CM["Pat:Logd_stopped"] % node)
 
         # Any active node needs to notice this one left
         # NOTE: This wont work if we have multiple partitions
         for other in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[other] == "up" and other != node:
                 patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
                 #self.debug("Checking %s will notice %s left"%(other, node))
                 
         watch = self.create_watch(patterns, self.CM["DeadTime"])
         watch.setwatch()
 
         if node == self.CM.OurNode:
             self.incr("us")
         else:
             if self.CM.upcount() <= 1:
                 self.incr("all")
             else:
                 self.incr("them")
 
         self.CM.StopaCM(node)
         watch_result = watch.lookforall()
 
         failreason=None
         UnmatchedList = "||"
         if watch.unmatched:
             (rc, output) = self.CM.rsh(node, "/bin/ps axf", None)
             for line in output:
                 self.CM.debug(line)
                 
             (rc, output) = self.CM.rsh(node, "/usr/sbin/dlm_tool dump", None)
             for line in output:
                 self.CM.debug(line)
 
             for regex in watch.unmatched:
                 self.CM.log ("ERROR: Shutdown pattern not found: %s" % (regex))
                 UnmatchedList +=  regex + "||";
                 failreason="Missing shutdown pattern"
 
         self.CM.cluster_stable(self.CM["DeadTime"])
 
         if not watch.unmatched or self.CM.upcount() == 0:
             return self.success()
 
         if len(watch.unmatched) >= self.CM.upcount():
             return self.failure("no match against (%s)" % UnmatchedList)
 
         if failreason == None:
             return self.success()
         else:
             return self.failure(failreason)
 #
 # We don't register StopTest because it's better when called by
 # another test...
 #
 
 ###################################################################
 class StartTest(CTSTest):
 ###################################################################
     '''Start (activate) the cluster manager on a node'''
     def __init__(self, cm, debug=None):
         CTSTest.__init__(self,cm)
         self.name="start"
         self.debug = debug
 
     def __call__(self, node):
         '''Perform the 'start' test. '''
         self.incr("calls")
 
         if self.CM.upcount() == 0:
             self.incr("us")
         else:
             self.incr("them")
 
         if self.CM.ShouldBeStatus[node] != "down":
             return self.skipped()
         elif self.CM.StartaCM(node):
             return self.success()
         else:
             return self.failure("Startup %s on node %s failed"
                                 %(self.CM["Name"], node))
 
 #
 # We don't register StartTest because it's better when called by
 # another test...
 #
 
 ###################################################################
 class FlipTest(CTSTest):
 ###################################################################
     '''If it's running, stop it.  If it's stopped start it.
        Overthrow the status quo...
     '''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Flip"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, node):
         '''Perform the 'Flip' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] == "up":
             self.incr("stopped")
             ret = self.stop(node)
             type="up->down"
             # Give the cluster time to recognize it's gone...
             time.sleep(self.CM["StableTime"])
         elif self.CM.ShouldBeStatus[node] == "down":
             self.incr("started")
             ret = self.start(node)
             type="down->up"
         else:
             return self.skipped()
 
         self.incr(type)
         if ret:
             return self.success()
         else:
             return self.failure("%s failure" % type)
 
 #        Register FlipTest as a good test to run
 AllTestClasses.append(FlipTest)
 
 ###################################################################
 class RestartTest(CTSTest):
 ###################################################################
     '''Stop and restart a node'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Restart"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         '''Perform the 'restart' test. '''
         self.incr("calls")
 
         self.incr("node:" + node)
         
         ret1 = 1
         if self.CM.StataCM(node):
             self.incr("WasStopped")
             if not self.start(node):
                 return self.failure("start (setup) failure: "+node)
 
         self.set_timer()
         if not self.stop(node):
             return self.failure("stop failure: "+node)
         if not self.start(node):
             return self.failure("start failure: "+node)
         return self.success()
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RestartTest)
 
 ###################################################################
 class StonithdTest(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name="Stonithd"
         self.startall = SimulStartLite(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         self.incr("calls")
         if len(self.CM.Env["nodes"]) < 2:
             return self.skipped()
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         is_dc = self.CM.is_node_dc(node)
 
         watchpats = []
         watchpats.append("log_operation: Operation .* for host '%s' with device .* returned: 0" % node)
         watchpats.append("tengine_stonith_notify: Peer %s was terminated .*: OK" % node)
 
         if self.CM.Env["at-boot"] == 0:
             self.CM.debug("Expecting %s to stay down" % node)
             self.CM.ShouldBeStatus[node]="down"
         else:
             self.CM.debug("Expecting %s to come up again %d" % (node, self.CM.Env["at-boot"]))
             watchpats.append("%s .*do_state_transition: .* S_STARTING -> S_PENDING" % node)
             watchpats.append("%s .*do_state_transition: .* S_PENDING -> S_NOT_DC" % node)
 
         watch = self.create_watch(watchpats, 30 + self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"])
         watch.setwatch()
 
         origin = self.CM.Env.RandomGen.choice(self.CM.Env["nodes"])
 
         rc = self.CM.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node)
 
         if rc == 194:
             # 194 - 256 = -62 = Timer expired
             #
             # Look for the patterns, usually this means the required
             # device was running on the node to be fenced - or that
             # the required devices were in the process of being loaded
             # and/or moved
             #
             # Effectively the node committed suicide so there will be
             # no confirmation, but pacemaker should be watching and
             # fence the node again
 
             self.CM.log("Fencing command on %s to fence %s timed out" % (origin, node))
 
         elif origin != node and rc != 0:
             self.CM.debug("Waiting for the cluster to recover")
             self.CM.cluster_stable()
 
             self.CM.debug("Waiting STONITHd node to come back up")
             self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600)
 
             self.CM.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc))
 
         elif origin == node and rc != 255:
             # 255 == broken pipe, ie. the node was fenced as epxected
             self.CM.log("Logcally originated fencing returned %d" % rc)
 
 
         self.set_timer("fence")
         matched = watch.lookforall()
         self.log_timer("fence")
         self.set_timer("reform")
         if watch.unmatched:
             self.CM.log("Patterns not found: " + repr(watch.unmatched))
 
         self.CM.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.CM.debug("Waiting STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600)
 
         self.CM.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.CM["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected patterns")
         elif not is_stable:
             return self.failure("Cluster did not become stable")
 
         self.log_timer("reform")
         return self.success()
 
     def errorstoignore(self):
         return [ 
             self.CM["Pat:Fencing_start"] % ".*", 
             self.CM["Pat:Fencing_ok"] % ".*",
             "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery",
             "error: remote_op_done: Operation reboot of .*by .* for stonith_admin.*: Timer expired",
             ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
 
         if self.CM.Env.has_key("DoFencing"):
             return self.CM.Env["DoFencing"]
 
         return 1
            
 AllTestClasses.append(StonithdTest)
 
 ###################################################################
 class StartOnebyOne(CTSTest):
 ###################################################################
     '''Start all the nodes ~ one by one'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="StartOnebyOne"
         self.stopall = SimulStopLite(cm)
         self.start = StartTest(cm)
         self.ns=CTS.NodeStatus(cm.Env)
 
     def __call__(self, dummy):
         '''Perform the 'StartOnebyOne' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Test setup failed")
 
         failed=[]
         self.set_timer()
         for node in self.CM.Env["nodes"]:
             if not self.start(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to start: " + repr(failed))
 
         return self.success()
 
 #        Register StartOnebyOne as a good test to run
 AllTestClasses.append(StartOnebyOne)
 
 ###################################################################
 class SimulStart(CTSTest):
 ###################################################################
     '''Start all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SimulStart"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStart' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
         
         self.CM.clear_all_caches()
  
         if not self.startall(None):
             return self.failure("Startall failed")
 
         return self.success()
 
 #        Register SimulStart as a good test to run
 AllTestClasses.append(SimulStart)
 
 ###################################################################
 class SimulStop(CTSTest):
 ###################################################################
     '''Stop all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SimulStop"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStop' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.stopall(None):
             return self.failure("Stopall failed")
 
         return self.success()
 
 #     Register SimulStop as a good test to run
 AllTestClasses.append(SimulStop)
 
 ###################################################################
 class StopOnebyOne(CTSTest):
 ###################################################################
     '''Stop all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="StopOnebyOne"
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, dummy):
         '''Perform the 'StopOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         failed=[]
         self.set_timer()
         for node in self.CM.Env["nodes"]:
             if not self.stop(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to stop: " + repr(failed))
 
         self.CM.clear_all_caches()
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(StopOnebyOne)
 
 ###################################################################
 class RestartOnebyOne(CTSTest):
 ###################################################################
     '''Restart all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="RestartOnebyOne"
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'RestartOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         did_fail=[]
         self.set_timer()
         self.restart = RestartTest(self.CM)
         for node in self.CM.Env["nodes"]:
             if not self.restart(node):
                 did_fail.append(node)
 
         if did_fail:
             return self.failure("Could not restart %d nodes: %s" 
                                 %(len(did_fail), repr(did_fail)))
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(RestartOnebyOne)
 
 ###################################################################
 class PartialStart(CTSTest):
 ###################################################################
     '''Start a node - but tell it to stop before it finishes starting up'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="PartialStart"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
         self.stop = StopTest(cm)
         #self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'PartialStart' test. '''
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
 #   FIXME!  This should use the CM class to get the pattern
 #       then it would be applicable in general
         watchpats = []
         watchpats.append("crmd.*Connecting to cluster infrastructure")
         watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
         watch.setwatch()
 
         self.CM.StartaCMnoBlock(node)
         ret = watch.lookforall()
         if not ret:
             self.CM.log("Patterns not found: " + repr(watch.unmatched))
             return self.failure("Setup of %s failed" % node) 
 
         ret = self.stop(node)
         if not ret:
             return self.failure("%s did not stop in time" % node)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
 
         # We might do some fencing in the 2-node case if we make it up far enough
         return [ """Executing reboot fencing operation""" ]
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(PartialStart)
 
 #######################################################################
 class StandbyTest(CTSTest):
 #######################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Standby"
         self.benchmark = 1
             
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         
     # make sure the node is active
     # set the node to standby mode
     # check resources, none resource should be running on the node
     # set the node to active mode
     # check resouces, resources should have been migrated back (SHOULD THEY?)
     
     def __call__(self, node):
     
         self.incr("calls")
         ret=self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
         
         self.CM.debug("Make sure node %s is active" % node)    
         if self.CM.StandbyStatus(node) != "off":
             if not self.CM.SetStandbyMode(node, "off"):
                 return self.failure("can't set node %s to active mode" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
 
         self.CM.debug("Getting resources running on node %s" % node)
         rsc_on_node = self.CM.active_resources(node)
 
         watchpats = []
         watchpats.append("do_state_transition:.*-> S_POLICY_ENGINE")
         watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
         watch.setwatch()
 
         self.CM.debug("Setting node %s to standby mode" % node) 
         if not self.CM.SetStandbyMode(node, "on"):
             return self.failure("can't set node %s to standby mode" % node)
 
         self.set_timer("on")
 
         ret = watch.lookforall()
         if not ret:
             self.CM.log("Patterns not found: " + repr(watch.unmatched))
             self.CM.SetStandbyMode(node, "off")
             return self.failure("cluster didn't react to standby change on %s" % node) 
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "on":
             return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
         self.log_timer("on")
 
         self.CM.debug("Checking resources")
         bad_run = self.CM.active_resources(node)
         if len(bad_run) > 0:
             rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
             self.CM.debug("Setting node %s to active mode" % node) 
             self.CM.SetStandbyMode(node, "off")
             return rc
 
         self.CM.debug("Setting node %s to active mode" % node) 
         if not self.CM.SetStandbyMode(node, "off"):
             return self.failure("can't set node %s to active mode" % node)
 
         self.set_timer("off")
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
         self.log_timer("off")
 
         return self.success()
 
 AllTestClasses.append(StandbyTest)
 
 #######################################################################
 class ValgrindTest(CTSTest):
 #######################################################################
     '''Check for memory leaks'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Valgrind"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_valgrind = 1
         self.is_loop = 1
 
     def setup(self, node):
         self.incr("calls")
         
         ret=self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         # Enable valgrind
         self.logPat = "/tmp/%s-*.valgrind" % self.name
 
         self.CM.Env["valgrind-prefix"] = self.name
 
         self.CM.rsh(node, "rm -f %s" % self.logPat, None)
         
         ret=self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         for node in self.CM.Env["nodes"]:
             (rc, output) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             for line in output:
                 self.CM.debug(line)
 
         return self.success()
 
     def teardown(self, node):
         # Disable valgrind
         self.CM.Env["valgrind-prefix"] = None
 
         # Return all nodes to normal
         ret=self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         return self.success()
 
     def find_leaks(self):
         # Check for leaks
         leaked = []
         self.stop = StopTest(self.CM)
 
         for node in self.CM.Env["nodes"]:
             (rc, ps_out) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             rc = self.stop(node)
             if not rc:
                 self.failure("Couldn't shut down %s" % node)
 
             rc = self.CM.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logPat, 0)
             if rc != 1:
                 leaked.append(node)
                 self.failure("Valgrind errors detected on %s" % node)
                 for line in ps_out:
                     self.CM.log(line)
                 (rc, output) = self.CM.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logPat, None)
                 for line in output:
                     self.CM.log(line)
                 (rc, output) = self.CM.rsh(node, "cat %s" % self.logPat, None)
                 for line in output:
                     self.CM.debug(line)
 
         self.CM.rsh(node, "rm -f %s" % self.logPat, None)
         return leaked
 
     def __call__(self, node):
         leaked = self.find_leaks()
         if len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))            
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """cib:.*readCibXmlFile:""", """HA_VALGRIND_ENABLED""" ]
 
 #######################################################################
 class StandbyLoopTest(ValgrindTest):
 #######################################################################
     '''Check for memory leaks by putting a node in and out of standby for an hour'''
     def __init__(self, cm):
         ValgrindTest.__init__(self,cm)
         self.name="StandbyLoop"
         
     def __call__(self, node):
     
         lpc = 0
         delay = 2
         failed = 0
         done=time.time() + self.CM.Env["loop-minutes"]*60
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "on"):
                 self.failure("can't set node %s to standby mode" % node)
                 failed = lpc
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "off"):
                 self.failure("can't set node %s to active mode" % node)
                 failed = lpc
 
         leaked = self.find_leaks()
         if failed:
             return self.failure("Iteration %d failed" % failed)
         elif len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
 AllTestClasses.append(StandbyLoopTest)
 
 ##############################################################################
 class BandwidthTest(CTSTest):
 ##############################################################################
 #        Tests should not be cluster-manager-specific
 #        If you need to find out cluster manager configuration to do this, then
 #        it should be added to the generic cluster manager API.
     '''Test the bandwidth which heartbeat uses'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Bandwidth"
         self.start = StartTest(cm)
         self.__setitem__("min",0)
         self.__setitem__("max",0)
         self.__setitem__("totalbandwidth",0)
         self.tempfile = tempfile.mktemp(".cts")
         self.startall = SimulStartLite(cm)
         
     def __call__(self, node):
         '''Perform the Bandwidth test'''
         self.incr("calls")
         
         if self.CM.upcount()<1:
             return self.skipped()
 
         Path = self.CM.InternalCommConfig()
         if "ip" not in Path["mediatype"]:
              return self.skipped()
 
         port = Path["port"][0]
         port = int(port)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Test setup failed")
         time.sleep(5)  # We get extra messages right after startup.
 
 
         fstmpfile = "/var/run/band_estimate"
         dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
         %                (port, fstmpfile)
  
         rc = self.CM.rsh(node, dumpcmd)
         if rc == 0:
             farfile = "root@%s:%s" % (node, fstmpfile)
             self.CM.rsh.cp(farfile, self.tempfile)
             Bandwidth = self.countbandwidth(self.tempfile)
             if not Bandwidth:
                 self.CM.log("Could not compute bandwidth.")
                 return self.success()
             intband = int(Bandwidth + 0.5)
             self.CM.log("...bandwidth: %d bits/sec" % intband)
             self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
             if self.Stats["min"] == 0:
                 self.Stats["min"] = Bandwidth
             if Bandwidth > self.Stats["max"]:
                 self.Stats["max"] = Bandwidth
             if Bandwidth < self.Stats["min"]:
                 self.Stats["min"] = Bandwidth
             self.CM.rsh(node, "rm -f %s" % fstmpfile)
             os.unlink(self.tempfile)
             return self.success()
         else:
             return self.failure("no response from tcpdump command [%d]!" % rc)
 
     def countbandwidth(self, file):
         fp = open(file, "r")
         fp.seek(0)
         count = 0
         sum = 0
         while 1:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count=count+1
                 linesplit = string.split(line," ")
                 for j in range(len(linesplit)-1):
                     if linesplit[j]=="udp": break
                     if linesplit[j]=="length:": break
                         
                 try:
                     sum = sum + int(linesplit[j+1])
                 except ValueError:
                     self.CM.log("Invalid tcpdump line: %s" % line)
                     return None
                 T1 = linesplit[0]
                 timesplit = string.split(T1,":")
                 time2split = string.split(timesplit[2],".")
                 time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001
                 break
 
         while count < 100:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count+1
                 linessplit = string.split(line," ")
                 for j in range(len(linessplit)-1):
                     if linessplit[j] =="udp": break
                     if linesplit[j]=="length:": break
                 try:
                     sum=int(linessplit[j+1])+sum
                 except ValueError:
                     self.CM.log("Invalid tcpdump line: %s" % line)
                     return None
 
         T2 = linessplit[0]
         timesplit = string.split(T2,":")
         time2split = string.split(timesplit[2],".")
         time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001
         time = time2-time1
         if (time <= 0):
             return 0
         return (sum*8)/time
 
     def is_applicable(self):
         '''BandwidthTest never applicable'''
         return 0
 
 AllTestClasses.append(BandwidthTest)
 
+
+###################################################################
+class MaintenanceMode(CTSTest):
+###################################################################
+    def __init__(self, cm):
+        CTSTest.__init__(self,cm)
+        self.name="MaintenanceMode"
+        self.start = StartTest(cm)
+        self.startall = SimulStartLite(cm)
+        self.max=30
+        #self.is_unsafe = 1
+        self.benchmark = 1
+        self.action = "asyncmon"
+        self.interval = 0
+        self.rid="maintenanceDummy"
+
+    def toggleMaintenanceMode(self, node, action):
+        pats = []
+        pats.append(self.CM["Pat:DC_IDLE"])
+
+        # fail the resource right after turning Maintenance mode on
+        # verify it is not recovered until maintenance mode is turned off
+        if action == "On":
+            pats.append("Updating failcount for %s on .* after .* %s" % (self.rid, self.action))
+        else:
+            pats.append("process_lrm_event: LRM operation %s_stop_0.*confirmed.*ok" % self.rid)
+            pats.append("process_lrm_event: LRM operation %s_start_0.*confirmed.*ok" % self.rid)
+
+        watch = self.create_watch(pats, 60)
+        watch.setwatch()
+
+        self.CM.debug("Turning maintenance mode %s" % action)
+        self.CM.rsh(node, self.CM["MaintenanceMode%s" % (action)])
+        if (action == "On"):
+            self.CM.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
+
+        self.set_timer("recover%s" % (action))
+        watch.lookforall()
+        self.log_timer("recover%s" % (action))
+        if watch.unmatched:
+            self.CM.debug("Failed to find patterns when turning maintenance mode %s" % action)
+            return repr(watch.unmatched)
+
+        return ""
+
+    def insertMaintenanceDummy(self, node):
+        pats = []
+        pats.append(".*%s.*process_lrm_event: LRM operation %s_start_0.*confirmed.*ok" % (node, self.rid))
+
+        watch = self.create_watch(pats, 60)
+        watch.setwatch()
+
+        self.CM.AddDummyRsc(node, self.rid)
+
+        self.set_timer("addDummy")
+        watch.lookforall()
+        self.log_timer("addDummy")
+
+        if watch.unmatched:
+            self.CM.debug("Failed to find patterns when adding maintenance dummy resource")
+            return repr(watch.unmatched)
+        return ""
+
+    def removeMaintenanceDummy(self, node):
+        pats = []
+        pats.append("process_lrm_event: LRM operation %s_stop_0.*confirmed.*ok" % self.rid)
+
+        watch = self.create_watch(pats, 60)
+        watch.setwatch()
+        self.CM.RemoveDummyRsc(node, self.rid)
+
+        self.set_timer("removeDummy")
+        watch.lookforall()
+        self.log_timer("removeDummy")
+
+        if watch.unmatched:
+            self.CM.debug("Failed to find patterns when removing maintenance dummy resource")
+            return repr(watch.unmatched)
+        return ""
+
+    def managedRscList(self, node):
+        rscList = []
+        (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
+        for line in lines:
+            if re.search("^Resource", line):
+                tmp = AuditResource(self.CM, line)
+                if tmp.managed():
+                    rscList.append(tmp.id)
+
+        return rscList
+
+    def verifyResources(self, node, rscList, managed):
+        managedList = list(rscList)
+        managed_str = "managed"
+        if not managed:
+            managed_str = "unmanaged"
+
+        (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
+        for line in lines:
+            if re.search("^Resource", line):
+                tmp = AuditResource(self.CM, line)
+                if managed and not tmp.managed():
+                    continue
+                elif not managed and tmp.managed():
+                    continue
+                elif managedList.count(tmp.id):
+                    managedList.remove(tmp.id)
+
+        if len(managedList) == 0:
+            self.CM.debug("Found all %s resources on %s" % (managed_str, node))
+            return True
+
+        self.CM.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList))
+        return False
+
+    def __call__(self, node):
+        '''Perform the 'MaintenanceMode' test. '''
+        self.incr("calls")
+        verify_managed = False
+        verify_unmanaged = False
+        failPat = ""
+
+        ret = self.startall(None)
+        if not ret:
+            return self.failure("Setup failed")
+
+        # get a list of all the managed resources. We use this list
+        # after enabling maintenance mode to verify all managed resources
+        # become un-managed.  After maintenance mode is turned off, we use
+        # this list to verify all the resources become managed again.
+        managedResources = self.managedRscList(node)
+        if len(managedResources) == 0:
+            self.CM.log("No managed resources on %s" % node)
+            return self.skipped()
+
+        # insert a fake resource we can fail during maintenance mode
+        # so we can verify recovery does not take place until after maintenance
+        # mode is disabled.
+        failPat = failPat + self.insertMaintenanceDummy(node)
+
+        # toggle maintenance mode ON, then fail dummy resource.
+        failPat = failPat + self.toggleMaintenanceMode(node, "On")
+
+        # verify all the resources are now unmanaged
+        if self.verifyResources(node, managedResources, False):
+            verify_unmanaged = True
+
+        # Toggle maintenance mode  OFF, verify dummy is recovered.
+        failPat = failPat + self.toggleMaintenanceMode(node, "Off")
+
+        # verify all the resources are now managed again
+        if self.verifyResources(node, managedResources, True):
+            verify_managed = True
+
+        # Remove our maintenance dummy resource.
+        failPat = failPat + self.removeMaintenanceDummy(node)
+
+        self.CM.cluster_stable()
+
+        if failPat != "":
+            return self.failure("Unmatched patterns: %s" % (failPat))
+        elif verify_unmanaged is False:
+            return self.failure("Failed to verify resources became unmanaged during maintenance mode")
+        elif verify_managed is False:
+            return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode")
+
+        return self.success()
+
+    def errorstoignore(self):
+        '''Return list of errors which should be ignored'''
+        return [ """Updating failcount for %s""" % self.rid,
+                 """LogActions: Recover %s""" % self.rid,
+                 """Unknown operation: fail""",
+                 """(ERROR|error): sending stonithRA op to stonithd failed.""",
+                 """(ERROR|error): process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval),
+                 """(ERROR|error): process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval),
+                ]
+
+AllTestClasses.append(MaintenanceMode)
+
 ###################################################################
 class ResourceRecover(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="ResourceRecover"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max=30
         self.rid=None
         self.rid_alt=None
         #self.is_unsafe = 1
         self.benchmark = 1
 
         # these are the values used for the new LRM API call
         self.action = "asyncmon"
         self.interval = 0
 
     def __call__(self, node):
         '''Perform the 'ResourceRecover' test. '''
         self.incr("calls")
         
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         resourcelist = self.CM.active_resources(node)
         # if there are no resourcelist, return directly
         if len(resourcelist)==0:
             self.CM.log("No active resources on %s" % node)
             return self.skipped()
 
         self.rid = self.CM.Env.RandomGen.choice(resourcelist)
         self.rid_alt = self.rid
 
         rsc = None
         (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.id == self.rid:
                     rsc = tmp
                     # Handle anonymous clones that get renamed
                     self.rid = rsc.clone_id
                     break
 
         if not rsc:
             return self.failure("Could not find %s in the resource list" % self.rid)
 
         self.CM.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))
 
         pats = []
         pats.append("Updating failcount for %s on .* after .* %s"
                     % (self.rid, self.action))
 
         if rsc.managed():
             pats.append("process_lrm_event: LRM operation %s_stop_0.*confirmed.*ok" % self.rid)
             if rsc.unique():
                 pats.append("process_lrm_event: LRM operation %s_start_0.*confirmed.*ok" % self.rid)
             else:
                 # Anonymous clones may get restarted with a different clone number
                 pats.append("process_lrm_event: LRM operation .*_start_0.*confirmed.*ok")
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
         
         self.CM.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover")
         watch.lookforall()
         self.log_timer("recover")
 
         self.CM.cluster_stable()
         recovered=self.CM.ResourceLocation(self.rid)
 
         if watch.unmatched: 
             return self.failure("Patterns not found: %s" % repr(watch.unmatched))
 
         elif rsc.unique() and len(recovered) > 1:
             return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))
 
         elif len(recovered) > 0:
             self.CM.debug("%s is running on: %s" %(self.rid, repr(recovered)))
 
         elif rsc.managed():
             return self.failure("%s was not recovered and is inactive" % self.rid)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """Updating failcount for %s""" % self.rid,
                  """LogActions: Recover %s""" % self.rid,
                  """LogActions: Recover %s""" % self.rid_alt,
                  """Unknown operation: fail""",
                  """(ERROR|error): sending stonithRA op to stonithd failed.""",
                  """(ERROR|error): process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval),
                  """(ERROR|error): process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval),
                  ]
 
 AllTestClasses.append(ResourceRecover)
 
 ###################################################################
 class ComponentFail(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="ComponentFail"
         self.startall = SimulStartLite(cm)
         self.complist = cm.Components()
         self.patterns = []
         self.okerrpatterns = []
         self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'ComponentFail' test. '''
         self.incr("calls")
         self.patterns = []
         self.okerrpatterns = []
 
         # start all nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.CM.cluster_stable(self.CM["StableTime"]):
             return self.failure("Setup failed - unstable")
 
         node_is_dc = self.CM.is_node_dc(node, None)
 
         # select a component to kill
         chosen = self.CM.Env.RandomGen.choice(self.complist)
         while chosen.dc_only == 1 and node_is_dc == 0:
             chosen = self.CM.Env.RandomGen.choice(self.complist)
 
         self.CM.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
         self.incr(chosen.name)
         
         if chosen.name != "aisexec":
             if self.CM["Name"] != "crm-lha" or chosen.name != "pengine":
                 self.patterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name))
                 self.patterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name))
 
         self.patterns.extend(chosen.pats)
         if node_is_dc:
           self.patterns.extend(chosen.dc_pats)
 
         # In an ideal world, this next stuff should be in the "chosen" object as a member function
         if self.CM["Name"] == "crm-lha" and chosen.triggersreboot:
             # Make sure the node goes down and then comes back up if it should reboot...
             for other in self.CM.Env["nodes"]:
                 if other != node:
                     self.patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
             self.patterns.append(self.CM["Pat:Slave_started"] % node)
             self.patterns.append(self.CM["Pat:Local_started"] % node)
 
             if chosen.dc_only: 
                 # Sometimes these will be in the log, and sometimes they won't...
                 self.okerrpatterns.append("%s .*Process %s:.* exited" %(node, chosen.name))
                 self.okerrpatterns.append("%s .*I_ERROR.*crmdManagedChildDied" %node)
                 self.okerrpatterns.append("%s .*The %s subsystem terminated unexpectedly" %(node, chosen.name))
                 self.okerrpatterns.append("(ERROR|error): Client .* exited with return code")
             else:
                 # Sometimes this won't be in the log...
                 self.okerrpatterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name))
                 self.okerrpatterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name))
                 self.okerrpatterns.append(self.CM["Pat:ChildExit"])
 
         # supply a copy so self.patterns doesnt end up empty
         tmpPats = []
         tmpPats.extend(self.patterns)
         self.patterns.extend(chosen.badnews_ignore)
 
         # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
         stonithPats = []
         stonithPats.append(self.CM["Pat:Fencing_ok"] % node)
         stonith = self.create_watch(stonithPats, 0)
         stonith.setwatch()
 
         # set the watch for stable
         watch = self.create_watch(
             tmpPats, self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"])
         watch.setwatch()
         
         # kill the component
         chosen.kill(node)
 
         self.CM.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.CM.debug("Waiting for any STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600)
 
         self.CM.debug("Waiting for the cluster to re-stabilize with all nodes")
         self.CM.cluster_stable(self.CM["StartTime"])
 
         self.CM.debug("Checking if %s was shot" % node)
         shot = stonith.look(60)
         if shot:
             self.CM.debug("Found: "+ repr(shot))
             self.okerrpatterns.append(self.CM["Pat:Fencing_start"] % node)
 
             if self.CM.Env["at-boot"] == 0:
                 self.CM.ShouldBeStatus[node]="down"
 
             # If fencing occurred, chances are many (if not all) the expected logs
             # will not be sent - or will be lost when the node reboots
             return self.success()
 
         # check for logs indicating a graceful recovery
         matched = watch.lookforall(allow_multiple_matches=1)
         if watch.unmatched:
             self.CM.log("Patterns not found: " + repr(watch.unmatched))
 
         self.CM.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.CM["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected patterns")
         elif not is_stable:
             return self.failure("Cluster did not become stable")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
     # Note that okerrpatterns refers to the last time we ran this test
     # The good news is that this works fine for us...
         self.okerrpatterns.extend(self.patterns)
         return self.okerrpatterns
     
 AllTestClasses.append(ComponentFail)
 
 ####################################################################
 class SplitBrainTest(CTSTest):
 ####################################################################
     '''It is used to test split-brain. when the path between the two nodes break
        check the two nodes both take over the resource'''
     def __init__(self,cm):
         CTSTest.__init__(self,cm)
         self.name = "SplitBrain"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.is_experimental = 1
 
     def isolate_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.CM.Env["nodes"])
         
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]) + " from " +repr(partition))
                 
         if len(other_nodes) == 0:
             return 1
 
         self.CM.debug("Creating partition: " + repr(partition))
         self.CM.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             if not self.CM.isolate_node(node, other_nodes):
                 self.CM.log("Could not isolate %s" % node)
                 return 0
 
         return 1
 
     def heal_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.CM.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]))
 
         if len(other_nodes) == 0:
             return 1
 
         self.CM.debug("Healing partition: " + repr(partition))
         self.CM.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             self.CM.unisolate_node(node, other_nodes)
 
     def __call__(self, node):
         '''Perform split-brain test'''
         self.incr("calls")
         self.passed = 1
         partitions = {}
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")        
 
         while 1:
             # Retry until we get multiple partitions
             partitions = {}
             p_max = len(self.CM.Env["nodes"])
             for node in self.CM.Env["nodes"]:
                 p = self.CM.Env.RandomGen.randint(1, p_max)
                 if not partitions.has_key(p):
                     partitions[p]= []
                 partitions[p].append(node)
             p_max = len(partitions.keys())
             if p_max > 1:
                 break
             # else, try again
             
         self.CM.debug("Created %d partitions" % p_max)
         for key in partitions.keys():
             self.CM.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))
 
         # Disabling STONITH to reduce test complexity for now
         self.CM.rsh(node, "crm_attribute -V -n stonith-enabled -v false")
 
         for key in partitions.keys():
             self.isolate_partition(partitions[key])
 
         count = 30
         while count > 0: 
             if len(self.CM.find_partitions()) != p_max:
                 time.sleep(10)
             else:
                 break
         else:
             self.failure("Expected partitions were not created")
             
         # Target number of partitions formed - wait for stability
         if not self.CM.cluster_stable():
             self.failure("Partitioned cluster not stable")
 
         # Now audit the cluster state
         self.CM.partitions_expected = p_max
         if not self.audit():
             self.failure("Audits failed")
         self.CM.partitions_expected = 1
 
         # And heal them again
         for key in partitions.keys():
             self.heal_partition(partitions[key])
 
         # Wait for a single partition to form
         count = 30
         while count > 0: 
             if len(self.CM.find_partitions()) != 1:
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not reform")
 
         # Wait for it to have the right number of members
         count = 30
         while count > 0: 
             members = []
 
             partitions = self.CM.find_partitions()
             if len(partitions) > 0:
                 members = partitions[0].split()
 
             if len(members) != len(self.CM.Env["nodes"]):
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not completely reform")
 
         # Wait up to 20 minutes - the delay is more preferable than
         # trying to continue with in a messed up state
         if not self.CM.cluster_stable(1200):
             self.failure("Reformed cluster not stable")
             answer = raw_input('Continue? [nY]')
             if answer and answer == "n":
                 raise ValueError("Reformed cluster not stable")
 
         # Turn fencing back on
         if self.CM.Env["DoFencing"]:
             self.CM.rsh(node, "crm_attribute -V -D -n stonith-enabled")
         
         self.CM.cluster_stable()
 
         if self.passed:
             return self.success()
         return self.failure("See previous errors")
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return [
             "Another DC detected:",
             "(ERROR|error): attrd_cib_callback: .*Application of an update diff failed",
             "crmd_ha_msg_callback:.*not in our membership list",
             "CRIT:.*node.*returning after partition",
             ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         return len(self.CM.Env["nodes"]) > 2
 
 AllTestClasses.append(SplitBrainTest)
 
 ####################################################################
 class Reattach(CTSTest):
 ####################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="Reattach"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
         self.is_unsafe = 0 # Handled by canrunnow()
 
     def setup(self, node):
         attempt=0
         if not self.startall(None):
             return None
 
         # Make sure we are really _really_ stable and that all
         # resources, including those that depend on transient node
         # attributes, are started
         while not self.CM.cluster_stable(double_check=True):
             if attempt < 5:
                 attempt += 1
                 self.CM.debug("Not stable yet, re-testing")
             else:
                 self.CM.log("Cluster is not stable")
                 return None
 
         return 1
 
     def teardown(self, node):
         
         # Make sure 'node' is up
         start = StartTest(self.CM)
         start(node)
 
         is_managed = self.CM.rsh(node, "crm_attribute -Q -G -t crm_config -n is-managed-default -d true", 1)
         is_managed = is_managed[:-1] # Strip off the newline
         if is_managed != "true":
             self.CM.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed))
             managed = self.create_watch(["is-managed-default"], 60)
             managed.setwatch()
             
             self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default")
             
             if not managed.lookforall():
                 self.CM.log("Patterns not found: " + repr(managed.unmatched))
                 self.CM.log("Could not re-enable resource management")
                 return 0
 
         return 1
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         if self.find_ocfs2_resources(node):
             self.CM.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
             return 0
         return 1
 
     def __call__(self, node):
         self.incr("calls")
 
         pats = []
         managed = self.create_watch(["is-managed-default"], 60)
         managed.setwatch()
         
         self.CM.debug("Disable resource management")
         self.CM.rsh(node, "crm_attribute -V -n is-managed-default -v false")
 
         if not managed.lookforall():
             self.CM.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not disabled")
 
         pats = []
         pats.append("process_lrm_event: .*_stop")
         pats.append("process_lrm_event: .*_start")
         pats.append("process_lrm_event: .*_promote")
         pats.append("process_lrm_event: .*_demote")
         pats.append("process_lrm_event: .*_migrate")
 
         watch = self.create_watch(pats, 60, "ShutdownActivity")
         watch.setwatch()
 
         self.CM.debug("Shutting down the cluster")
         ret = self.stopall(None)
         if not ret:
             self.CM.debug("Re-enable resource management")
             self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Couldn't shut down the cluster")
 
         self.CM.debug("Bringing the cluster back up")
         ret = self.startall(None)
         time.sleep(5) # allow ping to update the CIB
         if not ret:
             self.CM.debug("Re-enable resource management")
             self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Couldn't restart the cluster")
 
         if self.local_badnews("ResourceActivity:", watch):
             self.CM.debug("Re-enable resource management")
             self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default")
             return self.failure("Resources stopped or started during cluster restart")
 
         watch = self.create_watch(pats, 60, "StartupActivity")
         watch.setwatch()
 
         managed = self.create_watch(["is-managed-default"], 60)
         managed.setwatch()
         
         self.CM.debug("Re-enable resource management")
         self.CM.rsh(node, "crm_attribute -V -D -n is-managed-default")
 
         if not managed.lookforall():
             self.CM.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not enabled")
 
         self.CM.cluster_stable()
 
         # Ignore actions for STONITH resources
         ignore = []
         (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rclass == "stonith":
 
                     self.CM.debug("Ignoring start actions for %s" % r.id)
                     ignore.append("process_lrm_event: LRM operation %s_start_0.*confirmed.*ok" % r.id)
         
         if self.local_badnews("ResourceActivity:", watch, ignore):
             return self.failure("Resources stopped or started after resource management was re-enabled")
 
         return ret
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ 
             "You may ignore this error if it is unmanaged.",
             "pingd: .*(ERROR|error): send_ipc_message:",
             "pingd: .*(ERROR|error): send_update:",
             "lrmd: .*(ERROR|error): notify_client:",
             ]
 
     def is_applicable(self):
         if self.CM["Name"] == "crm-lha":
             return None
         return 1
 
 AllTestClasses.append(Reattach)
 
 ####################################################################
 class SpecialTest1(CTSTest):
 ####################################################################
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SpecialTest1"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, node):
         '''Perform the 'SpecialTest1' test for Andrew. '''
         self.incr("calls")
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Could not stop all nodes")
 
         #        Start the selected node
         ret = self.restart1(node)
         if not ret:
             return self.failure("Could not start "+node)
 
         #        Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Could not start the remaining nodes")
 
         return self.success()
 
 AllTestClasses.append(SpecialTest1)
 
 ####################################################################
 class HAETest(CTSTest):
 ####################################################################
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="HAETest"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_loop = 1
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret: 
             return self.failure("Couldn't stop all nodes")
         return self.success()
 
     def wait_on_state(self, node, resource, expected_clones, attempts=240):
         while attempts > 0:
             active=0
             (rc, lines) = self.CM.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)
 
             # Hack until crm_resource does the right thing
             if rc == 0 and lines:
                 active = len(lines)
                 
             if len(lines) == expected_clones:
                 return 1
                 
             elif rc == 1:
                 self.CM.debug("Resource %s is still inactive" % resource)
 
             elif rc == 234:
                 self.CM.log("Unknown resource %s" % resource)
                 return 0
 
             elif rc == 246:
                 self.CM.log("Cluster is inactive")
                 return 0
 
             elif rc != 0:
                 self.CM.log("Call to crm_resource failed, rc=%d" % rc)
                 return 0
 
             else:
                 self.CM.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))
 
             attempts -= 1
             time.sleep(1)
 
         return 0
 
     def find_dlm(self, node):
         self.r_dlm = None
 
         (rc, lines) = self.CM.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "controld" and r.parent != "NA":
                     self.CM.debug("Found dlm: %s" % self.r_dlm)
                     self.r_dlm = r.parent
                     return 1
         return 0
 
     def find_hae_resources(self, node):
         self.r_dlm = None
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         if self.find_dlm(node):
             self.find_ocfs2_resources(node)
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         if self.CM.Env["Schema"] == "hae":
             return 1
         return None
 
 ####################################################################
 class HAERoleTest(HAETest):
 ####################################################################
     def __init__(self, cm):
         '''Lars' mount/unmount test for the HA extension. '''
         HAETest.__init__(self,cm)
         self.name="HAERoleTest"
 
     def change_state(self, node, resource, target):
         rc = self.CM.rsh(node, "crm_resource -V -r %s -p target-role -v %s  --meta" % (resource, target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
         lpc = 0
         failed = 0
         delay = 2
         done=time.time() + self.CM.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.CM.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "Stopped")
             if not self.wait_on_state(node, self.r_dlm, 0):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "Started")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
             
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAERoleTest)
 
 ####################################################################
 class HAEStandbyTest(HAETest):
 ####################################################################
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         HAETest.__init__(self,cm)
         self.name="HAEStandbyTest"
 
     def change_state(self, node, resource, target):
         rc = self.CM.rsh(node, "crm_standby -V -l reboot -v %s" % (target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
 
         lpc = 0
         failed = 0
         done=time.time() + self.CM.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.CM.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "true")
             if not self.wait_on_state(node, self.r_dlm, clone_max-1):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "false")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
             
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAEStandbyTest)
 
 ###################################################################
 class NearQuorumPointTest(CTSTest):
 ###################################################################
     '''
     This test brings larger clusters near the quorum point (50%).
     In addition, it will test doing starts and stops at the same time.
 
     Here is how I think it should work:
     - loop over the nodes and decide randomly which will be up and which
       will be down  Use a 50% probability for each of up/down.
     - figure out what to do to get into that state from the current state
     - in parallel, bring up those going up  and bring those going down.
     '''
     
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="NearQuorumPoint"
 
     def __call__(self, dummy):
         '''Perform the 'NearQuorumPoint' test. '''
         self.incr("calls")
         startset = []
         stopset = []
        
         stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint")
         #decide what to do with each node
         for node in self.CM.Env["nodes"]:
             action = self.CM.Env.RandomGen.choice(["start","stop"])
             #action = self.CM.Env.RandomGen.choice(["start","stop","no change"])
             if action == "start" :
                 startset.append(node)
             elif action == "stop" :
                 stopset.append(node)
                 
         self.CM.debug("start nodes:" + repr(startset))
         self.CM.debug("stop nodes:" + repr(stopset))
 
         #add search patterns
         watchpats = [ ]
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 watchpats.append(self.CM["Pat:We_stopped"] % node)
                 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 #watchpats.append(self.CM["Pat:Slave_started"] % node)
                 watchpats.append(self.CM["Pat:Local_started"] % node)
             else:
                 for stopping in stopset:
                     if self.CM.ShouldBeStatus[stopping] == "up":
                         watchpats.append(self.CM["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))
                 
         if len(watchpats) == 0:
             return self.skipped()
 
         if len(startset) != 0:
             watchpats.append(self.CM["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
         
         watch.setwatch()
         
         #begin actions
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
                 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.CM.StartaCMnoBlock(node)
         
         #get the result        
         if watch.lookforall():
             self.CM.cluster_stable()
             self.CM.fencing_cleanup("NearQuorumPoint", stonith)
             return self.success()
 
         self.CM.log("Warn: Patterns not found: " + repr(watch.unmatched))
         
         #get the "bad" nodes
         upnodes = []        
         for node in stopset:
             if self.CM.StataCM(node) == 1:
                 upnodes.append(node)
         
         downnodes = []
         for node in startset:
             if self.CM.StataCM(node) == 0:
                 downnodes.append(node)
 
         self.CM.fencing_cleanup,("NearQuorumPoint", stonith)
         if upnodes == [] and downnodes == []:
             self.CM.cluster_stable()
 
             # Make sure they're completely down with no residule
             for node in stopset:
                 self.CM.rsh(node, self.CM["StopCmd"])
 
             return self.success()
 
         if len(upnodes) > 0:
             self.CM.log("Warn: Unstoppable nodes: " + repr(upnodes))
         
         if len(downnodes) > 0:
             self.CM.log("Warn: Unstartable nodes: " + repr(downnodes))
         
         return self.failure()
 
     def is_applicable(self):
         if self.CM["Name"] == "crm-cman":
             return None
         return 1
 
 AllTestClasses.append(NearQuorumPointTest)
 
 ###################################################################
 class RollingUpgradeTest(CTSTest):
 ###################################################################
     '''Perform a rolling upgrade of the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="RollingUpgrade"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.CM.Env["nodes"]:
             if not self.downgrade(node, None):
                 return self.failure("Couldn't downgrade %s" % node)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret: 
             return self.failure("Couldn't stop all nodes")
 
         for node in self.CM.Env["nodes"]:
             if not self.upgrade(node, None):
                 return self.failure("Couldn't upgrade %s" % node)
 
         return self.success()
 
     def install(self, node, version, start=1, flags="--force"):
 
         target_dir = "/tmp/rpm-%s" % version
         src_dir = "%s/%s" % (self.CM.Env["rpm-dir"], version)
 
         self.CM.log("Installing %s on %s with %s" % (version, node, flags))
         if not self.stop(node):
             return self.failure("stop failure: "+node)
 
         rc = self.CM.rsh(node, "mkdir -p %s" % target_dir)
         rc = self.CM.rsh(node, "rm -f %s/*.rpm" % target_dir)
         (rc, lines) = self.CM.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
         for line in lines:
             line = line[:-1]
             rc = self.CM.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
         rc = self.CM.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))
 
         if start and not self.start(node):
             return self.failure("start failure: "+node)
 
         return self.success()
 
     def upgrade(self, node, start=1):
         return self.install(node, self.CM.Env["current-version"], start)
 
     def downgrade(self, node, start=1):
         return self.install(node, self.CM.Env["previous-version"], start, "--force --nodeps")
 
     def __call__(self, node):
         '''Perform the 'Rolling Upgrade' test. '''
         self.incr("calls")
 
         for node in self.CM.Env["nodes"]:
             if self.upgrade(node):
                 return self.failure("Couldn't upgrade %s" % node)
 
             self.CM.cluster_stable()
 
         return self.success()
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return None
 
         if not self.CM.Env.has_key("rpm-dir"):
             return None
         if not self.CM.Env.has_key("current-version"):
             return None
         if not self.CM.Env.has_key("previous-version"):
             return None
 
         return 1
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RollingUpgradeTest)
 
 ###################################################################
 class BSC_AddResource(CTSTest):
 ###################################################################
     '''Add a resource to the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name="AddResource"
         self.resource_offset = 0
         self.cib_cmd="""cibadmin -C -o %s -X '%s' """
 
     def __call__(self, node):
         self.incr("calls")
         self.resource_offset =         self.resource_offset  + 1
 
         r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
         start_pat = "crmd.*%s_start_0.*confirmed.*ok"
 
         patterns = []
         patterns.append(start_pat % r_id)
 
         watch = self.create_watch(patterns, self.CM["DeadTime"])
         watch.setwatch()
 
         fields = string.split(self.CM.Env["IPBase"], '.')
         fields[3] = str(int(fields[3])+1)
         ip = string.join(fields, '.')
         self.CM.Env["IPBase"] = ip
 
         if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
             return self.failure("Make resource %s failed" % r_id)
 
         failed = 0
         watch_result = watch.lookforall()
         if watch.unmatched:
             for regex in watch.unmatched:
                 self.CM.log ("Warn: Pattern not found: %s" % (regex))
                 failed = 1
 
         if failed:
             return self.failure("Resource pattern(s) not found")
 
         if not self.CM.cluster_stable(self.CM["DeadTime"]):
             return self.failure("Unstable cluster")
 
         return self.success()
 
     def make_ip_resource(self, node, id, rclass, type, ip):
         self.CM.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
         rsc_xml="""
 <primitive id="%s" class="%s" type="%s"  provider="heartbeat">
     <instance_attributes id="%s"><attributes>
         <nvpair id="%s" name="ip" value="%s"/>
     </attributes></instance_attributes>
 </primitive>""" % (id, rclass, type, id, id, ip)
 
         node_constraint="""
       <rsc_location id="run_%s" rsc="%s">
         <rule id="pref_run_%s" score="100">
           <expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
         </rule>
       </rsc_location>""" % (id, id, id, id, node)
 
         rc = 0
         (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
         if rc != 0:
             self.CM.log("Constraint creation failed: %d" % rc)
             return None
 
         (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
         if rc != 0:
             self.CM.log("Resource creation failed: %d" % rc)
             return None
 
         return 1
 
     def is_applicable(self):
         if self.CM.Env["DoBSC"]:
             return 1
         return None
 
 AllTestClasses.append(BSC_AddResource)
 
 class SimulStopLite(CTSTest):
 ###################################################################
     '''Stop any active nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SimulStopLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStopLite' setup work. '''
         self.incr("calls")
 
         self.CM.debug("Setup: " + self.name)
 
         #     We ignore the "node" parameter...
         watchpats = [ ]
 
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.incr("WasStarted")
                 watchpats.append(self.CM["Pat:We_stopped"] % node)
                 #if self.CM.Env["use_logd"]:
                 #    watchpats.append(self.CM["Pat:Logd_stopped"] % node)
 
         if len(watchpats) == 0:
             self.CM.clear_all_caches()
             return self.success()
 
         #     Stop all the nodes - at about the same time...
         watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
 
         watch.setwatch()
         self.set_timer()
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
         if watch.lookforall():
             self.CM.clear_all_caches()
 
             # Make sure they're completely down with no residule
             for node in self.CM.Env["nodes"]:
                 self.CM.rsh(node, self.CM["StopCmd"])
 
             return self.success()
 
         did_fail=0
         up_nodes = []
         for node in self.CM.Env["nodes"]:
             if self.CM.StataCM(node) == 1:
                 did_fail=1
                 up_nodes.append(node)
 
         if did_fail:
             return self.failure("Active nodes exist: " + repr(up_nodes))
 
         self.CM.log("Warn: All nodes stopped but CTS didnt detect: " 
                     + repr(watch.unmatched))
 
         self.CM.clear_all_caches()
         return self.failure("Missing log message: "+repr(watch.unmatched))
 
     def is_applicable(self):
         '''SimulStopLite is a setup test and never applicable'''
         return 0
 
 ###################################################################
 class SimulStartLite(CTSTest):
 ###################################################################
     '''Start any stopped nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name="SimulStartLite"
         
     def __call__(self, dummy):
         '''Perform the 'SimulStartList' setup work. '''
         self.incr("calls")
         self.CM.debug("Setup: " + self.name)
 
         #        We ignore the "node" parameter...
         node_list = []
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.incr("WasStopped")
                 node_list.append(node)
 
         self.set_timer()
         while len(node_list) > 0:
             watchpats = [ ]
 
             uppat = self.CM["Pat:Slave_started"]
             if self.CM.upcount() == 0:
                 uppat = self.CM["Pat:Local_started"]
 
             watchpats.append(self.CM["Pat:DC_IDLE"])
             for node in node_list:
                 watchpats.append(uppat % node)        
                 watchpats.append(self.CM["Pat:InfraUp"] % node)
                 watchpats.append(self.CM["Pat:PacemakerUp"] % node)
         
             #   Start all the nodes - at about the same time...
             watch = self.create_watch(watchpats, self.CM["DeadTime"]+10)
             watch.setwatch()
             
             stonith = self.CM.prepare_fencing_watcher(self.name)
 
             for node in node_list:
                 self.CM.StartaCMnoBlock(node)
 
             watch.lookforall()
             node_list = self.CM.fencing_cleanup(self.name, stonith)
 
             # Remove node_list messages from watch.unmatched
             for node in node_list:
                 if watch.unmatched:
                     watch.unmatched.remove(uppat % node)
 
             if watch.unmatched:
                 for regex in watch.unmatched:
                     self.CM.log ("Warn: Startup pattern not found: %s" %(regex))
 
             if not self.CM.cluster_stable():
                 return self.failure("Cluster did not stabilize")                 
 
         did_fail=0
         unstable = []
         for node in self.CM.Env["nodes"]:
             if self.CM.StataCM(node) == 0:
                 did_fail=1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstarted nodes exist: " + repr(unstable))
 
         unstable = []
         for node in self.CM.Env["nodes"]:
             if not self.CM.node_stable(node):
                 did_fail=1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstable cluster nodes exist: " + repr(unstable))
 
         return self.success() 
 
 
     def is_applicable(self):
         '''SimulStartLite is a setup test and never applicable'''
         return 0
 
 def TestList(cm, audits):
     result = []
     for testclass in AllTestClasses:
         bound_test = testclass(cm)
         if bound_test.is_applicable():
             bound_test.Audits = audits
             result.append(bound_test)
     return result
 
 # vim:ts=4:sw=4:et:
diff --git a/fencing/commands.c b/fencing/commands.c
index c2951210bc..d3f0e5c318 100644
--- a/fencing/commands.c
+++ b/fencing/commands.c
@@ -1,1918 +1,1918 @@
 /* 
  * Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <sys/utsname.h>
 
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <ctype.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/ipc.h>
 #include <crm/cluster/internal.h>
 #include <crm/common/mainloop.h>
 
 #include <crm/stonith-ng.h>
 #include <crm/fencing/internal.h>
 #include <crm/common/xml.h>
 
 #include <internal.h>
 
 GHashTable *device_list = NULL;
 GHashTable *topology = NULL;
 GList *cmd_list = NULL;
 
 static int active_children = 0;
 
 struct device_search_s
 {
     char *host;
     char *action;
     int per_device_timeout;
     int replies_needed;
     int replies_received;
 
     void *user_data;
     void (*callback) (GList *devices, void *user_data);
     GListPtr capable;
 };
 
 static gboolean stonith_device_dispatch(gpointer user_data);
 static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data);
 static void
 stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer, const char *client_id);
 
 static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence);
 
 typedef struct async_command_s {
 
     int id;
     int pid;
     int fd_stdout;
     int options;
     int default_timeout;
     int timeout;
 
     char *op;
     char *origin;
     char *client;
     char *client_name;
     char *remote_op_id;
 
     char *victim;
     uint32_t victim_nodeid;
     char *action;
     char *device;
     char *mode;
 
     GListPtr device_list;
     GListPtr device_next;
 
     void *internal_user_data;
     void (*done_cb)(GPid pid, int rc, const char *output, gpointer user_data);
     guint timer_sigterm;
     guint timer_sigkill;
     /*! If the operation timed out, this is the last signal
      *  we sent to the process to get it to terminate */
     int last_timeout_signo;
 } async_command_t;
 
 static xmlNode *
 stonith_construct_async_reply(async_command_t *cmd, const char *output, xmlNode *data, int rc);
 
 static int
 get_action_timeout(stonith_device_t *device, const char *action, int default_timeout)
 {
     char buffer[512] = { 0, };
     char *value = NULL;
 
     CRM_CHECK(action != NULL, return default_timeout);
 
     if (!device->params) {
         return default_timeout;
     }
 
     snprintf(buffer, sizeof(buffer) - 1, "pcmk_%s_timeout", action);
     value = g_hash_table_lookup(device->params, buffer);
 
     if (!value) {
         return default_timeout;
     }
 
     return  atoi(value);
 }
 
 static void free_async_command(async_command_t *cmd)
 {
     if (!cmd) {
         return;
     }
     cmd_list = g_list_remove(cmd_list, cmd);
 
     g_list_free_full(cmd->device_list, free);
     free(cmd->device);
     free(cmd->action);
     free(cmd->victim);
     free(cmd->remote_op_id);
     free(cmd->client);
     free(cmd->client_name);
     free(cmd->origin);
     free(cmd->op);
     free(cmd);
 }
 
 static async_command_t *create_async_command(xmlNode *msg)
 {
     async_command_t *cmd = NULL;
     xmlNode *op = get_xpath_object("//@"F_STONITH_ACTION, msg, LOG_ERR);
     const char *action = crm_element_value(op, F_STONITH_ACTION);
 
     CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL);
 
     crm_log_xml_trace(msg, "Command");
     cmd = calloc(1, sizeof(async_command_t));
     crm_element_value_int(msg, F_STONITH_CALLID,   &(cmd->id));
     crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
     crm_element_value_int(msg, F_STONITH_TIMEOUT,  &(cmd->default_timeout));
     cmd->timeout = cmd->default_timeout;
 
     cmd->origin = crm_element_value_copy(msg, F_ORIG);
     cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
     cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
     cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
     cmd->op     = crm_element_value_copy(msg, F_STONITH_OPERATION);
     cmd->action = strdup(action);
     cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET);
     cmd->mode   = crm_element_value_copy(op, F_STONITH_MODE);
     cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
 
     CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL);
     CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient"));
 
     cmd->done_cb = st_child_done;
     cmd_list = g_list_append(cmd_list, cmd);
     return cmd;
 }
 
 static int stonith_manual_ack(xmlNode *msg, remote_fencing_op_t *op)
 {
     async_command_t *cmd = create_async_command(msg);
     xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR);
 
     if(cmd == NULL) {
         return -EINVAL;
     }
 
     cmd->device = strdup("manual_ack");
     cmd->remote_op_id = strdup(op->id);
 
     crm_notice("Injecting manual confirmation that %s is safely off/down",
                crm_element_value(dev, F_STONITH_TARGET));
 
     cmd->done_cb(0, 0, NULL, cmd);
     return pcmk_ok;
 }
 
 static gboolean stonith_device_execute(stonith_device_t *device)
 {
     int exec_rc = 0;
     async_command_t *cmd = NULL;
     stonith_action_t *action = NULL;
     CRM_CHECK(device != NULL, return FALSE);
 
     if(device->active_pid) {
         crm_trace("%s is still active with pid %u", device->id, device->active_pid);
         return TRUE;
     }
 
     if(device->pending_ops) {
         GList *first = device->pending_ops;
         device->pending_ops = g_list_remove_link(device->pending_ops, first);
         cmd = first->data;
         g_list_free_1(first);
     }
 
     if(cmd == NULL) {
         crm_trace("Nothing further to do for %s", device->id);
         return TRUE;
     }
 
     action = stonith_action_create(device->agent,
         cmd->action,
         cmd->victim,
         cmd->victim_nodeid,
         cmd->timeout,
         device->params,
         device->aliases);
 
     /* for async exec, exec_rc is pid if positive and error code if negative/zero */
     exec_rc = stonith_action_execute_async(action, (void *) cmd, cmd->done_cb);
 
     if(exec_rc > 0) {
         crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%dms",
             cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"",
             device->id, exec_rc, cmd->timeout);
         device->active_pid = exec_rc;
 
     } else {
         crm_warn("Operation %s%s%s on %s failed: %s (%d)",
             cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"",
             device->id, pcmk_strerror(exec_rc), exec_rc);
         cmd->done_cb(0, exec_rc, NULL, cmd);
     }
     return TRUE;
 }
 
 static gboolean stonith_device_dispatch(gpointer user_data)
 {
     return stonith_device_execute(user_data);
 }
 
 static void schedule_stonith_command(async_command_t *cmd, stonith_device_t *device)
 {
     CRM_CHECK(cmd != NULL, return);
     CRM_CHECK(device != NULL, return);
 
     if (cmd->device) {
         free(cmd->device);
     }
 
     if (device->include_nodeid && cmd->victim) {
         crm_node_t *node = crm_get_peer(0, cmd->victim);
         cmd->victim_nodeid = node->id;
     }
 
     cmd->device = strdup(device->id);
     cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
 
     if (cmd->remote_op_id) {
         crm_debug("Scheduling %s on %s for remote peer %s with op id (%s) (timeout=%ds)",
            cmd->action,
            device->id,
            cmd->origin,
            cmd->remote_op_id,
            cmd->timeout);
     } else {
         crm_debug("Scheduling %s on %s for %s (timeout=%ds)",
             cmd->action,
             device->id,
             cmd->client,
             cmd->timeout);
     }
 
     device->pending_ops = g_list_append(device->pending_ops, cmd);
     mainloop_set_trigger(device->work);
 }
 
 void free_device(gpointer data)
 {
     GListPtr gIter = NULL;
     stonith_device_t *device = data;
 
     g_hash_table_destroy(device->params);
     g_hash_table_destroy(device->aliases);
 
     for(gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
         async_command_t *cmd = gIter->data;
 
         crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action);
         cmd->done_cb(0, -ENODEV, NULL, cmd);
         free_async_command(cmd);
     }
     g_list_free(device->pending_ops);
 
     g_list_free_full(device->targets, free);
     free_xml(device->agent_metadata);
     free(device->namespace);
     free(device->on_target_actions);
     free(device->agent);
     free(device->id);
     free(device);
 }
 
 static GHashTable *build_port_aliases(const char *hostmap, GListPtr *targets) 
 {
     char *name = NULL;
     int last = 0, lpc = 0, max = 0, added = 0;
     GHashTable *aliases = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
 
     if(hostmap == NULL) {
         return aliases;
     }
 
     max = strlen(hostmap);
     for(; lpc <= max; lpc++) {
         switch(hostmap[lpc]) {
         /* Assignment chars */
         case '=':
         case ':':
             if(lpc > last) {
                 free(name);
                 name = calloc(1, 1 + lpc - last);
                 memcpy(name, hostmap + last, lpc - last);
             }
             last = lpc + 1;
             break;
 
         /* Delimeter chars */
         /* case ',': Potentially used to specify multiple ports */
         case 0:
         case ';':
         case ' ':
         case '\t':
             if(name) {
                 char *value = NULL;
                 value = calloc(1, 1 + lpc - last);
                 memcpy(value, hostmap + last, lpc - last);
 
                 crm_debug("Adding alias '%s'='%s'", name, value);
                 g_hash_table_replace(aliases, name, value);
                 if(targets) {
                     *targets = g_list_append(*targets, strdup(value));
                 }
                 value=NULL;
                 name=NULL;
                 added++;
 
             } else if(lpc > last) {
                 crm_debug("Parse error at offset %d near '%s'", lpc-last, hostmap+last);
             }
 
             last = lpc + 1;
             break;
         }
 
         if(hostmap[lpc] == 0) {
             break;
         }
     }
 
     if(added == 0) {
         crm_info("No host mappings detected in '%s'", hostmap);
     }
 
     free(name);
     return aliases;
 }
 
 static void parse_host_line(const char *line, GListPtr *output) 
 {
     int lpc = 0;
     int max = 0;
     int last = 0;
 
     if(line) {
         max = strlen(line);
     } else {
         return;
     }
 
     /* Check for any complaints about additional parameters that the device doesn't understand */
     if(strstr(line, "invalid") || strstr(line, "variable")) {
         crm_debug("Skipping: %s", line);
         return;
     }
 
     crm_trace("Processing: %s", line);
     /* Skip initial whitespace */
     for(lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) {
         last = lpc+1;
     }
 
     /* Now the actual content */
     for(lpc = 0; lpc <= max; lpc++) {
         gboolean a_space = isspace(line[lpc]);
         if(a_space && lpc < max && isspace(line[lpc+1])) {
             /* fast-forward to the end of the spaces */
 
         } else if(a_space || line[lpc] == ',' || line[lpc] == 0) {
             int rc = 1;
             char *entry = NULL;
 
             if(lpc != last) {
                 entry = calloc(1, 1 + lpc - last);
                 rc = sscanf(line+last, "%[a-zA-Z0-9_-.]", entry);
             }
 
             if(entry == NULL) {
                 /* Skip */
             } else if(rc != 1) {
                 crm_warn("Could not parse (%d %d): %s", last, lpc, line+last);
             } else if(safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) {
                 crm_trace("Adding '%s'", entry);
                 *output = g_list_append(*output, entry);
                 entry = NULL;
             }
 
             free(entry);
             last = lpc + 1;
         }
     }
 }
 
 static GListPtr parse_host_list(const char *hosts) 
 {
     int lpc = 0;
     int max = 0;
     int last = 0;
     GListPtr output = NULL;
 
     if(hosts == NULL) {
         return output;
     }
 
     max = strlen(hosts);
     for(lpc = 0; lpc <= max; lpc++) {
         if(hosts[lpc] == '\n' || hosts[lpc] == 0) {
             char *line = NULL;
 
             line = calloc(1, 2 + lpc - last);
             snprintf(line, 1 + lpc - last, "%s", hosts+last);
             parse_host_line(line, &output);
             free(line);
 
             last = lpc + 1;
         }
     }
 
     return output;
 }
 
 static xmlNode *get_agent_metadata(const char *agent)
 {
     stonith_t *st = stonith_api_new();
     xmlNode *xml = NULL;
     char *buffer = NULL;
     int rc = 0;
 
     rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
     if (rc || !buffer) {
         crm_err("Could not retrieve metadata for fencing agent %s", agent);
         return NULL;
     }
     xml = string2xml(buffer);
     free(buffer);
     stonith_api_delete(st);
 
     return xml;
 }
 
 static gboolean
 is_nodeid_required(xmlNode *xml)
 {
     xmlXPathObjectPtr xpath = NULL;
 
     if (stand_alone) {
         return FALSE;
     }
 
     if (!xml) {
         return FALSE;
     }
     xpath = xpath_search(xml, "//parameter[@name='nodeid']");
     if (!xpath || xpath->nodesetval->nodeNr <= 0) {
         return FALSE;
     }
 
     return TRUE;
 }
 
 static char *
 get_on_target_actions(xmlNode *xml)
 {
     char *actions = NULL;
     xmlXPathObjectPtr xpath = NULL;
     int max = 0;
     int lpc = 0;
 
     if (!xml) {
         return NULL;
     }
 
     xpath = xpath_search(xml, "//action");
 
     if (!xpath || !xpath->nodesetval) {
         return NULL;
     }
 
     max = xpath->nodesetval->nodeNr;
 
     actions = calloc(1, 512);
 
     for (lpc = 0; lpc < max; lpc++) {
         const char *on_target = NULL;
         const char *action = NULL;
         xmlNode *match = getXpathResult(xpath, lpc);
 
         CRM_CHECK(match != NULL, continue);
 
         on_target = crm_element_value(match, "on_target");
         action = crm_element_value(match, "name");
 
         if (action && crm_is_true(on_target)) {
             if (strlen(actions)) {
                 g_strlcat(actions, " ", 512);
             }
             g_strlcat(actions, action, 512);
         }
     }
 
     if (!strlen(actions)) {
         free(actions);
         actions = NULL;
     }
 
     return actions;
 }
 
 static stonith_device_t *build_device_from_xml(xmlNode *msg) 
 {
     const char *value = NULL;
     xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR);
     stonith_device_t *device = NULL;
 
     device = calloc(1, sizeof(stonith_device_t));
     device->id = crm_element_value_copy(dev, XML_ATTR_ID);
     device->agent = crm_element_value_copy(dev, "agent");
     device->namespace = crm_element_value_copy(dev, "namespace");
     device->params = xml2list(dev);
 
     value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST);
     if(value) {
         device->targets = parse_host_list(value);
     }
 
     value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP);
     device->aliases = build_port_aliases(value, &(device->targets));
 
     device->agent_metadata = get_agent_metadata(device->agent);
     device->on_target_actions = get_on_target_actions(device->agent_metadata);
 
     value = g_hash_table_lookup(device->params, "nodeid");
     if (!value) {
         device->include_nodeid = is_nodeid_required(device->agent_metadata);
     }
 
     if (device->on_target_actions) {
         crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node",
             device->id,
             device->on_target_actions);
     }
 
     device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
     /* TODO: Hook up priority */
 
     return device;
 }
 
 static const char *
 target_list_type(stonith_device_t *dev)
 {
     const char *check_type = NULL;
 
     check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK);
 
     if(check_type == NULL) {
 
         if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) {
             check_type = "static-list";
         } else if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) {
             check_type = "static-list";
         } else {
             check_type = "dynamic-list";
         }
     }
 
     return check_type;
 }
 
 static void
 schedule_internal_command(stonith_device_t *device,
                           const char *action,
                           const char *victim,
                           int timeout,
                           void *internal_user_data,
                           void (*done_cb)(GPid pid, int rc, const char *output, gpointer user_data))
 {
     async_command_t *cmd = NULL;
 
     cmd = calloc(1, sizeof(async_command_t));
 
     cmd->id = -1;
     cmd->default_timeout = timeout ? timeout : 60;
     cmd->timeout = cmd->default_timeout;
     cmd->action = strdup(action);
     cmd->victim = victim ? strdup(victim) : NULL;
     cmd->device = strdup(device->id);
     cmd->origin = strdup("st_internal_cmd");
     cmd->client = strdup("st_internal_client");
     cmd->client_name = strdup("st_internal_client_name");
 
     cmd->internal_user_data = internal_user_data;
     cmd->done_cb = done_cb;
 
     schedule_stonith_command(cmd, device);
 }
 
 static gboolean string_in_list(GListPtr list, const char *item)
 {
     int lpc = 0;
     int max = g_list_length(list);
     for(lpc = 0; lpc < max; lpc ++) {
         const char *value = g_list_nth_data(list, lpc);
         if(safe_str_eq(item, value)) {
             return TRUE;
         }
     }
     return FALSE;
 }
 
 static void
 status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
 {
     async_command_t *cmd = user_data;
     struct device_search_s *search = cmd->internal_user_data;
     stonith_device_t *dev = cmd->device ?
             g_hash_table_lookup(device_list, cmd->device) : NULL;
     gboolean can = FALSE;
 
     if (!dev) {
         search_devices_record_result(search, NULL, FALSE);
         return;
     }
 
     dev->active_pid = 0;
     mainloop_set_trigger(dev->work);
 
     if(rc == 1 /* unkown */) {
         crm_trace("Host %s is not known by %s", search->host, dev->id);
 
     } else if(rc == 0 /* active */ || rc == 2 /* inactive */) {
         can = TRUE;
 
     } else {
         crm_notice("Unkown result when testing if %s can fence %s: rc=%d", dev->id, search->host, rc);
     }
     search_devices_record_result(search, dev->id, can);
 }
 
 static void
 dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
 {
     async_command_t *cmd = user_data;
     struct device_search_s *search = cmd->internal_user_data;
     stonith_device_t *dev = cmd->device ?
             g_hash_table_lookup(device_list, cmd->device) : NULL;
     gboolean can_fence = FALSE;
 
     /* Host/alias must be in the list output to be eligable to be fenced
      *
      * Will cause problems if down'd nodes aren't listed or (for virtual nodes)
      *  if the guest is still listed despite being moved to another machine
      */
     if (!dev) {
         search_devices_record_result(search, NULL, FALSE);
         return;
     }
 
     dev->active_pid = 0;
     mainloop_set_trigger(dev->work);
 
     /* If we successfully got the targets earlier, don't disable. */
     if (rc != 0 && !dev->targets) {
         crm_notice("Disabling port list queries for %s (%d): %s",
                        dev->id, rc, output);
         /* Fall back to status */
         g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status"));
 
         g_list_free_full(dev->targets, free);
         dev->targets = NULL;
     } else if (!rc) {
         crm_info("Refreshing port list for %s", dev->id);
         g_list_free_full(dev->targets, free);
         dev->targets = parse_host_list(output);
         dev->targets_age = time(NULL);
     }
 
     if (dev->targets) {
         const char *alias = g_hash_table_lookup(dev->aliases, search->host);
 
         if (!alias) {
             alias = search->host;
         }
         if (string_in_list(dev->targets, alias)) {
             can_fence = TRUE;
         }
     }
     search_devices_record_result(search, dev->id, can_fence);
 }
 
 /*!
  * \internal
  * \brief Checks to see if an identical device already exists in the device_list
  */
 static stonith_device_t *
 device_has_duplicate(stonith_device_t *device)
 {
     char *key = NULL;
     char *value = NULL;
     GHashTableIter gIter;
     stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
 
     if (!dup || safe_str_neq(dup->agent, device->agent)) {
         return NULL;
     }
     g_hash_table_iter_init(&gIter, device->params);
     while (g_hash_table_iter_next(&gIter, (void **) &key, (void **) &value)) {
         char *other_value = g_hash_table_lookup(dup->params, key);
         if (!other_value || safe_str_neq(other_value, value)) {
             return NULL;
         }
     }
 
     return dup;
 }
 
 int stonith_device_register(xmlNode *msg, const char **desc, gboolean from_cib)
 {
     stonith_device_t *dup = NULL;
     stonith_device_t *device = build_device_from_xml(msg);
 
     if ((dup = device_has_duplicate(device))) {
         crm_notice("Device '%s' already existed in device list (%d active devices)", device->id, g_hash_table_size(device_list));
         free_device(device);
         device = dup;
     } else {
         stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
         if (from_cib && old && old->api_registered) {
             /* If the cib is writing over an entry that is shared with a stonith client,
              * copy any pending ops that currently exist on the old entry to the new one.
              * Otherwise the pending ops will be reported as failures */
             device->pending_ops = old->pending_ops;
             device->api_registered = TRUE;
             old->pending_ops = NULL;
             if (device->pending_ops) {
                 mainloop_set_trigger(device->work);
             }
         }
         g_hash_table_replace(device_list, device->id, device);
 
         crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list));
     }
     if(desc) {
         *desc = device->id;
     }
 
     if (from_cib) {
         device->cib_registered = TRUE;
     } else {
         device->api_registered = TRUE;
     }
 
     return pcmk_ok;
 }
 
 int stonith_device_remove(const char *id, gboolean from_cib)
 {
     stonith_device_t *device = g_hash_table_lookup(device_list, id);
 
     if (!device) {
         crm_info("Device '%s' not found (%d active devices)",
                  id, g_hash_table_size(device_list));
         return pcmk_ok;
     }
 
     if (from_cib) {
         device->cib_registered = FALSE;
     } else {
         device->verified = FALSE;
         device->api_registered = FALSE;
     }
 
     if (!device->cib_registered && !device->api_registered) {
         g_hash_table_remove(device_list, id);
         crm_info("Removed '%s' from the device list (%d active devices)",
                  id, g_hash_table_size(device_list));
     }
     return pcmk_ok;
 }
 
 static int count_active_levels(stonith_topology_t *tp)
 {
     int lpc = 0;
     int count = 0;
     for(lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
         if(tp->levels[lpc] != NULL) {
             count++;
         }
     }
     return count;
 }
 
 void free_topology_entry(gpointer data)
 {
     stonith_topology_t *tp = data;
 
     int lpc = 0;
     for(lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
         if(tp->levels[lpc] != NULL) {
             g_list_free_full(tp->levels[lpc], free);
         }
     }
     free(tp->node);
     free(tp);
 }
 
 int stonith_level_register(xmlNode *msg, char **desc) 
 {
     int id = 0;
     int rc = pcmk_ok;
     xmlNode *child = NULL;
 
     xmlNode *level = get_xpath_object("//"F_STONITH_LEVEL, msg, LOG_ERR);
     const char *node = crm_element_value(level, F_STONITH_TARGET);
     stonith_topology_t *tp = g_hash_table_lookup(topology, node);
 
     crm_element_value_int(level, XML_ATTR_ID, &id);
     if(desc) {
         *desc = g_strdup_printf("%s[%d]", node, id);
     }
     if(id <= 0 || id >= ST_LEVEL_MAX) {
         return -EINVAL;
     }
 
     if(tp == NULL) {
         tp = calloc(1, sizeof(stonith_topology_t));
         tp->node = strdup(node);
         g_hash_table_replace(topology, tp->node, tp);
         crm_trace("Added %s to the topology (%d active entries)", node, g_hash_table_size(topology));
     }
 
     if(tp->levels[id] != NULL) {
         crm_info("Adding to the existing %s[%d] topology entry (%d active entries)", node, id, count_active_levels(tp));
     }
 
     for (child = __xml_first_child(level); child != NULL; child = __xml_next(child)) {
         const char *device = ID(child);
         crm_trace("Adding device '%s' for %s (%d)", device, node, id);
         tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
     }
 
     crm_info("Node %s has %d active fencing levels", node, count_active_levels(tp));
     return rc;
 }
 
 int stonith_level_remove(xmlNode *msg, char **desc) 
 {
     int id = 0;
     xmlNode *level = get_xpath_object("//"F_STONITH_LEVEL, msg, LOG_ERR);
     const char *node = crm_element_value(level, F_STONITH_TARGET);
     stonith_topology_t *tp = g_hash_table_lookup(topology, node);
 
     if(desc) {
         *desc = g_strdup_printf("%s[%d]", node, id);
     }
     crm_element_value_int(level, XML_ATTR_ID, &id);
 
     if(tp == NULL) {
         crm_info("Node %s not found (%d active entries)",
                  node, g_hash_table_size(topology));
         return pcmk_ok;
 
     } else if(id < 0 || id >= ST_LEVEL_MAX) {
         return -EINVAL;
     }
 
     if(id == 0 && g_hash_table_remove(topology, node)) {
         crm_info("Removed all %s related entries from the topology (%d active entries)",
                  node, g_hash_table_size(topology));
 
     } else if(id > 0 && tp->levels[id] != NULL) {
         g_list_free_full(tp->levels[id], free);
         tp->levels[id] = NULL;
 
         crm_info("Removed entry '%d' from %s's topology (%d active entries remaining)",
                  id, node, count_active_levels(tp));
     }
     return pcmk_ok;
 }
 
 static int stonith_device_action(xmlNode *msg, char **output) 
 {
     int rc = pcmk_ok;
     xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR);
     const char *id = crm_element_value(dev, F_STONITH_DEVICE);
 
     async_command_t *cmd = NULL;
     stonith_device_t *device = NULL;
 
     if(id) {
         crm_trace("Looking for '%s'", id);
         device = g_hash_table_lookup(device_list, id);
     }
 
     if(device) {
         cmd = create_async_command(msg);
         if(cmd == NULL) {
             free_device(device);
             return -EPROTO;
         }
 
         schedule_stonith_command(cmd, device);
         rc = -EINPROGRESS;
 
     } else {
         crm_info("Device %s not found", id?id:"<none>");
         rc = -ENODEV;
     }
     return rc;
 }
 
 static void
 search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
 {
     search->replies_received++;
 
     if (can_fence && device) {
         search->capable = g_list_append(search->capable, strdup(device));
     }
 
     if (search->replies_needed == search->replies_received) {
 
         crm_debug("Finished Search. %d devices can perform action (%s) on node %s",
             g_list_length(search->capable),
             search->action ? search->action : "<unknown>",
             search->host ? search->host : "<anyone>");
 
         search->callback(search->capable, search->user_data);
         free(search->host);
         free(search->action);
         free(search);
     }
 }
 
 static void
 can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search)
 {
     gboolean can = FALSE;
     const char *check_type = NULL;
     const char *host = search->host;
     const char *alias = host;
 
     CRM_LOG_ASSERT(dev != NULL);
 
     if(dev == NULL) {
         goto search_report_results;
     } else if(host == NULL) {
         can = TRUE;
         goto search_report_results;
     }
 
     if (dev->on_target_actions &&
         search->action &&
         strstr(dev->on_target_actions, search->action) &&
         safe_str_neq(host, stonith_our_uname)) {
         /* this device can only execute this action on the target node */
         goto search_report_results;
     }
 
     if(g_hash_table_lookup(dev->aliases, host)) {
         alias = g_hash_table_lookup(dev->aliases, host);
     }
 
     check_type = target_list_type(dev);
 
     if(safe_str_eq(check_type, "none")) {
         can = TRUE;
 
     } else if(safe_str_eq(check_type, "static-list")) {
 
         /* Presence in the hostmap is sufficient
          * Only use if all hosts on which the device can be active can always fence all listed hosts
          */
 
         if(string_in_list(dev->targets, host)) {
             can = TRUE;
         } else if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)
                   && g_hash_table_lookup(dev->aliases, host)) {
             can = TRUE;
         }
 
     } else if(safe_str_eq(check_type, "dynamic-list")) {
         time_t now = time(NULL);
         if (dev->targets == NULL || dev->targets_age + 60 < now) {
             schedule_internal_command(dev,
                 "list",
                 NULL,
                 search->per_device_timeout,
                 search,
                 dynamic_list_search_cb);
 
             /* we'll respond to this search request async in the cb */
             return;
         }
 
         if (string_in_list(dev->targets, alias)) {
             can = TRUE;
         }
 
     } else if(safe_str_eq(check_type, "status")) {
         schedule_internal_command(dev,
             "status",
             search->host,
             search->per_device_timeout,
             search,
             status_search_cb);
         /* we'll respond to this search request async in the cb */
         return;
     } else {
         crm_err("Unknown check type: %s", check_type);
     }
 
     if(safe_str_eq(host, alias)) {
         crm_info("%s can%s fence %s: %s", dev->id, can?"":" not", host, check_type);
     } else {
         crm_info("%s can%s fence %s (aka. '%s'): %s", dev->id, can?"":" not", host, alias, check_type);
     }
 
 search_report_results:
     search_devices_record_result(search, dev?dev->id:NULL, can);
 }
 
 static void
 search_devices(gpointer key, gpointer value, gpointer user_data)
 {
     stonith_device_t *dev = value;
     struct device_search_s *search = user_data;
 
     can_fence_host_with_device(dev, search);
 }
 
 #define DEFAULT_QUERY_TIMEOUT 20
 static void
 get_capable_devices(const char *host, const char *action, int timeout, void *user_data, void (*callback)(GList *devices, void *user_data))
 {
     struct device_search_s *search;
     int per_device_timeout = DEFAULT_QUERY_TIMEOUT;
     int devices_needing_async_query = 0;
     char *key = NULL;
     const char *check_type = NULL;
     GHashTableIter gIter;
     stonith_device_t *device = NULL;
 
     if (!g_hash_table_size(device_list)) {
         callback(NULL, user_data);
         return;
     }
 
     search = calloc(1, sizeof(struct device_search_s));
     if (!search) {
         callback(NULL, user_data);
         return;
     }
 
     g_hash_table_iter_init(&gIter, device_list);
     while (g_hash_table_iter_next(&gIter, (void **) &key, (void **) &device)) {
         check_type = target_list_type(device);
         if (safe_str_eq(check_type, "status") || safe_str_eq(check_type, "dynamic-list")) {
             devices_needing_async_query++;
         }
     }
 
     /* If we have devices that require an async event in order to know what
      * nodes they can fence, we have to give the events a timeout. The total
      * query timeout is divided among those events. */
     if (devices_needing_async_query) {
         per_device_timeout = timeout / devices_needing_async_query;
         if (!per_device_timeout) {
             crm_err("stonith-timeout duration %d is too low, raise the duration to %d seconds",
                     timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
             per_device_timeout = DEFAULT_QUERY_TIMEOUT;
         } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) {
             crm_notice("stonith-timeout duration %d is low for the current configuration. Consider raising it to %d seconds",
                        timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
         }
     }
 
     search->host = host ? strdup(host) : NULL;
     search->action = action ? strdup(action) : NULL;
     search->per_device_timeout = per_device_timeout;
     /* We are guaranteed this many replies. Even if a device gets
      * unregistered some how during the async search, we will get
      * the correct number of replies. */
     search->replies_needed = g_hash_table_size(device_list);
     search->callback = callback;
     search->user_data = user_data;
     /* kick off the search */
 
     crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s",
         search->replies_needed,
         search->action ? search->action : "<unknown>",
         search->host ? search->host : "<anyone>");
     g_hash_table_foreach(device_list, search_devices, search);
 }
 
 struct st_query_data {
     xmlNode *reply;
     char *remote_peer;
     char *client_id;
     char *target;
     char *action;
     int call_options;
 };
 
 static void
 stonith_query_capable_device_cb(GList *devices, void *user_data)
 {
     struct st_query_data *query = user_data;
     int available_devices = 0;
     xmlNode *dev = NULL;
     xmlNode *list = NULL;
     GListPtr lpc = NULL;
 
     /* Pack the results into data */
     list = create_xml_node(NULL, __FUNCTION__);
     crm_xml_add(list, F_STONITH_TARGET, query->target);
     for(lpc = devices; lpc != NULL; lpc = lpc->next) {
         stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
         int action_specific_timeout;
 
         if (!device) {
             /* It is possible the device got unregistered while
              * determining who can fence the target */
             continue;
         }
 
         available_devices++;
 
         action_specific_timeout = get_action_timeout(device, query->action, 0);
         dev = create_xml_node(list, F_STONITH_DEVICE);
         crm_xml_add(dev, XML_ATTR_ID, device->id);
         crm_xml_add(dev, "namespace", device->namespace);
         crm_xml_add(dev, "agent", device->agent);
         crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
         if (action_specific_timeout) {
             crm_xml_add_int(dev, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
         }
         if (query->target == NULL) {
             xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
             g_hash_table_foreach(device->params, hash2field, attrs);
         }
     }
 
     crm_xml_add_int(list, "st-available-devices", available_devices);
     if (query->target) {
         crm_debug("Found %d matching devices for '%s'",
                  available_devices, query->target);
     } else {
         crm_debug("%d devices installed", available_devices);
     }
 
 
     if (list != NULL) {
         crm_trace("Attaching query list output");
         add_message_xml(query->reply, F_STONITH_CALLDATA, list);
     }
     stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id);
 
     free_xml(query->reply);
     free(query->remote_peer);
     free(query->client_id);
     free(query->target);
     free(query->action);
     free(query);
     free_xml(list);
     g_list_free_full(devices, free);
 }
 
 static void
 stonith_query(xmlNode *msg, const char *remote_peer, const char *client_id, int call_options)
 {
     struct st_query_data *query = NULL;
     const char *action = NULL;
     const char *target = NULL;
     int timeout = 0;
     xmlNode *dev = get_xpath_object("//@"F_STONITH_ACTION, msg, LOG_DEBUG_3);
 
+    crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
     if(dev) {
         const char *device = crm_element_value(dev, F_STONITH_DEVICE);
         target = crm_element_value(dev, F_STONITH_TARGET);
         action = crm_element_value(dev, F_STONITH_ACTION);
-        crm_element_value_int(dev, F_STONITH_TIMEOUT, &timeout);
         if(device && safe_str_eq(device, "manual_ack")) {
             /* No query or reply necessary */
             return;
         }
     }
 
     crm_log_xml_debug(msg, "Query");
     query = calloc(1, sizeof(struct st_query_data));
 
     query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
     query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
     query->client_id = client_id ? strdup(client_id) : NULL;
     query->target = target ? strdup(target) : NULL;
     query->action = action ? strdup(action) : NULL;
     query->call_options = call_options;
 
     get_capable_devices(target, action, timeout, query, stonith_query_capable_device_cb);
 }
 
 #define ST_LOG_OUTPUT_MAX 512
 static void log_operation(async_command_t *cmd, int rc, int pid, const char *next, const char *output) 
 {
     if(rc == 0) {
         next = NULL;
     }
 
     if(cmd->victim != NULL) {
         do_crm_log(rc==0?LOG_NOTICE:LOG_ERR,
                    "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d (%s)%s%s",
                    cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, rc, pcmk_strerror(rc),
                    next?". Trying: ":"", next?next:"");
     } else {
         do_crm_log_unlikely(rc==0?LOG_DEBUG:LOG_NOTICE,
                    "Operation '%s' [%d] for device '%s' returned: %d (%s)%s%s",
                    cmd->action, pid, cmd->device, rc, pcmk_strerror(rc), next?". Trying: ":"", next?next:"");
     }
 
     if(output) {
         /* Logging the whole string confuses syslog when the string is xml */
         char *prefix = g_strdup_printf("%s:%d", cmd->device, pid);
         crm_log_output(rc==0?LOG_INFO:LOG_WARNING, prefix, output);
         g_free(prefix);
     }
 }
 
 static void
 stonith_send_async_reply(async_command_t *cmd, const char *output, int rc, GPid pid)
 {
     xmlNode *reply = NULL;
     gboolean bcast = FALSE;
 
     reply = stonith_construct_async_reply(cmd, output, NULL, rc);
 
     if(safe_str_eq(cmd->action, "metadata")) {
         /* Too verbose to log */
         crm_trace("Metadata query for %s", cmd->device);
         output = NULL;
 
     } else if(crm_str_eq(cmd->action, "monitor", TRUE) ||
               crm_str_eq(cmd->action, "list", TRUE) ||
               crm_str_eq(cmd->action, "status", TRUE)) {
         crm_trace("Never broadcast %s replies", cmd->action);
 
     } else if(!stand_alone && safe_str_eq(cmd->origin, cmd->victim)) {
         crm_trace("Broadcast %s reply for %s", cmd->action, cmd->victim);
         crm_xml_add(reply, F_SUBTYPE, "broadcast");
         bcast = TRUE;
     }
 
     log_operation(cmd, rc, pid, NULL, output);
     crm_log_xml_trace(reply, "Reply");
 
     if(bcast) {
         crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
 
     } else if(cmd->origin) {
         crm_trace("Directed reply to %s", cmd->origin);
         send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
 
     } else {
         crm_trace("Directed local %ssync reply to %s",
                   (cmd->options & st_opt_sync_call)?"":"a-", cmd->client_name);
         do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
     }
 
     if(stand_alone) {
         /* Do notification with a clean data object */
         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
         crm_xml_add_int(notify_data, F_STONITH_RC,    rc);
         crm_xml_add(notify_data, F_STONITH_TARGET,    cmd->victim);
         crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
         crm_xml_add(notify_data, F_STONITH_DELEGATE,  cmd->device);
         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID,    cmd->remote_op_id);
         crm_xml_add(notify_data, F_STONITH_ORIGIN,    cmd->client);
 
         do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
     }
 
     free_xml(reply);
 }
 
 
 static void cancel_stonith_command(async_command_t *cmd)
 {
     stonith_device_t *device;
 
     CRM_CHECK(cmd != NULL, return);
 
     if (!cmd->device) {
         return;
     }
 
     device = g_hash_table_lookup(device_list, cmd->device);
 
     if (device) {
         crm_trace("Cancel scheduled %s on %s", cmd->action, device->id);
         device->pending_ops = g_list_remove(device->pending_ops, cmd);
     }
 }
 
 #define READ_MAX 500
 static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
 {
     stonith_device_t *device = NULL;
     async_command_t *cmd = user_data;
 
     GListPtr gIter = NULL;
     GListPtr gIterNext = NULL;
 
     CRM_CHECK(cmd != NULL, return);
 
     active_children--;
 
     /* The device is ready to do something else now */
     device = g_hash_table_lookup(device_list, cmd->device);
     if(device) {
         device->active_pid = 0;
         if (rc == pcmk_ok &&
             (safe_str_eq(cmd->action, "list") ||
              safe_str_eq(cmd->action, "monitor") ||
              safe_str_eq(cmd->action, "status"))) {
 
             device->verified = TRUE;
         }
 
         mainloop_set_trigger(device->work);
     }
 
     crm_trace("Operation %s on %s completed with rc=%d (%d remaining)",
               cmd->action, cmd->device, rc, g_list_length(cmd->device_next));
 
     if(rc != 0 && cmd->device_next) {
         stonith_device_t *dev = g_hash_table_lookup(device_list, cmd->device_next->data);
 
         if (dev) {
             log_operation(cmd, rc, pid, dev->id, output);
 
             cmd->device_next = cmd->device_next->next;
             schedule_stonith_command(cmd, dev);
             /* Prevent cmd from being freed */
             cmd = NULL;
             goto done;
         }
     }
 
     if(rc > 0) {
         rc = -pcmk_err_generic;
     }
 
     stonith_send_async_reply(cmd, output, rc, pid);
 
     if(rc != 0) {
         goto done;
     }
 
     /* Check to see if any operations are scheduled to do the exact
      * same thing that just completed.  If so, rather than
      * performing the same fencing operation twice, return the result
      * of this operation for all pending commands it matches. */
     for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
         async_command_t *cmd_other = gIter->data;
         gIterNext = gIter->next;
 
         if(cmd == cmd_other) {
             continue;
         }
 
         /* A pending scheduled command matches the command that just finished if.
          * 1. The client connections are different.
          * 2. The node victim is the same.
          * 3. The fencing action is the same.
          * 4. The device scheduled to execute the action is the same. 
          */
         if(safe_str_eq(cmd->client, cmd_other->client) ||
             safe_str_neq(cmd->victim, cmd_other->victim) ||
             safe_str_neq(cmd->action, cmd_other->action) ||
             safe_str_neq(cmd->device, cmd_other->device)) {
 
             continue;
         }
 
         crm_notice("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s",
             cmd_other->action,
             cmd_other->victim,
             cmd_other->client_name,
             cmd->client_name);
 
         cmd_list = g_list_remove_link(cmd_list, gIter);
 
         stonith_send_async_reply(cmd_other, output, rc, pid);
         cancel_stonith_command(cmd_other);
 
         free_async_command(cmd_other);
         g_list_free_1(gIter);
     }
 
   done:
     free_async_command(cmd);
 }
 
 static gint sort_device_priority(gconstpointer a, gconstpointer b)
 {
     const stonith_device_t *dev_a = a;
     const stonith_device_t *dev_b = a;
     if(dev_a->priority > dev_b->priority) {
         return -1;
     } else if(dev_a->priority < dev_b->priority) {
         return 1;
     }
     return 0;
 }
 
 static void
 stonith_fence_get_devices_cb(GList *devices, void *user_data)
 {
     async_command_t *cmd = user_data;
     stonith_device_t *device = NULL;
 
     crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim);
 
     if (g_list_length(devices) > 0) {
         /* Order based on priority */
         devices = g_list_sort(devices, sort_device_priority);
         device = g_hash_table_lookup(device_list, devices->data);
 
         if (device) {
             cmd->device_list = devices;
             cmd->device_next = devices->next;
             devices = NULL; /* list owned by cmd now */
         }
     }
 
     /* we have a device, schedule it for fencing. */
     if (device) {
         schedule_stonith_command(cmd, device);
         /* in progress */
         return;
     }
 
     /* no device found! */
     stonith_send_async_reply(cmd, NULL, -EHOSTUNREACH, 0);
 
     free_async_command(cmd);
     g_list_free_full(devices, free);
 }
 
 static int
 stonith_fence(xmlNode *msg)
 {
     const char *device_id = NULL;
     int rc = -EHOSTUNREACH;
     stonith_device_t *device = NULL;
     async_command_t *cmd = create_async_command(msg);
     xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR);
 
     if(cmd == NULL) {
         return -EPROTO;
     }
 
     device_id = crm_element_value(dev, F_STONITH_DEVICE);
     if(device_id) {
         device = g_hash_table_lookup(device_list, device_id);
         if(device == NULL) {
             crm_err("Requested device '%s' is not available", device_id);
         } else {
             schedule_stonith_command(cmd, device);
             rc = -EINPROGRESS;
         }
     } else {
         const char *host = crm_element_value(dev, F_STONITH_TARGET);
 
         if(cmd->options & st_opt_cs_nodeid) {
             int nodeid = crm_atoi(host, NULL);
             crm_node_t *node = crm_get_peer(nodeid, NULL);
             if(node) {
                 host = node->uname;
             }
         }
         get_capable_devices(host, cmd->action, cmd->default_timeout, cmd, stonith_fence_get_devices_cb);
         rc = -EINPROGRESS;
     }
 
     return rc;
 }
 
 xmlNode *stonith_construct_reply(xmlNode *request, const char *output, xmlNode *data, int rc) 
 {
     int lpc = 0;
     xmlNode *reply = NULL;
 
     const char *name = NULL;
     const char *value = NULL;
     const char *names[] = {
         F_STONITH_OPERATION,
         F_STONITH_CALLID,
         F_STONITH_CLIENTID,
         F_STONITH_CLIENTNAME,
         F_STONITH_REMOTE_OP_ID,
         F_STONITH_CALLOPTS
     };
 
     crm_trace("Creating a basic reply");
     reply = create_xml_node(NULL, T_STONITH_REPLY);
 
     crm_xml_add(reply, "st_origin", __FUNCTION__);
     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
     crm_xml_add(reply, "st_output", output);
     crm_xml_add_int(reply, F_STONITH_RC, rc);
 
     CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply);
     for(lpc = 0; lpc < DIMOF(names); lpc++) {
         name = names[lpc];
         value = crm_element_value(request, name);
         crm_xml_add(reply, name, value);
     }
 
     if(data != NULL) {
         crm_trace("Attaching reply output");
         add_message_xml(reply, F_STONITH_CALLDATA, data);
     }
     return reply;
 }
 
 static xmlNode *
 stonith_construct_async_reply(async_command_t *cmd, const char *output, xmlNode *data, int rc) 
 {
     xmlNode *reply = NULL;
 
     crm_trace("Creating a basic reply");
     reply = create_xml_node(NULL, T_STONITH_REPLY);
 
     crm_xml_add(reply, "st_origin", __FUNCTION__);
     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
 
     crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
     crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
     crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
     crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
     crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
     crm_xml_add(reply, F_STONITH_TARGET, cmd->victim);
     crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
 
 
     crm_xml_add_int(reply, F_STONITH_RC, rc);
 
     crm_xml_add(reply, "st_output", output);
 
     if(data != NULL) {
         crm_info("Attaching reply output");
         add_message_xml(reply, F_STONITH_CALLDATA, data);
     }
     return reply;
 }
 
 /*!
  * \internal
  * \brief Determine if we need to use an alternate node to
  * fence the target. If so return that node's uname
  *
  * \retval NULL, no alternate host
  * \retval uname, uname of alternate host to use
  */
 static const char *
 check_alternate_host(const char *target)
 {
     const char *alternate_host = NULL;
 
     if(g_hash_table_lookup(topology, target) && safe_str_eq(target, stonith_our_uname)) {
         GHashTableIter gIter;
         crm_node_t *entry = NULL;
         int membership = crm_proc_plugin | crm_proc_heartbeat | crm_proc_cpg;
 
         g_hash_table_iter_init(&gIter, crm_peer_cache);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
             crm_trace("Checking for %s.%d != %s",
                       entry->uname, entry->id, target);
             if(entry->uname
                && (entry->processes & membership)
                && safe_str_neq(entry->uname, target)) {
                 alternate_host = entry->uname;
                 break;
             }
         }
         if(alternate_host == NULL) {
             crm_err("No alternate host available to handle complex self fencing request");
             g_hash_table_iter_init(&gIter, crm_peer_cache);
             while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
                 crm_notice("Peer[%d] %s", entry->id, entry->uname);
             }
         }
     }
 
     return alternate_host;
 }
 
 static void
 stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer, const char *client_id)
 {
     if (remote_peer) {
         send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
     } else {
         do_local_reply(reply, client_id, call_options & st_opt_sync_call, remote_peer!=NULL);
     }
 }
 
 static int
 handle_request(stonith_client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer)
 {
     int call_options = 0;
     int rc = -EOPNOTSUPP;
 
     xmlNode *data = NULL;
     xmlNode *reply = NULL;
 
     char *output = NULL;
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
 
     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
 
     if(is_set(call_options, st_opt_sync_call)) {
         CRM_ASSERT(client == NULL || client->request_id == id);
     }
 
     if(crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
         xmlNode *reply = create_xml_node(NULL, "reply");
 
         CRM_ASSERT(client);
         crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
         crm_xml_add(reply, F_STONITH_CLIENTID,  client->id);
         crm_ipcs_send(client->channel, id, reply, FALSE);
         client->request_id = 0;
         free_xml(reply);
         return 0;
 
     } else if(crm_str_eq(op, STONITH_OP_EXEC, TRUE)) {
         rc = stonith_device_action(request, &output);
 
     } else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) {
         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
         const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
         int op_timeout = 0;
 
         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
         return 0;
 
     } else if(crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
         if (remote_peer) {
             create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */
         }
         stonith_query(request, remote_peer, client_id, call_options);
         return 0;
 
     } else if(crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
         const char *flag_name = NULL;
 
         CRM_ASSERT(client);
         flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE);
         if(flag_name) {
             crm_debug("Setting %s callbacks for %s (%s): ON",
                       flag_name, client->name, client->id);
             client->flags |= get_stonith_flag(flag_name);
         }
 
         flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE);
         if(flag_name) {
             crm_debug("Setting %s callbacks for %s (%s): off",
                       flag_name, client->name, client->id);
             client->flags |= get_stonith_flag(flag_name);
         }
 
         if(flags & crm_ipc_client_response) {
             crm_ipcs_send_ack(client->channel, id, "ack", __FUNCTION__, __LINE__);
             client->request_id = 0;
         }
         return 0;
 
     } else if(crm_str_eq(op, STONITH_OP_RELAY, TRUE)) {
         xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE);
         crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s",
             stonith_our_uname,
             client ? client->name : remote_peer,
             crm_element_value(dev, F_STONITH_ACTION),
             crm_element_value(dev, F_STONITH_TARGET));
 
         if(initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
             rc = -EINPROGRESS;
         }
 
     } else if(crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
 
         if(remote_peer || stand_alone) {
             rc = stonith_fence(request);
 
         } else if(call_options & st_opt_manual_ack) {
             remote_fencing_op_t *rop = initiate_remote_stonith_op(client, request, TRUE);
             rc = stonith_manual_ack(request, rop);
 
         } else {
             const char *alternate_host = NULL;
             xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE);
             const char *target = crm_element_value(dev, F_STONITH_TARGET);
             const char *action = crm_element_value(dev, F_STONITH_ACTION);
             const char *device = crm_element_value(dev, F_STONITH_DEVICE);
 
             if(client) {
                 int tolerance = 0;
 
                 crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'",
                            client->name, client->id, action, target, device?device:"(any)");
 
                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
 
                 if(stonith_check_fence_tolerance(tolerance, target, action)) {
                     rc = 0;
                     goto done;
                 }
 
             } else {
                 crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
                            remote_peer, action, target, device?device:"(any)");
             }
 
             alternate_host = check_alternate_host(target);
 
             if(alternate_host && client) {
                 crm_notice("Forwarding complex self fencing request to peer %s", alternate_host);
                 crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY);
                 crm_xml_add(request, F_STONITH_CLIENTID, client->id);
                 send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE);
                 rc = -EINPROGRESS;
 
             } else if(initiate_remote_stonith_op(client, request, FALSE) != NULL) {
                 rc = -EINPROGRESS;
             }
         }
 
     } else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) {
         rc = stonith_fence_history(request, &data);
 
     } else if(crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) {
         const char *id = NULL;
         xmlNode *notify_data = create_xml_node(NULL, op);
         rc = stonith_device_register(request, &id, FALSE);
 
         crm_xml_add(notify_data, F_STONITH_DEVICE, id);
         crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(device_list));
 
         do_stonith_notify(call_options, op, rc, notify_data);
         free_xml(notify_data);
 
     } else if(crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) {
         xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, request, LOG_ERR);
         const char *id = crm_element_value(dev, XML_ATTR_ID);
         xmlNode *notify_data = create_xml_node(NULL, op);
 
         rc = stonith_device_remove(id, FALSE);
 
         crm_xml_add(notify_data, F_STONITH_DEVICE, id);
         crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(device_list));
 
         do_stonith_notify(call_options, op, rc, notify_data);
         free_xml(notify_data);
 
     } else if(crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) {
         char *id = NULL;
         xmlNode *notify_data = create_xml_node(NULL, op);
         rc = stonith_level_register(request, &id);
 
         crm_xml_add(notify_data, F_STONITH_DEVICE, id);
         crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
 
         do_stonith_notify(call_options, op, rc, notify_data);
         free_xml(notify_data);
 
     } else if(crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) {
         char *id = NULL;
         xmlNode *notify_data = create_xml_node(NULL, op);
         rc = stonith_level_remove(request, &id);
 
         crm_xml_add(notify_data, F_STONITH_DEVICE, id);
         crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
 
         do_stonith_notify(call_options, op, rc, notify_data);
         free_xml(notify_data);
 
     } else if(crm_str_eq(op, STONITH_OP_CONFIRM, TRUE)) {
         async_command_t *cmd = create_async_command(request);
         xmlNode *reply = stonith_construct_async_reply(cmd, NULL, NULL, 0);
 
         crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
         crm_notice("Broadcasting manual fencing confirmation for node %s", cmd->victim);
         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
 
         free_async_command(cmd);
         free_xml(reply);
 
     } else {
         crm_err("Unknown %s from %s", op, client ? client->name : remote_peer);
         crm_log_xml_warn(request, "UnknownOp");
     }
 
   done:
 
     /* Always reply unles the request is in process still.
      * If in progress, a reply will happen async after the request
      * processing is finished */
     if (rc != -EINPROGRESS) {
         reply = stonith_construct_reply(request, output, data, rc);
         stonith_send_reply(reply, call_options, remote_peer, client_id);
     }
 
     free(output);
     free_xml(data);
     free_xml(reply);
 
     return rc;
 }
 
 static void
 handle_reply(stonith_client_t *client, xmlNode *request, const char *remote_peer)
 {
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
 
     if(crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
         process_remote_stonith_query(request);
     } else if(crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
         process_remote_stonith_exec(request);
     } else if(crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
         /* Reply to a complex fencing op */
         process_remote_stonith_exec(request);
     } else {
         crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer);
         crm_log_xml_warn(request, "UnknownOp");
     }
 }
 
 void
 stonith_command(stonith_client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer)
 {
     int call_options = 0;
     int rc = 0;
     gboolean is_reply = FALSE;
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
 
     if(get_xpath_object("//"T_STONITH_REPLY, request, LOG_DEBUG_3)) {
         is_reply = TRUE;
     }
 
     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
     crm_debug("Processing %s%s from %s (%16x)", op, is_reply?" reply":"",
               client?client->name:remote_peer, call_options);
 
     if(is_set(call_options, st_opt_sync_call)) {
         CRM_ASSERT(client == NULL || client->request_id == id);
     }
 
     if (is_reply) {
         handle_reply(client, request, remote_peer);
     } else {
         rc = handle_request(client, id, flags, request, remote_peer);
     }
 
     do_crm_log_unlikely(rc>0?LOG_DEBUG:LOG_INFO,"Processed %s%s from %s: %s (%d)", op, is_reply?" reply":"",
                client?client->name:remote_peer, rc>0?"":pcmk_strerror(rc), rc);
 
 }
diff --git a/fencing/regression.py.in b/fencing/regression.py.in
index 835b6ec27e..851ae17a74 100644
--- a/fencing/regression.py.in
+++ b/fencing/regression.py.in
@@ -1,1029 +1,1029 @@
 #!/usr/bin/python
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 
 import os
 import sys
 import subprocess
 import shlex
 import time
 
 def output_from_command(command):
 	test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 	test.wait()
 
 	return test.communicate()[0].split("\n")
 
 class Test:
 	def __init__(self, name, description, verbose = 0, with_cpg = 0):
 		self.name = name
 		self.description = description
 		self.cmds = []
 		self.verbose = verbose
 
 		self.result_txt = ""
 		self.cmd_tool_output = ""
 		self.result_exitcode = 0;
 
 		self.stonith_options = "-s"
 		self.enable_corosync = 0
 
 		if with_cpg:
 			self.stonith_options = "-c"
 			self.enable_corosync = 1
 
 		self.stonith_process = None
 		self.stonith_output = ""
 		self.stonith_patterns = []
 		self.negative_stonith_patterns = []
 
 		self.executed = 0
 
 		rsc_classes = output_from_command("crm_resource --list-standards")
 
 	def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None):
 		self.cmds.append(
 			{
 				"cmd" : cmd,
 				"kill" : kill,
 				"args" : args,
 				"expected_exitcode" : exitcode,
 				"stdout_match" : stdout_match,
 				"stdout_negative_match" : stdout_negative_match,
 				"no_wait" : no_wait,
 			}
 		)
 
 	def stop_pacemaker(self):
 		cmd = shlex.split("killall -9 -q pacemakerd")
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 		test.wait()
 
 	def start_environment(self):
 		### make sure we are in full control here ###
 		self.stop_pacemaker()
 
 		cmd = shlex.split("killall -9 -q stonithd")
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 		test.wait()
 
 		if self.verbose:
 			print "Starting stonithd with %s" % self.stonith_options
 
 		self.stonith_process = subprocess.Popen(
 			shlex.split("@CRM_DAEMON_DIR@/stonithd %s -V" % self.stonith_options),
 			stdout=subprocess.PIPE,
 			stderr=subprocess.PIPE)
 
 		time.sleep(1)
 
 	def clean_environment(self):
 		if self.stonith_process:
 			self.stonith_process.terminate()
 
 		self.stonith_output = self.stonith_process.communicate()[1]
 		self.stonith_process = None
 
 		if self.verbose:
 			print self.stonith_output
 
 	def add_stonith_log_pattern(self, pattern):
 		self.stonith_patterns.append(pattern)
 
 	def add_stonith_negative_log_pattern(self, pattern):
 		self.negative_stonith_patterns.append(pattern)
 
 	def add_cmd(self, cmd, args):
 		self.__new_cmd(cmd, args, 0, "")
 
 	def add_cmd_no_wait(self, cmd, args):
 		self.__new_cmd(cmd, args, 0, "", 1)
 
 	def add_cmd_check_stdout(self, cmd, args, match, no_match = ""):
 		self.__new_cmd(cmd, args, 0, match, 0, no_match)
 
 	def add_expected_fail_cmd(self, cmd, args, exitcode = 255):
 		self.__new_cmd(cmd, args, exitcode, "")
 
 	def get_exitcode(self):
 		return self.result_exitcode
 
 	def print_result(self, filler):
 		print "%s%s" % (filler, self.result_txt)
 
 	def run_cmd(self, args):
 		cmd = shlex.split(args['args'])
 		cmd.insert(0, args['cmd'])
 
 		if self.verbose:
 			print "\n\nRunning: "+" ".join(cmd)
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
 		if args['kill']:
 			if self.verbose:
 				print "Also running: "+args['kill']
 			subprocess.Popen(shlex.split(args['kill']))
 
 		if args['no_wait'] == 0:
 			test.wait()
 		else:
 			return 0
 
 		output_res = test.communicate()
 		output = output_res[0] + output_res[1]
 
 		if self.verbose:
 			print output
 
 		if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0:
 			test.returncode = -2
 			print "STDOUT string '%s' was not found in cmd output: %s" % (args['stdout_match'], output)
 
 		if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0:
 			test.returncode = -2
 			print "STDOUT string '%s' was found in cmd output: %s" % (args['stdout_negative_match'], output)
 
 		return test.returncode;
 
 
 	def count_negative_matches(self, outline):
 		count = 0
 		for line in self.negative_stonith_patterns:
 			if outline.count(line):
 				count = 1
 				if self.verbose:
 					print "This pattern should not have matched = '%s" % (line)
 		return count
 
 	def match_stonith_patterns(self):
 		negative_matches = 0
 		cur = 0
 		pats = self.stonith_patterns
 		total_patterns = len(self.stonith_patterns)
 
 		if len(self.stonith_patterns) == 0:
 			return
 
 		for line in self.stonith_output.split("\n"):
 			negative_matches = negative_matches + self.count_negative_matches(line)
 			if len(pats) == 0:
 				continue
 			cur = -1
 			for p in pats:
 				cur = cur + 1
 				if line.count(pats[cur]):
 					del pats[cur]
 					break
 
 		if len(pats) > 0 or negative_matches:
 			if self.verbose:
 				for p in pats:
 					print "Pattern Not Matched = '%s'" % p
 
 			self.result_txt = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." % (self.name, len(pats), total_patterns, negative_matches)
 			self.result_exitcode = -1
 
 	def run(self):
 		res = 0
 		i = 1
 		self.start_environment()
 
 		if self.verbose:
 			print "\n--- START TEST - %s" % self.name
 
 		self.result_txt = "SUCCESS - '%s'" % (self.name)
 		self.result_exitcode = 0
 		for cmd in self.cmds:
 			res = self.run_cmd(cmd)
 			if res != cmd['expected_exitcode']:
 				print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode'])
 				self.result_txt = "FAILURE - '%s' failed at step %d. Command: %s %s" % (self.name, i, cmd['cmd'], cmd['args'])
 				self.result_exitcode = -1
 				break
 			else:
 				if self.verbose:
 					print "Step %d SUCCESS" % (i)
 			i = i + 1
 		self.clean_environment()
 
 		if self.result_exitcode == 0:
 			self.match_stonith_patterns()
 
 		print self.result_txt
 		if self.verbose:
 			print "--- END TEST - %s\n" % self.name
 
 		self.executed = 1
 		return res
 
 class Tests:
 	def __init__(self, verbose = 0):
 		self.tests = []
 		self.verbose = verbose
 		self.autogen_corosync_cfg = 0
 		if not os.path.exists("/etc/corosync/corosync.conf"):
 			self.autogen_corosync_cfg = 1
 
 	def new_test(self, name, description, with_cpg = 0):
 		test = Test(name, description, self.verbose, with_cpg)
 		self.tests.append(test)
 		return test
 
 	def print_list(self):
 		print "\n==== %d TESTS FOUND ====" % (len(self.tests))
 		print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")
 		print "%35s - %s" % ("--------------------", "--------------------")
 		for test in self.tests:
 			print "%35s - %s" % (test.name, test.description)
 		print "==== END OF LIST ====\n"
 
 
 	def start_corosync(self):
 		if self.verbose:
 			print "Starting corosync"
 
 		test = subprocess.Popen("corosync", stdout=subprocess.PIPE)
 		test.wait()
 		time.sleep(10)
 
 	def stop_corosync(self):
 		cmd = shlex.split("killall -9 -q corosync")
 		test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 		test.wait()
 
 	def run_single(self, name):
 		for test in self.tests:
 			if test.name == name:
 				test.run()
 				break;
 
 	def run_tests_matching(self, pattern):
 		for test in self.tests:
 			if test.name.count(pattern) != 0:
 				test.run()
 
 	def run_cpg_only(self):
 		for test in self.tests:
 			if test.enable_corosync:
 				test.run()
 
 	def run_no_cpg(self):
 		for test in self.tests:
 			if not test.enable_corosync:
 				test.run()
 
 	def run_tests(self):
 		for test in self.tests:
 			test.run()
 
 	def exit(self):
 		for test in self.tests:
 			if test.executed == 0:
 				continue
 
 			if test.get_exitcode() != 0:
 				sys.exit(-1)
 
 		sys.exit(0)
 
 	def print_results(self):
 		failures = 0;
 		success = 0;
 		print "\n\n======= FINAL RESULTS =========="
 		print "\n--- FAILURE RESULTS:"
 		for test in self.tests:
 			if test.executed == 0:
 				continue
 
 			if test.get_exitcode() != 0:
 				failures = failures + 1
 				test.print_result("    ")
 			else:
 				success = success + 1
 
 		if failures == 0:
 			print "    None"
 
 		print "\n--- TOTALS\n    Pass:%d\n    Fail:%d\n" % (success, failures)
 	def build_api_sanity_tests(self):
 		verbose_arg = ""
 		if self.verbose:
 			verbose_arg = "-V"
 
 		test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.")
 		test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-t %s" % (verbose_arg))
 
 		test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1)
 		test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-m %s" % (verbose_arg))
 
 	def build_custom_timeout_tests(self):
 		# custom timeout without topology
 		test = self.new_test("cpg_custom_timeout_1",
 				"Verify per device timeouts work as expected without using topology.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4\"")
 		test.add_cmd("stonith_admin", "-F node3 -t 2")
 		# timeout is 2+1+4 = 7
 		test.add_stonith_log_pattern("remote op timeout set to 7")
 
 		# custom timeout _WITH_ topology
 		test = self.new_test("cpg_custom_timeout_2",
 				"Verify per device timeouts work as expected _WITH_ topology.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4000\"")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 		test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
 		test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2")
 		test.add_cmd("stonith_admin", "-F node3 -t 2")
 		# timeout is 2+1+4000 = 4003
 		test.add_stonith_log_pattern("remote op timeout set to 4003")
 
 	def build_fence_merge_tests(self):
 
 		### Simple test that overlapping fencing operations get merged
 		test = self.new_test("cpg_custom_merge_single",
 				"Verify overlapping identical fencing operations are merged, no fencing levels used.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node3\"")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd("stonith_admin", "-F node3 -t 10")
 		### one merger will happen
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		### the pattern below signifies that both the original and duplicate operation completed
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 
 		### Test that multiple mergers occur
 		test = self.new_test("cpg_custom_merge_multiple",
 				"Verify multiple overlapping identical fencing operations are merged", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node3\"")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd("stonith_admin", "-F node3 -t 10")
 		### 4 mergers should occur
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		### the pattern below signifies that both the original and duplicate operation completed
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 
 		### Test that multiple mergers occur with topologies used
 		test = self.new_test("cpg_custom_merge_with_topology",
 				"Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2")
 		test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10")
 		test.add_cmd("stonith_admin", "-F node3 -t 10")
 		### 4 mergers should occur
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client")
 		### the pattern below signifies that both the original and duplicate operation completed
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 		test.add_stonith_log_pattern("Operation off of node3 by")
 
 
 		test = self.new_test("cpg_custom_no_merge",
 				"Verify differing fencing operations are not merged", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node3 node2\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3 node2\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node3 node2\"")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 		test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2")
 		test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1")
 		test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10")
 		test.add_cmd("stonith_admin", "-F node3 -t 10")
 		test.add_stonith_negative_log_pattern("Merging stonith action off for node node3 originating from client")
 
 	def build_standalone_tests(self):
 		test_types = [
 			{
 				"prefix" : "standalone" ,
 				"use_cpg" : 0,
 			},
 			{
 				"prefix" : "cpg" ,
 				"use_cpg" : 1,
 			},
 		]
 
 		# test what happens when all devices timeout
 		for test_type in test_types:
 			test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"],
 					"Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2  -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false3 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 194)
 
 			if test_type["use_cpg"] == 1:
 				test.add_stonith_log_pattern("remote op timeout set to 6")
 
 			test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -62")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -62")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: -62")
 
 		# test what happens when multiple devices can fence a node, but the first device fails.
 		for test_type in test_types:
 			test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"],
 					"Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-F node3 -t 2")
 
 			if test_type["use_cpg"] == 1:
 				test.add_stonith_log_pattern("remote op timeout set to 6")
 
 		# simple topology test for one device
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_simple" % test_type["prefix"],
 					"Verify all fencing devices at a level are used.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true")
 			test.add_cmd("stonith_admin", "-F node3 -t 2")
 
 			test.add_stonith_log_pattern("remote op timeout set to 2")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0")
 
 		# test what happens when the first fencing level has multiple devices.
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_device_fails" % test_type["prefix"],
 					"Verify if one device in a level fails, the other is tried.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R false  -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v false")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v true")
 			test.add_cmd("stonith_admin", "-F node3 -t 20")
 
 			test.add_stonith_log_pattern("remote op timeout set to 4")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -62")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0")
 
 		# test what happens when the first fencing level fails.
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"],
 					"Verify if one level fails, the next leve is tried.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true4  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
 
 			test.add_cmd("stonith_admin", "-F node3 -t 2")
 
 			test.add_stonith_log_pattern("remote op timeout set to 12")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -62")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -62")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0")
 
 
 		# test what happens when the first fencing level had devices that no one has registered
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_missing_devices" % test_type["prefix"],
 					"Verify topology can continue with missing devices.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true2  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true4  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
 
 			test.add_cmd("stonith_admin", "-F node3 -t 2")
 
 		# Test what happens if multiple fencing levels are defined, and then the first one is removed.
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 
 			test = self.new_test("%s_topology_level_removal" % test_type["prefix"],
 					"Verify level removal works.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true4  -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
 			test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
 			test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2")
 
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3")
 			test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4")
 
 			# Now remove level 2, verify none of the devices in level two are hit.
 			test.add_cmd("stonith_admin", "-d node3 -i 2")
 
 			test.add_cmd("stonith_admin", "-F node3 -t 20")
 
 			test.add_stonith_log_pattern("remote op timeout set to 8")
 			test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -62")
 			test.add_stonith_negative_log_pattern("for host 'node3' with device 'false2' returned: -62")
 			test.add_stonith_negative_log_pattern("for host 'node3' with device 'false2' returned: -1001")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0")
 			test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0")
 
 		# test the stonith builds the correct list of devices that can fence a node.
 		for test_type in test_types:
 			test = self.new_test("%s_list_devices" % test_type["prefix"],
 					"Verify list of devices that can fence a node is correct", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\"")
 			test.add_cmd("stonith_admin", "-R true2 -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 			test.add_cmd("stonith_admin", "-R true3 -a fence_true -o \"pcmk_host_list=node1 node2 node3\"")
 
 			test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1")
 			test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1")
 
 		# simple test of device monitor
 		for test_type in test_types:
 			test = self.new_test("%s_monitor" % test_type["prefix"],
 					"Verify device is reachable", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\"")
 			test.add_cmd("stonith_admin", "-R false1  -a fence_false -o \"pcmk_host_list=node3\"")
 
 			test.add_cmd("stonith_admin", "-Q true1")
 			test.add_cmd("stonith_admin", "-Q false1")
 			test.add_expected_fail_cmd("stonith_admin", "-Q true2", 237)
 
 		# Verify monitor occurs for duration of timeout period on failure
 		for test_type in test_types:
 			test = self.new_test("%s_monitor_timeout" % test_type["prefix"],
 					"Verify monitor uses duration of timeout period given.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"")
 			test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 23)
 			test.add_stonith_log_pattern("Attempt 2 to execute")
 
 		# Verify monitor occurs for duration of timeout period on failure, but stops at max retries
 		for test_type in test_types:
 			test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"],
 					"Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_dummy_monitor_fail -o \"pcmk_host_list=node3\"")
 			test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15", 23)
-			test.add_stonith_log_pattern("Attempt 10 to execute")
+			test.add_stonith_log_pattern("Attempted to execute agent fence_dummy_monitor_fail (list) the maximum number of times")
 
 		# simple register test
 		for test_type in test_types:
 			test = self.new_test("%s_register" % test_type["prefix"],
 					"Verify devices can be registered and un-registered", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\"")
 
 			test.add_cmd("stonith_admin", "-Q true1")
 
 			test.add_cmd("stonith_admin", "-D true1")
 
 			test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237)
 
 
 		# simple reboot test
 		for test_type in test_types:
 			test = self.new_test("%s_reboot" % test_type["prefix"],
 					"Verify devices can be rebooted", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\"")
 
 			test.add_cmd("stonith_admin", "-B node3 -t 2")
 
 			test.add_cmd("stonith_admin", "-D true1")
 
 			test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237)
 
 		# test fencing history.
 		for test_type in test_types:
 			if test_type["use_cpg"] == 0:
 				continue
 			test = self.new_test("%s_fence_history" % test_type["prefix"],
 					"Verify last fencing operation is returned.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\"")
 
 			test.add_cmd("stonith_admin", "-F node3 -t 2 -V")
 
 			test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "")
 
 		# simple test of dynamic list query
 		for test_type in test_types:
 			test = self.new_test("%s_dynamic_list_query" % test_type["prefix"],
 					"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_true")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_true")
 
 			test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found")
 
 
 		# fence using dynamic list query
 		for test_type in test_types:
 			test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"],
 					"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_true")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_true")
 
 			test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V");
 
 		# simple test of  query using status action
 		for test_type in test_types:
 			test = self.new_test("%s_status_query" % test_type["prefix"],
 					"Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"])
 			test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_check=status\"")
 			test.add_cmd("stonith_admin", "-R true2  -a fence_true -o \"pcmk_host_check=status\"")
 			test.add_cmd("stonith_admin", "-R true3  -a fence_true -o \"pcmk_host_check=status\"")
 
 			test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found")
 
 
 	def build_nodeid_tests(self):
 		our_uname = output_from_command("uname -n")
 		if our_uname:
 			our_uname = our_uname[0]
 
 		### verify nodeid is supplied when nodeid is in the metadata parameters
 		test = self.new_test("cpg_supply_nodeid",
 				"Verify nodeid is given when fence agent has nodeid as parameter", 1)
 
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
 		test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
 
 		### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters
 		test = self.new_test("cpg_do_not_supply_nodeid",
 				"Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1)
 
 		test.add_cmd("stonith_admin", "-R true1 -a fence_true -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
 		test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
 
 		### verify nodeid use doesn't explode standalone mode
 		test = self.new_test("standalone_do_not_supply_nodeid",
 				"Verify nodeid in metadata parameter list doesn't kill standalone mode", 0)
 
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname))
 		test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname))
 
 
 	def build_unfence_tests(self):
 		our_uname = output_from_command("uname -n")
 		if our_uname:
 			our_uname = our_uname[0]
 
 		### Simple test unfencing works
 		test = self.new_test("cpg_unfence_simple",
 				"Verify simple unfencing.", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-R true1  -a fence_true -o \"pcmk_host_list=node3\" ")
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=node3\"")
 		test.add_cmd("stonith_admin", "-U node3 -t 3")
 
 		### verify unfencing using on_target device
 		test = self.new_test("cpg_unfence_on_target_1",
 				"Verify unfencing with on_target = true", 1)
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 
 		### verify failure of unfencing using on_target device
 		test = self.new_test("cpg_unfence_on_target_2",
 				"Verify failure unfencing with on_target = true", 1)
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname))
 		test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 194)
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 
 		### verify unfencing using on_target device with topology
 		test = self.new_test("cpg_unfence_on_target_3",
 				"Verify unfencing with on_target = true using topology", 1)
 
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=%s node3\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s node3\"" % (our_uname))
 
 		test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname))
 		test.add_cmd("stonith_admin", "-r %s -i 3 -v true1" % (our_uname))
 
 		test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname))
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 		### verify unfencing using on_target device with topology fails
 		test = self.new_test("cpg_unfence_on_target_4",
 				"Verify unfencing failure with on_target = true using topology", 1)
 
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
 
 		test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1")
 		test.add_cmd("stonith_admin", "-r node_fake -i 2 -v false2")
 		test.add_cmd("stonith_admin", "-r node_fake -i 3 -v true1")
 
 		test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", 194)
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 
 		### verify use of on_target = true for "on" action does not interfere with "off" action
 		test = self.new_test("cpg_unfence_on_target_ignored",
 				"Verify on target is ignored for other actions", 1)
 		test.add_cmd("stonith_admin", "-R false1 -a fence_false -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R false2 -a fence_false -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_metadata_helper -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
 		test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1")
 		test.add_cmd("stonith_admin", "-r node_fake -i 2 -v false2")
 		test.add_cmd("stonith_admin", "-r node_fake -i 3 -v true1")
 		test.add_cmd("stonith_admin", "-F node_fake -t 3")
 		test.add_stonith_log_pattern("(on) to be executed on the target node")
 
 	def setup_environment(self, use_corosync):
 		if self.autogen_corosync_cfg and use_corosync:
 			corosync_conf = ("""
 totem {
         version: 2
         crypto_cipher: none
         crypto_hash: none
 
         nodeid:         101
         secauth:        off
 
         interface {
                 ttl: 1
                 ringnumber: 0
                 mcastport: 6666
                 mcastaddr: 226.94.1.1
                 bindnetaddr: 127.0.0.1
         }
 }
 
 logging {
         debug: off
         fileline: off
         to_syslog: no
         to_stderr: no
         syslog_facility: daemon
         timestamp: on
         to_logfile: yes
         logfile: /var/log/corosync.log
         logfile_priority: info
 }
 """)
 
 			os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf))
 
 
 		if use_corosync:
 			### make sure we are in control ###
 			self.stop_corosync()
 			self.start_corosync()
 
 		monitor_fail_agent = ("""#!/usr/bin/python
 import sys
 def main():
     for line in sys.stdin.readlines():
         if line.count("monitor") > 0:
             sys.exit(-1);
     sys.exit(-1)
 if __name__ == "__main__":
     main()
 """)
 		on_target_agent = ("""#!/usr/bin/python
 import sys
 def main():
     for line in sys.stdin.readlines():
         off_hit = 0
         nodeid_found = 0
         if line.count("monitor") > 0:
             sys.exit(0)
         if line.count("metadata") > 0:
             print '<resource-agent name="fence_dummy_metadata_helper" shortdesc="Dummy Fence agent for testing">'
             print '  <longdesc>dummy description.</longdesc>'
             print '  <vendor-url>http://www.example.com</vendor-url>'
             print '  <parameters>'
             print '    <parameter name="action" unique="0" required="1">'
             print '      <getopt mixed="-o, --action=[action]"/>'
             print '      <content type="string" default="reboot"/>'
             print '      <shortdesc lang="en">Fencing Action</shortdesc>'
             print '    </parameter>'
             print '    <parameter name="nodeid" unique="0" required="0">'
             print '      <content type="string"/>'
             print '      <shortdesc lang="en">Corosync nodeid of the fence victim</shortdesc>'
             print '    </parameter>'
             print '    <parameter name="port" unique="0" required="0">'
             print '      <getopt mixed="-n, --plug=[id]"/>'
             print '      <content type="string"/>'
             print '      <shortdesc lang="en">Physical plug number or name of virtual machine</shortdesc>'
             print '    </parameter>'
             print '  </parameters>'
             print '  <actions>'
             print '    <action name="on" on_target="1"/>'
             print '    <action name="off"/>'
             print '    <action name="monitor"/>'
             print '    <action name="metadata"/>'
             print '  </actions>'
             print '</resource-agent>'
             sys.exit(0)
         if line.count("on") > 0:
             sys.exit(0)
         if line.count("off") > 0:
             off_hit = 1
         if line.count("nodeid") > 0:
             nodeid_found = 1
 
     if off_hit and nodeid_found:
         sys.exit(0)
     sys.exit(-1)
 if __name__ == "__main__":
     main()
 """)
 
 		os.system("cat <<-END >>/usr/sbin/fence_dummy_metadata_helper\n%s\nEND" % (on_target_agent))
 		os.system("chmod 711 /usr/sbin/fence_dummy_metadata_helper")
 
 		os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor_fail\n%s\nEND" % (monitor_fail_agent))
 		os.system("chmod 711 /usr/sbin/fence_dummy_monitor_fail")
 		os.system("cp /usr/share/pacemaker/tests/cts/fence_false /usr/sbin/fence_false")
 		os.system("cp /usr/share/pacemaker/tests/cts/fence_true /usr/sbin/fence_true")
 
 	def cleanup_environment(self, use_corosync):
 		if use_corosync:
 			self.stop_corosync()
 
 			if self.verbose and os.path.exists('/var/log/corosync.log'):
 				print "Daemon output"
 				f = open('/var/log/corosync.log', 'r')
 				for line in f.readlines():
 					print line.strip()
 				os.remove('/var/log/corosync.log')
 
 		if self.autogen_corosync_cfg:
 			os.system("rm -f /etc/corosync/corosync.conf")
 
 		os.system("rm -f /usr/sbin/fence_dummy_metadata_helper")
 		os.system("rm -f /usr/sbin/fence_dummy_monitor_fail")
 
 class TestOptions:
 	def __init__(self):
 		self.options = {}
 		self.options['list-tests'] = 0
 		self.options['run-all'] = 1
 		self.options['run-only'] = ""
 		self.options['run-only-pattern'] = ""
 		self.options['verbose'] = 0
 		self.options['invalid-arg'] = ""
 		self.options['cpg-only'] = 0
 		self.options['no-cpg'] = 0
 		self.options['show-usage'] = 0
 
 	def build_options(self, argv):
 		args = argv[1:]
 		skip = 0
 		for i in range(0, len(args)):
 			if skip:
 				skip = 0
 				continue
 			elif args[i] == "-h" or args[i] == "--help":
 				self.options['show-usage'] = 1
 			elif args[i] == "-l" or args[i] == "--list-tests":
 				self.options['list-tests'] = 1
 			elif args[i] == "-V" or args[i] == "--verbose":
 				self.options['verbose'] = 1
 			elif args[i] == "-n" or args[i] == "--no-cpg":
 				self.options['no-cpg'] = 1
 			elif args[i] == "-c" or args[i] == "--cpg-only":
 				self.options['cpg-only'] = 1
 			elif args[i] == "-r" or args[i] == "--run-only":
 				self.options['run-only'] = args[i+1]
 				skip = 1
 			elif args[i] == "-p" or args[i] == "--run-only-pattern":
 				self.options['run-only-pattern'] = args[i+1]
 				skip = 1
 
 	def show_usage(self):
 		print "usage: " + sys.argv[0] + " [options]"
 		print "If no options are provided, all tests will run"
 		print "Options:"
 		print "\t [--help | -h]                        Show usage"
 		print "\t [--list-tests | -l]                  Print out all registered tests."
 		print "\t [--cpg-only | -c]                    Only run tests that require corosync."
 		print "\t [--no-cpg | -n]                      Only run tests that do not require corosync"
 		print "\t [--run-only | -r 'testname']         Run a specific test"
 		print "\t [--verbose | -V]                     Verbose output"
 		print "\t [--run-only-pattern | -p 'string']   Run only tests containing the string value"
 		print "\n\tExample: Run only the test 'start_top'"
 		print "\t\t python ./regression.py --run-only start_stop"
 		print "\n\tExample: Run only the tests with the string 'systemd' present in them"
 		print "\t\t python ./regression.py --run-only-pattern systemd"
 
 def main(argv):
 	o = TestOptions()
 	o.build_options(argv)
 
 	use_corosync = 1
 
 	tests = Tests(o.options['verbose'])
 	tests.build_standalone_tests()
 	tests.build_custom_timeout_tests()
 	tests.build_api_sanity_tests()
 	tests.build_fence_merge_tests()
 	tests.build_unfence_tests()
 	tests.build_nodeid_tests()
 
 	if o.options['list-tests']:
 		tests.print_list()
 		sys.exit(0)
 	elif o.options['show-usage']:
 		o.show_usage()
 		sys.exit(0)
 
 	print "Starting ..."
 
 	if o.options['no-cpg']:
 		use_corosync = 0
 
 	tests.setup_environment(use_corosync)
 
 	if o.options['run-only-pattern'] != "":
 		tests.run_tests_matching(o.options['run-only-pattern'])
 		tests.print_results()
 	elif o.options['run-only'] != "":
 		tests.run_single(o.options['run-only'])
 		tests.print_results()
 	elif o.options['no-cpg']:
 		tests.run_no_cpg()
 		tests.print_results()
 	elif o.options['cpg-only']:
 		tests.run_cpg_only()
 		tests.print_results()
 	else:
 		tests.run_tests()
 		tests.print_results()
 
 	tests.cleanup_environment(use_corosync)
 	tests.exit()
 if __name__=="__main__":
 	main(sys.argv)
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index 2e2ca16e8b..5c2e68722a 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,2290 +1,2292 @@
 
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 
 #include <crm/crm.h>
 
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <sys/utsname.h>
 
 #include <crm/msg_xml.h>
 #include <crm/services.h>
 #include <crm/lrmd.h>
 #include <crm/common/util.h>
 #include <crm/common/xml.h>
 #include <crm/common/ipc.h>
 #include <crm/common/mainloop.h>
 
 #include <crm/cib/internal.h>
 #include <crm/pengine/status.h>
 #include <../lib/pengine/unpack.h>
 #include <../pengine/pengine.h>
 #include <crm/stonith-ng.h>
 
 /* GMainLoop *mainloop = NULL; */
 
 void wait_for_refresh(int offset, const char *prefix, int msec);
 void clean_up(int rc);
 void crm_diff_update(const char *event, xmlNode * msg);
 gboolean mon_refresh_display(gpointer user_data);
 int cib_connect(gboolean full);
 void mon_st_callback(stonith_t *st, stonith_event_t *e);
 
 char *xml_file = NULL;
 char *as_html_file = NULL;
 int as_xml = 0;
 char *pid_file = NULL;
 char *snmp_target = NULL;
 char *snmp_community = NULL;
 
 gboolean as_console = TRUE;;
 gboolean simple_status = FALSE;
 gboolean group_by_node = FALSE;
 gboolean inactive_resources = FALSE;
 gboolean web_cgi = FALSE;
 int reconnect_msec = 5000;
 gboolean daemonize = FALSE;
 GMainLoop *mainloop = NULL;
 guint timer_id = 0;
 GList *attr_list = NULL;
 
 const char *crm_mail_host = NULL;
 const char *crm_mail_prefix = NULL;
 const char *crm_mail_from = NULL;
 const char *crm_mail_to = NULL;
 const char *external_agent = NULL;
 const char *external_recipient = NULL;
 
 cib_t *cib = NULL;
 stonith_t *st = NULL;
 xmlNode *current_cib = NULL;
 
 gboolean one_shot = FALSE;
 gboolean has_warnings = FALSE;
 gboolean print_failcount = FALSE;
 gboolean print_operations = FALSE;
 gboolean print_timing = FALSE;
 gboolean print_nodes_attr = FALSE;
 gboolean print_last_updated = TRUE;
 gboolean print_last_change = TRUE;
 gboolean print_tickets = FALSE;
 gboolean watch_fencing = FALSE;
 
 #define FILTER_STR {"shutdown", "terminate", "standby", "fail-count",	\
 	    "last-failure", "probe_complete", "#id", "#uname",		\
 	    "#is_dc", NULL}
 
 gboolean log_diffs = FALSE;
 gboolean log_updates = FALSE;
 
 long last_refresh = 0;
 crm_trigger_t *refresh_trigger = NULL;
 
 /*
  * 1.3.6.1.4.1.32723 has been assigned to the project by IANA
  * http://www.iana.org/assignments/enterprise-numbers
  */
 #define PACEMAKER_PREFIX "1.3.6.1.4.1.32723"
 #define PACEMAKER_TRAP_PREFIX PACEMAKER_PREFIX ".1"
 
 #define snmp_crm_trap_oid   PACEMAKER_TRAP_PREFIX
 #define snmp_crm_oid_node   PACEMAKER_TRAP_PREFIX ".1"
 #define snmp_crm_oid_rsc    PACEMAKER_TRAP_PREFIX ".2"
 #define snmp_crm_oid_task   PACEMAKER_TRAP_PREFIX ".3"
 #define snmp_crm_oid_desc   PACEMAKER_TRAP_PREFIX ".4"
 #define snmp_crm_oid_status PACEMAKER_TRAP_PREFIX ".5"
 #define snmp_crm_oid_rc     PACEMAKER_TRAP_PREFIX ".6"
 #define snmp_crm_oid_trc    PACEMAKER_TRAP_PREFIX ".7"
 
 #if CURSES_ENABLED
 #  define print_dot() if(as_console) {		\
 	printw(".");				\
 	clrtoeol();				\
 	refresh();				\
     } else {					\
 	fprintf(stdout, ".");			\
     }
 #else
 #  define print_dot() fprintf(stdout, ".");
 #endif
 
 #if CURSES_ENABLED
 #  define print_as(fmt, args...) if(as_console) {	\
 	printw(fmt, ##args);				\
 	clrtoeol();					\
 	refresh();					\
     } else {						\
 	fprintf(stdout, fmt, ##args);			\
     }
 #else
 #  define print_as(fmt, args...) fprintf(stdout, fmt, ##args);
 #endif
 
 static void
 blank_screen(void)
 {
 #if CURSES_ENABLED
     int lpc = 0;
 
     for (lpc = 0; lpc < LINES; lpc++) {
         move(lpc, 0);
         clrtoeol();
     }
     move(0, 0);
     refresh();
 #endif
 }
 
 static gboolean
 mon_timer_popped(gpointer data)
 {
     int rc = pcmk_ok;
 
     if (timer_id > 0) {
         g_source_remove(timer_id);
     }
 
     rc = cib_connect(TRUE);
 
     if (rc != pcmk_ok) {
         print_dot();
         timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
     }
     return FALSE;
 }
 
 static void
 mon_cib_connection_destroy(gpointer user_data)
 {
     print_as("Connection to the CIB terminated\n");
     if (cib) {
         print_as("Reconnecting...");
         cib->cmds->signoff(cib);
         timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
     }
     return;
 }
 
 /*
  * Mainloop signal handler.
  */
 static void
 mon_shutdown(int nsig)
 {
     clean_up(EX_OK);
 }
 
 #if ON_DARWIN
 #  define sighandler_t sig_t
 #endif
 
 #if CURSES_ENABLED
 #ifndef HAVE_SIGHANDLER_T
 typedef void (*sighandler_t)(int);
 #endif
 static sighandler_t ncurses_winch_handler;
 static void
 mon_winresize(int nsig)
 {
     static int not_done;
     int lines = 0, cols = 0;
 
     if (!not_done++) {
         if (ncurses_winch_handler)
             /* the original ncurses WINCH signal handler does the
              * magic of retrieving the new window size;
              * otherwise, we'd have to use ioctl or tgetent */
             (*ncurses_winch_handler) (SIGWINCH);
         getmaxyx(stdscr, lines, cols);
         resizeterm(lines, cols);
         mainloop_set_trigger(refresh_trigger);
     }
     not_done--;
 }
 #endif
 
 int
 cib_connect(gboolean full)
 {
     int rc = pcmk_ok;
     static gboolean need_pass = TRUE;
 
     CRM_CHECK(cib != NULL, return -EINVAL);
 
     if (getenv("CIB_passwd") != NULL) {
         need_pass = FALSE;
     }
 
     if(watch_fencing && st == NULL) {
         st = stonith_api_new();
     }
     
     if(watch_fencing && st->state == stonith_disconnected) {
         crm_trace("Connecting to stonith");
         rc = st->cmds->connect(st, crm_system_name, NULL);
         if(rc == pcmk_ok) {
             crm_trace("Setting up stonith callbacks");
             st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback);
         }
     }
     
     if (cib->state != cib_connected_query && cib->state != cib_connected_command) {
         crm_trace("Connecting to the CIB");
         if (as_console && need_pass && cib->variant == cib_remote) {
             need_pass = FALSE;
             print_as("Password:");
         }
 
         rc = cib->cmds->signon(cib, crm_system_name, cib_query);
 
         if (rc != pcmk_ok) {
             return rc;
         }
 
         current_cib = get_cib_copy(cib);
         mon_refresh_display(NULL);
 
         if (full) {
             if (rc == pcmk_ok) {
                 rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
                 if (rc == -EPROTONOSUPPORT) {
                     print_as("Notification setup failed, won't be able to reconnect after failure");
                     if (as_console) {
                         sleep(2);
                     }
                     rc = pcmk_ok;
                 }
 
             }
 
             if (rc == pcmk_ok) {
                 cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
                 rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
             }
 
             if (rc != pcmk_ok) {
                 print_as("Notification setup failed, could not monitor CIB actions");
                 if (as_console) {
                     sleep(2);
                 }
                 clean_up(-rc);
             }
         }
     }
     return rc;
 }
 
 /* *INDENT-OFF* */
 static struct crm_option long_options[] = {
     /* Top-level Options */
     {"help",           0, 0, '?', "\tThis text"},
     {"version",        0, 0, '$', "\tVersion information"  },
     {"verbose",        0, 0, 'V', "\tIncrease debug output"},
     {"quiet",          0, 0, 'Q', "\tDisplay only essential output" },
 
     {"-spacer-",	1, 0, '-', "\nModes:"},
     {"as-html",        1, 0, 'h', "Write cluster status to the named html file"},
     {"as-xml",         0, 0, 'X', "\tWrite cluster status as xml to stdout. This will enable one-shot mode."},
     {"web-cgi",        0, 0, 'w', "\tWeb mode with output suitable for cgi"},
     {"simple-status",  0, 0, 's', "Display the cluster status once as a simple one line output (suitable for nagios)"},
     {"snmp-traps",     1, 0, 'S', "Send SNMP traps to this station", !ENABLE_SNMP},
     {"snmp-community", 1, 0, 'C', "Specify community for SNMP traps(default is NULL)", !ENABLE_SNMP},
     {"mail-to",        1, 0, 'T', "Send Mail alerts to this user.  See also --mail-from, --mail-host, --mail-prefix", !ENABLE_ESMTP},
     
     {"-spacer-",	1, 0, '-', "\nDisplay Options:"},
     {"group-by-node",  0, 0, 'n', "\tGroup resources by node"     },
     {"inactive",       0, 0, 'r', "\tDisplay inactive resources"  },
     {"failcounts",     0, 0, 'f', "\tDisplay resource fail counts"},
     {"operations",     0, 0, 'o', "\tDisplay resource operation history" },
     {"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" },
     {"tickets",        0, 0, 'c', "\t\tDisplay cluster tickets"},
     {"watch-fencing",  0, 0, 'W', "\t\tListen for fencing events. For use with --external-agent, --mail-to and/or --snmp-traps where supported"},
     {"show-node-attributes", 0, 0, 'A', "Display node attributes" },
 
     {"-spacer-",	1, 0, '-', "\nAdditional Options:"},
     {"interval",       1, 0, 'i', "\tUpdate frequency in seconds" },
     {"one-shot",       0, 0, '1', "\tDisplay the cluster status once on the console and exit"},
     {"disable-ncurses",0, 0, 'N', "\tDisable the use of ncurses", !CURSES_ENABLED},
     {"daemonize",      0, 0, 'd', "\tRun in the background as a daemon"},
     {"pid-file",       1, 0, 'p', "\t(Advanced) Daemon pid file location"},
     {"mail-from",      1, 0, 'F', "\tMail alerts should come from the named user", !ENABLE_ESMTP},
     {"mail-host",      1, 0, 'H', "\tMail alerts should be sent via the named host", !ENABLE_ESMTP},
     {"mail-prefix",    1, 0, 'P', "Subjects for mail alerts should start with this string", !ENABLE_ESMTP},
     {"external-agent",    1, 0, 'E', "A program to run when resource operations take place."},
     {"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."},
 
     
     {"xml-file",       1, 0, 'x', NULL, 1},
 
     {"-spacer-",	1, 0, '-', "\nExamples:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', "Display the cluster status on the console with updates as they occur:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_mon", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Display the cluster status on the console just once then exit:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_mon -1", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Display your cluster status, group resources by node, and include inactive resources in the list:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_mon --group-by-node --inactive", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_mon --daemonize --as-html /path/to/docroot/filename.html", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Start crm_mon and export the current cluster status as xml to stdout, then exit.:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_mon --as-xml", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Start crm_mon as a background daemon and have it send email alerts:", pcmk_option_paragraph|!ENABLE_ESMTP},
     {"-spacer-",	1, 0, '-', " crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com", pcmk_option_example|!ENABLE_ESMTP},
     {"-spacer-",	1, 0, '-', "Start crm_mon as a background daemon and have it send SNMP alerts:", pcmk_option_paragraph|!ENABLE_SNMP},
     {"-spacer-",	1, 0, '-', " crm_mon --daemonize --snmp-traps snmptrapd.example.com", pcmk_option_example|!ENABLE_SNMP},
     
     {NULL, 0, 0, 0}
 };
 /* *INDENT-ON* */
 
 int
 main(int argc, char **argv)
 {
     int flag;
     int argerr = 0;
     int exit_code = 0;
     int option_index = 0;
 
     pid_file = strdup("/tmp/ClusterMon.pid");
     crm_log_cli_init("crm_mon");
     crm_set_options(NULL, "mode [options]", long_options,
                     "Provides a summary of cluster's current state."
                     "\n\nOutputs varying levels of detail in a number of different formats.\n");
 
 #ifndef ON_DARWIN
     /* prevent zombies */
     signal(SIGCLD, SIG_IGN);
 #endif
 
     if (strcmp(crm_system_name, "crm_mon.cgi") == 0) {
         web_cgi = TRUE;
         one_shot = TRUE;
     }
 
     while (1) {
         flag = crm_get_option(argc, argv, &option_index);
         if (flag == -1)
             break;
 
         switch (flag) {
             case 'V':
                 crm_bump_log_level(argc, argv);
                 break;
             case 'Q':
                 print_last_updated = FALSE;
                 print_last_change = FALSE;
                 break;
             case 'i':
                 reconnect_msec = crm_get_msec(optarg);
                 break;
             case 'n':
                 group_by_node = TRUE;
                 break;
             case 'r':
                 inactive_resources = TRUE;
                 break;
             case 'W':
                 watch_fencing = TRUE;
                 break;
             case 'd':
                 daemonize = TRUE;
                 break;
             case 't':
                 print_timing = TRUE;
                 print_operations = TRUE;
                 break;
             case 'o':
                 print_operations = TRUE;
                 break;
             case 'f':
                 print_failcount = TRUE;
                 break;
             case 'A':
                 print_nodes_attr = TRUE;
                 break;
             case 'c':
                 print_tickets = TRUE;
                 break;
             case 'p':
                 free(pid_file);
                 pid_file = strdup(optarg);
                 break;
             case 'x':
                 xml_file = strdup(optarg);
                 one_shot = TRUE;
                 break;
             case 'h':
                 as_html_file = strdup(optarg);
                 break;
             case 'X':
                 as_xml = TRUE;
                 one_shot = TRUE;
                 break;
             case 'w':
                 web_cgi = TRUE;
                 one_shot = TRUE;
                 break;
             case 's':
                 simple_status = TRUE;
                 one_shot = TRUE;
                 break;
             case 'S':
                 snmp_target = optarg;
                 break;
             case 'T':
                 crm_mail_to = optarg;
                 break;
             case 'F':
                 crm_mail_from = optarg;
                 break;
             case 'H':
                 crm_mail_host = optarg;
                 break;
             case 'P':
                 crm_mail_prefix = optarg;
                 break;
             case 'E':
                 external_agent = optarg;
                 break;
             case 'e':
                 external_recipient = optarg;
                 break;
             case '1':
                 one_shot = TRUE;
                 break;
             case 'N':
                 as_console = FALSE;
                 break;
             case 'C':
                 snmp_community = optarg;
                 break;
             case '$':
             case '?':
                 crm_help(flag, EX_OK);
                 break;
             default:
                 printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
                 ++argerr;
                 break;
         }
     }
 
     if (optind < argc) {
         printf("non-option ARGV-elements: ");
         while (optind < argc)
             printf("%s ", argv[optind++]);
         printf("\n");
     }
     if (argerr) {
         crm_help('?', EX_USAGE);
     }
 
     if (one_shot) {
         as_console = FALSE;
 
     } else if (daemonize) {
         as_console = FALSE;
         crm_enable_stderr(FALSE);
 
         if (!as_html_file && !snmp_target && !crm_mail_to && !external_agent && !as_xml) {
             printf
                 ("Looks like you forgot to specify one or more of: --as-html, --as-xml, --mail-to, --snmp-target, --external-agent\n");
             crm_help('?', EX_USAGE);
         }
 
         crm_make_daemon(crm_system_name, TRUE, pid_file);
 
     } else if (as_console) {
 #if CURSES_ENABLED
         initscr();
         cbreak();
         noecho();
         crm_enable_stderr(FALSE);
 #else
         one_shot = TRUE;
         as_console = FALSE;
         printf("Defaulting to one-shot mode\n");
         printf("You need to have curses available at compile time to enable console mode\n");
 #endif
     }
 
     crm_info("Starting %s", crm_system_name);
     if (xml_file != NULL) {
         current_cib = filename2xml(xml_file);
         mon_refresh_display(NULL);
         return exit_code;
     }
 
     if (current_cib == NULL) {
         cib = cib_new();
         if (!one_shot) {
             print_as("Attempting connection to the cluster...");
         }
 
         do {
             exit_code = cib_connect(!one_shot);
 
             if (one_shot) {
                 break;
 
             } else if (exit_code != pcmk_ok) {
                 print_dot();
                 sleep(reconnect_msec / 1000);
             }
 
         } while (exit_code == -ENOTCONN);
 
         if (exit_code != pcmk_ok) {
             print_as("\nConnection to cluster failed: %s\n", pcmk_strerror(exit_code));
             if (as_console) {
                 sleep(2);
             }
             clean_up(-exit_code);
         }
     }
 
     if (one_shot) {
         return exit_code;
     }
 
     mainloop = g_main_new(FALSE);
 
     mainloop_add_signal(SIGTERM, mon_shutdown);
     mainloop_add_signal(SIGINT, mon_shutdown);
 #if CURSES_ENABLED
     if (as_console) {
         ncurses_winch_handler = signal(SIGWINCH, mon_winresize);
         if (ncurses_winch_handler == SIG_DFL ||
             ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
             ncurses_winch_handler = NULL;
     }
 #endif
     refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
 
     g_main_run(mainloop);
     g_main_destroy(mainloop);
 
     crm_info("Exiting %s", crm_system_name);
 
     clean_up(0);
     return 0;                   /* never reached */
 }
 
 void
 wait_for_refresh(int offset, const char *prefix, int msec)
 {
     int lpc = msec / 1000;
     struct timespec sleept = { 1, 0 };
 
     if (as_console == FALSE) {
         timer_id = g_timeout_add(msec, mon_timer_popped, NULL);
         return;
     }
 
     crm_notice("%sRefresh in %ds...", prefix ? prefix : "", lpc);
     while (lpc > 0) {
 #if CURSES_ENABLED
         move(offset, 0);
 /* 		printw("%sRefresh in \033[01;32m%ds\033[00m...", prefix?prefix:"", lpc); */
         printw("%sRefresh in %ds...\n", prefix ? prefix : "", lpc);
         clrtoeol();
         refresh();
 #endif
         lpc--;
         if (lpc == 0) {
             timer_id = g_timeout_add(1000, mon_timer_popped, NULL);
         } else {
             if (nanosleep(&sleept, NULL) != 0) {
                 return;
             }
         }
     }
 }
 
 #define mon_warn(fmt...) do {			\
 	if (!has_warnings) {			\
 	    print_as("Warning:");		\
 	} else {				\
 	    print_as(",");			\
 	}					\
 	print_as(fmt);				\
 	has_warnings = TRUE;			\
     } while(0)
 
 static int
 count_resources(pe_working_set_t * data_set, resource_t * rsc)
 {
     int count = 0;
     GListPtr gIter = NULL;
 
     if (rsc == NULL) {
         gIter = data_set->resources;
     } else if (rsc->children) {
         gIter = rsc->children;
     } else {
         return is_not_set(rsc->flags, pe_rsc_orphan);
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         count += count_resources(data_set, gIter->data);
     }
     return count;
 }
 
 static int
 print_simple_status(pe_working_set_t * data_set)
 {
     node_t *dc = NULL;
     GListPtr gIter = NULL;
     int nodes_online = 0;
     int nodes_standby = 0;
 
     dc = data_set->dc_node;
 
     if (dc == NULL) {
         mon_warn("No DC ");
     }
 
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         if (node->details->standby && node->details->online) {
             nodes_standby++;
         } else if (node->details->online) {
             nodes_online++;
         } else {
             mon_warn("offline node: %s", node->details->uname);
         }
     }
 
     if (!has_warnings) {
         print_as("Ok: %d nodes online", nodes_online);
         if (nodes_standby > 0) {
             print_as(", %d standby nodes", nodes_standby);
         }
         print_as(", %d resources configured", count_resources(data_set, NULL));
     }
 
     print_as("\n");
     return 0;
 }
 
 extern int get_failcount(node_t * node, resource_t * rsc, int *last_failure,
                          pe_working_set_t * data_set);
 
 static void
 print_date(time_t time)
 {
     int lpc = 0;
     char date_str[26];
 
     asctime_r(localtime(&time), date_str);
     for (; lpc < 26; lpc++) {
         if (date_str[lpc] == '\n') {
             date_str[lpc] = 0;
         }
     }
     print_as("'%s'", date_str);
 }
 
 static void
 print_rsc_summary(pe_working_set_t * data_set, node_t * node, resource_t * rsc, gboolean all)
 {
     gboolean printed = FALSE;
     time_t last_failure = 0;
 
     char *fail_attr = crm_concat("fail-count", rsc->id, '-');
     const char *value = g_hash_table_lookup(node->details->attrs, fail_attr);
 
     int failcount = char2score(value);  /* Get the true value, not the effective one from get_failcount() */
 
     get_failcount(node, rsc, (int *)&last_failure, data_set);
     free(fail_attr);
 
     if (all || failcount || last_failure > 0) {
         printed = TRUE;
         print_as("   %s: migration-threshold=%d", rsc->id, rsc->migration_threshold);
     }
 
     if (failcount > 0) {
         printed = TRUE;
         print_as(" fail-count=%d", failcount);
     }
 
     if (last_failure > 0) {
         printed = TRUE;
         print_as(" last-failure=");
         print_date(last_failure);
     }
 
     if (printed) {
         print_as("\n");
     }
 }
 
 static void
 print_rsc_history(pe_working_set_t * data_set, node_t * node, xmlNode * rsc_entry)
 {
     GListPtr gIter = NULL;
     GListPtr op_list = NULL;
     gboolean print_name = TRUE;
     GListPtr sorted_op_list = NULL;
     const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
     resource_t *rsc = pe_find_resource(data_set->resources, rsc_id);
 
     xmlNode *rsc_op = NULL;
 
     for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             op_list = g_list_append(op_list, rsc_op);
         }
     }
 
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *xml_op = (xmlNode *) gIter->data;
         const char *value = NULL;
         const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
         const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
         const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC);
         const char *interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
         int rc = crm_parse_int(op_rc, "0");
 
         if (safe_str_eq(task, CRMD_ACTION_STATUS)
             && safe_str_eq(interval, "0")) {
             task = "probe";
         }
 
         if (rc == 7 && safe_str_eq(task, "probe")) {
             continue;
 
         } else if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
             continue;
         }
 
         if (print_name) {
             print_name = FALSE;
             if (rsc == NULL) {
                 print_as("Orphan resource: %s", rsc_id);
             } else {
                 print_rsc_summary(data_set, node, rsc, TRUE);
             }
         }
 
         print_as("    + (%s) %s:", call, task);
         if (safe_str_neq(interval, "0")) {
             print_as(" interval=%sms", interval);
         }
 
         if (print_timing) {
             int int_value;
             const char *attr = "last-rc-change";
 
             value = crm_element_value(xml_op, attr);
             if (value) {
                 int_value = crm_parse_int(value, NULL);
                 print_as(" %s=", attr);
                 print_date(int_value);
             }
 
             attr = "last-run";
             value = crm_element_value(xml_op, attr);
             if (value) {
                 int_value = crm_parse_int(value, NULL);
                 print_as(" %s=", attr);
                 print_date(int_value);
             }
 
             attr = "exec-time";
             value = crm_element_value(xml_op, attr);
             if (value) {
                 int_value = crm_parse_int(value, NULL);
                 print_as(" %s=%dms", attr, int_value);
             }
 
             attr = "queue-time";
             value = crm_element_value(xml_op, attr);
             if (value) {
                 int_value = crm_parse_int(value, NULL);
                 print_as(" %s=%dms", attr, int_value);
             }
         }
 
         print_as(" rc=%s (%s)\n", op_rc, lrmd_event_rc2str(rc));
     }
 
     /* no need to free the contents */
     g_list_free(sorted_op_list);
 }
 
 static void
 print_attr_msg(node_t * node, GListPtr rsc_list, const char *attrname, const char *attrvalue)
 {
     GListPtr gIter = NULL;
 
     for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
         const char *type = g_hash_table_lookup(rsc->meta, "type");
 
         if (rsc->children != NULL) {
             print_attr_msg(node, rsc->children, attrname, attrvalue);
         }
 
         if (safe_str_eq(type, "ping") || safe_str_eq(type, "pingd")) {
             const char *name = "pingd";
             const char *multiplier = NULL;
             char **host_list = NULL;
             int host_list_num = 0;
             int expected_score = 0;
 
             if (g_hash_table_lookup(rsc->meta, "name") != NULL) {
                 name = g_hash_table_lookup(rsc->meta, "name");
             }
 
             /* To identify the resource with the attribute name. */
             if (safe_str_eq(name, attrname)) {
                 int value = crm_parse_int(attrvalue, "0");
 
                 multiplier = g_hash_table_lookup(rsc->meta, "multiplier");
                 host_list = g_strsplit(g_hash_table_lookup(rsc->meta, "host_list"), " ", 0);
                 host_list_num = g_strv_length(host_list);
                 g_strfreev(host_list);
                 /* pingd multiplier is the same as the default value. */
                 expected_score = host_list_num * crm_parse_int(multiplier, "1");
 
                 /* pingd is abnormal score. */
                 if (value <= 0) {
                     print_as("\t: Connectivity is lost");
                 } else if (value < expected_score) {
                     print_as("\t: Connectivity is degraded (Expected=%d)", expected_score);
                 }
             }
         }
     }
 }
 
 static int
 compare_attribute(gconstpointer a, gconstpointer b)
 {
     int rc;
 
     rc = strcmp((const char *)a, (const char *)b);
 
     return rc;
 }
 
 static void
 create_attr_list(gpointer name, gpointer value, gpointer data)
 {
     int i;
     const char *filt_str[] = FILTER_STR;
 
     CRM_CHECK(name != NULL, return);
 
     /* filtering automatic attributes */
     for (i = 0; filt_str[i] != NULL; i++) {
         if (g_str_has_prefix(name, filt_str[i])) {
             return;
         }
     }
 
     attr_list = g_list_insert_sorted(attr_list, name, compare_attribute);
 }
 
 static void
 print_node_attribute(gpointer name, gpointer node_data)
 {
     const char *value = NULL;
     node_t *node = (node_t *) node_data;
 
     value = g_hash_table_lookup(node->details->attrs, name);
     print_as("    + %-32s\t: %-10s", (char *)name, value);
     print_attr_msg(node, node->details->running_rsc, name, value);
     print_as("\n");
 }
 
 static void
 print_node_summary(pe_working_set_t * data_set, gboolean operations)
 {
     xmlNode *lrm_rsc = NULL;
     xmlNode *rsc_entry = NULL;
     xmlNode *node_state = NULL;
     xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
 
     if (operations) {
         print_as("\nOperations:\n");
     } else {
         print_as("\nMigration summary:\n");
     }
 
     for (node_state = __xml_first_child(cib_status); node_state != NULL;
          node_state = __xml_next(node_state)) {
         if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
             node_t *node = pe_find_node_id(data_set->nodes, ID(node_state));
 
             if (node == NULL || node->details->online == FALSE) {
                 continue;
             }
 
             print_as("* Node %s: ", crm_element_value(node_state, XML_ATTR_UNAME));
             print_as("\n");
 
             lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
             lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
 
             for (rsc_entry = __xml_first_child(lrm_rsc); rsc_entry != NULL;
                  rsc_entry = __xml_next(rsc_entry)) {
                 if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
                     if (operations) {
                         print_rsc_history(data_set, node, rsc_entry);
 
                     } else {
                         const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
                         resource_t *rsc = pe_find_resource(data_set->resources, rsc_id);
 
                         if (rsc) {
                             print_rsc_summary(data_set, node, rsc, FALSE);
                         } else {
                             print_as("   %s: orphan\n", rsc_id);
                         }
                     }
                 }
             }
         }
     }
 }
 
 static void
 print_ticket(gpointer name, gpointer value, gpointer data)
 {
     ticket_t *ticket = (ticket_t *) value;
 
     print_as(" %s\t%s%10s", ticket->id,
              ticket->granted ? "granted":"revoked",
              ticket->standby ? " [standby]":"");
     if (ticket->last_granted > -1) {
         print_as(" last-granted=");
         print_date(ticket->last_granted);
     }
     print_as("\n");
 
     return;
 }
 
 static void
 print_cluster_tickets(pe_working_set_t * data_set)
 {
     xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
 
     /* For recording the tickets that are referenced in rsc_ticket constraints
      * but have never been granted yet. */
     unpack_constraints(cib_constraints, data_set);
 
     print_as("\nTickets:\n");
     g_hash_table_foreach(data_set->tickets, print_ticket, NULL);
 
     return;
 }
 
 static int
 print_status(pe_working_set_t * data_set)
 {
     static int updates = 0;
 
     GListPtr gIter = NULL;
     node_t *dc = NULL;
     char *since_epoch = NULL;
     char *online_nodes = NULL;
     char *offline_nodes = NULL;
     xmlNode *dc_version = NULL;
     xmlNode *quorum_node = NULL;
     xmlNode *stack = NULL;
     time_t a_time = time(NULL);
 
     int print_opts = pe_print_ncurses;
     const char *quorum_votes = "unknown";
 
     if (as_console) {
         blank_screen();
     } else {
         print_opts = pe_print_printf;
     }
 
     updates++;
     dc = data_set->dc_node;
 
     if (a_time == (time_t) - 1) {
         crm_perror(LOG_ERR, "set_node_tstamp(): Invalid time returned");
         return 1;
     }
 
     since_epoch = ctime(&a_time);
     if (since_epoch != NULL && print_last_updated) {
         print_as("Last updated: %s", since_epoch);
     }
 
     if (print_last_change) {
         const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN);
         const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER);
         const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT);
         const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG);
 
         print_as("Last change: %s", last_written ? last_written : "");
         if (user) {
             print_as(" by %s", user);
         }
         if (client) {
             print_as(" via %s", client);
         }
         if (origin) {
             print_as(" on %s", origin);
         }
         print_as("\n");
     }
 
     stack =
         get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG);
     if (stack) {
         print_as("Stack: %s\n", crm_element_value(stack, XML_NVPAIR_ATTR_VALUE));
     }
 
     dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG);
     if (dc == NULL) {
         print_as("Current DC: NONE\n");
     } else {
         const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM);
 
         if (safe_str_neq(dc->details->uname, dc->details->id)) {
             print_as("Current DC: %s (%s)", dc->details->uname, dc->details->id);
         } else {
             print_as("Current DC: %s", dc->details->uname);
         }
         print_as(" - partition %s quorum\n", crm_is_true(quorum) ? "with" : "WITHOUT");
         if (dc_version) {
             print_as("Version: %s\n", crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE));
         }
     }
 
     quorum_node =
         get_xpath_object("//nvpair[@name='" XML_ATTR_EXPECTED_VOTES "']", data_set->input,
                          LOG_DEBUG);
     if (quorum_node) {
         quorum_votes = crm_element_value(quorum_node, XML_NVPAIR_ATTR_VALUE);
     }
 
     print_as("%d Nodes configured, %s expected votes\n", g_list_length(data_set->nodes),
              quorum_votes);
     print_as("%d Resources configured.\n", count_resources(data_set, NULL));
     print_as("\n\n");
 
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
         const char *node_mode = NULL;
 
         if (node->details->unclean) {
             if (node->details->online && node->details->unclean) {
                 node_mode = "UNCLEAN (online)";
 
             } else if (node->details->pending) {
                 node_mode = "UNCLEAN (pending)";
 
             } else {
                 node_mode = "UNCLEAN (offline)";
             }
 
         } else if (node->details->pending) {
             node_mode = "pending";
 
         } else if (node->details->standby_onfail && node->details->online) {
             node_mode = "standby (on-fail)";
 
         } else if (node->details->standby) {
             if (node->details->online) {
                 node_mode = "standby";
             } else {
                 node_mode = "OFFLINE (standby)";
             }
 
         } else if (node->details->online) {
             node_mode = "online";
             if (group_by_node == FALSE) {
                 online_nodes = add_list_element(online_nodes, node->details->uname);
                 continue;
             }
 
         } else {
             node_mode = "OFFLINE";
             if (group_by_node == FALSE) {
                 offline_nodes = add_list_element(offline_nodes, node->details->uname);
                 continue;
             }
         }
 
         if (safe_str_eq(node->details->uname, node->details->id)) {
             print_as("Node %s: %s\n", node->details->uname, node_mode);
         } else {
             print_as("Node %s (%s): %s\n", node->details->uname, node->details->id, node_mode);
         }
 
         if (group_by_node) {
             GListPtr gIter2 = NULL;
 
             for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) {
                 resource_t *rsc = (resource_t *) gIter2->data;
 
                 rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout);
             }
         }
     }
 
     if (online_nodes) {
         print_as("Online: [%s ]\n", online_nodes);
         free(online_nodes);
     }
     if (offline_nodes) {
         print_as("OFFLINE: [%s ]\n", offline_nodes);
         free(offline_nodes);
     }
 
     if (group_by_node == FALSE && inactive_resources) {
         print_as("\nFull list of resources:\n");
 
     } else if (inactive_resources) {
         print_as("\nInactive resources:\n");
     }
 
     if (group_by_node == FALSE || inactive_resources) {
         print_as("\n");
         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
             resource_t *rsc = (resource_t *) gIter->data;
 
             gboolean is_active = rsc->fns->active(rsc, TRUE);
             gboolean partially_active = rsc->fns->active(rsc, FALSE);
 
             if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) {
                 continue;
 
             } else if (group_by_node == FALSE) {
                 if (partially_active || inactive_resources) {
                     rsc->fns->print(rsc, NULL, print_opts, stdout);
                 }
 
             } else if (is_active == FALSE && inactive_resources) {
                 rsc->fns->print(rsc, NULL, print_opts, stdout);
             }
         }
     }
 
     if (print_nodes_attr) {
         print_as("\nNode Attributes:\n");
         for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
             node_t *node = (node_t *) gIter->data;
 
             if (node == NULL || node->details->online == FALSE) {
                 continue;
             }
             attr_list = NULL;
             print_as("* Node %s:\n", node->details->uname);
             g_hash_table_foreach(node->details->attrs, create_attr_list, NULL);
             g_list_foreach(attr_list, print_node_attribute, node);
         }
     }
 
     if (print_operations || print_failcount) {
         print_node_summary(data_set, print_operations);
     }
 
     if (xml_has_children(data_set->failed)) {
         xmlNode *xml_op = NULL;
 
         print_as("\nFailed actions:\n");
         for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
              xml_op = __xml_next(xml_op)) {
             int val = 0;
             const char *id = ID(xml_op);
             const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
             const char *last = crm_element_value(xml_op, "last_run");
             const char *node = crm_element_value(xml_op, XML_ATTR_UNAME);
             const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
             const char *rc = crm_element_value(xml_op, XML_LRM_ATTR_RC);
             const char *status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS);
 
             val = crm_parse_int(status, "0");
             print_as("    %s (node=%s, call=%s, rc=%s, status=%s",
                      op_key ? op_key : id, node, call, rc, services_lrm_status_str(val));
 
             if (last) {
                 time_t run_at = crm_parse_int(last, "0");
 
                 print_as(", last-run=%s, queued=%sms, exec=%sms\n",
                          ctime(&run_at),
                          crm_element_value(xml_op, "exec_time"),
                          crm_element_value(xml_op, "queue_time"));
             }
 
             val = crm_parse_int(rc, "0");
             print_as("): %s\n", lrmd_event_rc2str(val));
         }
     }
 
     if (print_tickets) {
         print_cluster_tickets(data_set);
     }
 
 #if CURSES_ENABLED
     if (as_console) {
         refresh();
     }
 #endif
     return 0;
 }
 
 static int
 print_xml_status(pe_working_set_t * data_set)
 {
     FILE *stream = stdout;
     GListPtr gIter = NULL;
     node_t *dc = NULL;
     xmlNode *stack = NULL;
     xmlNode *quorum_node = NULL;
     const char *quorum_votes = "unknown";
 
     dc = data_set->dc_node;
 
 
     fprintf(stream, "<?xml version=\"1.0\"?>\n");
     fprintf(stream, "<crm_mon version=\"%s\">\n", VERSION);
 
     /*** SUMMARY ***/
     fprintf(stream, "    <summary>\n");
 
     if (print_last_updated) {
         time_t now = time(NULL);
         char *now_str = ctime(&now);
 
         now_str[24] = EOS;      /* replace the newline */
         fprintf(stream, "        <last_update time=\"%s\" />\n", now_str);
     }
 
     if (print_last_change) {
         const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN);
         const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER);
         const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT);
         const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG);
 
         fprintf(stream, "        <last_change time=\"%s\" user=\"%s\" client=\"%s\" origin=\"%s\" />\n",
             last_written ? last_written : "",
             user ? user : "",
             client ? client : "",
             origin ? origin : "");
     }
 
     stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']",
         data_set->input,
         LOG_DEBUG);
     if (stack) {
         fprintf(stream, "        <stack type=\"%s\" />\n", crm_element_value(stack, XML_NVPAIR_ATTR_VALUE));
     }
 
     if (!dc) {
         fprintf(stream, "        <current_dc present=\"false\" />\n");
     } else {
         const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM);
         const char *uname = dc->details->uname;
         const char *id = dc->details->id;
         xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']",
             data_set->input,
             LOG_DEBUG);
         fprintf(stream, "        <current_dc present=\"true\" version=\"%s\" name=\"%s\" id=\"%s\" with_quorum=\"%s\" />\n",
             dc_version ? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : "",
             uname,
             id,
             quorum ? (crm_is_true(quorum) ? "true" : "false") : "false");
     }
 
     quorum_node = get_xpath_object("//nvpair[@name='" XML_ATTR_EXPECTED_VOTES "']",
                     data_set->input,
                     LOG_DEBUG);
     if (quorum_node) {
         quorum_votes = crm_element_value(quorum_node, XML_NVPAIR_ATTR_VALUE);
     }
     fprintf(stream, "        <nodes_configured number=\"%d\" expected_votes=\"%s\" />\n",
         g_list_length(data_set->nodes),
         quorum_votes);
 
     fprintf(stream, "        <resources_configured number=\"%d\" />\n", count_resources(data_set, NULL));
 
     fprintf(stream, "    </summary>\n");
 
     /*** NODES ***/
     fprintf(stream, "    <nodes>\n");
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
         const char *node_type = "unknown";
 
         switch (node->details->type) {
         case node_member:
             node_type = "member";
             break;
         case node_ping:
             node_type = "ping";
             break;
         }
 
         fprintf(stream, "        <node name=\"%s\" ", node->details->uname);
         fprintf(stream, "id=\"%s\" ", node->details->id);
         fprintf(stream, "online=\"%s\" ", node->details->online ? "true" : "false");
         fprintf(stream, "standby=\"%s\" ", node->details->standby ? "true" : "false");
         fprintf(stream, "standby_onfail=\"%s\" ", node->details->standby_onfail ? "true" : "false");
         fprintf(stream, "pending=\"%s\" ", node->details->pending ? "true" : "false");
         fprintf(stream, "unclean=\"%s\" ", node->details->unclean ? "true" : "false");
         fprintf(stream, "shutdown=\"%s\" ", node->details->shutdown ? "true" : "false");
         fprintf(stream, "expected_up=\"%s\" ", node->details->expected_up ? "true" : "false");
         fprintf(stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false");
         fprintf(stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc));
         fprintf(stream, "type=\"%s\" ", node_type);
 
         if (group_by_node) {
             GListPtr lpc2 = NULL;
             fprintf(stream, ">\n");
             for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
                 resource_t *rsc = (resource_t *) lpc2->data;
 
                 rsc->fns->print(rsc, "            ", pe_print_xml | pe_print_rsconly, stream);
             }
             fprintf(stream, "        </node>\n");
         } else {
             fprintf(stream, "/>\n");
         }
     }
     fprintf(stream, "    </nodes>\n");
 
     /*** RESOURCES ***/
     if (group_by_node == FALSE || inactive_resources) {
         fprintf(stream, "    <resources>\n");
         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
             resource_t *rsc = (resource_t *) gIter->data;
             gboolean is_active = rsc->fns->active(rsc, TRUE);
             gboolean partially_active = rsc->fns->active(rsc, FALSE);
 
             if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) {
                 continue;
 
             } else if (group_by_node == FALSE) {
                 if (partially_active || inactive_resources) {
                     rsc->fns->print(rsc, "        ", pe_print_xml, stream);
                 }
 
             } else if (is_active == FALSE && inactive_resources) {
                 rsc->fns->print(rsc, "        ", pe_print_xml, stream);
             }
         }
         fprintf(stream, "    </resources>\n");
     }
 
     fprintf(stream, "</crm_mon>\n");
     fflush(stream);
     fclose(stream);
 
     return 0;
 }
 
 static int
 print_html_status(pe_working_set_t * data_set, const char *filename, gboolean web_cgi)
 {
     FILE *stream;
     GListPtr gIter = NULL;
     node_t *dc = NULL;
     static int updates = 0;
     char *filename_tmp = NULL;
 
     if (web_cgi) {
         stream = stdout;
         fprintf(stream, "Content-type: text/html\n\n");
 
     } else {
         filename_tmp = crm_concat(filename, "tmp", '.');
         stream = fopen(filename_tmp, "w");
         if (stream == NULL) {
             crm_perror(LOG_ERR, "Cannot open %s for writing", filename_tmp);
             free(filename_tmp);
             return -1;
         }
     }
 
     updates++;
     dc = data_set->dc_node;
 
     fprintf(stream, "<html>");
     fprintf(stream, "<head>");
     fprintf(stream, "<title>Cluster status</title>");
 /* content="%d;url=http://webdesign.about.com" */
     fprintf(stream, "<meta http-equiv=\"refresh\" content=\"%d\">", reconnect_msec / 1000);
     fprintf(stream, "</head>");
 
     /*** SUMMARY ***/
 
     fprintf(stream, "<h2>Cluster summary</h2>");
     {
         char *now_str = NULL;
         time_t now = time(NULL);
 
         now_str = ctime(&now);
         now_str[24] = EOS;      /* replace the newline */
         fprintf(stream, "Last updated: <b>%s</b><br/>\n", now_str);
     }
 
     if (dc == NULL) {
         fprintf(stream, "Current DC: <font color=\"red\"><b>NONE</b></font><br/>");
     } else {
         fprintf(stream, "Current DC: %s (%s)<br/>", dc->details->uname, dc->details->id);
     }
     fprintf(stream, "%d Nodes configured.<br/>", g_list_length(data_set->nodes));
     fprintf(stream, "%d Resources configured.<br/>", count_resources(data_set, NULL));
 
     /*** CONFIG ***/
 
     fprintf(stream, "<h3>Config Options</h3>\n");
 
     fprintf(stream, "<table>\n");
     fprintf(stream, "<tr><td>STONITH of failed nodes</td><td>:</td><td>%s</td></tr>\n",
             is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
 
     fprintf(stream, "<tr><td>Cluster is</td><td>:</td><td>%ssymmetric</td></tr>\n",
             is_set(data_set->flags, pe_flag_symmetric_cluster) ? "" : "a-");
 
     fprintf(stream, "<tr><td>No Quorum Policy</td><td>:</td><td>");
     switch (data_set->no_quorum_policy) {
         case no_quorum_freeze:
             fprintf(stream, "Freeze resources");
             break;
         case no_quorum_stop:
             fprintf(stream, "Stop ALL resources");
             break;
         case no_quorum_ignore:
             fprintf(stream, "Ignore");
             break;
         case no_quorum_suicide:
             fprintf(stream, "Suicide");
             break;
     }
     fprintf(stream, "\n</td></tr>\n</table>\n");
 
     /*** NODE LIST ***/
 
     fprintf(stream, "<h2>Node List</h2>\n");
     fprintf(stream, "<ul>\n");
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         fprintf(stream, "<li>");
         if (node->details->standby_onfail && node->details->online) {
             fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id,
                     "<font color=\"orange\">standby (on-fail)</font>\n");
         } else if (node->details->standby && node->details->online) {
             fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id,
                     "<font color=\"orange\">standby</font>\n");
         } else if (node->details->standby) {
             fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id,
                     "<font color=\"red\">OFFLINE (standby)</font>\n");
         } else if (node->details->online) {
             fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id,
                     "<font color=\"green\">online</font>\n");
         } else {
             fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id,
                     "<font color=\"red\">OFFLINE</font>\n");
         }
         if (group_by_node) {
             GListPtr lpc2 = NULL;
 
             fprintf(stream, "<ul>\n");
             for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
                 resource_t *rsc = (resource_t *) lpc2->data;
 
                 fprintf(stream, "<li>");
                 rsc->fns->print(rsc, NULL, pe_print_html | pe_print_rsconly, stream);
                 fprintf(stream, "</li>\n");
             }
             fprintf(stream, "</ul>\n");
         }
         fprintf(stream, "</li>\n");
     }
     fprintf(stream, "</ul>\n");
 
     if (group_by_node && inactive_resources) {
         fprintf(stream, "<h2>Inactive Resources</h2>\n");
 
     } else if (group_by_node == FALSE) {
         fprintf(stream, "<h2>Resource List</h2>\n");
     }
 
     if (group_by_node == FALSE || inactive_resources) {
         for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
             resource_t *rsc = (resource_t *) gIter->data;
             gboolean is_active = rsc->fns->active(rsc, TRUE);
             gboolean partially_active = rsc->fns->active(rsc, FALSE);
 
             if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) {
                 continue;
 
             } else if (group_by_node == FALSE) {
                 if (partially_active || inactive_resources) {
                     rsc->fns->print(rsc, NULL, pe_print_html, stream);
                 }
 
             } else if (is_active == FALSE && inactive_resources) {
                 rsc->fns->print(rsc, NULL, pe_print_html, stream);
             }
         }
     }
 
     fprintf(stream, "</html>");
     fflush(stream);
     fclose(stream);
 
     if (!web_cgi) {
         if (rename(filename_tmp, filename) != 0) {
             crm_perror(LOG_ERR, "Unable to rename %s->%s", filename_tmp, filename);
         }
         free(filename_tmp);
     }
     return 0;
 }
 
 #if ENABLE_SNMP
 #  include <net-snmp/net-snmp-config.h>
 #  include <net-snmp/snmpv3_api.h>
 #  include <net-snmp/agent/agent_trap.h>
 #  include <net-snmp/library/snmp_client.h>
 #  include <net-snmp/library/mib.h>
 #  include <net-snmp/library/snmp_debug.h>
 
 #  define add_snmp_field(list, oid_string, value) do {			\
 	oid name[MAX_OID_LEN];						\
         size_t name_length = MAX_OID_LEN;				\
 	if (snmp_parse_oid(oid_string, name, &name_length)) {		\
 	    int s_rc = snmp_add_var(list, name, name_length, 's', (value)); \
 	    if(s_rc != 0) {						\
 		crm_err("Could not add %s=%s rc=%d", oid_string, value, s_rc); \
 	    } else {							\
 		crm_trace("Added %s=%s", oid_string, value);		\
 	    }								\
 	} else {							\
 	    crm_err("Could not parse OID: %s", oid_string);		\
 	}								\
     } while(0)								\
 
 #  define add_snmp_field_int(list, oid_string, value) do {		\
 	oid name[MAX_OID_LEN];						\
         size_t name_length = MAX_OID_LEN;				\
 	if (snmp_parse_oid(oid_string, name, &name_length)) {		\
 	    if(NULL == snmp_pdu_add_variable(				\
 		   list, name, name_length, ASN_INTEGER,		\
 		   (u_char *) & value, sizeof(value))) {		\
 		crm_err("Could not add %s=%d", oid_string, value);	\
 	    } else {							\
 		crm_trace("Added %s=%d", oid_string, value);		\
 	    }								\
 	} else {							\
 	    crm_err("Could not parse OID: %s", oid_string);		\
 	}								\
     } while(0)								\
 
 static int
 snmp_input(int operation, netsnmp_session * session, int reqid, netsnmp_pdu * pdu, void *magic)
 {
     return 1;
 }
 
 static netsnmp_session *
 crm_snmp_init(const char *target, char *community)
 {
     static netsnmp_session *session = NULL;
 
 #  ifdef NETSNMPV53
     char target53[128];
 
     snprintf(target53, sizeof(target53), "%s:162", target);
 #  endif
 
     if (session) {
         return session;
     }
 
     if (target == NULL) {
         return NULL;
     }
 
     if (get_crm_log_level() > LOG_INFO) {
         char *debug_tokens = strdup("run:shell,snmptrap,tdomain");
 
         debug_register_tokens(debug_tokens);
         snmp_set_do_debugging(1);
     }
 
     session = calloc(1, sizeof(netsnmp_session));
     snmp_sess_init(session);
     session->version = SNMP_VERSION_2c;
     session->callback = snmp_input;
     session->callback_magic = NULL;
 
     if (community) {
         session->community_len = strlen(community);
         session->community = (unsigned char *)community;
     }
 
     session = snmp_add(session,
 #  ifdef NETSNMPV53
                        netsnmp_tdomain_transport(target53, 0, "udp"),
 #  else
                        netsnmp_transport_open_client("snmptrap", target),
 #  endif
                        NULL, NULL);
 
     if (session == NULL) {
         snmp_sess_perror("Could not create snmp transport", session);
     }
     return session;
 }
 
 #endif
 
 static int
 send_snmp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
                int status, const char *desc)
 {
     int ret = 1;
 
 #if ENABLE_SNMP
     static oid snmptrap_oid[] = { 1, 3, 6, 1, 6, 3, 1, 1, 4, 1, 0 };
     static oid sysuptime_oid[] = { 1, 3, 6, 1, 2, 1, 1, 3, 0 };
 
     netsnmp_pdu *trap_pdu;
     netsnmp_session *session = crm_snmp_init(snmp_target, snmp_community);
 
     trap_pdu = snmp_pdu_create(SNMP_MSG_TRAP2);
     if (!trap_pdu) {
         crm_err("Failed to create SNMP notification");
         return SNMPERR_GENERR;
     }
 
     if (1) {
         /* send uptime */
         char csysuptime[20];
         time_t now = time(NULL);
 
         sprintf(csysuptime, "%ld", now);
         snmp_add_var(trap_pdu, sysuptime_oid, sizeof(sysuptime_oid) / sizeof(oid), 't', csysuptime);
     }
 
     /* Indicate what the trap is by setting snmpTrapOid.0 */
     ret =
         snmp_add_var(trap_pdu, snmptrap_oid, sizeof(snmptrap_oid) / sizeof(oid), 'o',
                      snmp_crm_trap_oid);
     if (ret != 0) {
         crm_err("Failed set snmpTrapOid.0=%s", snmp_crm_trap_oid);
         return ret;
     }
 
     /* Add extries to the trap */
-    add_snmp_field(trap_pdu, snmp_crm_oid_rsc, rsc);
+    if (rsc) {
+        add_snmp_field(trap_pdu, snmp_crm_oid_rsc, rsc);
+    }
     add_snmp_field(trap_pdu, snmp_crm_oid_node, node);
     add_snmp_field(trap_pdu, snmp_crm_oid_task, task);
     add_snmp_field(trap_pdu, snmp_crm_oid_desc, desc);
 
     add_snmp_field_int(trap_pdu, snmp_crm_oid_rc, rc);
     add_snmp_field_int(trap_pdu, snmp_crm_oid_trc, target_rc);
     add_snmp_field_int(trap_pdu, snmp_crm_oid_status, status);
 
     /* Send and cleanup */
     ret = snmp_send(session, trap_pdu);
     if (ret == 0) {
         /* error */
         snmp_sess_perror("Could not send SNMP trap", session);
         snmp_free_pdu(trap_pdu);
         ret = SNMPERR_GENERR;
     } else {
         ret = SNMPERR_SUCCESS;
     }
 #else
     crm_err("Sending SNMP traps is not supported by this installation");
 #endif
     return ret;
 }
 
 #if ENABLE_ESMTP
 #  include <auth-client.h>
 #  include <libesmtp.h>
 
 static void
 print_recipient_status(smtp_recipient_t recipient, const char *mailbox, void *arg)
 {
     const smtp_status_t *status;
 
     status = smtp_recipient_status(recipient);
     printf("%s: %d %s", mailbox, status->code, status->text);
 }
 
 static void
 event_cb(smtp_session_t session, int event_no, void *arg, ...)
 {
     int *ok;
     va_list alist;
 
     va_start(alist, arg);
     switch (event_no) {
         case SMTP_EV_CONNECT:
         case SMTP_EV_MAILSTATUS:
         case SMTP_EV_RCPTSTATUS:
         case SMTP_EV_MESSAGEDATA:
         case SMTP_EV_MESSAGESENT:
         case SMTP_EV_DISCONNECT:
             break;
 
         case SMTP_EV_WEAK_CIPHER:{
                 int bits = va_arg(alist, long);
                 ok = va_arg(alist, int *);
 
                 crm_debug("SMTP_EV_WEAK_CIPHER, bits=%d - accepted.", bits);
                 *ok = 1;
                 break;
             }
         case SMTP_EV_STARTTLS_OK:
             crm_debug("SMTP_EV_STARTTLS_OK - TLS started here.");
             break;
 
         case SMTP_EV_INVALID_PEER_CERTIFICATE:{
                 long vfy_result = va_arg(alist, long);
                 ok = va_arg(alist, int *);
 
                 /* There is a table in handle_invalid_peer_certificate() of mail-file.c */
                 crm_err("SMTP_EV_INVALID_PEER_CERTIFICATE: %ld", vfy_result);
                 *ok = 1;
                 break;
             }
         case SMTP_EV_NO_PEER_CERTIFICATE:
             ok = va_arg(alist, int *);
 
             crm_debug("SMTP_EV_NO_PEER_CERTIFICATE - accepted.");
             *ok = 1;
             break;
         case SMTP_EV_WRONG_PEER_CERTIFICATE:
             ok = va_arg(alist, int *);
 
             crm_debug("SMTP_EV_WRONG_PEER_CERTIFICATE - accepted.");
             *ok = 1;
             break;
         case SMTP_EV_NO_CLIENT_CERTIFICATE:
             ok = va_arg(alist, int *);
 
             crm_debug("SMTP_EV_NO_CLIENT_CERTIFICATE - accepted.");
             *ok = 1;
             break;
         default:
             crm_debug("Got event: %d - ignored.\n", event_no);
     }
     va_end(alist);
 }
 #endif
 
 #define BODY_MAX 2048
 
 #if ENABLE_ESMTP
 static void
 crm_smtp_debug(const char *buf, int buflen, int writing, void *arg)
 {
     char type = 0;
     int lpc = 0, last = 0, level = *(int *)arg;
 
     if (writing == SMTP_CB_HEADERS) {
         type = 'H';
     } else if (writing) {
         type = 'C';
     } else {
         type = 'S';
     }
 
     for (; lpc < buflen; lpc++) {
         switch (buf[lpc]) {
             case 0:
             case '\n':
                 if (last > 0) {
                     do_crm_log(level, "   %.*s", lpc - last, buf + last);
                 } else {
                     do_crm_log(level, "%c: %.*s", type, lpc - last, buf + last);
                 }
                 last = lpc + 1;
                 break;
         }
     }
 }
 #endif
 
 static int
 send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
                  int status, const char *desc)
 {
     pid_t pid;
 
     /*setenv needs chars, these are ints */
     char *rc_s = crm_itoa(rc);
     char *status_s = crm_itoa(status);
     char *target_rc_s = crm_itoa(target_rc);
 
     crm_debug("Sending external notification to '%s' via '%s'", external_recipient, external_agent);
 
     setenv("CRM_notify_recipient", external_recipient, 1);
     setenv("CRM_notify_node", node, 1);
     setenv("CRM_notify_rsc", rsc, 1);
     setenv("CRM_notify_task", task, 1);
     setenv("CRM_notify_desc", desc, 1);
     setenv("CRM_notify_rc", rc_s, 1);
     setenv("CRM_notify_target_rc", target_rc_s, 1);
     setenv("CRM_notify_status", status_s, 1);
 
     pid = fork();
     if (pid == -1) {
         crm_perror(LOG_ERR, "notification fork() failed.");
     }
     if (pid == 0) {
         /* crm_debug("notification: I am the child. Executing the nofitication program."); */
         execl(external_agent, external_agent, NULL);
     }
 
     crm_trace("Finished running custom notification program '%s'.", external_agent);
     free(target_rc_s);
     free(status_s);
     free(rc_s);
     return 0;
 }
 
 static int
 send_smtp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
                int status, const char *desc)
 {
 #if ENABLE_ESMTP
     smtp_session_t session;
     smtp_message_t message;
     auth_context_t authctx;
     struct sigaction sa;
 
     int len = 20;
     int noauth = 1;
     int smtp_debug = LOG_DEBUG;
     char crm_mail_body[BODY_MAX];
     char *crm_mail_subject = NULL;
 
     memset(&sa, 0, sizeof(struct sigaction));
 
     if (node == NULL) {
         node = "-";
     }
     if (rsc == NULL) {
         rsc = "-";
     }
     if (desc == NULL) {
         desc = "-";
     }
 
     if (crm_mail_to == NULL) {
         return 1;
     }
 
     if (crm_mail_host == NULL) {
         crm_mail_host = "localhost:25";
     }
 
     if (crm_mail_prefix == NULL) {
         crm_mail_prefix = "Cluster notification";
     }
 
     crm_debug("Sending '%s' mail to %s via %s", crm_mail_prefix, crm_mail_to, crm_mail_host);
 
     len += strlen(crm_mail_prefix);
     len += strlen(task);
     len += strlen(rsc);
     len += strlen(node);
     len += strlen(desc);
     len++;
 
     crm_mail_subject = calloc(1, len);
     snprintf(crm_mail_subject, len, "%s - %s event for %s on %s: %s\r\n", crm_mail_prefix, task,
              rsc, node, desc);
 
     len = 0;
     len += snprintf(crm_mail_body + len, BODY_MAX - len, "\r\n%s\r\n", crm_mail_prefix);
     len += snprintf(crm_mail_body + len, BODY_MAX - len, "====\r\n\r\n");
     if (rc == target_rc) {
         len += snprintf(crm_mail_body + len, BODY_MAX - len,
                         "Completed operation %s for resource %s on %s\r\n", task, rsc, node);
     } else {
         len += snprintf(crm_mail_body + len, BODY_MAX - len,
                         "Operation %s for resource %s on %s failed: %s\r\n", task, rsc, node, desc);
     }
 
     len += snprintf(crm_mail_body + len, BODY_MAX - len, "\r\nDetails:\r\n");
     len += snprintf(crm_mail_body + len, BODY_MAX - len,
                     "\toperation status: (%d) %s\r\n", status, services_lrm_status_str(status));
     if (status == PCMK_LRM_OP_DONE) {
         len += snprintf(crm_mail_body + len, BODY_MAX - len,
                         "\tscript returned: (%d) %s\r\n", rc, lrmd_event_rc2str(rc));
         len += snprintf(crm_mail_body + len, BODY_MAX - len,
                         "\texpected return value: (%d) %s\r\n", target_rc,
                         lrmd_event_rc2str(target_rc));
     }
 
     auth_client_init();
     session = smtp_create_session();
     message = smtp_add_message(session);
 
     smtp_starttls_enable(session, Starttls_ENABLED);
 
     sa.sa_handler = SIG_IGN;
     sigemptyset(&sa.sa_mask);
     sa.sa_flags = 0;
     sigaction(SIGPIPE, &sa, NULL);
 
     smtp_set_server(session, crm_mail_host);
 
     authctx = auth_create_context();
     auth_set_mechanism_flags(authctx, AUTH_PLUGIN_PLAIN, 0);
 
     smtp_set_eventcb(session, event_cb, NULL);
 
     /* Now tell libESMTP it can use the SMTP AUTH extension.
      */
     if (!noauth) {
         crm_debug("Adding authentication context");
         smtp_auth_set_context(session, authctx);
     }
 
     if (crm_mail_from == NULL) {
         struct utsname us;
         char auto_from[BODY_MAX];
 
         CRM_ASSERT(uname(&us) == 0);
         snprintf(auto_from, BODY_MAX, "crm_mon@%s", us.nodename);
         smtp_set_reverse_path(message, auto_from);
 
     } else {
         /* NULL is ok */
         smtp_set_reverse_path(message, crm_mail_from);
     }
 
     smtp_set_header(message, "To", NULL /*phrase */ , NULL /*addr */ ); /* "Phrase" <addr> */
     smtp_add_recipient(message, crm_mail_to);
 
     /* Set the Subject: header and override any subject line in the message headers. */
     smtp_set_header(message, "Subject", crm_mail_subject);
     smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1);
 
     smtp_set_message_str(message, crm_mail_body);
     smtp_set_monitorcb(session, crm_smtp_debug, &smtp_debug, 1);
 
     if (smtp_start_session(session)) {
         char buf[128];
         int rc = smtp_errno();
 
         crm_err("SMTP server problem: %s (%d)", smtp_strerror(rc, buf, sizeof buf), rc);
 
     } else {
         char buf[128];
         int rc = smtp_errno();
         const smtp_status_t *smtp_status = smtp_message_transfer_status(message);
 
         if (rc != 0) {
             crm_err("SMTP server problem: %s (%d)", smtp_strerror(rc, buf, sizeof buf), rc);
         }
         crm_info("Send status: %d %s", smtp_status->code, crm_str(smtp_status->text));
         smtp_enumerate_recipients(message, print_recipient_status, NULL);
     }
 
     smtp_destroy_session(session);
     auth_destroy_context(authctx);
     auth_client_exit();
 #endif
     return 0;
 }
 
 static void
 handle_rsc_op(xmlNode * rsc_op)
 {
     int rc = -1;
     int status = -1;
     int action = -1;
     int interval = 0;
     int target_rc = -1;
     int transition_num = -1;
     gboolean notify = TRUE;
 
     char *rsc = NULL;
     char *task = NULL;
     const char *desc = NULL;
     const char *node = NULL;
     const char *magic = NULL;
     const char *id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY);
     char *update_te_uuid = NULL;
 
     xmlNode *n = rsc_op;
 
     if (id == NULL) {
         /* Compatability with <= 1.1.5 */
         id = ID(rsc_op);
     }
 
     magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC);
     if (magic == NULL) {
         /* non-change */
         return;
     }
 
     if (FALSE == decode_transition_magic(magic, &update_te_uuid, &transition_num, &action,
                                          &status, &rc, &target_rc)) {
         crm_err("Invalid event %s detected for %s", magic, id);
         return;
     }
 
     if (parse_op_key(id, &rsc, &task, &interval) == FALSE) {
         crm_err("Invalid event detected for %s", id);
         goto bail;
     }
 
     while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) {
         n = n->parent;
     }
 
     node = crm_element_value(n, XML_ATTR_UNAME);
     if (node == NULL) {
         node = ID(n);
     }
     if (node == NULL) {
         crm_err("No node detected for event %s (%s)", magic, id);
         goto bail;
     }
 
     /* look up where we expected it to be? */
     desc = pcmk_strerror(pcmk_ok);
     if (status == PCMK_LRM_OP_DONE && target_rc == rc) {
         crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
         if (rc == PCMK_EXECRA_NOT_RUNNING) {
             notify = FALSE;
         }
 
     } else if (status == PCMK_LRM_OP_DONE) {
         desc = lrmd_event_rc2str(rc);
         crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
 
     } else {
         desc = services_lrm_status_str(status);
         crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
     }
 
     if (notify && snmp_target) {
         send_snmp_trap(node, rsc, task, target_rc, rc, status, desc);
     }
     if (notify && crm_mail_to) {
         send_smtp_trap(node, rsc, task, target_rc, rc, status, desc);
     }
     if (notify && external_agent) {
         send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
     }
   bail:
     free(update_te_uuid);
     free(rsc);
     free(task);
 }
 
 void
 crm_diff_update(const char *event, xmlNode * msg)
 {
     int rc = -1;
     long now = time(NULL);
     const char *op = NULL;
 
     print_dot();
 
     if (current_cib != NULL) {
         xmlNode *cib_last = current_cib;
         current_cib = NULL;
 
         rc = cib_apply_patch_event(msg, cib_last, &current_cib, LOG_DEBUG);
         free_xml(cib_last);
 
         switch(rc) {
             case pcmk_err_diff_resync:
             case pcmk_err_diff_failed:
                 crm_warn("[%s] %s Patch aborted: %s (%d)", event, op, pcmk_strerror(rc), rc);
             case pcmk_ok:
                 break;
             default:
                 crm_warn("[%s] %s ABORTED: %s (%d)", event, op, pcmk_strerror(rc), rc);
                 return;
         }
     }
 
     if (current_cib == NULL) {
         current_cib = get_cib_copy(cib);
     }
 
     if (crm_mail_to || snmp_target || external_agent) {
         /* Process operation updates */
         xmlXPathObject *xpathObj =
             xpath_search(msg,
                          "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
         if (xpathObj && xpathObj->nodesetval->nodeNr > 0) {
             int lpc = 0, max = xpathObj->nodesetval->nodeNr;
 
             for (lpc = 0; lpc < max; lpc++) {
                 xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
 
                 handle_rsc_op(rsc_op);
             }
         }
         if (xpathObj) {
             xmlXPathFreeObject(xpathObj);
         }
     }
 
     if ((now - last_refresh) > (reconnect_msec / 1000)) {
         /* Force a refresh */
         mon_refresh_display(NULL);
 
     } else {
         mainloop_set_trigger(refresh_trigger);
     }
 }
 
 gboolean
 mon_refresh_display(gpointer user_data)
 {
     xmlNode *cib_copy = copy_xml(current_cib);
     pe_working_set_t data_set;
 
     last_refresh = time(NULL);
 
     if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
         if (cib) {
             cib->cmds->signoff(cib);
         }
         print_as("Upgrade failed: %s", pcmk_strerror(-pcmk_err_dtd_validation));
         if (as_console) {
             sleep(2);
         }
         clean_up(EX_USAGE);
         return FALSE;
     }
 
     set_working_set_defaults(&data_set);
     data_set.input = cib_copy;
     cluster_status(&data_set);
 
     if (as_html_file || web_cgi) {
         if (print_html_status(&data_set, as_html_file, web_cgi) != 0) {
             fprintf(stderr, "Critical: Unable to output html file\n");
             clean_up(EX_USAGE);
         }
     } else if (as_xml) {
         if (print_xml_status(&data_set) != 0) {
             fprintf(stderr, "Critical: Unable to output xml file\n");
             clean_up(EX_USAGE);
         }
     } else if (daemonize) {
         /* do nothing */
 
     } else if (simple_status) {
         print_simple_status(&data_set);
         if (has_warnings) {
             clean_up(EX_USAGE);
         }
 
     } else {
         print_status(&data_set);
     }
 
     cleanup_calculations(&data_set);
     return TRUE;
 }
 
 void mon_st_callback(stonith_t *st, stonith_event_t *e)
 {
     char *desc = g_strdup_printf(
         "Operation %s requested by %s for peer %s: %s (ref=%s)",
         e->operation, e->origin, e->target, pcmk_strerror(e->result), e->id); 
 
     if (snmp_target) {
         send_snmp_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
     }
     if (crm_mail_to) {
         send_smtp_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
     }
     if (external_agent) {
         send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
     }
     g_free(desc);
 }
 
 /*
  * De-init ncurses, signoff from the CIB and deallocate memory.
  */
 void
 clean_up(int rc)
 {
 #if ENABLE_SNMP
     netsnmp_session *session = crm_snmp_init(NULL, NULL);
 
     if (session) {
         snmp_close(session);
         snmp_shutdown("snmpapp");
     }
 #endif
 
 #if CURSES_ENABLED
     if (as_console) {
         as_console = FALSE;
         echo();
         nocbreak();
         endwin();
     }
 #endif
 
     if (cib != NULL) {
         cib->cmds->signoff(cib);
         cib_delete(cib);
         cib = NULL;
     }
 
     free(as_html_file);
     free(xml_file);
     free(pid_file);
 
     if (rc >= 0) {
         crm_exit(rc);
     }
     return;
 }