diff --git a/cts/CTStests.py b/cts/CTStests.py
index a57a5da805..f8f2cb626f 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -1,3130 +1,3129 @@
 """ Test-specific classes for Pacemaker's Cluster Test Suite (CTS)
 """
 
 # Pacemaker targets compatibility with Python 2.7 and 3.2+
 from __future__ import print_function, unicode_literals, absolute_import, division
 
 __copyright__ = "Copyright 2000-2019 the Pacemaker project contributors"
 __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
 
 #
 #        SPECIAL NOTE:
 #
 #        Tests may NOT implement any cluster-manager-specific code in them.
 #        EXTEND the ClusterManager object to provide the base capabilities
 #        the test needs if you need to do something that the current CM classes
 #        do not.  Otherwise you screw up the whole point of the object structure
 #        in CTS.
 #
 #                Thank you.
 #
 
 import os
 import re
 import time
 import subprocess
 import tempfile
 
 from stat import *
 from cts import CTS
 from cts.CTSaudits import *
 from cts.CTSvars   import *
 from cts.patterns  import PatternSelector
 from cts.logging   import LogFactory
 from cts.remote    import RemoteFactory, input_wrapper
 from cts.watcher   import LogWatcher
 from cts.environment import EnvFactory
 
 AllTestClasses = [ ]
 
 
 class CTSTest(object):
     '''
     A Cluster test.
     We implement the basic set of properties and behaviors for a generic
     cluster test.
 
     Cluster tests track their own statistics.
     We keep each of the kinds of counts we track as separate {name,value}
     pairs.
     '''
 
     def __init__(self, cm):
         #self.name="the unnamed test"
         self.Stats = {"calls":0
         ,        "success":0
         ,        "failure":0
         ,        "skipped":0
         ,        "auditfail":0}
 
 #        if not issubclass(cm.__class__, ClusterManager):
 #            raise ValueError("Must be a ClusterManager object")
         self.CM = cm
         self.Env = EnvFactory().getInstance()
         self.rsh = RemoteFactory().getInstance()
         self.logger = LogFactory()
         self.templates = PatternSelector(cm["Name"])
         self.Audits = []
         self.timeout = 120
         self.passed = 1
         self.is_loop = 0
         self.is_unsafe = 0
         self.is_docker_unsafe = 0
         self.is_experimental = 0
         self.is_container = 0
         self.is_valgrind = 0
         self.benchmark = 0  # which tests to benchmark
         self.timer = {}  # timers
 
     def log(self, args):
         self.logger.log(args)
 
     def debug(self, args):
         self.logger.debug(args)
 
     def has_key(self, key):
         return key in self.Stats
 
     def __setitem__(self, key, value):
         self.Stats[key] = value
 
     def __getitem__(self, key):
         if str(key) == "0":
             raise ValueError("Bad call to 'foo in X', should reference 'foo in X.Stats' instead")
 
         if key in self.Stats:
             return self.Stats[key]
         return None
 
     def log_mark(self, msg):
         self.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
         return
 
     def get_timer(self,key = "test"):
         try: return self.timer[key]
         except: return 0
 
     def set_timer(self,key = "test"):
         self.timer[key] = time.time()
         return self.timer[key]
 
     def log_timer(self,key = "test"):
         elapsed = 0
         if key in self.timer:
             elapsed = time.time() - self.timer[key]
             s = key == "test" and self.name or "%s:%s" % (self.name,key)
             self.debug("%s runtime: %.2f" % (s, elapsed))
             del self.timer[key]
         return elapsed
 
     def incr(self, name):
         '''Increment (or initialize) the value associated with the given name'''
         if not name in self.Stats:
             self.Stats[name] = 0
         self.Stats[name] = self.Stats[name]+1
 
         # Reset the test passed boolean
         if name == "calls":
             self.passed = 1
 
     def failure(self, reason="none"):
         '''Increment the failure count'''
         self.passed = 0
         self.incr("failure")
         self.logger.log(("Test %s" % self.name).ljust(35) + " FAILED: %s" % reason)
         return None
 
     def success(self):
         '''Increment the success count'''
         self.incr("success")
         return 1
 
     def skipped(self):
         '''Increment the skipped count'''
         self.incr("skipped")
         return 1
 
     def __call__(self, node):
         '''Perform the given test'''
         raise ValueError("Abstract Class member (__call__)")
         self.incr("calls")
         return self.failure()
 
     def audit(self):
         passed = 1
         if len(self.Audits) > 0:
             for audit in self.Audits:
                 if not audit():
                     self.logger.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
                     self.incr("auditfail")
                     passed = 0
         return passed
 
     def setup(self, node):
         '''Setup the given test'''
         return self.success()
 
     def teardown(self, node):
         '''Tear down the given test'''
         return self.success()
 
     def create_watch(self, patterns, timeout, name=None):
         if not name:
             name = self.name
         return LogWatcher(self.Env["LogFileName"], patterns, name, timeout, kind=self.Env["LogWatcher"], hosts=self.Env["nodes"])
 
     def local_badnews(self, prefix, watch, local_ignore=[]):
         errcount = 0
         if not prefix:
             prefix = "LocalBadNews:"
 
         ignorelist = []
         ignorelist.append(" CTS: ")
         ignorelist.append(prefix)
         ignorelist.extend(local_ignore)
 
         while errcount < 100:
             match = watch.look(0)
             if match:
                add_err = 1
                for ignore in ignorelist:
                    if add_err == 1 and re.search(ignore, match):
                        add_err = 0
                if add_err == 1:
                    self.logger.log(prefix + " " + match)
                    errcount = errcount + 1
             else:
               break
         else:
             self.logger.log("Too many errors!")
 
         watch.end()
         return errcount
 
     def is_applicable(self):
         return self.is_applicable_common()
 
     def is_applicable_common(self):
         '''Return TRUE if we are applicable in the current test configuration'''
         #raise ValueError("Abstract Class member (is_applicable)")
 
         if self.is_loop and not self.Env["loop-tests"]:
             return 0
         elif self.is_unsafe and not self.Env["unsafe-tests"]:
             return 0
         elif self.is_valgrind and not self.Env["valgrind-tests"]:
             return 0
         elif self.is_experimental and not self.Env["experimental-tests"]:
             return 0
         elif self.is_docker_unsafe and self.Env["docker"]:
             return 0
         elif self.is_container and not self.Env["container-tests"]:
             return 0
         elif self.Env["benchmark"] and self.benchmark == 0:
             return 0
 
         return 1
 
     def find_ocfs2_resources(self, node):
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "o2cb" and r.parent != "NA":
                     self.debug("Found o2cb: %s" % self.r_o2cb)
                     self.r_o2cb = r.parent
             if re.search("^Constraint", line):
                 c = AuditConstraint(self.CM, line)
                 if c.type == "rsc_colocation" and c.target == self.r_o2cb:
                     self.r_ocfs2.append(c.rsc)
 
         self.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
         return len(self.r_ocfs2)
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         return 1
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return []
 
 
 class StopTest(CTSTest):
     '''Stop (deactivate) the cluster manager on a node'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Stop"
 
     def __call__(self, node):
         '''Perform the 'stop' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] != "up":
             return self.skipped()
 
         patterns = []
         # Technically we should always be able to notice ourselves stopping
         patterns.append(self.templates["Pat:We_stopped"] % node)
 
         # Any active node needs to notice this one left
         # (note that this won't work if we have multiple partitions)
         for other in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[other] == "up" and other != node:
                 patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
                 #self.debug("Checking %s will notice %s left"%(other, node))
 
         watch = self.create_watch(patterns, self.Env["DeadTime"])
         watch.setwatch()
 
         if node == self.CM.OurNode:
             self.incr("us")
         else:
             if self.CM.upcount() <= 1:
                 self.incr("all")
             else:
                 self.incr("them")
 
         self.CM.StopaCM(node)
         watch_result = watch.lookforall()
 
         failreason = None
         UnmatchedList = "||"
         if watch.unmatched:
             (rc, output) = self.rsh(node, "/bin/ps axf", None)
             for line in output:
                 self.debug(line)
 
             (rc, output) = self.rsh(node, "/usr/sbin/dlm_tool dump", None)
             for line in output:
                 self.debug(line)
 
             for regex in watch.unmatched:
                 self.logger.log ("ERROR: Shutdown pattern not found: %s" % (regex))
                 UnmatchedList +=  regex + "||";
                 failreason = "Missing shutdown pattern"
 
         self.CM.cluster_stable(self.Env["DeadTime"])
 
         if not watch.unmatched or self.CM.upcount() == 0:
             return self.success()
 
         if len(watch.unmatched) >= self.CM.upcount():
             return self.failure("no match against (%s)" % UnmatchedList)
 
         if failreason == None:
             return self.success()
         else:
             return self.failure(failreason)
 #
 # We don't register StopTest because it's better when called by
 # another test...
 #
 
 
 class StartTest(CTSTest):
     '''Start (activate) the cluster manager on a node'''
     def __init__(self, cm, debug=None):
         CTSTest.__init__(self,cm)
         self.name = "start"
         self.debug = debug
 
     def __call__(self, node):
         '''Perform the 'start' test. '''
         self.incr("calls")
 
         if self.CM.upcount() == 0:
             self.incr("us")
         else:
             self.incr("them")
 
         if self.CM.ShouldBeStatus[node] != "down":
             return self.skipped()
         elif self.CM.StartaCM(node):
             return self.success()
         else:
             return self.failure("Startup %s on node %s failed"
                                 % (self.Env["Name"], node))
 
 #
 # We don't register StartTest because it's better when called by
 # another test...
 #
 
 
 class FlipTest(CTSTest):
     '''If it's running, stop it.  If it's stopped start it.
        Overthrow the status quo...
     '''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Flip"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, node):
         '''Perform the 'Flip' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] == "up":
             self.incr("stopped")
             ret = self.stop(node)
             type = "up->down"
             # Give the cluster time to recognize it's gone...
             time.sleep(self.Env["StableTime"])
         elif self.CM.ShouldBeStatus[node] == "down":
             self.incr("started")
             ret = self.start(node)
             type = "down->up"
         else:
             return self.skipped()
 
         self.incr(type)
         if ret:
             return self.success()
         else:
             return self.failure("%s failure" % type)
 
 #        Register FlipTest as a good test to run
 AllTestClasses.append(FlipTest)
 
 
 class RestartTest(CTSTest):
     '''Stop and restart a node'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Restart"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         '''Perform the 'restart' test. '''
         self.incr("calls")
 
         self.incr("node:" + node)
 
         ret1 = 1
         if self.CM.StataCM(node):
             self.incr("WasStopped")
             if not self.start(node):
                 return self.failure("start (setup) failure: "+node)
 
         self.set_timer()
         if not self.stop(node):
             return self.failure("stop failure: "+node)
         if not self.start(node):
             return self.failure("start failure: "+node)
         return self.success()
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RestartTest)
 
 
 class StonithdTest(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Stonithd"
         self.startall = SimulStartLite(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         self.incr("calls")
         if len(self.Env["nodes"]) < 2:
             return self.skipped()
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         is_dc = self.CM.is_node_dc(node)
 
         watchpats = []
         watchpats.append(self.templates["Pat:FenceOpOK"] % node)
         watchpats.append(self.templates["Pat:NodeFenced"] % node)
 
         if self.Env["at-boot"] == 0:
             self.debug("Expecting %s to stay down" % node)
             self.CM.ShouldBeStatus[node] = "down"
         else:
             self.debug("Expecting %s to come up again %d" % (node, self.Env["at-boot"]))
             watchpats.append("%s.* S_STARTING -> S_PENDING" % node)
             watchpats.append("%s.* S_PENDING -> S_NOT_DC" % node)
 
         watch = self.create_watch(watchpats, 30 + self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
         watch.setwatch()
 
         origin = self.Env.RandomGen.choice(self.Env["nodes"])
 
         rc = self.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node)
 
         if rc == 194:
             # 194 - 256 = -62 = Timer expired
             #
             # Look for the patterns, usually this means the required
             # device was running on the node to be fenced - or that
             # the required devices were in the process of being loaded
             # and/or moved
             #
             # Effectively the node committed suicide so there will be
             # no confirmation, but pacemaker should be watching and
             # fence the node again
 
             self.logger.log("Fencing command on %s to fence %s timed out" % (origin, node))
 
         elif origin != node and rc != 0:
             self.debug("Waiting for the cluster to recover")
             self.CM.cluster_stable()
 
             self.debug("Waiting for fenced node to come back up")
             self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
             self.logger.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc))
 
         elif origin == node and rc != 255:
             # 255 == broken pipe, ie. the node was fenced as expected
             self.logger.log("Locally originated fencing returned %d" % rc)
 
         self.set_timer("fence")
         matched = watch.lookforall()
         self.log_timer("fence")
         self.set_timer("reform")
         if watch.unmatched:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.debug("Waiting for fenced node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.Env["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected patterns")
         elif not is_stable:
             return self.failure("Cluster did not become stable")
 
         self.log_timer("reform")
         return self.success()
 
     def errorstoignore(self):
         return [
             self.templates["Pat:Fencing_start"] % ".*",
             self.templates["Pat:Fencing_ok"] % ".*",
             r"error.*: Resource .*stonith::.* is active on 2 nodes attempting recovery",
             r"error.*: Operation 'reboot' targeting .* on .* for stonith_admin.*: Timer expired",
         ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
 
         if "DoFencing" in list(self.Env.keys()):
             return self.Env["DoFencing"]
 
         return 1
 
 AllTestClasses.append(StonithdTest)
 
 
 class StartOnebyOne(CTSTest):
     '''Start all the nodes ~ one by one'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "StartOnebyOne"
         self.stopall = SimulStopLite(cm)
         self.start = StartTest(cm)
         self.ns = CTS.NodeStatus(cm.Env)
 
     def __call__(self, dummy):
         '''Perform the 'StartOnebyOne' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Test setup failed")
 
         failed = []
         self.set_timer()
         for node in self.Env["nodes"]:
             if not self.start(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to start: " + repr(failed))
 
         return self.success()
 
 #        Register StartOnebyOne as a good test to run
 AllTestClasses.append(StartOnebyOne)
 
 
 class SimulStart(CTSTest):
     '''Start all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStart"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStart' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.startall(None):
             return self.failure("Startall failed")
 
         return self.success()
 
 #        Register SimulStart as a good test to run
 AllTestClasses.append(SimulStart)
 
 
 class SimulStop(CTSTest):
     '''Stop all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStop"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStop' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.stopall(None):
             return self.failure("Stopall failed")
 
         return self.success()
 
 #     Register SimulStop as a good test to run
 AllTestClasses.append(SimulStop)
 
 
 class StopOnebyOne(CTSTest):
     '''Stop all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "StopOnebyOne"
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, dummy):
         '''Perform the 'StopOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         failed = []
         self.set_timer()
         for node in self.Env["nodes"]:
             if not self.stop(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to stop: " + repr(failed))
 
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(StopOnebyOne)
 
 
 class RestartOnebyOne(CTSTest):
     '''Restart all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RestartOnebyOne"
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'RestartOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         did_fail = []
         self.set_timer()
         self.restart = RestartTest(self.CM)
         for node in self.Env["nodes"]:
             if not self.restart(node):
                 did_fail.append(node)
 
         if did_fail:
             return self.failure("Could not restart %d nodes: %s"
                                 % (len(did_fail), repr(did_fail)))
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(RestartOnebyOne)
 
 
 class PartialStart(CTSTest):
     '''Start a node - but tell it to stop before it finishes starting up'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "PartialStart"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
         self.stop = StopTest(cm)
         #self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'PartialStart' test. '''
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
 #   FIXME!  This should use the CM class to get the pattern
 #       then it would be applicable in general
         watchpats = []
         watchpats.append("pacemaker-controld.*Connecting to cluster infrastructure")
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
         watch.setwatch()
 
         self.CM.StartaCMnoBlock(node)
         ret = watch.lookforall()
         if not ret:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
             return self.failure("Setup of %s failed" % node)
 
         ret = self.stop(node)
         if not ret:
             return self.failure("%s did not stop in time" % node)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
 
         # We might do some fencing in the 2-node case if we make it up far enough
         return [
             r"Executing reboot fencing operation",
             r"Requesting fencing \([^)]+\) of node ",
         ]
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(PartialStart)
 
 
 class StandbyTest(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Standby"
         self.benchmark = 1
 
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
 
     # make sure the node is active
     # set the node to standby mode
     # check resources, none resource should be running on the node
     # set the node to active mode
     # check resouces, resources should have been migrated back (SHOULD THEY?)
 
     def __call__(self, node):
 
         self.incr("calls")
         ret = self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         self.debug("Make sure node %s is active" % node)
         if self.CM.StandbyStatus(node) != "off":
             if not self.CM.SetStandbyMode(node, "off"):
                 return self.failure("can't set node %s to active mode" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
 
         self.debug("Getting resources running on node %s" % node)
         rsc_on_node = self.CM.active_resources(node)
 
         watchpats = []
         watchpats.append(r"State transition .* -> S_POLICY_ENGINE")
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
         watch.setwatch()
 
         self.debug("Setting node %s to standby mode" % node)
         if not self.CM.SetStandbyMode(node, "on"):
             return self.failure("can't set node %s to standby mode" % node)
 
         self.set_timer("on")
 
         ret = watch.lookforall()
         if not ret:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
             self.CM.SetStandbyMode(node, "off")
             return self.failure("cluster didn't react to standby change on %s" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "on":
             return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
         self.log_timer("on")
 
         self.debug("Checking resources")
         bad_run = self.CM.active_resources(node)
         if len(bad_run) > 0:
             rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
             self.debug("Setting node %s to active mode" % node)
             self.CM.SetStandbyMode(node, "off")
             return rc
 
         self.debug("Setting node %s to active mode" % node)
         if not self.CM.SetStandbyMode(node, "off"):
             return self.failure("can't set node %s to active mode" % node)
 
         self.set_timer("off")
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
         self.log_timer("off")
 
         return self.success()
 
 AllTestClasses.append(StandbyTest)
 
 
 class ValgrindTest(CTSTest):
     '''Check for memory leaks'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Valgrind"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_valgrind = 1
         self.is_loop = 1
 
     def setup(self, node):
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         # @TODO Edit /etc/sysconfig/pacemaker on all nodes to enable valgrind,
         # and clear any valgrind logs from previous runs. For now, we rely on
         # the user to do this manually.
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         return self.success()
 
     def teardown(self, node):
         # Return all nodes to normal
         # @TODO Edit /etc/sysconfig/pacemaker on all nodes to disable valgrind
         ret = self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         return self.success()
 
     def find_leaks(self):
         # Check for leaks
         # (no longer used but kept in case feature is restored)
         leaked = []
         self.stop = StopTest(self.CM)
 
         for node in self.Env["nodes"]:
             rc = self.stop(node)
             if not rc:
                 self.failure("Couldn't shut down %s" % node)
 
             rc = self.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logger.logPat, 0)
             if rc != 1:
                 leaked.append(node)
                 self.failure("Valgrind errors detected on %s" % node)
                 (rc, output) = self.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logger.logPat, None)
                 for line in output:
                     self.logger.log(line)
                 (rc, output) = self.rsh(node, "cat %s" % self.logger.logPat, None)
                 for line in output:
                     self.debug(line)
 
         self.rsh(node, "rm -f %s" % self.logger.logPat, None)
         return leaked
 
     def __call__(self, node):
         #leaked = self.find_leaks()
         #if len(leaked) > 0:
         #    return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"pacemaker-based.*: \*\*\*\*\*\*\*\*\*\*\*\*\*",
             r"pacemaker-based.*: .* avoid confusing Valgrind",
             r"HA_VALGRIND_ENABLED",
         ]
 
 
 class StandbyLoopTest(ValgrindTest):
     '''Check for memory leaks by putting a node in and out of standby for an hour'''
     # @TODO This is not a useful test for memory leaks
     def __init__(self, cm):
         ValgrindTest.__init__(self,cm)
         self.name = "StandbyLoop"
 
     def __call__(self, node):
 
         lpc = 0
         delay = 2
         failed = 0
         done = time.time() + self.Env["loop-minutes"] * 60
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "on"):
                 self.failure("can't set node %s to standby mode" % node)
                 failed = lpc
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "off"):
                 self.failure("can't set node %s to active mode" % node)
                 failed = lpc
 
         leaked = self.find_leaks()
         if failed:
             return self.failure("Iteration %d failed" % failed)
         elif len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
 #AllTestClasses.append(StandbyLoopTest)
 
 
 class BandwidthTest(CTSTest):
 #        Tests should not be cluster-manager-specific
 #        If you need to find out cluster manager configuration to do this, then
 #        it should be added to the generic cluster manager API.
     '''Test the bandwidth which the cluster uses'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Bandwidth"
         self.start = StartTest(cm)
         self.__setitem__("min",0)
         self.__setitem__("max",0)
         self.__setitem__("totalbandwidth",0)
         (handle, self.tempfile) = tempfile.mkstemp(".cts")
         os.close(handle)
         self.startall = SimulStartLite(cm)
 
     def __call__(self, node):
         '''Perform the Bandwidth test'''
         self.incr("calls")
 
         if self.CM.upcount() < 1:
             return self.skipped()
 
         Path = self.CM.InternalCommConfig()
         if "ip" not in Path["mediatype"]:
              return self.skipped()
 
         port = Path["port"][0]
         port = int(port)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Test setup failed")
         time.sleep(5)  # We get extra messages right after startup.
 
         fstmpfile = "/var/run/band_estimate"
         dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
         %                (port, fstmpfile)
 
         rc = self.rsh(node, dumpcmd)
         if rc == 0:
             farfile = "root@%s:%s" % (node, fstmpfile)
             self.rsh.cp(farfile, self.tempfile)
             Bandwidth = self.countbandwidth(self.tempfile)
             if not Bandwidth:
                 self.logger.log("Could not compute bandwidth.")
                 return self.success()
             intband = int(Bandwidth + 0.5)
             self.logger.log("...bandwidth: %d bits/sec" % intband)
             self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
             if self.Stats["min"] == 0:
                 self.Stats["min"] = Bandwidth
             if Bandwidth > self.Stats["max"]:
                 self.Stats["max"] = Bandwidth
             if Bandwidth < self.Stats["min"]:
                 self.Stats["min"] = Bandwidth
             self.rsh(node, "rm -f %s" % fstmpfile)
             os.unlink(self.tempfile)
             return self.success()
         else:
             return self.failure("no response from tcpdump command [%d]!" % rc)
 
     def countbandwidth(self, file):
         fp = open(file, "r")
         fp.seek(0)
         count = 0
         sum = 0
         while 1:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count + 1
                 linesplit = line.split(" ")
                 for j in range(len(linesplit)-1):
                     if linesplit[j] == "udp": break
                     if linesplit[j] == "length:": break
 
                 try:
                     sum = sum + int(linesplit[j+1])
                 except ValueError:
                     self.logger.log("Invalid tcpdump line: %s" % line)
                     return None
                 T1 = linesplit[0]
                 timesplit = T1.split(":")
                 time2split = timesplit[2].split(".")
                 time1 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001
                 break
 
         while count < 100:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count+1
                 linessplit = line.split(" ")
                 for j in range(len(linessplit)-1):
                     if linessplit[j] == "udp": break
                     if linessplit[j] == "length:": break
                 try:
                     sum = int(linessplit[j+1]) + sum
                 except ValueError:
                     self.logger.log("Invalid tcpdump line: %s" % line)
                     return None
 
         T2 = linessplit[0]
         timesplit = T2.split(":")
         time2split = timesplit[2].split(".")
         time2 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001
         time = time2-time1
         if (time <= 0):
             return 0
         return int((sum*8)/time)
 
     def is_applicable(self):
         '''BandwidthTest never applicable'''
         return 0
 
 AllTestClasses.append(BandwidthTest)
 
 
 ###################################################################
 class MaintenanceMode(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "MaintenanceMode"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max = 30
         #self.is_unsafe = 1
         self.benchmark = 1
         self.action = "asyncmon"
         self.interval = 0
         self.rid = "maintenanceDummy"
 
     def toggleMaintenanceMode(self, node, action):
         pats = []
         pats.append(self.templates["Pat:DC_IDLE"])
 
         # fail the resource right after turning Maintenance mode on
         # verify it is not recovered until maintenance mode is turned off
         if action == "On":
-            pats.append(r"schedulerd.*:\s+warning:.*Processing failed %s of %s on" % (self.action, self.rid))
+            pats.append(self.templates["Pat:RscOpFail"] % (self.action, self.rid))
         else:
             pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))
             pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.debug("Turning maintenance mode %s" % action)
         self.rsh(node, self.templates["MaintenanceMode%s" % (action)])
         if (action == "On"):
             self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover%s" % (action))
         watch.lookforall()
         self.log_timer("recover%s" % (action))
         if watch.unmatched:
             self.debug("Failed to find patterns when turning maintenance mode %s" % action)
             return repr(watch.unmatched)
 
         return ""
 
     def insertMaintenanceDummy(self, node):
         pats = []
         pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % ("start", self.rid)))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.CM.AddDummyRsc(node, self.rid)
 
         self.set_timer("addDummy")
         watch.lookforall()
         self.log_timer("addDummy")
 
         if watch.unmatched:
             self.debug("Failed to find patterns when adding maintenance dummy resource")
             return repr(watch.unmatched)
         return ""
 
     def removeMaintenanceDummy(self, node):
         pats = []
         pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
         self.CM.RemoveDummyRsc(node, self.rid)
 
         self.set_timer("removeDummy")
         watch.lookforall()
         self.log_timer("removeDummy")
 
         if watch.unmatched:
             self.debug("Failed to find patterns when removing maintenance dummy resource")
             return repr(watch.unmatched)
         return ""
 
     def managedRscList(self, node):
         rscList = []
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.managed():
                     rscList.append(tmp.id)
 
         return rscList
 
     def verifyResources(self, node, rscList, managed):
         managedList = list(rscList)
         managed_str = "managed"
         if not managed:
             managed_str = "unmanaged"
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if managed and not tmp.managed():
                     continue
                 elif not managed and tmp.managed():
                     continue
                 elif managedList.count(tmp.id):
                     managedList.remove(tmp.id)
 
         if len(managedList) == 0:
             self.debug("Found all %s resources on %s" % (managed_str, node))
             return True
 
         self.logger.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList))
         return False
 
     def __call__(self, node):
         '''Perform the 'MaintenanceMode' test. '''
         self.incr("calls")
         verify_managed = False
         verify_unmanaged = False
         failPat = ""
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         # get a list of all the managed resources. We use this list
         # after enabling maintenance mode to verify all managed resources
         # become un-managed.  After maintenance mode is turned off, we use
         # this list to verify all the resources become managed again.
         managedResources = self.managedRscList(node)
         if len(managedResources) == 0:
             self.logger.log("No managed resources on %s" % node)
             return self.skipped()
 
         # insert a fake resource we can fail during maintenance mode
         # so we can verify recovery does not take place until after maintenance
         # mode is disabled.
         failPat = failPat + self.insertMaintenanceDummy(node)
 
         # toggle maintenance mode ON, then fail dummy resource.
         failPat = failPat + self.toggleMaintenanceMode(node, "On")
 
         # verify all the resources are now unmanaged
         if self.verifyResources(node, managedResources, False):
             verify_unmanaged = True
 
         # Toggle maintenance mode  OFF, verify dummy is recovered.
         failPat = failPat + self.toggleMaintenanceMode(node, "Off")
 
         # verify all the resources are now managed again
         if self.verifyResources(node, managedResources, True):
             verify_managed = True
 
         # Remove our maintenance dummy resource.
         failPat = failPat + self.removeMaintenanceDummy(node)
 
         self.CM.cluster_stable()
 
         if failPat != "":
             return self.failure("Unmatched patterns: %s" % (failPat))
         elif verify_unmanaged is False:
             return self.failure("Failed to verify resources became unmanaged during maintenance mode")
         elif verify_managed is False:
             return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for %s" % self.rid,
             r"schedulerd.*: Recover %s\s*\(.*\)" % self.rid,
             r"Unknown operation: fail",
             self.templates["Pat:RscOpOK"] % (self.action, self.rid),
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
         ]
 
 AllTestClasses.append(MaintenanceMode)
 
 
 class ResourceRecover(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ResourceRecover"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max = 30
         self.rid = None
         self.rid_alt = None
         #self.is_unsafe = 1
         self.benchmark = 1
 
         # these are the values used for the new LRM API call
         self.action = "asyncmon"
         self.interval = 0
 
     def __call__(self, node):
         '''Perform the 'ResourceRecover' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         resourcelist = self.CM.active_resources(node)
         # if there are no resourcelist, return directly
         if len(resourcelist) == 0:
             self.logger.log("No active resources on %s" % node)
             return self.skipped()
 
         self.rid = self.Env.RandomGen.choice(resourcelist)
         self.rid_alt = self.rid
 
         rsc = None
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.id == self.rid:
                     rsc = tmp
                     # Handle anonymous clones that get renamed
                     self.rid = rsc.clone_id
                     break
 
         if not rsc:
             return self.failure("Could not find %s in the resource list" % self.rid)
 
         self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))
 
         pats = []
-        pats.append(r"schedulerd.*:\s+warning:.*Processing failed %s of (%s|%s) on" % (self.action,
-            rsc.id, rsc.clone_id))
+        pats.append(self.templates["Pat:CloneOpFail"] % (self.action, rsc.id, rsc.clone_id))
 
         if rsc.managed():
             pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))
             if rsc.unique():
                 pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid))
             else:
                 # Anonymous clones may get restarted with a different clone number
                 pats.append(self.templates["Pat:RscOpOK"] % ("start", ".*"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover")
         watch.lookforall()
         self.log_timer("recover")
 
         self.CM.cluster_stable()
         recovered = self.CM.ResourceLocation(self.rid)
 
         if watch.unmatched:
             return self.failure("Patterns not found: %s" % repr(watch.unmatched))
 
         elif rsc.unique() and len(recovered) > 1:
             return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))
 
         elif len(recovered) > 0:
             self.debug("%s is running on: %s" % (self.rid, repr(recovered)))
 
         elif rsc.managed():
             return self.failure("%s was not recovered and is inactive" % self.rid)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for %s" % self.rid,
             r"schedulerd.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt),
             r"Unknown operation: fail",
             self.templates["Pat:RscOpOK"] % (self.action, self.rid),
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
         ]
 
 AllTestClasses.append(ResourceRecover)
 
 
 class ComponentFail(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ComponentFail"
         # TODO make this work correctly in docker.
         self.is_docker_unsafe = 1
         self.startall = SimulStartLite(cm)
         self.complist = cm.Components()
         self.patterns = []
         self.okerrpatterns = []
         self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'ComponentFail' test. '''
         self.incr("calls")
         self.patterns = []
         self.okerrpatterns = []
 
         # start all nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.CM.cluster_stable(self.Env["StableTime"]):
             return self.failure("Setup failed - unstable")
 
         node_is_dc = self.CM.is_node_dc(node, None)
 
         # select a component to kill
         chosen = self.Env.RandomGen.choice(self.complist)
         while chosen.dc_only == 1 and node_is_dc == 0:
             chosen = self.Env.RandomGen.choice(self.complist)
 
         self.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
         self.incr(chosen.name)
 
         if chosen.name != "corosync":
             self.patterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
             self.patterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
 
         self.patterns.extend(chosen.pats)
         if node_is_dc:
           self.patterns.extend(chosen.dc_pats)
 
         # @TODO this should be a flag in the Component
         if chosen.name in [ "corosync", "pacemaker-based", "pacemaker-fenced" ]:
             # Ignore actions for fence devices if fencer will respawn
             # (their registration will be lost, and probes will fail)
             (rc, lines) = self.rsh(node, "crm_resource -c", None)
             for line in lines:
                 if re.search("^Resource", line):
                     r = AuditResource(self.CM, line)
                     if r.rclass == "stonith":
                         self.okerrpatterns.append(self.templates["Pat:Fencing_recover"] % r.id)
                         self.okerrpatterns.append(self.templates["Pat:Fencing_active"] % r.id)
                         self.okerrpatterns.append(self.templates["Pat:Fencing_probe"] % r.id)
 
         # supply a copy so self.patterns doesn't end up empty
         tmpPats = []
         tmpPats.extend(self.patterns)
         self.patterns.extend(chosen.badnews_ignore)
 
         # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
         stonithPats = []
         stonithPats.append(self.templates["Pat:Fencing_ok"] % node)
         stonith = self.create_watch(stonithPats, 0)
         stonith.setwatch()
 
         # set the watch for stable
         watch = self.create_watch(
             tmpPats, self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
         watch.setwatch()
 
         # kill the component
         chosen.kill(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.debug("Waiting for any fenced node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         self.CM.cluster_stable(self.Env["StartTime"])
 
         self.debug("Checking if %s was shot" % node)
         shot = stonith.look(60)
         if shot:
             self.debug("Found: " + repr(shot))
             self.okerrpatterns.append(self.templates["Pat:Fencing_start"] % node)
 
             if self.Env["at-boot"] == 0:
                 self.CM.ShouldBeStatus[node] = "down"
 
             # If fencing occurred, chances are many (if not all) the expected logs
             # will not be sent - or will be lost when the node reboots
             return self.success()
 
         # check for logs indicating a graceful recovery
         matched = watch.lookforall(allow_multiple_matches=1)
         if watch.unmatched:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.Env["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected %s patterns" % chosen.name)
         elif not is_stable:
             return self.failure("Cluster did not become stable after killing %s" % chosen.name)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
     # Note that okerrpatterns refers to the last time we ran this test
     # The good news is that this works fine for us...
         self.okerrpatterns.extend(self.patterns)
         return self.okerrpatterns
 
 AllTestClasses.append(ComponentFail)
 
 
 class SplitBrainTest(CTSTest):
     '''It is used to test split-brain. when the path between the two nodes break
        check the two nodes both take over the resource'''
     def __init__(self,cm):
         CTSTest.__init__(self,cm)
         self.name = "SplitBrain"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.is_experimental = 1
 
     def isolate_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]) + " from " +repr(partition))
 
         if len(other_nodes) == 0:
             return 1
 
         self.debug("Creating partition: " + repr(partition))
         self.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             if not self.CM.isolate_node(node, other_nodes):
                 self.logger.log("Could not isolate %s" % node)
                 return 0
 
         return 1
 
     def heal_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]))
 
         if len(other_nodes) == 0:
             return 1
 
         self.debug("Healing partition: " + repr(partition))
         self.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             self.CM.unisolate_node(node, other_nodes)
 
     def __call__(self, node):
         '''Perform split-brain test'''
         self.incr("calls")
         self.passed = 1
         partitions = {}
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         while 1:
             # Retry until we get multiple partitions
             partitions = {}
             p_max = len(self.Env["nodes"])
             for node in self.Env["nodes"]:
                 p = self.Env.RandomGen.randint(1, p_max)
                 if not p in partitions:
                     partitions[p] = []
                 partitions[p].append(node)
             p_max = len(list(partitions.keys()))
             if p_max > 1:
                 break
             # else, try again
 
         self.debug("Created %d partitions" % p_max)
         for key in list(partitions.keys()):
             self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))
 
         # Disabling STONITH to reduce test complexity for now
         self.rsh(node, "crm_attribute -V -n stonith-enabled -v false")
 
         for key in list(partitions.keys()):
             self.isolate_partition(partitions[key])
 
         count = 30
         while count > 0:
             if len(self.CM.find_partitions()) != p_max:
                 time.sleep(10)
             else:
                 break
         else:
             self.failure("Expected partitions were not created")
 
         # Target number of partitions formed - wait for stability
         if not self.CM.cluster_stable():
             self.failure("Partitioned cluster not stable")
 
         # Now audit the cluster state
         self.CM.partitions_expected = p_max
         if not self.audit():
             self.failure("Audits failed")
         self.CM.partitions_expected = 1
 
         # And heal them again
         for key in list(partitions.keys()):
             self.heal_partition(partitions[key])
 
         # Wait for a single partition to form
         count = 30
         while count > 0:
             if len(self.CM.find_partitions()) != 1:
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not reform")
 
         # Wait for it to have the right number of members
         count = 30
         while count > 0:
             members = []
 
             partitions = self.CM.find_partitions()
             if len(partitions) > 0:
                 members = partitions[0].split()
 
             if len(members) != len(self.Env["nodes"]):
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not completely reform")
 
         # Wait up to 20 minutes - the delay is more preferable than
         # trying to continue with in a messed up state
         if not self.CM.cluster_stable(1200):
             self.failure("Reformed cluster not stable")
             if self.Env["continue"] == 1:
                 answer = "Y"
             else:
                 try:
                     answer = input_wrapper('Continue? [nY]')
                 except EOFError as e:
                     answer = "n" 
             if answer and answer == "n":
                 raise ValueError("Reformed cluster not stable")
 
         # Turn fencing back on
         if self.Env["DoFencing"]:
             self.rsh(node, "crm_attribute -V -D -n stonith-enabled")
 
         self.CM.cluster_stable()
 
         if self.passed:
             return self.success()
         return self.failure("See previous errors")
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return [
             r"Another DC detected:",
             r"(ERROR|error).*: .*Application of an update diff failed",
             r"pacemaker-controld.*:.*not in our membership list",
             r"CRIT:.*node.*returning after partition",
         ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         return len(self.Env["nodes"]) > 2
 
 AllTestClasses.append(SplitBrainTest)
 
 
 class Reattach(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Reattach"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
         self.is_unsafe = 0 # Handled by canrunnow()
 
     def _is_managed(self, node):
         is_managed = self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -q -G -d true", 1)
         is_managed = is_managed[:-1] # Strip off the newline
         return is_managed == "true"
 
     def _set_unmanaged(self, node):
         self.debug("Disable resource management")
         self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -v false")
 
     def _set_managed(self, node):
         self.debug("Re-enable resource management")
         self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -D")
 
     def setup(self, node):
         attempt = 0
         if not self.startall(None):
             return None
 
         # Make sure we are really _really_ stable and that all
         # resources, including those that depend on transient node
         # attributes, are started
         while not self.CM.cluster_stable(double_check=True):
             if attempt < 5:
                 attempt += 1
                 self.debug("Not stable yet, re-testing")
             else:
                 self.logger.log("Cluster is not stable")
                 return None
 
         return 1
 
     def teardown(self, node):
 
         # Make sure 'node' is up
         start = StartTest(self.CM)
         start(node)
 
         if not self._is_managed(node):
             self.logger.log("Attempting to re-enable resource management on %s" % node)
             self._set_managed(node)
             self.CM.cluster_stable()
             if not self._is_managed(node):
                 self.logger.log("Could not re-enable resource management")
                 return 0
 
         return 1
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         if self.find_ocfs2_resources(node):
             self.logger.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
             return 0
         return 1
 
     def __call__(self, node):
         self.incr("calls")
 
         pats = []
         # Conveniently, the scheduler will display this message when disabling
         # management, even if fencing is not enabled, so we can rely on it.
         managed = self.create_watch(["Delaying fencing operations"], 60)
         managed.setwatch()
 
         self._set_unmanaged(node)
 
         if not managed.lookforall():
             self.logger.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not disabled")
 
         pats = []
         pats.append(self.templates["Pat:RscOpOK"] % ("start", ".*"))
         pats.append(self.templates["Pat:RscOpOK"] % ("stop", ".*"))
         pats.append(self.templates["Pat:RscOpOK"] % ("promote", ".*"))
         pats.append(self.templates["Pat:RscOpOK"] % ("demote", ".*"))
         pats.append(self.templates["Pat:RscOpOK"] % ("migrate", ".*"))
 
         watch = self.create_watch(pats, 60, "ShutdownActivity")
         watch.setwatch()
 
         self.debug("Shutting down the cluster")
         ret = self.stopall(None)
         if not ret:
             self._set_managed(node)
             return self.failure("Couldn't shut down the cluster")
 
         self.debug("Bringing the cluster back up")
         ret = self.startall(None)
         time.sleep(5) # allow ping to update the CIB
         if not ret:
             self._set_managed(node)
             return self.failure("Couldn't restart the cluster")
 
         if self.local_badnews("ResourceActivity:", watch):
             self._set_managed(node)
             return self.failure("Resources stopped or started during cluster restart")
 
         watch = self.create_watch(pats, 60, "StartupActivity")
         watch.setwatch()
 
         # Re-enable resource management (and verify it happened).
         self._set_managed(node)
         self.CM.cluster_stable()
         if not self._is_managed(node):
             return self.failure("Could not re-enable resource management")
 
         # Ignore actions for STONITH resources
         ignore = []
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rclass == "stonith":
 
                     self.debug("Ignoring start actions for %s" % r.id)
                     ignore.append(self.templates["Pat:RscOpOK"] % ("start", r.id))
 
         if self.local_badnews("ResourceActivity:", watch, ignore):
             return self.failure("Resources stopped or started after resource management was re-enabled")
 
         return ret
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"resource( was|s were) active at shutdown",
         ]
 
     def is_applicable(self):
         return 1
 
 AllTestClasses.append(Reattach)
 
 
 class SpecialTest1(CTSTest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SpecialTest1"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, node):
         '''Perform the 'SpecialTest1' test for Andrew. '''
         self.incr("calls")
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Could not stop all nodes")
 
         # Test config recovery when the other nodes come up
         self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")
 
         #        Start the selected node
         ret = self.restart1(node)
         if not ret:
             return self.failure("Could not start "+node)
 
         #        Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Could not start the remaining nodes")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         # Errors that occur as a result of the CIB being wiped
         return [
             r"error.*: v1 patchset error, patch failed to apply: Application of an update diff failed",
             r"error.*: Resource start-up disabled since no STONITH resources have been defined",
             r"error.*: Either configure some or disable STONITH with the stonith-enabled option",
             r"error.*: NOTE: Clusters with shared data need STONITH to ensure data integrity",
         ]
 
 AllTestClasses.append(SpecialTest1)
 
 
 class HAETest(CTSTest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "HAETest"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_loop = 1
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
         return self.success()
 
     def wait_on_state(self, node, resource, expected_clones, attempts=240):
         while attempts > 0:
             active = 0
             (rc, lines) = self.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)
 
             # Hack until crm_resource does the right thing
             if rc == 0 and lines:
                 active = len(lines)
 
             if len(lines) == expected_clones:
                 return 1
 
             elif rc == 1:
                 self.debug("Resource %s is still inactive" % resource)
 
             elif rc == 234:
                 self.logger.log("Unknown resource %s" % resource)
                 return 0
 
             elif rc == 246:
                 self.logger.log("Cluster is inactive")
                 return 0
 
             elif rc != 0:
                 self.logger.log("Call to crm_resource failed, rc=%d" % rc)
                 return 0
 
             else:
                 self.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))
 
             attempts -= 1
             time.sleep(1)
 
         return 0
 
     def find_dlm(self, node):
         self.r_dlm = None
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "controld" and r.parent != "NA":
                     self.debug("Found dlm: %s" % self.r_dlm)
                     self.r_dlm = r.parent
                     return 1
         return 0
 
     def find_hae_resources(self, node):
         self.r_dlm = None
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         if self.find_dlm(node):
             self.find_ocfs2_resources(node)
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         if self.Env["Schema"] == "hae":
             return 1
         return None
 
 
 class HAERoleTest(HAETest):
     def __init__(self, cm):
         '''Lars' mount/unmount test for the HA extension. '''
         HAETest.__init__(self,cm)
         self.name = "HAERoleTest"
 
     def change_state(self, node, resource, target):
         rc = self.rsh(node, "crm_resource -V -r %s -p target-role -v %s  --meta" % (resource, target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
         lpc = 0
         failed = 0
         delay = 2
         done = time.time() + self.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "Stopped")
             if not self.wait_on_state(node, self.r_dlm, 0):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "Started")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
 
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAERoleTest)
 
 
 class HAEStandbyTest(HAETest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         HAETest.__init__(self,cm)
         self.name = "HAEStandbyTest"
 
     def change_state(self, node, resource, target):
         rc = self.rsh(node, "crm_standby -V -l reboot -v %s" % (target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
 
         lpc = 0
         failed = 0
         done = time.time() + self.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "true")
             if not self.wait_on_state(node, self.r_dlm, clone_max-1):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "false")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
 
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAEStandbyTest)
 
 
 class NearQuorumPointTest(CTSTest):
     '''
     This test brings larger clusters near the quorum point (50%).
     In addition, it will test doing starts and stops at the same time.
 
     Here is how I think it should work:
     - loop over the nodes and decide randomly which will be up and which
       will be down  Use a 50% probability for each of up/down.
     - figure out what to do to get into that state from the current state
     - in parallel, bring up those going up  and bring those going down.
     '''
 
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "NearQuorumPoint"
 
     def __call__(self, dummy):
         '''Perform the 'NearQuorumPoint' test. '''
         self.incr("calls")
         startset = []
         stopset = []
 
         stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint")
         #decide what to do with each node
         for node in self.Env["nodes"]:
             action = self.Env.RandomGen.choice(["start","stop"])
             #action = self.Env.RandomGen.choice(["start","stop","no change"])
             if action == "start" :
                 startset.append(node)
             elif action == "stop" :
                 stopset.append(node)
 
         self.debug("start nodes:" + repr(startset))
         self.debug("stop nodes:" + repr(stopset))
 
         #add search patterns
         watchpats = [ ]
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 watchpats.append(self.templates["Pat:We_stopped"] % node)
 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 #watchpats.append(self.templates["Pat:NonDC_started"] % node)
                 watchpats.append(self.templates["Pat:Local_started"] % node)
             else:
                 for stopping in stopset:
                     if self.CM.ShouldBeStatus[stopping] == "up":
                         watchpats.append(self.templates["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))
 
         if len(watchpats) == 0:
             return self.skipped()
 
         if len(startset) != 0:
             watchpats.append(self.templates["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
 
         watch.setwatch()
 
         #begin actions
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.CM.StartaCMnoBlock(node)
 
         #get the result
         if watch.lookforall():
             self.CM.cluster_stable()
             self.CM.fencing_cleanup("NearQuorumPoint", stonith)
             return self.success()
 
         self.logger.log("Warn: Patterns not found: " + repr(watch.unmatched))
 
         #get the "bad" nodes
         upnodes = []
         for node in stopset:
             if self.CM.StataCM(node) == 1:
                 upnodes.append(node)
 
         downnodes = []
         for node in startset:
             if self.CM.StataCM(node) == 0:
                 downnodes.append(node)
 
         self.CM.fencing_cleanup("NearQuorumPoint", stonith)
         if upnodes == [] and downnodes == []:
             self.CM.cluster_stable()
 
             # Make sure they're completely down with no residule
             for node in stopset:
                 self.rsh(node, self.templates["StopCmd"])
 
             return self.success()
 
         if len(upnodes) > 0:
             self.logger.log("Warn: Unstoppable nodes: " + repr(upnodes))
 
         if len(downnodes) > 0:
             self.logger.log("Warn: Unstartable nodes: " + repr(downnodes))
 
         return self.failure()
 
     def is_applicable(self):
         return 1
 
 AllTestClasses.append(NearQuorumPointTest)
 
 
 class RollingUpgradeTest(CTSTest):
     '''Perform a rolling upgrade of the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RollingUpgrade"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.Env["nodes"]:
             if not self.downgrade(node, None):
                 return self.failure("Couldn't downgrade %s" % node)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.Env["nodes"]:
             if not self.upgrade(node, None):
                 return self.failure("Couldn't upgrade %s" % node)
 
         return self.success()
 
     def install(self, node, version, start=1, flags="--force"):
 
         target_dir = "/tmp/rpm-%s" % version
         src_dir = "%s/%s" % (self.Env["rpm-dir"], version)
 
         self.logger.log("Installing %s on %s with %s" % (version, node, flags))
         if not self.stop(node):
             return self.failure("stop failure: "+node)
 
         rc = self.rsh(node, "mkdir -p %s" % target_dir)
         rc = self.rsh(node, "rm -f %s/*.rpm" % target_dir)
         (rc, lines) = self.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
         for line in lines:
             line = line[:-1]
             rc = self.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
         rc = self.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))
 
         if start and not self.start(node):
             return self.failure("start failure: "+node)
 
         return self.success()
 
     def upgrade(self, node, start=1):
         return self.install(node, self.Env["current-version"], start)
 
     def downgrade(self, node, start=1):
         return self.install(node, self.Env["previous-version"], start, "--force --nodeps")
 
     def __call__(self, node):
         '''Perform the 'Rolling Upgrade' test. '''
         self.incr("calls")
 
         for node in self.Env["nodes"]:
             if self.upgrade(node):
                 return self.failure("Couldn't upgrade %s" % node)
 
             self.CM.cluster_stable()
 
         return self.success()
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return None
 
         if not "rpm-dir" in list(self.Env.keys()):
             return None
         if not "current-version" in list(self.Env.keys()):
             return None
         if not "previous-version" in list(self.Env.keys()):
             return None
 
         return 1
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RollingUpgradeTest)
 
 
 class BSC_AddResource(CTSTest):
     '''Add a resource to the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "AddResource"
         self.resource_offset = 0
         self.cib_cmd = """cibadmin -C -o %s -X '%s' """
 
     def __call__(self, node):
         self.incr("calls")
         self.resource_offset =         self.resource_offset  + 1
 
         r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
         start_pat = "pacemaker-controld.*%s_start_0.*confirmed.*ok"
 
         patterns = []
         patterns.append(start_pat % r_id)
 
         watch = self.create_watch(patterns, self.Env["DeadTime"])
         watch.setwatch()
 
         ip = self.NextIP()
         if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
             return self.failure("Make resource %s failed" % r_id)
 
         failed = 0
         watch_result = watch.lookforall()
         if watch.unmatched:
             for regex in watch.unmatched:
                 self.logger.log ("Warn: Pattern not found: %s" % (regex))
                 failed = 1
 
         if failed:
             return self.failure("Resource pattern(s) not found")
 
         if not self.CM.cluster_stable(self.Env["DeadTime"]):
             return self.failure("Unstable cluster")
 
         return self.success()
 
     def NextIP(self):
         ip = self.Env["IPBase"]
         if ":" in ip:
             fields = ip.rpartition(":")
             fields[2] = str(hex(int(fields[2], 16)+1))
             print(str(hex(int(f[2], 16)+1)))
         else:
             fields = ip.rpartition('.')
             fields[2] = str(int(fields[2])+1)
 
         ip = fields[0] + fields[1] + fields[3];
         self.Env["IPBase"] = ip
         return ip.strip()
 
     def make_ip_resource(self, node, id, rclass, type, ip):
         self.logger.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
         rsc_xml="""
 <primitive id="%s" class="%s" type="%s"  provider="heartbeat">
     <instance_attributes id="%s"><attributes>
         <nvpair id="%s" name="ip" value="%s"/>
     </attributes></instance_attributes>
 </primitive>""" % (id, rclass, type, id, id, ip)
 
         node_constraint = """
       <rsc_location id="run_%s" rsc="%s">
         <rule id="pref_run_%s" score="100">
           <expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
         </rule>
       </rsc_location>""" % (id, id, id, id, node)
 
         rc = 0
         (rc, lines) = self.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
         if rc != 0:
             self.logger.log("Constraint creation failed: %d" % rc)
             return None
 
         (rc, lines) = self.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
         if rc != 0:
             self.logger.log("Resource creation failed: %d" % rc)
             return None
 
         return 1
 
     def is_applicable(self):
         if self.Env["DoBSC"]:
             return 1
         return None
 
 AllTestClasses.append(BSC_AddResource)
 
 
 class SimulStopLite(CTSTest):
     '''Stop any active nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStopLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStopLite' setup work. '''
         self.incr("calls")
 
         self.debug("Setup: " + self.name)
 
         #     We ignore the "node" parameter...
         watchpats = [ ]
 
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.incr("WasStarted")
                 watchpats.append(self.templates["Pat:We_stopped"] % node)
 
         if len(watchpats) == 0:
             return self.success()
 
         #     Stop all the nodes - at about the same time...
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
 
         watch.setwatch()
         self.set_timer()
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
         if watch.lookforall():
             # Make sure they're completely down with no residule
             for node in self.Env["nodes"]:
                 self.rsh(node, self.templates["StopCmd"])
 
             return self.success()
 
         did_fail = 0
         up_nodes = []
         for node in self.Env["nodes"]:
             if self.CM.StataCM(node) == 1:
                 did_fail = 1
                 up_nodes.append(node)
 
         if did_fail:
             return self.failure("Active nodes exist: " + repr(up_nodes))
 
         self.logger.log("Warn: All nodes stopped but CTS didnt detect: "
                     + repr(watch.unmatched))
 
         return self.failure("Missing log message: "+repr(watch.unmatched))
 
     def is_applicable(self):
         '''SimulStopLite is a setup test and never applicable'''
         return 0
 
 
 class SimulStartLite(CTSTest):
     '''Start any stopped nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStartLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStartList' setup work. '''
         self.incr("calls")
         self.debug("Setup: " + self.name)
 
         #        We ignore the "node" parameter...
         node_list = []
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.incr("WasStopped")
                 node_list.append(node)
 
         self.set_timer()
         while len(node_list) > 0:
             # Repeat until all nodes come up
             watchpats = [ ]
 
             uppat = self.templates["Pat:NonDC_started"]
             if self.CM.upcount() == 0:
                 uppat = self.templates["Pat:Local_started"]
 
             watchpats.append(self.templates["Pat:DC_IDLE"])
             for node in node_list:
                 watchpats.append(uppat % node)
                 watchpats.append(self.templates["Pat:InfraUp"] % node)
                 watchpats.append(self.templates["Pat:PacemakerUp"] % node)
 
             #   Start all the nodes - at about the same time...
             watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
             watch.setwatch()
 
             stonith = self.CM.prepare_fencing_watcher(self.name)
 
             for node in node_list:
                 self.CM.StartaCMnoBlock(node)
 
             watch.lookforall()
 
             node_list = self.CM.fencing_cleanup(self.name, stonith)
 
             if node_list == None:
                 return self.failure("Cluster did not stabilize")
 
             # Remove node_list messages from watch.unmatched
             for node in node_list:
                 self.logger.debug("Dealing with stonith operations for %s" % repr(node_list))
                 if watch.unmatched:
                     try:
                         watch.unmatched.remove(uppat % node)
                     except:
                         self.debug("Already matched: %s" % (uppat % node))
                     try:                        
                         watch.unmatched.remove(self.templates["Pat:InfraUp"] % node)
                     except:
                         self.debug("Already matched: %s" % (self.templates["Pat:InfraUp"] % node))
                     try:
                         watch.unmatched.remove(self.templates["Pat:PacemakerUp"] % node)
                     except:
                         self.debug("Already matched: %s" % (self.templates["Pat:PacemakerUp"] % node))
 
             if watch.unmatched:
                 for regex in watch.unmatched:
                     self.logger.log ("Warn: Startup pattern not found: %s" %(regex))
 
             if not self.CM.cluster_stable():
                 return self.failure("Cluster did not stabilize")
 
         did_fail = 0
         unstable = []
         for node in self.Env["nodes"]:
             if self.CM.StataCM(node) == 0:
                 did_fail = 1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstarted nodes exist: " + repr(unstable))
 
         unstable = []
         for node in self.Env["nodes"]:
             if not self.CM.node_stable(node):
                 did_fail = 1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstable cluster nodes exist: " + repr(unstable))
 
         return self.success()
 
     def is_applicable(self):
         '''SimulStartLite is a setup test and never applicable'''
         return 0
 
 
 def TestList(cm, audits):
     result = []
     for testclass in AllTestClasses:
         bound_test = testclass(cm)
         if bound_test.is_applicable():
             bound_test.Audits = audits
             result.append(bound_test)
     return result
 
 
 class RemoteLXC(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteLXC"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.num_containers = 2
         self.is_container = 1
         self.is_docker_unsafe = 1
         self.failed = 0
         self.fail_string = ""
 
     def start_lxc_simple(self, node):
 
         # restore any artifacts laying around from a previous test.
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")
 
         # generate the containers, put them in the config, add some resources to them
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc1"))
         pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc2"))
         pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc-ms"))
         pats.append(self.templates["Pat:RscOpOK"] % ("promote", "lxc-ms"))
 
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers)
         self.set_timer("remoteSimpleInit")
         watch.lookforall()
         self.log_timer("remoteSimpleInit")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
     def cleanup_lxc_simple(self, node):
 
         pats = [ ]
         # if the test failed, attempt to clean up the cib and libvirt environment
         # as best as possible 
         if self.failed == 1:
             # restore libvirt and cib
             self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")
             return
 
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         pats.append(self.templates["Pat:RscOpOK"] % ("stop", "container1"))
         pats.append(self.templates["Pat:RscOpOK"] % ("stop", "container2"))
 
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null")
         self.set_timer("remoteSimpleCleanup")
         watch.lookforall()
         self.log_timer("remoteSimpleCleanup")
 
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
         # cleanup libvirt
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")
 
     def __call__(self, node):
         '''Perform the 'RemoteLXC' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         rc = self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null")
         if rc == 1:
             self.log("Environment test for lxc support failed.")
             return self.skipped()
 
         self.start_lxc_simple(node)
         self.cleanup_lxc_simple(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         if self.failed == 1:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for ping",
             r"schedulerd.*: Recover (ping|lxc-ms|container)\s*\(.*\)",
             # The orphaned lxc-ms resource causes an expected transition error
             # that is a result of the scheduler not having knowledge that the
             # promotable resource used to be a clone. As a result, it looks like that 
             # resource is running in multiple locations when it shouldn't... But in
             # this instance we know why this error is occurring and that it is expected.
             r"Calculated [Tt]ransition .*pe-error",
             r"Resource lxc-ms .* is active on 2 nodes attempting recovery",
             r"Unknown operation: fail",
             r"VirtualDomain.*ERROR: Unable to determine emulator",
         ]
 
 AllTestClasses.append(RemoteLXC)
 
 
 class RemoteDriver(CTSTest):
 
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = self.__class__.__name__
         self.is_docker_unsafe = 1
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
         self.remote_rsc = "remote-rsc"
         self.cib_cmd = """cibadmin -C -o %s -X '%s' """
         self.reset()
 
     def reset(self):
         self.pcmk_started = 0
         self.failed = False
         self.fail_string = ""
         self.remote_node_added = 0
         self.remote_rsc_added = 0
         self.remote_use_reconnect_interval = self.Env.RandomGen.choice([True,False])
 
     def fail(self, msg):
         """ Mark test as failed. """
 
         self.failed = True
 
         # Always log the failure.
         self.logger.log(msg)
 
         # Use first failure as test status, as it's likely to be most useful.
         if not self.fail_string:
             self.fail_string = msg
 
     def get_othernode(self, node):
         for othernode in self.Env["nodes"]:
             if othernode == node:
                 # we don't want to try and use the cib that we just shutdown.
                 # find a cluster node that is not our soon to be remote-node.
                 continue
             else:
                 return othernode
 
     def del_rsc(self, node, rsc):
         othernode = self.get_othernode(node)
         rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc))
         if rc != 0:
             self.fail("Removal of resource '%s' failed" % rsc)
 
     def add_rsc(self, node, rsc_xml):
         othernode = self.get_othernode(node)
         rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml))
         if rc != 0:
             self.fail("resource creation failed")
 
     def add_primitive_rsc(self, node):
         rsc_xml = """
 <primitive class="ocf" id="%(node)s" provider="heartbeat" type="Dummy">
   <meta_attributes id="%(node)s-meta_attributes"/>
   <operations>
     <op id="%(node)s-monitor-interval-20s" interval="20s" name="monitor"/>
   </operations>
 </primitive>""" % { "node": self.remote_rsc }
         self.add_rsc(node, rsc_xml)
         if not self.failed:
             self.remote_rsc_added = 1
 
     def add_connection_rsc(self, node):
         rsc_xml = """
 <primitive class="ocf" id="%(node)s" provider="pacemaker" type="remote">
   <instance_attributes id="%(node)s-instance_attributes">
     <nvpair id="%(node)s-instance_attributes-server" name="server" value="%(server)s"/>
 """ % { "node": self.remote_node, "server": node }
 
         if self.remote_use_reconnect_interval:
             # Set reconnect interval on resource
             rsc_xml = rsc_xml + """
     <nvpair id="%s-instance_attributes-reconnect_interval" name="reconnect_interval" value="60s"/>
 """ % (self.remote_node)
 
         rsc_xml = rsc_xml + """
   </instance_attributes>
   <operations>
     <op id="%(node)s-start"       name="start"   interval="0"   timeout="120s"/>
     <op id="%(node)s-monitor-20s" name="monitor" interval="20s" timeout="45s"/>
   </operations>
 </primitive>
 """ % { "node": self.remote_node }
 
         self.add_rsc(node, rsc_xml)
         if not self.failed:
             self.remote_node_added = 1
 
     def disable_services(self, node):
         self.corosync_enabled = self.Env.service_is_enabled(node, "corosync")
         if self.corosync_enabled:
             self.Env.disable_service(node, "corosync")
 
         self.pacemaker_enabled = self.Env.service_is_enabled(node, "pacemaker")
         if self.pacemaker_enabled:
             self.Env.disable_service(node, "pacemaker")
 
     def restore_services(self, node):
         if self.corosync_enabled:
             self.Env.enable_service(node, "corosync")
 
         if self.pacemaker_enabled:
             self.Env.enable_service(node, "pacemaker")
 
     def stop_pcmk_remote(self, node):
         # disable pcmk remote
         for i in range(10):
             rc = self.rsh(node, "service pacemaker_remote stop")
             if rc != 0:
                 time.sleep(6)
             else:
                 break
 
     def start_pcmk_remote(self, node):
         for i in range(10):
             rc = self.rsh(node, "service pacemaker_remote start")
             if rc != 0:
                 time.sleep(6)
             else:
                 self.pcmk_started = 1
                 break
 
     def freeze_pcmk_remote(self, node):
         """ Simulate a Pacemaker Remote daemon failure. """
 
         # We freeze the process.
         self.rsh(node, "killall -STOP pacemaker-remoted")
 
     def resume_pcmk_remote(self, node):
         # We resume the process.
         self.rsh(node, "killall -CONT pacemaker-remoted")
 
     def start_metal(self, node):
         # Cluster nodes are reused as remote nodes in remote tests. If cluster
         # services were enabled at boot, in case the remote node got fenced, the
         # cluster node would join instead of the expected remote one. Meanwhile
         # pacemaker_remote would not be able to start. Depending on the chances,
         # the situations might not be able to be orchestrated gracefully any more.
         #
         # Temporarily disable any enabled cluster serivces.
         self.disable_services(node)
 
         pcmk_started = 0
 
         # make sure the resource doesn't already exist for some reason
         self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_rsc))
         self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_node))
 
         if not self.stop(node):
             self.fail("Failed to shutdown cluster node %s" % node)
             return
 
         self.start_pcmk_remote(node)
 
         if self.pcmk_started == 0:
             self.fail("Failed to start pacemaker_remote on node %s" % node)
             return
 
         # Convert node to baremetal now that it has shutdown the cluster stack
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % ("start", self.remote_node))
         pats.append(self.templates["Pat:DC_IDLE"])
 
         self.add_connection_rsc(node)
 
         self.set_timer("remoteMetalInit")
         watch.lookforall()
         self.log_timer("remoteMetalInit")
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
 
     def migrate_connection(self, node):
         if self.failed:
             return
 
         pats = [ ]
         pats.append(self.templates["Pat:RscOpOK"] % ("migrate_to", self.remote_node))
         pats.append(self.templates["Pat:RscOpOK"] % ("migrate_from", self.remote_node))
         pats.append(self.templates["Pat:DC_IDLE"])
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         (rc, lines) = self.rsh(node, "crm_resource -M -r %s" % (self.remote_node), None)
         if rc != 0:
             self.fail("failed to move remote node connection resource")
             return
 
         self.set_timer("remoteMetalMigrate")
         watch.lookforall()
         self.log_timer("remoteMetalMigrate")
 
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
             return
 
     def fail_rsc(self, node):
         if self.failed:
             return
 
         watchpats = [ ]
         watchpats.append(self.templates["Pat:RscRemoteOpOK"] % ("stop", self.remote_rsc, self.remote_node))
         watchpats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))
         watchpats.append(self.templates["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, 120)
         watch.setwatch()
 
         self.debug("causing dummy rsc to fail.")
 
         rc = self.rsh(node, "rm -f /var/run/resource-agents/Dummy*")
 
         self.set_timer("remoteRscFail")
         watch.lookforall()
         self.log_timer("remoteRscFail")
         if watch.unmatched:
             self.fail("Unmatched patterns during rsc fail: %s" % watch.unmatched)
 
     def fail_connection(self, node):
         if self.failed:
             return
 
         watchpats = [ ]
         watchpats.append(self.templates["Pat:FenceOpOK"] % self.remote_node)
         watchpats.append(self.templates["Pat:NodeFenced"] % self.remote_node)
 
         watch = self.create_watch(watchpats, 120)
         watch.setwatch()
 
         # freeze the pcmk remote daemon. this will result in fencing
         self.debug("Force stopped active remote node")
         self.freeze_pcmk_remote(node)
 
         self.debug("Waiting for remote node to be fenced.")
         self.set_timer("remoteMetalFence")
         watch.lookforall()
         self.log_timer("remoteMetalFence")
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
             return
 
         self.debug("Waiting for the remote node to come back up")
         self.CM.ns.WaitForNodeToComeUp(node, 120);
 
         pats = [ ]
         watch = self.create_watch(pats, 240)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % ("start", self.remote_node))
         if self.remote_rsc_added == 1:
             pats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))
 
         # start the remote node again watch it integrate back into cluster.
         self.start_pcmk_remote(node)
         if self.pcmk_started == 0:
             self.fail("Failed to start pacemaker_remote on node %s" % node)
             return
 
         self.debug("Waiting for remote node to rejoin cluster after being fenced.")
         self.set_timer("remoteMetalRestart")
         watch.lookforall()
         self.log_timer("remoteMetalRestart")
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
             return
 
     def add_dummy_rsc(self, node):
         if self.failed:
             return
 
         # verify we can put a resource on the remote node
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))
         pats.append(self.templates["Pat:DC_IDLE"])
 
         # Add a resource that must live on remote-node
         self.add_primitive_rsc(node)
 
         # force that rsc to prefer the remote node. 
         (rc, line) = self.CM.rsh(node, "crm_resource -M -r %s -N %s -f" % (self.remote_rsc, self.remote_node), None)
         if rc != 0:
             self.fail("Failed to place remote resource on remote node.")
             return
 
         self.set_timer("remoteMetalRsc")
         watch.lookforall()
         self.log_timer("remoteMetalRsc")
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
 
     def test_attributes(self, node):
         if self.failed:
             return
 
         # This verifies permanent attributes can be set on a remote-node. It also
         # verifies the remote-node can edit its own cib node section remotely.
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -v testval -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail("Failed to set remote-node attribute. rc:%s output:%s" % (rc, line))
             return
 
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -q -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail("Failed to get remote-node attribute")
             return
 
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -D -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail("Failed to delete remote-node attribute")
             return
 
     def cleanup_metal(self, node):
         self.restore_services(node)
 
         if self.pcmk_started == 0:
             return
 
         pats = [ ]
 
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         if self.remote_rsc_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.remote_rsc))
         if self.remote_node_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.remote_node))
 
         self.set_timer("remoteMetalCleanup")
 
         self.resume_pcmk_remote(node)
 
         if self.remote_rsc_added == 1:
 
             # Remove dummy resource added for remote node tests
             self.debug("Cleaning up dummy rsc put on remote node")
             self.rsh(self.get_othernode(node), "crm_resource -U -r %s" % self.remote_rsc)
             self.del_rsc(node, self.remote_rsc)
 
         if self.remote_node_added == 1:
 
             # Remove remote node's connection resource
             self.debug("Cleaning up remote node connection resource")
             self.rsh(self.get_othernode(node), "crm_resource -U -r %s" % (self.remote_node))
             self.del_rsc(node, self.remote_node)
 
         watch.lookforall()
         self.log_timer("remoteMetalCleanup")
 
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
 
         self.stop_pcmk_remote(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         if self.remote_node_added == 1:
             # Remove remote node itself
             self.debug("Cleaning up node entry for remote node")
             self.rsh(self.get_othernode(node), "crm_node --force --remove %s" % self.remote_node)
 
     def setup_env(self, node):
 
         self.remote_node = "remote-%s" % (node)
 
         # we are assuming if all nodes have a key, that it is
         # the right key... If any node doesn't have a remote
         # key, we regenerate it everywhere.
         if self.rsh.exists_on_all("/etc/pacemaker/authkey", self.Env["nodes"]):
             return
 
         # create key locally
         (handle, keyfile) = tempfile.mkstemp(".cts")
         os.close(handle)
         devnull = open(os.devnull, 'wb')
         subprocess.check_call(["dd", "if=/dev/urandom", "of=%s" % keyfile, "bs=4096", "count=1"],
             stdout=devnull, stderr=devnull)
         devnull.close()
 
         # sync key throughout the cluster
         for node in self.Env["nodes"]:
             self.rsh(node, "mkdir -p --mode=0750 /etc/pacemaker")
             self.rsh.cp(keyfile, "root@%s:/etc/pacemaker/authkey" % node)
             self.rsh(node, "chgrp haclient /etc/pacemaker /etc/pacemaker/authkey")
             self.rsh(node, "chmod 0640 /etc/pacemaker/authkey")
         os.unlink(keyfile)
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return False
 
         for node in self.Env["nodes"]:
             rc = self.rsh(node, "which pacemaker-remoted >/dev/null 2>&1")
             if rc != 0:
                 return False
         return True
 
     def start_new_test(self, node):
         self.incr("calls")
         self.reset()
 
         ret = self.startall(None)
         if not ret:
             return self.failure("setup failed: could not start all nodes")
 
         self.setup_env(node)
         self.start_metal(node)
         self.add_dummy_rsc(node)
         return True
 
     def __call__(self, node):
         return self.failure("This base class is not meant to be called directly.")
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ r"""is running on remote.*which isn't allowed""",
                  r"""Connection terminated""",
                  r"""Could not send remote""",
                 ]
 
 # RemoteDriver is just a base class for other tests, so it is not added to AllTestClasses
 
 
 class RemoteBasic(RemoteDriver):
 
     def __call__(self, node):
         '''Perform the 'RemoteBaremetal' test. '''
 
         if not self.start_new_test(node):
             return self.failure(self.fail_string)
 
         self.test_attributes(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed:
             return self.failure(self.fail_string)
 
         return self.success()
 
 AllTestClasses.append(RemoteBasic)
 
 class RemoteStonithd(RemoteDriver):
 
     def __call__(self, node):
         '''Perform the 'RemoteStonithd' test. '''
 
         if not self.start_new_test(node):
             return self.failure(self.fail_string)
 
         self.fail_connection(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def is_applicable(self):
         if not RemoteDriver.is_applicable(self):
             return False
 
         if "DoFencing" in list(self.Env.keys()):
             return self.Env["DoFencing"]
 
         return True
 
     def errorstoignore(self):
         ignore_pats = [
             r"Lost connection to Pacemaker Remote node",
             r"Software caused connection abort",
             r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
             r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
             r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
             r"error: Result of monitor operation for .* on remote-.*: No executor connection",
         ]
 
         ignore_pats.extend(RemoteDriver.errorstoignore(self))
         return ignore_pats
 
 AllTestClasses.append(RemoteStonithd)
 
 
 class RemoteMigrate(RemoteDriver):
 
     def __call__(self, node):
         '''Perform the 'RemoteMigrate' test. '''
 
         if not self.start_new_test(node):
             return self.failure(self.fail_string)
 
         self.migrate_connection(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed:
             return self.failure(self.fail_string)
 
         return self.success()
 
 AllTestClasses.append(RemoteMigrate)
 
 
 class RemoteRscFailure(RemoteDriver):
 
     def __call__(self, node):
         '''Perform the 'RemoteRscFailure' test. '''
 
         if not self.start_new_test(node):
             return self.failure(self.fail_string)
 
         # This is an important step. We are migrating the connection
         # before failing the resource. This verifies that the migration
         # has properly maintained control over the remote-node.
         self.migrate_connection(node)
 
         self.fail_rsc(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def errorstoignore(self):
         ignore_pats = [
             r"schedulerd.*: Recover remote-rsc\s*\(.*\)",
             r"Dummy.*: No process state file found",
         ]
 
         ignore_pats.extend(RemoteDriver.errorstoignore(self))
         return ignore_pats
 
 AllTestClasses.append(RemoteRscFailure)
 
 # vim:ts=4:sw=4:et:
diff --git a/cts/cts-exec.in b/cts/cts-exec.in
index 8cc203fcb9..592d850b4e 100644
--- a/cts/cts-exec.in
+++ b/cts/cts-exec.in
@@ -1,1219 +1,1219 @@
 #!@PYTHON@
 """ Regression tests for Pacemaker's pacemaker-execd
 """
 
 # Pacemaker targets compatibility with Python 2.7 and 3.2+
 from __future__ import print_function, unicode_literals, absolute_import, division
 
 __copyright__ = "Copyright 2012-2019 the Pacemaker project contributors"
 __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
 
 import io
 import os
 import stat
 import sys
 import subprocess
 import shlex
 import shutil
 import time
 
 # Where to find test binaries
 # Prefer the source tree if available
 BUILD_DIR = "@abs_top_builddir@"
 TEST_DIR = sys.path[0]
 
 SBIN_DIR = "@sbindir@"
 
 # File permissions for executable scripts we create
 EXECMODE = stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH
 
 # These values must be kept in sync with include/crm/crm.h
 class CrmExit(object):
     OK                   =   0
     ERROR                =   1
     INVALID_PARAM        =   2
     UNIMPLEMENT_FEATURE  =   3
     INSUFFICIENT_PRIV    =   4
     NOT_INSTALLED        =   5
     NOT_CONFIGURED       =   6
     NOT_RUNNING          =   7
     USAGE                =  64
     DATAERR              =  65
     NOINPUT              =  66
     NOUSER               =  67
     NOHOST               =  68
     UNAVAILABLE          =  69
     SOFTWARE             =  70
     OSERR                =  71
     OSFILE               =  72
     CANTCREAT            =  73
     IOERR                =  74
     TEMPFAIL             =  75
     PROTOCOL             =  76
     NOPERM               =  77
     CONFIG               =  78
     FATAL                = 100
     PANIC                = 101
     DISCONNECT           = 102
     SOLO                 = 103
     DIGEST               = 104
     NOSUCH               = 105
     QUORUM               = 106
     UNSAFE               = 107
     EXISTS               = 108
     MULTIPLE             = 109
     OLD                  = 110
     TIMEOUT              = 124
     MAX                  = 255
 
 
 def update_path():
     """ Set the PATH environment variable appropriately for the tests """
 
     new_path = os.environ['PATH']
     if os.path.exists("%s/cts-exec.in" % TEST_DIR):
         print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR))
         # For pacemaker-execd, cts-exec-helper, and pacemaker-remoted
         new_path = "%s/daemons/execd:%s" % (BUILD_DIR, new_path)
         new_path = "%s/tools:%s" % (BUILD_DIR, new_path)   # For crm_resource
         # For pacemaker-fenced
         new_path = "%s/daemons/fenced:%s" % (BUILD_DIR, new_path)
         # For cts-support
         new_path = "%s/cts:%s" % (BUILD_DIR, new_path)
 
     else:
         print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR)
         # For cts-exec-helper, cts-support, pacemaker-execd, pacemaker-fenced,
         # and pacemaker-remoted
         new_path = "@CRM_DAEMON_DIR@:%s" % (new_path)
 
     print('Using PATH="{}"'.format(new_path))
     os.environ['PATH'] = new_path
 
 
 def pipe_output(pipes, stdout=True, stderr=False):
     """ Wrapper to get text output from pipes regardless of Python version """
 
     output = ""
     pipe_outputs = pipes.communicate()
     if sys.version_info < (3,):
         if stdout:
             output = output + pipe_outputs[0]
         if stderr:
             output = output + pipe_outputs[1]
     else:
         if stdout:
             output = output + pipe_outputs[0].decode(sys.stdout.encoding)
         if stderr:
             output = output + pipe_outputs[1].decode(sys.stderr.encoding)
     return output
 
 
 def output_from_command(command):
     """ Run a command, and return its standard output. """
 
     test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE)
     test.wait()
     return pipe_output(test).split("\n")
 
 
 class TestError(Exception):
     """ Base class for exceptions in this module """
     pass
 
 
 class ExitCodeError(TestError):
     """ Exception raised when command exit status is unexpected """
 
     def __init__(self, exit_code):
         self.exit_code = exit_code
 
     def __str__(self):
         return repr(self.exit_code)
 
 
 class OutputNotFoundError(TestError):
     """ Exception raised when command output does not contain wanted string """
 
     def __init__(self, output):
         self.output = output
 
     def __str__(self):
         return repr(self.output)
 
 
 class OutputFoundError(TestError):
     """ Exception raised when command output contains unwanted string """
 
     def __init__(self, output):
         self.output = output
 
     def __str__(self):
         return repr(self.output)
 
 
 class Test(object):
     """ Executor for a single pacemaker-execd regression test """
 
     def __init__(self, name, description, verbose=0, tls=0):
         self.name = name
         self.description = description
         self.cmds = []
 
         if tls:
             self.daemon_location = "pacemaker-remoted"
         else:
             self.daemon_location = "pacemaker-execd"
 
         self.test_tool_location = "cts-exec-helper"
         self.verbose = verbose
         self.tls = tls
 
         self.result_txt = ""
         self.cmd_tool_output = ""
         self.result_exitcode = CrmExit.OK
 
         self.execd_process = None
         self.stonith_process = None
 
         self.executed = 0
 
     def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None):
         """ Add a command to be executed as part of this test """
 
         if self.verbose and cmd == self.test_tool_location:
             args = args + " -V "
 
         if (cmd == self.test_tool_location) and self.tls:
             args = args + " -S "
 
         self.cmds.append(
             {
                 "cmd" : cmd,
                 "kill" : kill,
                 "args" : args,
                 "expected_exitcode" : exitcode,
                 "stdout_match" : stdout_match,
                 "stdout_negative_match" : stdout_negative_match,
                 "no_wait" : no_wait,
                 "cmd_output" : "",
             }
         )
 
     def start_environment(self):
         """ Prepare the host for running a test """
 
         ### make sure we are in full control here ###
         cmd = shlex.split("killall -q -9 pacemaker-fenced lt-pacemaker-fenced pacemaker-execd lt-pacemaker-execd cts-exec-helper lt-cts-exec-helper pacemaker-remoted")
         test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
         test.wait()
 
         additional_args = ""
 
         if self.tls == 0:
             self.stonith_process = subprocess.Popen(shlex.split("pacemaker-fenced -s"))
 
         if self.verbose:
             additional_args = additional_args + " -V"
 
         self.execd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/pacemaker-execd-regression.log"
                                                          % (self.daemon_location, additional_args)))
 
         time.sleep(1)
 
     def clean_environment(self):
         """ Clean up the host after running a test """
 
         if self.execd_process:
             self.execd_process.terminate()
             self.execd_process.wait()
 
             if self.verbose:
                 print("Daemon output")
                 logfile = io.open('/tmp/pacemaker-execd-regression.log', 'rt', errors='replace')
                 for line in logfile:
                     print(line.strip().encode('utf-8', 'replace'))
             os.remove('/tmp/pacemaker-execd-regression.log')
 
         if self.stonith_process:
             self.stonith_process.terminate()
             self.stonith_process.wait()
 
         self.execd_process = None
         self.stonith_process = None
 
     def add_sys_cmd(self, cmd, args):
         """ Add a simple command to be executed as part of this test """
 
         self.__new_cmd(cmd, args, CrmExit.OK, "")
 
     def add_cmd_check_stdout(self, args, match, no_match=""):
         """ Add a command with expected output to be executed as part of this test """
 
         self.__new_cmd(self.test_tool_location, args, CrmExit.OK, match, 0, no_match)
 
     def add_cmd(self, args):
         """ Add a cts-exec-helper command to be executed as part of this test """
 
         self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "")
 
     def add_cmd_and_kill(self, kill_proc, args):
         """ Add a cts-exec-helper command and system command to be executed as part of this test """
 
         self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "", kill=kill_proc)
 
     def add_expected_fail_cmd(self, args, exitcode=CrmExit.ERROR):
         """ Add a cts-exec-helper command to be executed as part of this test and expected to fail """
 
         self.__new_cmd(self.test_tool_location, args, exitcode, "")
 
     def get_exitcode(self):
         """ Return the exit status of the last test execution """
 
         return self.result_exitcode
 
     def print_result(self, filler):
         """ Print the result of the last test execution """
 
         print("%s%s" % (filler, self.result_txt))
 
     def run_cmd(self, args):
         """ Execute a command as part of this test """
 
         cmd = shlex.split(args['args'])
         cmd.insert(0, args['cmd'])
         if self.verbose:
             print("\n\nRunning: "+" ".join(cmd))
         test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 
         if args['kill']:
             if self.verbose:
                 print("Also running: "+args['kill'])
             ### Typically, the kill argument is used to detect some sort of
             ### failure. Without yielding for a few seconds here, the process
             ### launched earlier that is listening for the failure may not have
             ### time to connect to pacemaker-execd.
             time.sleep(2)
             subprocess.Popen(shlex.split(args['kill']))
 
         if args['no_wait'] == 0:
             test.wait()
         else:
             return CrmExit.OK
 
         output = pipe_output(test)
         args['cmd_output'] = output
 
         if test.returncode != args['expected_exitcode']:
             raise ExitCodeError(test.returncode)
 
         if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0:
             raise OutputNotFoundError(output)
 
         if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0:
             raise OutputFoundError(output)
 
     def set_error(self, step, cmd):
         """ Record failure of this test """
 
         msg = "FAILURE - '%s' failed at step %d. Command: %s %s"
         self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args'])
         self.result_exitcode = CrmExit.ERROR
 
     def run(self):
         """ Execute this test. """
 
         res = 0
         i = 1
 
         if self.tls and self.name.count("stonith") != 0:
             self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name)
             print(self.result_txt)
             return res
 
         self.start_environment()
 
         if self.verbose:
             print("\n--- START TEST - %s" % self.name)
 
         self.result_txt = "SUCCESS - '%s'" % (self.name)
         self.result_exitcode = CrmExit.OK
         for cmd in self.cmds:
             try:
                 self.run_cmd(cmd)
             except ExitCodeError as e:
                 print(cmd['cmd_output'])
                 print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode']))
                 self.set_error(i, cmd);
                 break
             except OutputNotFoundError as e:
                 print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e))
                 self.set_error(i, cmd);
                 break
             except OutputFoundError as e:
                 print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e))
                 self.set_error(i, cmd);
                 break
             if self.verbose:
                 print(cmd['cmd_output'].strip())
                 print("Step %d SUCCESS" % (i))
             i = i + 1
         self.clean_environment()
 
         print(self.result_txt)
         if self.verbose:
             print("--- END TEST - %s\n" % self.name)
 
         self.executed = 1
         return res
 
 class Tests(object):
     """ Collection of all pacemaker-execd regression tests """
 
     def __init__(self, verbose=0, tls=0):
         self.tests = []
         self.verbose = verbose
         self.tls = tls
         self.rsc_classes = output_from_command("crm_resource --list-standards")
         self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line
         self.installed_files = []
         self.action_timeout = " -t 9000 "
         if self.tls:
             self.rsc_classes.remove("stonith")
         if "systemd" in self.rsc_classes:
             try:
                 # This code doesn't need this import, but pacemaker-cts-dummyd
                 # does, so ensure the dependency is available rather than cause
                 # all systemd tests to fail.
                 import systemd.daemon
             except ImportError:
                 print("Python systemd bindings not found.")
                 print("The tests for systemd class are not going to be run.")
                 self.rsc_classes.remove("systemd")
 
         print("Testing resource classes", repr(self.rsc_classes))
 
         self.common_cmds = {
             "ocf_reg_line"      : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy",
             "ocf_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"",
             "ocf_unreg_line"    : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout,
             "ocf_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"",
             "ocf_start_line"    : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout,
             "ocf_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ",
             "ocf_stop_line"     : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout,
             "ocf_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ",
             "ocf_monitor_line"  : '-c exec -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
             "ocf_cancel_line"   : '-c cancel -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "ocf_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 
             "systemd_reg_line"      : "-c register_rsc -r systemd_test_rsc " +
                                       self.action_timeout +
                                       " -C systemd -T pacemaker-cts-dummyd@3",
             "systemd_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"",
             "systemd_unreg_line"    : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout,
             "systemd_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"",
             "systemd_start_line"    : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout,
             "systemd_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ",
             "systemd_stop_line"     : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout,
             "systemd_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ",
             "systemd_monitor_line"  : '-c exec -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" -t 15000 ",
             "systemd_cancel_line"   : '-c cancel -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "systemd_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 
             "upstart_reg_line"      : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T pacemaker-cts-dummyd",
             "upstart_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"",
             "upstart_unreg_line"    : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout,
             "upstart_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"",
             "upstart_start_line"    : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout,
             "upstart_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ",
             "upstart_stop_line"     : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout,
             "upstart_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ",
             "upstart_monitor_line"  : '-c exec -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "upstart_monitor_event" : '-l "NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete" -t 15000',
             "upstart_cancel_line"   : '-c cancel -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "upstart_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 
             "service_reg_line"      : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy",
             "service_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"",
             "service_unreg_line"    : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout,
             "service_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"",
             "service_start_line"    : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout,
             "service_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ",
             "service_stop_line"     : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout,
             "service_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ",
             "service_monitor_line"  : '-c exec -r service_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
             "service_cancel_line"   : '-c cancel -r service_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "service_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
 
             "lsb_reg_line"      : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy",
             "lsb_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ",
             "lsb_unreg_line"    : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout,
             "lsb_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"",
             "lsb_start_line"    : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout,
             "lsb_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ",
             "lsb_stop_line"     : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout,
             "lsb_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ",
             "lsb_monitor_line"  : '-c exec -r lsb_test_rsc -a status -i 2s ' + self.action_timeout,
             "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout,
             "lsb_cancel_line"   : '-c cancel -r lsb_test_rsc -a status -i 2s ' + self.action_timeout,
             "lsb_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ",
 
             "stonith_reg_line"      : "-c register_rsc -r stonith_test_rsc " + self.action_timeout +
 				      " -C stonith -P pacemaker -T fence_dummy",
             "stonith_reg_event"     : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ",
             "stonith_unreg_line"    : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout,
             "stonith_unreg_event"   : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"",
             "stonith_start_line"    : '-c exec -r stonith_test_rsc -a start ' + self.action_timeout,
             "stonith_start_event"   : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ",
             "stonith_stop_line"     : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout,
             "stonith_stop_event"    : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ",
             "stonith_monitor_line"  : '-c exec -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
             "stonith_cancel_line"   : '-c cancel -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout,
             "stonith_cancel_event"  : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
         }
 
     def new_test(self, name, description):
         """ Create a named test """
 
         test = Test(name, description, self.verbose, self.tls)
         self.tests.append(test)
         return test
 
     def setup_test_environment(self):
         """ Prepare the host before executing any tests """
 
         os.system("service pacemaker_remote stop")
         self.cleanup_test_environment()
 
         if self.tls and not os.path.isfile("/etc/pacemaker/authkey"):
             print("Installing /etc/pacemaker/authkey ...")
             os.system("mkdir -p /etc/pacemaker")
             os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1")
             self.installed_files.append("/etc/pacemaker/authkey")
 
         # If we're in build directory, install agents if not already installed
         if os.path.exists("%s/cts/cts-exec.in" % BUILD_DIR):
 
             if not os.path.exists("@OCF_RA_DIR@/pacemaker"):
                 # @TODO remember which components were created and remove them
                 os.makedirs("@OCF_RA_DIR@/pacemaker", 0o755)
 
             for agent in ["Dummy", "Stateful", "ping"]:
                 agent_source = "%s/extra/resources/%s" % (BUILD_DIR, agent)
                 agent_dest = "@OCF_RA_DIR@/pacemaker/%s" % (agent)
                 if not os.path.exists(agent_dest):
                     print("Installing %s ..." % (agent_dest))
                     shutil.copyfile(agent_source, agent_dest)
                     os.chmod(agent_dest, EXECMODE)
                     self.installed_files.append(agent_dest)
 
         subprocess.call(["cts-support", "install"])
 
     def cleanup_test_environment(self):
         """ Clean up the host after executing desired tests """
 
         for installed_file in self.installed_files:
             print("Removing %s ..." % (installed_file))
             os.remove(installed_file)
 
         subprocess.call(["cts-support", "uninstall"])
 
     def build_generic_tests(self):
         """ Register tests that apply to all resource classes """
 
         common_cmds = self.common_cmds
 
         ### register/unregister tests ###
         for rsc in self.rsc_classes:
             test = self.new_test("generic_registration_%s" % (rsc),
                                  "Simple resource registration test for %s standard" % (rsc))
             test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)])
             test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
         ### start/stop tests  ###
         for rsc in self.rsc_classes:
             test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc))
             test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
             test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
             test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
             test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
         ### monitor cancel test ###
         for rsc in self.rsc_classes:
             test = self.new_test("generic_monitor_cancel_%s" % (rsc),
                                  "Simple monitor cancel test for %s standard" % (rsc))
             test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
             test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
             test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
             ### If this fails, that means the monitor may not be getting rescheduled ####
             test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
             ### If this fails, that means the monitor may not be getting rescheduled ####
             test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
             test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
             ### If this happens the monitor did not actually cancel correctly. ###
             test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
             ### If this happens the monitor did not actually cancel correctly. ###
             test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
             test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
             test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
         ### monitor duplicate test ###
         for rsc in self.rsc_classes:
             test = self.new_test("generic_monitor_duplicate_%s" % (rsc),
                                  "Test creation and canceling of duplicate monitors for %s standard" % (rsc))
             test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
             test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
             test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
             ### If this fails, that means the monitor may not be getting rescheduled ####
             test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
             ### If this fails, that means the monitor may not be getting rescheduled ####
             test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
 
             # Add the duplicate monitors
             test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
             test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
             test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
             test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
             # verify we still get update events
             ### If this fails, that means the monitor may not be getting rescheduled ####
             test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
 
             # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates
             test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
             ### If this happens the monitor did not actually cancel correctly. ###
             test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
             ### If this happens the monitor did not actually cancel correctly. ###
             test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
             test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
             test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
         ### stop implies cancel test ###
         for rsc in self.rsc_classes:
             test = self.new_test("generic_stop_implies_cancel_%s" % (rsc),
                                  "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc))
             test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
             test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
             test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
             ### If this fails, that means the monitor may not be getting rescheduled ####
             test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
             ### If this fails, that means the monitor may not be getting rescheduled ####
             test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
             test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
             ### If this happens the monitor did not actually cancel correctly. ###
             test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
             ### If this happens the monitor did not actually cancel correctly. ###
             test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
             test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
 
     def build_multi_rsc_tests(self):
         """ Register complex tests that involve managing multiple resouces of different types """
 
         common_cmds = self.common_cmds
         # do not use service and systemd at the same time, it is the same resource.
 
         ### register start monitor stop unregister resources of each type at the same time. ###
         test = self.new_test("multi_rsc_start_stop_all",
                              "Start, monitor, and stop resources of multiple types and classes")
         for rsc in self.rsc_classes:
             test.add_cmd(common_cmds["%s_reg_line" % (rsc)]   + " " + common_cmds["%s_reg_event" % (rsc)])
         for rsc in self.rsc_classes:
             test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
         for rsc in self.rsc_classes:
             test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
         for rsc in self.rsc_classes:
             ### If this fails, that means the monitor is not being rescheduled ####
             test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
         for rsc in self.rsc_classes:
             test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
         for rsc in self.rsc_classes:
             test.add_cmd(common_cmds["%s_stop_line" % (rsc)]  + " " + common_cmds["%s_stop_event" % (rsc)])
         for rsc in self.rsc_classes:
             test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
 
     def build_negative_tests(self):
         """ Register tests related to how pacemaker-execd handles failures """
 
         ### ocf start timeout test  ###
         test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.")
         test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "
                      + self.action_timeout +
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
         # -t must be less than self.action_timeout
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w")
-        test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" '
+        test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:Timed Out" '
                      + self.action_timeout)
         test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
         test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### stonith start timeout test  ###
         test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.")
         test.add_cmd('-c register_rsc -r test_rsc ' +
                      '-C stonith -P pacemaker -T fence_dummy ' +
                      self.action_timeout +
                      '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete"')
         test.add_cmd('-c exec -r test_rsc -a start -k monitor_delay -v 30 ' +
                      '-t 1000 -w') # -t must be less than self.action_timeout
         test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:OCF_TIMEOUT op_status:Timed Out" '
                      + self.action_timeout)
         test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
         test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### stonith component fail ###
         common_cmds = self.common_cmds
         test = self.new_test("stonith_component_fail", "Kill stonith component after pacemaker-execd connects")
         test.add_cmd(common_cmds["stonith_reg_line"]   + " " + common_cmds["stonith_reg_event"])
         test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"])
 
         test.add_cmd('-c exec -r stonith_test_rsc -a monitor -i 600s '
                      '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" '
                      + self.action_timeout)
 
         test.add_cmd_and_kill("killall -9 -q pacemaker-fenced lt-pacemaker-fenced",
-                              '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error" -t 15000')
+                              '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:error op_status:error" -t 15000')
         test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"])
 
 
         ### monitor fail for ocf resources ###
         test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.")
         test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "
                      + self.action_timeout +
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
         test.add_cmd('-c exec -r test_rsc -a monitor -i 1s '
                      + self.action_timeout +
                      '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"')
         test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"'
                      + self.action_timeout)
         test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"'
                      + self.action_timeout)
         test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state",
                               '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" ' + self.action_timeout)
         test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s ' + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
         test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "
                                    + self.action_timeout, CrmExit.TIMEOUT)
         test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "
                                    + self.action_timeout, CrmExit.TIMEOUT)
         test.add_cmd("-c unregister_rsc -r \"test_rsc\" "
                      + self.action_timeout +
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### verify notify changes only for monitor operation.  ###
         test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.")
         test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o "
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
         test.add_cmd('-c exec -r test_rsc -a monitor -i 1s '
                      + self.action_timeout +
                      ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ')
         test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
         test.add_cmd_and_kill('rm -f @localstatedir@/run/Dummy-test_rsc.state', '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout)
         test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s'
                      + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
         test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
         test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
         test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout +
                      '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"')
 
         ### monitor fail for systemd resource ###
         if "systemd" in self.rsc_classes:
             test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..")
             test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T pacemaker-cts-dummyd@3 " +
                          self.action_timeout +
                          "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
             test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
             test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
             test.add_cmd('-c exec -r test_rsc -a monitor -i 1s '
                          + self.action_timeout +
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
             test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
             test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
             test.add_cmd_and_kill("killall -9 -q pacemaker-cts-dummyd",
                                   '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout)
             test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout +
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
             test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
             test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
             test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### monitor fail for upstart resource ###
         if "upstart" in self.rsc_classes:
             test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..")
             test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T pacemaker-cts-dummyd "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
             test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
             test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
             test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout +
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
             test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
             test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
             test.add_cmd_and_kill('killall -9 -q dd', '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout)
             test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s'
                          + self.action_timeout +
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
             test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
             test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
             test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### Cancel non-existent operation on a resource ###
         test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure")
         test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
         test.add_cmd('-c exec -r test_rsc -a monitor -i 1s '
                      + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
         test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
         ### interval is wrong, should fail
         test.add_expected_fail_cmd('-c cancel -r test_rsc -a monitor -i 2s' + self.action_timeout +
                                    "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
         ### action name is wrong, should fail
         test.add_expected_fail_cmd('-c cancel -r test_rsc -a stop -i 1s' + self.action_timeout +
                                    "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
         test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### Attempt to invoke non-existent rsc id ###
         test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.")
         test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
-                                   "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ")
+                                   "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:complete\" ")
         test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+
                                    "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
         test.add_expected_fail_cmd('-c exec -r test_rsc -a monitor -i 6s '
                                    + self.action_timeout +
                                    "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
         test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+
                                    "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ")
         test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### Register and start a resource that doesn't exist, systemd  ###
         if "systemd" in self.rsc_classes:
             test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure")
             test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
             test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
             test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         if "upstart" in self.rsc_classes:
             test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure")
             test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
             test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
             test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                          "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### Register and start a resource that doesn't exist, ocf ###
         test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.")
         test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
         test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### Register ocf with non-existent provider  ###
         test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.")
         test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
         test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### Register ocf with empty provider field  ###
         test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.")
         test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+
                                    "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
         test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                                    "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ")
         test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
     def build_stress_tests(self):
         """ Register stress tests """
 
         timeout = "-t 20000"
 
         iterations = 25
         test = self.new_test("ocf_stress", "Verify OCF agent handling works under load")
         for i in range(iterations):
             test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i))
             test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i))
             test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s '
                          '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i))
         for i in range(iterations):
             test.add_cmd("-c exec -r rsc_%s -a stop %s  -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i))
             test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i))
 
 
         if "systemd" in self.rsc_classes:
             test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load")
             for i in range(iterations):
                 test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T pacemaker-cts-dummyd@3 -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i))
                 test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i))
                 test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s '
                              '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i))
 
             for i in range(iterations):
                 test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i))
                 test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i))
 
         iterations = 9
         timeout = "-t 30000"
         ### Verify recurring op in-flight collision is handled in series properly
         test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.")
         test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy "
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
         test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout))
         for i in range(iterations):
             test.add_cmd('-c exec -r test_rsc -a monitor %s -i 100%dms '
                          '-k op_sleep -v 2 '
                          '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' % (timeout, i))
 
         test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout))
         test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout))
 
     def build_custom_tests(self):
         """ Register tests that target specific cases """
 
         ### verify resource temporary folder is created and used by OCF agents.  ###
         test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory")
         test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@")
         test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy "
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
         test.add_cmd("-c exec -r test_rsc -a start -t 4000")
         test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@")
         test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state")
         test.add_cmd("-c exec -r test_rsc -a stop -t 4000")
         test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### start delay then stop test ###
         test = self.new_test("start_delay", "Verify start delay works as expected.")
         test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy "
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
         test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000")
         test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000", CrmExit.TIMEOUT)
         test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000")
         test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
         test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### start delay, but cancel before it gets a chance to start.  ###
         test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.")
         test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy "
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
         test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000")
         test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ")
         test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000", CrmExit.TIMEOUT)
         test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout +
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### Register a bunch of resources, verify we can get info on them ###
         test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.")
         if "systemd" in self.rsc_classes:
             test.add_cmd("-c register_rsc -r rsc1 -C systemd -T pacemaker-cts-dummyd@3 "+self.action_timeout)
             test.add_cmd("-c get_rsc_info -r rsc1 ")
             test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout)
             test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ")
 
         if "upstart" in self.rsc_classes:
             test.add_cmd("-c register_rsc -r rsc1 -C upstart -T pacemaker-cts-dummyd "+self.action_timeout)
             test.add_cmd("-c get_rsc_info -r rsc1 ")
             test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout)
             test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ")
 
         test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
         test.add_cmd("-c get_rsc_info -r rsc2 ")
         test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout)
         test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ")
 
         ### Register duplicate, verify only one entry exists and can still be removed.
         test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.")
         test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
         test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy")
         test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
         test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy")
         test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout)
         test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful")
         test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout)
         test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ")
 
         ### verify the option to only send notification to the original client. ###
         test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.")
         test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
         test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
         test.add_cmd('-c exec -r \"test_rsc\" -a \"monitor\" -i 1s '
                      + self.action_timeout + ' -n '
                      '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"')
         # this will fail because the monitor notifications should only go to the original caller, which no longer exists.
         test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
         test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ')
         test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
                      "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
 
         ### get metadata ###
         test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource")
         test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"",
                                   "resource-agent name=\"Dummy\"")
         test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"")
         test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"")
         test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"")
 
         ### get metadata ###
         test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource")
         test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"",
                                   "resource-agent name='LSBDummy'")
 
         ### get stonith metadata ###
         test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource")
         test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy\"",
                                   "resource-agent name=\"fence_dummy\"")
 
         ### get metadata ###
         if "systemd" in self.rsc_classes:
             test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource")
             test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"pacemaker-cts-dummyd@\"",
                                       "resource-agent name=\"pacemaker-cts-dummyd@\"")
 
         ### get metadata ###
         if "upstart" in self.rsc_classes:
             test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource")
             test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"pacemaker-cts-dummyd\"",
                                       "resource-agent name=\"pacemaker-cts-dummyd\"")
 
         ### get ocf providers  ###
         test = self.new_test("list_ocf_providers",
                              "Retrieve list of available resource providers, verifies pacemaker is a provider.")
         test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker")
         test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker")
 
         ### Verify agents only exist in their lists ###
         test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.")
         test.add_cmd_check_stdout("-c list_agents ", "Stateful")                                  ### ocf ###
         test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful")
         test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful")                        ### should not exist
         test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful")                    ### should not exist
         test.add_cmd_check_stdout("-c list_agents ", "LSBDummy")                                  ### init.d ###
         test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy")
         test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
         test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@")           ### should not exist
 
         test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@")           ### should not exist
         test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy")                     ### should not exist
         test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy")                 ### should not exist
         test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy")                     ### should not exist
 
         if "systemd" in self.rsc_classes:
             test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd@")             ### systemd ###
             test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
             test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful")            ### should not exist
             test.add_cmd_check_stdout("-c list_agents -C systemd", "pacemaker-cts-dummyd@")
             test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy")         ### should not exist
 
         if "upstart" in self.rsc_classes:
             test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd")              ### upstart ###
             test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
             test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful")            ### should not exist
             test.add_cmd_check_stdout("-c list_agents -C upstart", "pacemaker-cts-dummyd")
             test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy")         ### should not exist
 
         if "stonith" in self.rsc_classes:
             test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy")             ### stonith ###
             test.add_cmd_check_stdout("-c list_agents -C stonith", "", "pacemaker-cts-dummyd@") ### should not exist
             test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful")            ### should not exist
             test.add_cmd_check_stdout("-c list_agents ", "fence_dummy")
 
     def print_list(self):
         """ List all registered tests """
 
         print("\n==== %d TESTS FOUND ====" % (len(self.tests)))
         print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION"))
         print("%35s - %s" % ("--------------------", "--------------------"))
         for test in self.tests:
             print("%35s - %s" % (test.name, test.description))
         print("==== END OF LIST ====\n")
 
     def run_single(self, name):
         """ Run a single named test """
 
         for test in self.tests:
             if test.name == name:
                 test.run()
                 break
 
     def run_tests_matching(self, pattern):
         """ Run all tests whose name matches a pattern """
 
         for test in self.tests:
             if test.name.count(pattern) != 0:
                 test.run()
 
     def run_tests(self):
         """ Run all tests """
 
         for test in self.tests:
             test.run()
 
     def exit(self):
         """ Exit (with error status code if any test failed) """
 
         for test in self.tests:
             if test.executed == 0:
                 continue
 
             if test.get_exitcode() != CrmExit.OK:
                 sys.exit(CrmExit.ERROR)
 
         sys.exit(CrmExit.OK)
 
     def print_results(self):
         """ Print summary of results of executed tests """
 
         failures = 0
         success = 0
         print("\n\n======= FINAL RESULTS ==========")
         print("\n--- FAILURE RESULTS:")
         for test in self.tests:
             if test.executed == 0:
                 continue
 
             if test.get_exitcode() != CrmExit.OK:
                 failures = failures + 1
                 test.print_result("    ")
             else:
                 success = success + 1
 
         if failures == 0:
             print("    None")
 
         print("\n--- TOTALS\n    Pass:%d\n    Fail:%d\n" % (success, failures))
 
 
 class TestOptions(object):
     """ Option handler """
 
     def __init__(self):
         self.options = {}
         self.options['list-tests'] = 0
         self.options['run-all'] = 1
         self.options['run-only'] = ""
         self.options['run-only-pattern'] = ""
         self.options['verbose'] = 0
         self.options['invalid-arg'] = ""
         self.options['show-usage'] = 0
         self.options['pacemaker-remote'] = 0
 
     def build_options(self, argv):
         """ Set options based on command-line arguments """
 
         args = argv[1:]
         skip = 0
         for i in range(0, len(args)):
             if skip:
                 skip = 0
                 continue
             elif args[i] == "-h" or args[i] == "--help":
                 self.options['show-usage'] = 1
             elif args[i] == "-l" or args[i] == "--list-tests":
                 self.options['list-tests'] = 1
             elif args[i] == "-V" or args[i] == "--verbose":
                 self.options['verbose'] = 1
             elif args[i] == "-R" or args[i] == "--pacemaker-remote":
                 self.options['pacemaker-remote'] = 1
             elif args[i] == "-r" or args[i] == "--run-only":
                 self.options['run-only'] = args[i+1]
                 skip = 1
             elif args[i] == "-p" or args[i] == "--run-only-pattern":
                 self.options['run-only-pattern'] = args[i+1]
                 skip = 1
 
     def show_usage(self):
         """ Show command usage """
 
         print("usage: " + sys.argv[0] + " [options]")
         print("If no options are provided, all tests will run")
         print("Options:")
         print("\t [--help | -h]                        Show usage")
         print("\t [--list-tests | -l]                  Print out all registered tests.")
         print("\t [--run-only | -r 'testname']         Run a specific test")
         print("\t [--verbose | -V]                     Verbose output")
         print("\t [--pacemaker-remote | -R             Test pacemaker-remoted binary instead of pacemaker-execd")
         print("\t [--run-only-pattern | -p 'string']   Run only tests containing the string value")
         print("\n\tExample: Run only the test 'start_stop'")
         print("\t\t " + sys.argv[0] + " --run-only start_stop")
         print("\n\tExample: Run only the tests with the string 'systemd' present in them")
         print("\t\t " + sys.argv[0] + " --run-only-pattern systemd")
 
 
 def main(argv):
     """ Run pacemaker-execd regression tests as specified by arguments """
 
     update_path()
 
     opts = TestOptions()
     opts.build_options(argv)
 
     tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote'])
 
     tests.build_generic_tests()
     tests.build_multi_rsc_tests()
     tests.build_negative_tests()
     tests.build_custom_tests()
     tests.build_stress_tests()
 
     tests.setup_test_environment()
 
     print("Starting ...")
 
     if opts.options['list-tests']:
         tests.print_list()
     elif opts.options['show-usage']:
         opts.show_usage()
     elif opts.options['run-only-pattern'] != "":
         tests.run_tests_matching(opts.options['run-only-pattern'])
         tests.print_results()
     elif opts.options['run-only'] != "":
         tests.run_single(opts.options['run-only'])
         tests.print_results()
     else:
         tests.run_tests()
         tests.print_results()
 
     tests.cleanup_test_environment()
     tests.exit()
 
 
 if __name__ == "__main__":
     main(sys.argv)
diff --git a/cts/patterns.py b/cts/patterns.py
index b0b8784a02..96d6471c38 100644
--- a/cts/patterns.py
+++ b/cts/patterns.py
@@ -1,411 +1,413 @@
 """ Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS)
 """
 
 # Pacemaker targets compatibility with Python 2.7 and 3.2+
 from __future__ import print_function, unicode_literals, absolute_import, division
 
 __copyright__ = "Copyright 2008-2019 the Pacemaker project contributors"
 __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
 
 import sys, os
 
 from cts.CTSvars import *
 
 patternvariants = {}
 class BasePatterns(object):
     def __init__(self, name):
         self.name = name
         patternvariants[name] = self
         self.ignore = [
             "avoid confusing Valgrind",
 
             # Logging bug in some versions of libvirtd
             r"libvirtd.*: internal error: Failed to parse PCI config address",
         ]
         self.BadNews = []
         self.components = {}
         self.commands = {
             "StatusCmd"      : "crmadmin -t 60000 -S %s 2>/dev/null",
             "CibQuery"       : "cibadmin -Ql",
             "CibAddXml"      : "cibadmin --modify -c --xml-text %s",
             "CibDelXpath"    : "cibadmin --delete --xpath %s",
             # 300,000 == 5 minutes
             "RscRunning"     : CTSvars.CRM_DAEMON_DIR + "/cts-exec-helper -R -r %s",
             "CIBfile"        : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
             "TmpDir"         : "/tmp",
 
             "BreakCommCmd"   : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
             "FixCommCmd"     : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
 
 # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
 # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
 # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
             "ReduceCommCmd"  : "",
             "RestoreCommCmd" : "tc qdisc del dev lo root",
 
             "MaintenanceModeOn"    : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'",
             "MaintenanceModeOff"    : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
 
             "StandbyCmd"      : "crm_attribute -Vq  -U %s -n standby -l forever -v %s 2>/dev/null",
             "StandbyQueryCmd" : "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null",
         }
         self.search = {
             "Pat:DC_IDLE"      : "pacemaker-controld.*State transition.*-> S_IDLE",
             
             # This won't work if we have multiple partitions
             "Pat:Local_started" : "%s\W.*controller successfully started",
             "Pat:NonDC_started" : r"%s\W.*State transition.*-> S_NOT_DC",
             "Pat:DC_started"    : r"%s\W.*State transition.*-> S_IDLE",
             "Pat:We_stopped"    : "%s\W.*OVERRIDE THIS PATTERN",
             "Pat:They_stopped"  : "%s\W.*LOST:.* %s ",
             "Pat:They_dead"     : "node %s.*: is dead",
             "Pat:TransitionComplete" : "Transition status: Complete: complete",
 
             "Pat:Fencing_start"   : r"Requesting peer fencing .* targeting %s",
             "Pat:Fencing_ok"      : r"pacemaker-fenced.*:\s*Operation .* targeting %s on .* for .*@.*: OK",
             "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover %s",
             "Pat:Fencing_active"  : r"pacemaker-schedulerd.*: Resource %s is active on .* nodes",
             "Pat:Fencing_probe"   : r"pacemaker-controld.* Result of probe operation for %s on .*: Error",
 
             "Pat:RscOpOK"       : r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok",
+            "Pat:RscOpFail"     : r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of %s ",
+            "Pat:CloneOpFail"   : r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of (%s|%s) ",
             "Pat:RscRemoteOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok",
             "Pat:NodeFenced"    : r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK",
             "Pat:FenceOpOK"     : "Operation .* for host '%s' with device .* returned: 0",
         }
 
     def get_component(self, key):
         if key in self.components:
             return self.components[key]
         print("Unknown component '%s' for %s" % (key, self.name))
         return []
 
     def get_patterns(self, key):
         if key == "BadNews":
             return self.BadNews
         elif key == "BadNewsIgnore":
             return self.ignore
         elif key == "Commands":
             return self.commands
         elif key == "Search":
             return self.search
         elif key == "Components":
             return self.components
 
     def __getitem__(self, key):
         if key == "Name":
             return self.name
         elif key in self.commands:
             return self.commands[key]
         elif key in self.search:
             return self.search[key]
         else:
             print("Unknown template '%s' for %s" % (key, self.name))
             return None
 
 
 class crm_corosync(BasePatterns):
     '''
     Patterns for Corosync version 2 cluster manager class
     '''
 
     def __init__(self, name):
         BasePatterns.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "service corosync start && service pacemaker start",
             "StopCmd"        : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop",
 
             "EpochCmd"      : "crm_node -e",
             "QuorumCmd"      : "crm_node -q",
             "PartitionCmd"    : "crm_node -p",
         })
 
         self.search.update({
             # Close enough ... "Corosync Cluster Engine exiting normally" isn't
             # printed reliably.
             "Pat:We_stopped"   : "%s\W.*Unloading all Corosync service engines",
             "Pat:They_stopped" : "%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost",
             "Pat:They_dead"    : "pacemaker-controld.*Node %s(\[|\s).*state is now lost",
 
             "Pat:ChildExit"    : r"\[[0-9]+\] exited with status [0-9]+ \(",
             # "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes()
             "Pat:ChildKilled"  : r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated( with signal 9|$)",
             "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
 
             "Pat:InfraUp"      : "%s\W.*corosync.*Initializing transport",
             "Pat:PacemakerUp"  : "%s\W.*pacemakerd.*Starting Pacemaker",
         })
 
         self.ignore = self.ignore + [
             r"crm_mon:",
             r"crmadmin:",
             r"update_trace_data",
             r"async_notify:.*strange, client not found",
             r"Parse error: Ignoring unknown option .*nodename",
             r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:",
             r"getinfo response error: 1$",
             r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE",
             r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)",
         ]
 
         self.BadNews = [
-            r"error:",
+            r"[^(]error:",
             r"crit:",
             r"ERROR:",
             r"CRIT:",
             r"Shutting down...NOW",
             r"Timer I_TERMINATE just popped",
             r"input=I_ERROR",
             r"input=I_FAIL",
             r"input=I_INTEGRATED cause=C_TIMER_POPPED",
             r"input=I_FINALIZED cause=C_TIMER_POPPED",
             r"input=I_ERROR",
             r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting",
             r"schedulerd.*Attempting recovery of resource",
             r"is taking more than 2x its timeout",
             r"Confirm not received from",
             r"Welcome reply not received from",
             r"Attempting to schedule .* after a stop",
             r"Resource .* was active at shutdown",
             r"duplicate entries for call_id",
             r"Search terminated:",
             r":global_timer_callback",
             r"Faking parameter digest creation",
             r"Parameters to .* action changed:",
             r"Parameters to .* changed",
             r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)",
             r"pacemaker-schedulerd.*Recover .*\(.* -\> .*\)",
             r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
             r"Peer is not part of our cluster",
             r"We appear to be in an election loop",
             r"Unknown node -> we will not deliver message",
             r"(Blackbox dump requested|Problem detected)",
             r"pacemakerd.*Could not connect to Cluster Configuration Database API",
             r"Receiving messages from a node we think is dead",
             r"share the same cluster nodeid",
             r"share the same name",
 
             #r"crm_ipc_send:.*Request .* failed",
             #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received",
 
                 # Not inherently bad, but worth tracking
             #r"No need to invoke the TE",
             #r"ping.*: DEBUG: Updated connected = 0",
             #r"Digest mis-match:",
             r"pacemaker-controld:.*Transition failed: terminated",
             r"Local CIB .* differs from .*:",
             r"warn.*:\s*Continuing but .* will NOT be used",
             r"warn.*:\s*Cluster configuration file .* is corrupt",
             #r"Executing .* fencing operation",
             r"Election storm",
             r"stalled the FSA with pending inputs",
         ]
 
         self.components["common-ignore"] = [
             r"Pending action:",
             r"resource( was|s were) active at shutdown",
             r"pending LRM operations at shutdown",
             r"Lost connection to the CIB manager",
             r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported",
             r"pacemaker-controld.*:\s*Performing A_EXIT_1 - forcefully exiting ",
             r".*:\s*Executing .* fencing operation \(.*\) on ",
             r".*:\s*Requesting fencing \([^)]+\) of node ",
             r"(Blackbox dump requested|Problem detected)",
 #           "Resource .*stonith::.* is active on 2 nodes attempting recovery",
 #           "Transition .* ERRORs found during PE processing",
         ]
         
         self.components["corosync-ignore"] = [
             r"error:.*Connection to the CPG API failed: Library error",
             r"\[[0-9]+\] exited with status [0-9]+ \(",
             r"pacemaker-based.*error:.*Corosync connection lost",
             r"pacemaker-fenced.*error:.*Corosync connection terminated",
             r"pacemaker-controld.*State transition .* S_RECOVERY",
             r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state",
             r"pacemaker-controld.*error:.*Could not recover from internal error",
             r"error:.*Connection to cib_(shm|rw).* (failed|closed)",
             r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
             r"crit: Fencing daemon connection failed",
             # This is overbroad, but we don't have a way to say that only
             # certain transition errors are acceptable (if the fencer respawns,
             # fence devices may appear multiply active). We have to rely on
             # other causes of a transition error logging their own error
             # message, which is the usual practice.
             r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
             ]
 
         self.components["corosync"] = [
             # We expect each daemon to lose its cluster connection.
             # However, if the CIB manager loses its connection first,
             # it's possible for another daemon to lose that connection and
             # exit before losing the cluster connection.
             r"pacemakerd.*:\s*(crit|error):.*Lost connection to cluster layer",
             r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
             r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer",
             r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
             r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
             r"schedulerd.*Scheduling Node .* for STONITH",
             r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK",
         ]
 
         self.components["pacemaker-based"] = [
             r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102",
             r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1",
             r"pacemakerd.* Respawning failed child process: pacemaker-attrd",
             r"pacemakerd.* Respawning failed child process: pacemaker-based",
             r"pacemakerd.* Respawning failed child process: pacemaker-controld",
             r"pacemakerd.* Respawning failed child process: pacemaker-fenced",
             r"pacemaker-.* Connection to cib_.* (failed|closed)",
             r"pacemaker-attrd.*:.*Lost connection to the CIB manager",
             r"pacemaker-controld.*:.*Lost connection to the CIB manager",
             r"pacemaker-controld.*I_ERROR.*crmd_cib_connection_destroy",
             r"pacemaker-controld.* State transition .* S_RECOVERY",
             r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
             r"pacemaker-controld.*Could not recover from internal error",
         ]
         self.components["pacemaker-based-ignore"] = [
             r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
             # This is overbroad, but we don't have a way to say that only
             # certain transition errors are acceptable (if the fencer respawns,
             # fence devices may appear multiply active). We have to rely on
             # other causes of a transition error logging their own error
             # message, which is the usual practice.
             r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
         ]
 
         self.components["pacemaker-execd"] = [
             r"pacemaker-controld.*Connection to (pacemaker-execd|lrmd|executor) (failed|closed)",
             r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy",
             r"pacemaker-controld.*State transition .* S_RECOVERY",
             r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
             r"pacemaker-controld.*Could not recover from internal error",
             r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1",
             r"pacemakerd.*Respawning failed child process: pacemaker-execd",
             r"pacemakerd.*Respawning failed child process: pacemaker-controld",
         ]
         self.components["pacemaker-execd-ignore"] = [
             r"pacemaker-attrd.*Connection to lrmd (failed|closed)",
             r"pacemaker-(attrd|controld).*Could not execute alert",
         ]
 
         self.components["pacemaker-controld"] = [
 #                    "WARN: determine_online_status: Node .* is unclean",
 #                    "Scheduling Node .* for STONITH",
 #                    "Executing .* fencing operation",
 # Only if the node wasn't the DC:  "State transition S_IDLE",
                     "State transition .* -> S_IDLE",
                     ]
         self.components["pacemaker-controld-ignore"] = []
 
         self.components["pacemaker-attrd"] = []
         self.components["pacemaker-attrd-ignore"] = []
 
         self.components["pacemaker-schedulerd"] = [
                     "State transition .* S_RECOVERY",
                     r"Respawning failed child process: pacemaker-controld",
                     r"pacemaker-controld\[[0-9]+\] exited with status 1 \(",
                     "Connection to pengine failed",
                     "Connection to pengine.* closed",
                     r"Connection to the scheduler failed",
                     "pacemaker-controld.*I_ERROR.*save_cib_contents",
                     r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
                     "pacemaker-controld.*Could not recover from internal error",
                     ]
         self.components["pacemaker-schedulerd-ignore"] = []
 
         self.components["pacemaker-fenced"] = [
             r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
             r"Fencing daemon connection failed",
             r"pacemaker-controld.*Fencer successfully connected",
         ]
         self.components["pacemaker-fenced-ignore"] = [
             r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
             r"crit:.*Fencing daemon connection failed",
             r"error:.*Fencer connection failed \(will retry\)",
             r"Connection to (fencer|stonith-ng) failed, finalizing .* pending operations",
             r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error",
             # This is overbroad, but we don't have a way to say that only
             # certain transition errors are acceptable (if the fencer respawns,
             # fence devices may appear multiply active). We have to rely on
             # other causes of a transition error logging their own error
             # message, which is the usual practice.
             r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
         ]
         self.components["pacemaker-fenced-ignore"].extend(self.components["common-ignore"])
 
 
 class crm_corosync_docker(crm_corosync):
     '''
     Patterns for Corosync version 2 cluster manager class
     '''
     def __init__(self, name):
         crm_corosync.__init__(self, name)
 
         self.commands.update({
             "StartCmd"       : "pcmk_start",
             "StopCmd"        : "pcmk_stop",
         })
 
 
 class PatternSelector(object):
 
     def __init__(self, name=None):
         self.name = name
         self.base = BasePatterns("crm-base")
 
         if not name:
             crm_corosync("crm-corosync")
         elif name == "crm-corosync":
             crm_corosync(name)
         elif name == "crm-corosync-docker":
             crm_corosync_docker(name)
 
     def get_variant(self, variant):
         if variant in patternvariants:
             return patternvariants[variant]
         print("defaulting to crm-base for %s" % variant)
         return self.base
 
     def get_patterns(self, variant, kind):
         return self.get_variant(variant).get_patterns(kind)
 
     def get_template(self, variant, key):
         v = self.get_variant(variant)
         return v[key]
 
     def get_component(self, variant, kind):
         return self.get_variant(variant).get_component(kind)
 
     def __getitem__(self, key):
         return self.get_template(self.name, key)
 
 # python cts/CTSpatt.py -k crm-corosync -t StartCmd
 if __name__ == '__main__':
 
     pdir=os.path.dirname(sys.path[0])
     sys.path.insert(0, pdir) # So that things work from the source directory
 
     kind=None
     template=None
 
     skipthis=None
     args=sys.argv[1:]
     for i in range(0, len(args)):
        if skipthis:
            skipthis=None
            continue
 
        elif args[i] == "-k" or args[i] == "--kind":
            skipthis=1
            kind = args[i+1]
 
        elif args[i] == "-t" or args[i] == "--template":
            skipthis=1
            template = args[i+1]
 
        else:
            print("Illegal argument " + args[i])
 
 
     print(PatternSelector(kind)[template])
diff --git a/doc/Clusters_from_Scratch/en-US/Author_Group.xml b/doc/Clusters_from_Scratch/en-US/Author_Group.xml
index 898da71261..ff907feee5 100644
--- a/doc/Clusters_from_Scratch/en-US/Author_Group.xml
+++ b/doc/Clusters_from_Scratch/en-US/Author_Group.xml
@@ -1,11 +1,10 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <authorgroup>
   <author>
-    <firstname>Andrew</firstname><surname>Beekhof</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Primary author</contrib>
-    <email>andrew@beekhof.net</email>
+    <affiliation>
+      <orgname>Written by the Pacemaker project contributors</orgname>
+    </affiliation>
   </author>
 </authorgroup>
diff --git a/doc/Clusters_from_Scratch/en-US/Book_Info.xml b/doc/Clusters_from_Scratch/en-US/Book_Info.xml
index 68be4424c1..9fcbdf0a10 100644
--- a/doc/Clusters_from_Scratch/en-US/Book_Info.xml
+++ b/doc/Clusters_from_Scratch/en-US/Book_Info.xml
@@ -1,69 +1,71 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
 %BOOK_ENTITIES;
 ]>
 <bookinfo id="book-Clusters_from_Scratch-Clusters_from_Scratch">
-        <title>Clusters from Scratch</title>
-        <subtitle>Step-by-Step Instructions for Building Your First High-Availability Cluster</subtitle>
-        <productname>Pacemaker</productname>
-        <productnumber>2.0</productnumber>
-        <!--
-                EDITION-PUBSNUMBER should match REVNUMBER in Revision_History.xml.
-                Increment EDITION when the syntax of the documented software
-                changes (OS, pacemaker, corosync, pcs), and PUBSNUMBER for
-                simple textual changes (corrections, translations, etc.).
-        -->
-        <edition>10</edition>
-        <pubsnumber>2</pubsnumber>
-        <abstract>
-                <para>
-                        This document provides a step-by-step guide to building a simple high-availability cluster using Pacemaker.
-                </para>
-                <para>
-                        The example cluster will use:
-                        <orderedlist>
-                                <listitem>
-                                        <para>
-                                                &DISTRO; &DISTRO_VERSION; as the host operating system
-                                        </para>
-                                </listitem>
-                                <listitem>
-                                        <para>
-                                                Corosync to provide messaging and membership services,
-                                        </para>
-                                </listitem>
-                                <listitem>
-                                        <para>
-                                                Pacemaker 1.1.18
-                                                <footnote><para>While this guide is part of the document set for
-                                                Pacemaker 2.0, it demonstrates the version available in
-                                                the standard &DISTRO; repositories.</para></footnote>
-                                        </para>
-                                </listitem>
-                                <listitem>
-                                        <para>
-                                                DRBD as a cost-effective alternative to shared storage,
-                                        </para>
-                                </listitem>
-                                <listitem>
-                                        <para>
-                                                GFS2 as the cluster filesystem (in active/active mode)
-                                        </para>
-                                </listitem>
-                        </orderedlist>
-                </para>
-                <para>
-                        Given the graphical nature of the install process, a number of screenshots are included. However the guide is primarily composed of commands, the reasons for executing them and their expected outputs.
-                </para>
-        </abstract>
-        <corpauthor>
-                <inlinemediaobject>
-                        <imageobject>
-                                <imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" />
-                        </imageobject>
-                </inlinemediaobject>
-        </corpauthor>
-        <xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-        <xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+  <title>Clusters from Scratch</title>
+  <subtitle>Step-by-Step Instructions for Building Your First High-Availability Cluster</subtitle>
+  <productname>Pacemaker</productname>
+  <productnumber>2.0</productnumber>
+  <!--
+    EDITION-PUBSNUMBER should match REVNUMBER in Revision_History.xml.
+    Increment EDITION when the syntax of the documented software
+    changes (OS, pacemaker, corosync, pcs), and PUBSNUMBER for
+    simple textual changes (corrections, translations, etc.).
+  -->
+  <edition>11</edition>
+  <pubsnumber>0</pubsnumber>
+  <abstract>
+    <para>
+      This document provides a step-by-step guide to building a simple high-availability cluster using Pacemaker.
+    </para>
+    <para>
+      The example cluster will use:
+      <orderedlist>
+        <listitem>
+          <para>
+            &DISTRO; &DISTRO_VERSION; as the host operating system
+          </para>
+        </listitem>
+        <listitem>
+          <para>
+            Corosync to provide messaging and membership services,
+          </para>
+        </listitem>
+        <listitem>
+          <para>
+            Pacemaker 1.1.18
+            <footnote><para>While this guide is part of the document set for
+            Pacemaker 2.0, it demonstrates the version available in
+            the standard &DISTRO; repositories.</para></footnote>
+          </para>
+        </listitem>
+        <listitem>
+          <para>
+            DRBD as a cost-effective alternative to shared storage,
+          </para>
+        </listitem>
+        <listitem>
+          <para>
+            GFS2 as the cluster filesystem (in active/active mode)
+          </para>
+        </listitem>
+      </orderedlist>
+    </para>
+    <para>
+      Given the graphical nature of the install process, a number of
+      screenshots are included. However the guide is primarily composed of
+      commands, the reasons for executing them and their expected outputs.
+    </para>
+  </abstract>
+  <corpauthor>
+    <inlinemediaobject>
+      <imageobject>
+        <imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" />
+      </imageobject>
+    </inlinemediaobject>
+  </corpauthor>
+  <xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+  <xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
 </bookinfo>
diff --git a/doc/Clusters_from_Scratch/en-US/Revision_History.xml b/doc/Clusters_from_Scratch/en-US/Revision_History.xml
index 05ef1be65a..77a3b2cdde 100644
--- a/doc/Clusters_from_Scratch/en-US/Revision_History.xml
+++ b/doc/Clusters_from_Scratch/en-US/Revision_History.xml
@@ -1,93 +1,205 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
 %BOOK_ENTITIES;
 ]>
 <appendix id="appe-Clusters_from_Scratch-Revision_History">
-        <!-- see comment in Book_Info.xml for revision numbering -->
-        <title>Revision History</title>
-        <simpara>
-                <revhistory>
-                        <revision>
-                          <revnumber>1-0</revnumber>
-                          <date>Mon May 17 2010</date>
-                          <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
-                          <revdescription><simplelist><member>Import from Pages.app</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>2-0</revnumber>
-                          <date>Wed Sep 22 2010</date>
-                          <author><firstname>Raoul</firstname><surname>Scarazzini</surname><email>rasca@miamammausalinux.org</email></author>
-                          <revdescription><simplelist><member>Italian translation</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>3-0</revnumber>
-                          <date>Wed Feb 9 2011</date>
-                          <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
-                          <revdescription><simplelist><member>Updated for Fedora 13</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>4-0</revnumber>
-                          <date>Wed Oct 5 2011</date>
-                          <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
-                          <revdescription><simplelist><member>Update the GFS2 section to use CMAN</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>5-0</revnumber>
-                          <date>Fri Feb 10 2012</date>
-                          <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
-                          <revdescription><simplelist><member>Generate docbook content from asciidoc sources</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>6-0</revnumber>
-                          <date>Tues July 3 2012</date>
-                          <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
-                          <revdescription><simplelist><member>Updated for Fedora 17</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>7-0</revnumber>
-                          <date>Fri Sept 14 2012</date>
-                          <author><firstname>David</firstname><surname>Vossel</surname><email>davidvossel@gmail.com</email></author>
-                          <revdescription><simplelist><member>Updated for pcs</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>8-0</revnumber>
-                          <date>Mon Jan 05 2015</date>
-                          <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
-                          <revdescription><simplelist><member>Updated for Fedora 21</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>8-1</revnumber>
-                          <date>Thu Jan 08 2015</date>
-                          <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
-                          <revdescription><simplelist><member>Minor corrections, plus use include file for intro</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>9-0</revnumber>
-                          <date>Fri Aug 14 2015</date>
-                          <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
-                          <revdescription><simplelist><member>Update for CentOS 7.1 and leaving firewalld/SELinux enabled</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>10-0</revnumber>
-                          <date>Fri Jan 12 2018</date>
-                          <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
-                          <revdescription><simplelist><member>Update banner for Pacemaker 2.0 and content for CentOS 7.4 with Pacemaker 1.1.16</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>10-1</revnumber>
-                          <date>Wed Sep 5 2018</date>
-                          <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
-                          <revdescription><simplelist><member>Update for CentOS 7.5 with Pacemaker 1.1.18</member></simplelist></revdescription>
-                        </revision>
-                        <revision>
-                          <revnumber>10-2</revnumber>
-                          <date>Fri Dec 7 2018</date>
-                          <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
-                          <author><firstname>Jan</firstname><surname>Pokorný</surname><email>jpokorny@redhat.com</email></author>
-                          <author><firstname>Chris</firstname><surname>Lumens</surname><email>clumens@redhat.com</email></author>
-                          <revdescription><simplelist><member>Minor clarifications and formatting changes</member></simplelist></revdescription>
-                        </revision>
-                </revhistory>
-        </simpara>
+  <!-- see comment in Book_Info.xml for revision numbering -->
+  <title>Revision History</title>
+  <simpara>
+    <revhistory>
+      <revision>
+        <revnumber>1-0</revnumber>
+        <date>Mon May 17 2010</date>
+        <author>
+          <firstname>Andrew</firstname><surname>Beekhof</surname>
+          <email>andrew@beekhof.net</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Import from Pages.app</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>2-0</revnumber>
+        <date>Wed Sep 22 2010</date>
+        <author>
+          <firstname>Raoul</firstname><surname>Scarazzini</surname>
+          <email>rasca@miamammausalinux.org</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Italian translation</member>
+          </simplelist>
+          </revdescription>
+      </revision>
+      <revision>
+        <revnumber>3-0</revnumber>
+        <date>Wed Feb 9 2011</date>
+        <author>
+          <firstname>Andrew</firstname>
+          <surname>Beekhof</surname>
+          <email>andrew@beekhof.net</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Updated for Fedora 13</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>4-0</revnumber>
+        <date>Wed Oct 5 2011</date>
+        <author>
+          <firstname>Andrew</firstname>
+          <surname>Beekhof</surname>
+          <email>andrew@beekhof.net</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Update the GFS2 section to use CMAN</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>5-0</revnumber>
+        <date>Fri Feb 10 2012</date>
+        <author>
+          <firstname>Andrew</firstname>
+          <surname>Beekhof</surname>
+          <email>andrew@beekhof.net</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Generate docbook content from asciidoc sources</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>6-0</revnumber>
+        <date>Tues July 3 2012</date>
+        <author>
+          <firstname>Andrew</firstname><surname>Beekhof</surname>
+          <email>andrew@beekhof.net</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Updated for Fedora 17</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>7-0</revnumber>
+        <date>Fri Sept 14 2012</date>
+        <author>
+          <firstname>David</firstname><surname>Vossel</surname>
+          <email>davidvossel@gmail.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Updated for pcs</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>8-0</revnumber>
+        <date>Mon Jan 05 2015</date>
+        <author>
+          <firstname>Ken</firstname><surname>Gaillot</surname>
+          <email>kgaillot@redhat.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Updated for Fedora 21</member>
+          </simplelist></revdescription>
+      </revision>
+      <revision>
+        <revnumber>8-1</revnumber>
+        <date>Thu Jan 08 2015</date>
+        <author>
+          <firstname>Ken</firstname><surname>Gaillot</surname>
+          <email>kgaillot@redhat.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Minor corrections, plus use include file for intro</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>9-0</revnumber>
+        <date>Fri Aug 14 2015</date>
+        <author>
+          <firstname>Ken</firstname><surname>Gaillot</surname>
+          <email>kgaillot@redhat.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Update for CentOS 7.1 and leaving firewalld/SELinux enabled</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>10-0</revnumber>
+        <date>Fri Jan 12 2018</date>
+        <author>
+          <firstname>Ken</firstname><surname>Gaillot</surname>
+          <email>kgaillot@redhat.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Update banner for Pacemaker 2.0 and content for CentOS 7.4 with Pacemaker 1.1.16</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>10-1</revnumber>
+        <date>Wed Sep 5 2018</date>
+        <author>
+          <firstname>Ken</firstname><surname>Gaillot</surname>
+          <email>kgaillot@redhat.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Update for CentOS 7.5 with Pacemaker 1.1.18</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>10-2</revnumber>
+        <date>Fri Dec 7 2018</date>
+        <author>
+          <firstname>Ken</firstname><surname>Gaillot</surname>
+          <email>kgaillot@redhat.com</email>
+        </author>
+        <author>
+          <firstname>Jan</firstname><surname>Pokorný</surname>
+          <email>jpokorny@redhat.com</email>
+        </author>
+        <author>
+          <firstname>Chris</firstname><surname>Lumens</surname>
+          <email>clumens@redhat.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Minor clarifications and formatting changes</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>11-0</revnumber>
+        <date>Thu Jul 18 2019</date>
+        <author>
+          <firstname>Tomas</firstname><surname>Jelinek</surname>
+          <email>tojeline@redhat.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Note differences in pcs 0.10</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+    </revhistory>
+  </simpara>
 </appendix>
diff --git a/doc/Pacemaker_Administration/en-US/Author_Group.xml b/doc/Pacemaker_Administration/en-US/Author_Group.xml
index a40b3adcc6..ff907feee5 100644
--- a/doc/Pacemaker_Administration/en-US/Author_Group.xml
+++ b/doc/Pacemaker_Administration/en-US/Author_Group.xml
@@ -1,17 +1,10 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <authorgroup>
   <author>
-    <firstname>Andrew</firstname><surname>Beekhof</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Co-author</contrib>
-    <email>andrew@beekhof.net</email>
-  </author>
-  <author>
-    <firstname>Ken</firstname><surname>Gaillot</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Co-author</contrib>
-    <email>kgaillot@redhat.com</email>
+    <affiliation>
+      <orgname>Written by the Pacemaker project contributors</orgname>
+    </affiliation>
   </author>
 </authorgroup>
diff --git a/doc/Pacemaker_Administration/en-US/Book_Info.xml b/doc/Pacemaker_Administration/en-US/Book_Info.xml
index 5dcd86bb2b..5acc7868e7 100644
--- a/doc/Pacemaker_Administration/en-US/Book_Info.xml
+++ b/doc/Pacemaker_Administration/en-US/Book_Info.xml
@@ -1,36 +1,34 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Pacemaker_Administration.ent">
 %BOOK_ENTITIES;
 ]>
 <bookinfo id="book-Pacemaker_Administration-Pacemaker_Administration">
-	<title>Pacemaker Administration</title>
-	<subtitle>Managing Pacemaker Clusters</subtitle>
-	<!--
-		EDITION-PUBSNUMBER should match REVNUMBER in Revision_History.xml.
-		Increment EDITION when the syntax of the documented software
-		changes (OS, pacemaker, corosync, pcs), and PUBSNUMBER for
-		simple textual changes (corrections, translations, etc.).
-		Changing the revision is only necessary when releasing a new
-		version of Pacemaker or publishing the documents to the Web.
-	-->
-	<edition>1</edition>
-	<pubsnumber>3</pubsnumber>
-	<abstract>
-		<para>
-			This document has instructions and tips for system
-			administrators who need to manage high-availability
-			clusters using Pacemaker.
-		</para>
-	</abstract>
-	<corpauthor>
-		<inlinemediaobject>
-			<imageobject>
-				<imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" />
-			</imageobject>
-		</inlinemediaobject>
-	</corpauthor>
-	<xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-	<xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+  <title>Pacemaker Administration</title>
+  <subtitle>Managing Pacemaker Clusters</subtitle>
+  <!--
+    EDITION-PUBSNUMBER should match REVNUMBER in Revision_History.xml.
+    Increment EDITION when the syntax of the documented software
+    changes (OS, pacemaker, corosync, pcs), and PUBSNUMBER for
+    simple textual changes (corrections, translations, etc.).
+    Changing the revision is only necessary when releasing a new
+    version of Pacemaker or publishing the documents to the Web.
+  -->
+  <edition>2</edition>
+  <pubsnumber>0</pubsnumber>
+  <abstract>
+    <para>
+      This document has instructions and tips for system administrators who
+      need to manage high-availability clusters using Pacemaker.
+    </para>
+  </abstract>
+  <corpauthor>
+    <inlinemediaobject>
+      <imageobject>
+        <imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" />
+      </imageobject>
+    </inlinemediaobject>
+  </corpauthor>
+  <xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+  <xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
 </bookinfo>
-
diff --git a/doc/Pacemaker_Administration/en-US/Revision_History.xml b/doc/Pacemaker_Administration/en-US/Revision_History.xml
index 6276ccd69b..3d8a310735 100644
--- a/doc/Pacemaker_Administration/en-US/Revision_History.xml
+++ b/doc/Pacemaker_Administration/en-US/Revision_History.xml
@@ -1,69 +1,87 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Pacemaker_Administration.ent">
 %BOOK_ENTITIES;
 ]>
 <appendix id="appe-Pacemaker_Administration-Revision_History">
   <!-- see comment in Book_Info.xml for revision numbering -->
   <title>Revision History</title>
   <simpara>
     <revhistory>
 
       <revision>
         <revnumber>1-0</revnumber>
         <date>Tue Jan 23 2018</date>
+        <author>
+          <firstname>Andrew</firstname><surname>Beekhof</surname>
+          <email>andrew@beekhof.net</email>
+        </author>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Move administration-oriented information from
                               Pacemaker Explained into its own
                               book</member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>1-1</revnumber>
         <date>Mon Jan 28 2019</date>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <author>
           <firstname>Jan</firstname><surname>Pokorný</surname>
           <email>jpokorny@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Add "Troubleshooting" chapter, minor
                               clarifications and reformatting</member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>1-2</revnumber>
         <date>Mon May 13 2019</date>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Overhaul configuration how-to</member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>1-3</revnumber>
         <date>Tue Oct 15 2019</date>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Add Tools chapter</member></simplelist>
         </revdescription>
       </revision>
 
+      <revision>
+        <revnumber>2-0</revnumber>
+        <date>Tue Nov 5 2019</date>
+        <author>
+          <firstname>Chris</firstname><surname>Lumens</surname>
+          <email>clumens@redhat.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Update for crm_mon changes in 2.0.3</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+
     </revhistory>
   </simpara>
 </appendix>
diff --git a/doc/Pacemaker_Development/en-US/Author_Group.xml b/doc/Pacemaker_Development/en-US/Author_Group.xml
index 4f315d6ad0..ff907feee5 100644
--- a/doc/Pacemaker_Development/en-US/Author_Group.xml
+++ b/doc/Pacemaker_Development/en-US/Author_Group.xml
@@ -1,23 +1,10 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <authorgroup>
   <author>
-    <firstname>Andrew</firstname><surname>Beekhof</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Co-author</contrib>
-    <email>andrew@beekhof.net</email>
-  </author>
-  <author>
-    <firstname>Ken</firstname><surname>Gaillot</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Co-author</contrib>
-    <email>kgaillot@redhat.com</email>
-  </author>
-  <author>
-    <firstname>Jan</firstname><surname>Pokorný</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Co-author</contrib>
-    <email>poki@redhat.com</email>
+    <affiliation>
+      <orgname>Written by the Pacemaker project contributors</orgname>
+    </affiliation>
   </author>
 </authorgroup>
diff --git a/doc/Pacemaker_Development/en-US/Revision_History.xml b/doc/Pacemaker_Development/en-US/Revision_History.xml
index 32b37341ce..163867f785 100644
--- a/doc/Pacemaker_Development/en-US/Revision_History.xml
+++ b/doc/Pacemaker_Development/en-US/Revision_History.xml
@@ -1,108 +1,112 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Pacemaker_Development.ent">
 %BOOK_ENTITIES;
 ]>
 <appendix id="appe-Pacemaker_Development-Revision_History">
   <!-- see comment in Book_Info.xml for revision numbering -->
   <title>Revision History</title>
   <simpara>
     <revhistory>
 
       <revision>
         <revnumber>1-0</revnumber>
         <date>Tue Jul 26 2016</date>
+        <author>
+          <firstname>Andrew</firstname><surname>Beekhof</surname>
+          <email>andrew@beekhof.net</email>
+        </author>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Convert coding guidelines and developer FAQ
                               to Publican document</member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>1-1</revnumber>
         <date>Mon Aug 29 2016</date>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Add Python coding guidelines, and
                               more about licensing</member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>2-0</revnumber>
         <date>Fri Jan 12 2018</date>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Drop support for Python 2.6</member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>2-1</revnumber>
         <date>Tue Sep 18 2018</date>
         <author>
           <firstname>Jan</firstname><surname>Pokorný</surname>
           <email>poki@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Start documenting notable evolutionary
                               points</member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>2-2</revnumber>
         <date>Fri Dec 7 2018</date>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Update FAQ and C guidelines</member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>2-3</revnumber>
         <date>Mon May 13 2019</date>
         <author>
           <firstname>Ken</firstname><surname>Gaillot</surname>
           <email>kgaillot@redhat.com</email>
         </author>
         <author>
           <firstname>Jan</firstname><surname>Pokorný</surname>
           <email>poki@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>
             Update copyright notice policy, and some external references
           </member></simplelist>
         </revdescription>
       </revision>
 
       <revision>
         <revnumber>2-4</revnumber>
         <date>Fri 17 May 2019</date>
         <author>
           <firstname>Jan</firstname><surname>Pokorný</surname>
           <email>poki@redhat.com</email>
         </author>
         <revdescription>
           <simplelist><member>Start capturing hacking howto
                               for advanced contributors</member></simplelist>
         </revdescription>
       </revision>
 
     </revhistory>
   </simpara>
 </appendix>
diff --git a/doc/Pacemaker_Explained/en-US/Author_Group.xml b/doc/Pacemaker_Explained/en-US/Author_Group.xml
index 08fc1f72ed..ff907feee5 100644
--- a/doc/Pacemaker_Explained/en-US/Author_Group.xml
+++ b/doc/Pacemaker_Explained/en-US/Author_Group.xml
@@ -1,59 +1,10 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <authorgroup>
   <author>
-    <firstname>Andrew</firstname><surname>Beekhof</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Primary author</contrib>
-    <email>andrew@beekhof.net</email>
+    <affiliation>
+      <orgname>Written by the Pacemaker project contributors</orgname>
+    </affiliation>
   </author>
-  <othercredit>
-    <firstname>Ken</firstname><surname>Gaillot</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Co-author</contrib>
-    <email>kgaillot@redhat.com</email>
-  </othercredit>
-  <othercredit>
-    <firstname>Philipp</firstname><surname>Marek</surname>
-    <affiliation><orgname>LINBit</orgname></affiliation>
-    <contrib>Style and formatting updates. Indexing.</contrib>
-    <email>philipp.marek@linbit.com</email>
-  </othercredit>
-  <othercredit>
-    <firstname>Tanja</firstname><surname>Roth</surname>
-    <affiliation><orgname>SUSE</orgname></affiliation>
-    <contrib>Utilization chapter</contrib>
-    <contrib>Resource Templates chapter</contrib>
-    <contrib>Multi-Site Clusters chapter</contrib>
-    <email>taroth@suse.com</email>
-  </othercredit>
-  <othercredit>
-    <firstname>Lars</firstname><surname>Marowsky-Bree</surname>
-    <affiliation><orgname>SUSE</orgname></affiliation>
-    <contrib>Multi-Site Clusters chapter</contrib>
-    <email>lmb@suse.com</email>
-  </othercredit>
-  <othercredit>
-    <firstname>Yan</firstname><surname>Gao</surname>
-    <affiliation><orgname>SUSE</orgname></affiliation>
-    <contrib>Utilization chapter</contrib>
-    <contrib>Resource Templates chapter</contrib>
-    <contrib>Multi-Site Clusters chapter</contrib>
-    <email>ygao@suse.com</email>
-  </othercredit>
-  <othercredit>
-    <firstname>Thomas</firstname><surname>Schraitle</surname>
-    <affiliation><orgname>SUSE</orgname></affiliation>
-    <contrib>Utilization chapter</contrib>
-    <contrib>Resource Templates chapter</contrib>
-    <contrib>Multi-Site Clusters chapter</contrib>
-    <email>toms@suse.com</email>
-  </othercredit>
-  <othercredit>
-    <firstname>Dejan</firstname><surname>Muhamedagic</surname>
-    <affiliation><orgname>SUSE</orgname></affiliation>
-    <contrib>Resource Templates chapter</contrib>
-    <email>dmuhamedagic@suse.com</email>
-  </othercredit>
 </authorgroup>
diff --git a/doc/Pacemaker_Explained/en-US/Ch-Fencing.txt b/doc/Pacemaker_Explained/en-US/Ch-Fencing.txt
index 53d8793024..31b5044274 100644
--- a/doc/Pacemaker_Explained/en-US/Ch-Fencing.txt
+++ b/doc/Pacemaker_Explained/en-US/Ch-Fencing.txt
@@ -1,1021 +1,1028 @@
 :compat-mode: legacy
 = Fencing =
 
 ////
 We prefer [[ch-fencing]], but older versions of asciidoc don't deal well
 with that construct for chapter headings
 ////
 anchor:ch-fencing[Chapter 6, Fencing]
 indexterm:[Fencing, Configuration]
 indexterm:[STONITH, Configuration]
 
 == What Is Fencing? ==
 
 'Fencing' is the ability to make a node unable to run resources, even when that
 node is unresponsive to cluster commands.
 
 Fencing is also known as 'STONITH', an acronym for "Shoot The Other Node In The
 Head", since the most common fencing method is cutting power to the node.
 Another method is "fabric fencing", cutting the node's access to some
 capability required to run resources (such as network access or a shared disk).
 
 == Why Is Fencing Necessary? ==
 
 Fencing protects your data from being corrupted by malfunctioning nodes or
 unintentional concurrent access to shared resources.
 
 Fencing protects against the "split brain" failure scenario, where cluster
 nodes have lost the ability to reliably communicate with each other but are
 still able to run resources. If the cluster just assumed that uncommunicative
 nodes were down, then multiple instances of a resource could be started on
 different nodes.
 
 The effect of split brain depends on the resource type. For example, an IP
 address brought up on two hosts on a network will cause packets to randomly be
 sent to one or the other host, rendering the IP useless. For a database or
 clustered file system, the effect could be much more severe, causing data
 corruption or divergence.
 
 Fencing also is used when a resource cannot otherwise be stopped. If a failed
 resource fails to stop, it cannot be recovered elsewhere. Fencing the
 resource's node is the only way to ensure the resource is recoverable.
 
 Users may also configure the +on-fail+ property of any resource operation to
 +fencing+, in which case the cluster will fence the resource's node if the
 operation fails.
 
 == Fence Devices ==
 
 A 'fence device' (or 'fencing device') is a special type of resource that
 provides the means to fence a node.
 
 Examples of fencing devices include intelligent power switches and IPMI devices
 that accept SNMP commands to cut power to a node, and iSCSI controllers that
 allow SCSI reservations to be used to cut a node's access to a shared disk.
 
 Since fencing devices will be used to recover from loss of networking
 connectivity to other nodes, it is essential that they do not rely on the same
 network as the cluster itself, otherwise that network becomes a single point of
 failure.
 
 Since loss of a node due to power outage is indistinguishable from loss of
 network connectivity to that node, it is also essential that at least one fence
 device for a node does not share power with that node. For example, an on-board
 IPMI controller that shares power with its host should not be used as the sole
 fencing device for that host.
 
 Since fencing is used to isolate malfunctioning nodes, no fence device should
 rely on its target functioning properly. This includes, for example, devices
 that ssh into a node and issue a shutdown command (such devices might be
 suitable for testing, but never for production).
 
 == Fence Agents ==
 
 A 'fence agent' (or 'fencing agent') is a +stonith+-class resource agent.
 
 The fence agent standard provides commands (such as +off+ and +reboot+) that
 the cluster can use to fence nodes. As with other resource agent classes,
 this allows a layer of abstraction so that Pacemaker doesn't need any knowledge
 about specific fencing technologies -- that knowledge is isolated in the agent.
 
 == When a Fence Device Can Be Used ==
 
 Fencing devices do not actually "run" like most services. Typically, they just
 provide an interface for sending commands to an external device.
 
 Additionally, fencing may be initiated by Pacemaker, by other cluster-aware software
 such as DRBD or DLM, or manually by an administrator, at any point in the
 cluster life cycle, including before any resources have been started.
 
 To accommodate this, Pacemaker does not require the fence device resource to be
 "started" in order to be used. Whether a fence device is started or not
 determines whether a node runs any recurring monitor for the device, and gives
 the node a slight preference for being chosen to execute fencing using that
 device.
 
 By default, any node can execute any fencing device. If a fence device is
 disabled by setting its +target-role+ to Stopped, then no node can use that
 device. If mandatory location constraints prevent a specific node from
 "running" a fence device, then that node will never be chosen to execute
 fencing using the device. A node may fence itself, but the cluster will choose
 that only if no other nodes can do the fencing.
 
 A common configuration scenario is to have one fence device per target node.
 In such a case, users often configure anti-location constraints so that
 the target node does not monitor its own device. The best practice is to make
 the constraint optional (i.e. a finite negative score rather than +-INFINITY+),
 so that the node can fence itself if no other nodes can.
 
 == Limitations of Fencing Resources ==
 
 Fencing resources have certain limitations that other resource classes don't:
 
 * They may have only one set of meta-attributes and one set of instance
   attributes.
 * If <<ch-rules,rules>> are used to determine fencing resource options, these
   may only be evaluated when first read, meaning that later changes to the
   rules will have no effect. Therefore, it is better to avoid confusion and not
   use rules at all with fencing resources.
 
 These limitations could be revisited if there is significant user demand.
 
 == Special Options for Fencing Resources ==
 
 The table below lists special instance attributes that may be set for any
 fencing resource ('not' meta-attributes, even though they are interpreted by
 pacemaker rather than the fence agent). These are also listed in the man page
 for +pacemaker-fenced+.
 
 .Additional Properties of Fencing Resources
 [width="95%",cols="8m,3,6,<12",options="header",align="center"]
 |=========================================================
 
 |Field
 |Type
 |Default
 |Description
 
 |stonith-timeout
 |NA
 |NA
 a|Older versions used this to override the default period to wait for a STONITH (reboot, on, off) action to complete for this device.
  It has been replaced by the +pcmk_reboot_timeout+ and +pcmk_off_timeout+ properties.
  indexterm:[stonith-timeout,Fencing]
  indexterm:[Fencing,Property,stonith-timeout]
 
 ////
  (not yet implemented)
  priority
  integer
  0
  The priority of the STONITH resource. Devices are tried in order of highest priority to lowest.
  indexterm  priority,Fencing 
  indexterm  Fencing,Property,priority 
 ////
 
 |provides
 |string
 |
 |Any special capability provided by the fence device. Currently, only one such
  capability is meaningful: +unfencing+ (see <<s-unfencing>>).
  indexterm:[provides,Fencing]
  indexterm:[Fencing,Property,provides]
 
 |pcmk_host_map
 |string
 |
 |A mapping of host names to ports numbers for devices that do not support host names.
  Example: +node1:1;node2:2,3+ tells the cluster to use port 1 for
  *node1* and ports 2 and 3 for *node2*. If +pcmk_host_check+ is explicitly set
  to +static-list+, either this or +pcmk_host_list+ must be set.
  indexterm:[pcmk_host_map,Fencing]
  indexterm:[Fencing,Property,pcmk_host_map]
 
 |pcmk_host_list
 |string
 |
 |A list of machines controlled by this device. If +pcmk_host_check+ is
  explicitly set to +static-list+, either this or +pcmk_host_map+ must be set.
  indexterm:[pcmk_host_list,Fencing]
  indexterm:[Fencing,Property,pcmk_host_list]
 
 |pcmk_host_check
 |string
-|static-list if either +pcmk_host_list+ or +pcmk_host_map+ is set,
- otherwise dynamic-list if the fence device supports the list action,
- otherwise status if the fence device supports the status action,
- otherwise none
+|A value appropriate to other configuration options and
+ device capabilities (see note below)
 a|How to determine which machines are controlled by the device.
  Allowed values:
 
 * +dynamic-list:+ query the device via the "list" command
 * +static-list:+ check the +pcmk_host_list+ or +pcmk_host_map+ attribute
 * +status:+ query the device via the "status" command
 * +none:+ assume every device can fence every machine
 
 indexterm:[pcmk_host_check,Fencing]
 indexterm:[Fencing,Property,pcmk_host_check]
 
 |pcmk_delay_max
 |time
 |0s
 |Enable a random delay of up to the time specified before executing fencing
 actions. This is sometimes used in two-node clusters to ensure that the
 nodes don't fence each other at the same time. The overall delay introduced
 by pacemaker is derived from this random delay value adding a static delay so
 that the sum is kept below the maximum delay.
 
 indexterm:[pcmk_delay_max,Fencing]
 indexterm:[Fencing,Property,pcmk_delay_max]
 
 |pcmk_delay_base
 |time
 |0s
 |Enable a static delay before executing fencing actions. This can be used
  e.g. in two-node clusters to ensure that the nodes don't fence each other,
  by having separate fencing resources with different values. The node that is
  fenced with the shorter delay will lose a fencing race. The overall delay
  introduced by pacemaker is derived from this value plus a random delay such
  that the sum is kept below the maximum delay.
 
 indexterm:[pcmk_delay_base,Fencing]
 indexterm:[Fencing,Property,pcmk_delay_base]
 
 |pcmk_action_limit
 |integer
 |1
 |The maximum number of actions that can be performed in parallel on this
  device, if the cluster option +concurrent-fencing+ is +true+. -1 is unlimited.
 
 indexterm:[pcmk_action_limit,Fencing]
 indexterm:[Fencing,Property,pcmk_action_limit]
 
 |pcmk_host_argument
 |string
 |port
 |'Advanced use only.' Which parameter should be supplied to the resource agent
 to identify the node to be fenced. Some devices do not support the standard
 +port+ parameter or may provide additional ones. Use this to specify an
 alternate, device-specific parameter. A value of +none+ tells the
 cluster not to supply any additional parameters.
  indexterm:[pcmk_host_argument,Fencing]
  indexterm:[Fencing,Property,pcmk_host_argument]
 
 |pcmk_reboot_action
 |string
 |reboot
 |'Advanced use only.' The command to send to the resource agent in order to
 reboot a node. Some devices do not support the standard commands or may provide
 additional ones. Use this to specify an alternate, device-specific command.
  indexterm:[pcmk_reboot_action,Fencing]
  indexterm:[Fencing,Property,pcmk_reboot_action]
 
 |pcmk_reboot_timeout
 |time
 |60s
 |'Advanced use only.' Specify an alternate timeout to use for `reboot` actions
 instead of the value of +stonith-timeout+. Some devices need much more or less
 time to complete than normal. Use this to specify an alternate, device-specific
 timeout.
  indexterm:[pcmk_reboot_timeout,Fencing]
  indexterm:[Fencing,Property,pcmk_reboot_timeout]
  indexterm:[stonith-timeout,Fencing]
  indexterm:[Fencing,Property,stonith-timeout]
 
 |pcmk_reboot_retries
 |integer
 |2
 |'Advanced use only.' The maximum number of times to retry the `reboot` command
 within the timeout period. Some devices do not support multiple connections, and
 operations may fail if the device is busy with another task, so Pacemaker will
 automatically retry the operation, if there is time remaining. Use this option
 to alter the number of times Pacemaker retries before giving up.
  indexterm:[pcmk_reboot_retries,Fencing]
  indexterm:[Fencing,Property,pcmk_reboot_retries]
 
 |pcmk_off_action
 |string
 |off
 |'Advanced use only.' The command to send to the resource agent in order to
 shut down a node. Some devices do not support the standard commands or may provide
 additional ones. Use this to specify an alternate, device-specific command.
  indexterm:[pcmk_off_action,Fencing]
  indexterm:[Fencing,Property,pcmk_off_action]
 
 |pcmk_off_timeout
 |time
 |60s
 |'Advanced use only.' Specify an alternate timeout to use for `off` actions
 instead of the value of +stonith-timeout+. Some devices need much more or less
 time to complete than normal. Use this to specify an alternate, device-specific
 timeout.
  indexterm:[pcmk_off_timeout,Fencing]
  indexterm:[Fencing,Property,pcmk_off_timeout]
  indexterm:[stonith-timeout,Fencing]
  indexterm:[Fencing,Property,stonith-timeout]
 
 |pcmk_off_retries
 |integer
 |2
 |'Advanced use only.' The maximum number of times to retry the `off` command
 within the timeout period. Some devices do not support multiple connections, and
 operations may fail if the device is busy with another task, so Pacemaker will
 automatically retry the operation, if there is time remaining. Use this option
 to alter the number of times Pacemaker retries before giving up.
  indexterm:[pcmk_off_retries,Fencing]
  indexterm:[Fencing,Property,pcmk_off_retries]
 
 |pcmk_list_action
 |string
 |list
 |'Advanced use only.' The command to send to the resource agent in order to
 list nodes. Some devices do not support the standard commands or may provide
 additional ones. Use this to specify an alternate, device-specific command.
  indexterm:[pcmk_list_action,Fencing]
  indexterm:[Fencing,Property,pcmk_list_action]
 
 |pcmk_list_timeout
 |time
 |60s
 |'Advanced use only.' Specify an alternate timeout to use for `list` actions
 instead of the value of +stonith-timeout+. Some devices need much more or less
 time to complete than normal. Use this to specify an alternate, device-specific
 timeout.
  indexterm:[pcmk_list_timeout,Fencing]
  indexterm:[Fencing,Property,pcmk_list_timeout]
 
 |pcmk_list_retries
 |integer
 |2
 |'Advanced use only.' The maximum number of times to retry the `list` command
 within the timeout period. Some devices do not support multiple connections, and
 operations may fail if the device is busy with another task, so Pacemaker will
 automatically retry the operation, if there is time remaining. Use this option
 to alter the number of times Pacemaker retries before giving up.
  indexterm:[pcmk_list_retries,Fencing]
  indexterm:[Fencing,Property,pcmk_list_retries]
 
 |pcmk_monitor_action
 |string
 |monitor
 |'Advanced use only.' The command to send to the resource agent in order to
 report extended status. Some devices do not support the standard commands or may provide
 additional ones. Use this to specify an alternate, device-specific command.
  indexterm:[pcmk_monitor_action,Fencing]
  indexterm:[Fencing,Property,pcmk_monitor_action]
 
 |pcmk_monitor_timeout
 |time
 |60s
 |'Advanced use only.' Specify an alternate timeout to use for `monitor` actions
 instead of the value of +stonith-timeout+. Some devices need much more or less
 time to complete than normal. Use this to specify an alternate, device-specific
 timeout.
  indexterm:[pcmk_monitor_timeout,Fencing]
  indexterm:[Fencing,Property,pcmk_monitor_timeout]
 
 |pcmk_monitor_retries
 |integer
 |2
 |'Advanced use only.' The maximum number of times to retry the `monitor` command
 within the timeout period. Some devices do not support multiple connections, and
 operations may fail if the device is busy with another task, so Pacemaker will
 automatically retry the operation, if there is time remaining. Use this option
 to alter the number of times Pacemaker retries before giving up.
  indexterm:[pcmk_monitor_retries,Fencing]
  indexterm:[Fencing,Property,pcmk_monitor_retries]
 
 |pcmk_status_action
 |string
 |status
 |'Advanced use only.' The command to send to the resource agent in order to
 report status. Some devices do not support the standard commands or may provide
 additional ones. Use this to specify an alternate, device-specific command.
  indexterm:[pcmk_status_action,Fencing]
  indexterm:[Fencing,Property,pcmk_status_action]
 
 |pcmk_status_timeout
 |time
 |60s
 |'Advanced use only.' Specify an alternate timeout to use for `status` actions
 instead of the value of +stonith-timeout+. Some devices need much more or less
 time to complete than normal. Use this to specify an alternate, device-specific
 timeout.
  indexterm:[pcmk_status_timeout,Fencing]
  indexterm:[Fencing,Property,pcmk_status_timeout]
 
 |pcmk_status_retries
 |integer
 |2
 |'Advanced use only.' The maximum number of times to retry the `status` command
 within the timeout period. Some devices do not support multiple connections, and
 operations may fail if the device is busy with another task, so Pacemaker will
 automatically retry the operation, if there is time remaining. Use this option
 to alter the number of times Pacemaker retries before giving up.
  indexterm:[pcmk_status_retries,Fencing]
  indexterm:[Fencing,Property,pcmk_status_retries]
 
 |=========================================================
 
+[NOTE]
+====
+The default value for +pcmk_host_check+ is +static-list+ if either
++pcmk_host_list+ or +pcmk_host_map+ is configured. If neither of those are
+configured, the default is +dynamic-list+ if the fence device supports the list
+action, or +status+ if the fence device supports the status action but not the
+list action. If none of those conditions apply, the default is +none+.
+====
+
 [[s-unfencing]]
 == Unfencing ==
 
 With fabric fencing (such as cutting network or shared disk access rather than
 power), it is expected that the cluster will fence the node, and
 then a system administrator must manually investigate what went wrong, correct
 any issues found, then reboot (or restart the cluster services on) the node.
 
 Once the node reboots and rejoins the cluster, some fabric fencing devices
 require an explicit command to restore the node's access. This capability is
 called 'unfencing' and is typically implemented as the fence agent's +on+
 command.
 
 If any cluster resource has +requires+ set to +unfencing+, then that resource
 will not be probed or started on a node until that node has been unfenced.
 
 == Fence Devices Dependent on Other Resources ==
 
 In some cases, a fence device may require some other cluster resource (such as
 an IP address) to be active in order to function properly.
 
 This is obviously undesirable in general: fencing may be required when the
 depended-on resource is not active, or fencing may be required because the node
 running the depended-on resource is no longer responding.
 
 However, this may be acceptable under certain conditions:
 
 * The dependent fence device should not be able to target any node that is
   allowed to run the depended-on resource.
 
 * The depended-on resource should not be disabled during production operation.
 
 * The +concurrent-fencing+ cluster property should be set to +true+. Otherwise,
   if both the node running the depended-on resource and some node targeted by
   the dependent fence device need to be fenced, the fencing of the node
   running the depended-on resource might be ordered first, making the second
   fencing impossible and blocking further recovery. With concurrent fencing,
   the dependent fence device might fail at first due to the depended-on
   resource being unavailable, but it will be retried and eventually succeed
   once the resource is brought back up.
 
 Even under those conditions, there is one unlikely problem scenario. The DC
 always schedules fencing of itself after any other fencing needed, to avoid
 unnecessary repeated DC elections. If the dependent fence device targets the
 DC, and both the DC and a different node running the depended-on resource need
 to be fenced, the DC fencing will always fail and block further recovery. Note,
 however, that losing a DC node entirely causes some other node to become DC and
 schedule the fencing, so this is only a risk when a stop or other operation
 with +on-fail+ set to +fencing+ fails on the DC.
 
 == Configuring Fencing ==
 
 . Find the correct driver:
 +
 ----
 # stonith_admin --list-installed
 ----
 
 . Find the required parameters associated with the device
   (replacing $AGENT_NAME with the name obtained from the previous step):
 +
 ----
 # stonith_admin --metadata --agent $AGENT_NAME
 ----
 
 . Create a file called +stonith.xml+ containing a primitive resource
   with a class of +stonith+, a type equal to the agent name obtained earlier,
   and a parameter for each of the values returned in the previous step.
 
 . If the device does not know how to fence nodes based on their uname,
   you may also need to set the special +pcmk_host_map+ parameter.  See
   `man pacemaker-fenced` for details.
 
 . If the device does not support the `list` command, you may also need
   to set the special +pcmk_host_list+ and/or +pcmk_host_check+
   parameters.  See `man pacemaker-fenced` for details.
 
 . If the device does not expect the victim to be specified with the
   `port` parameter, you may also need to set the special
   +pcmk_host_argument+ parameter. See `man pacemaker-fenced` for details.
 
 . Upload it into the CIB using cibadmin:
 +
 ----
 # cibadmin -C -o resources --xml-file stonith.xml
 ----
 
 . Set +stonith-enabled+ to true:
 +
 ----
 # crm_attribute -t crm_config -n stonith-enabled -v true
 ----
 
 . Once the stonith resource is running, you can test it by executing the
   following (although you might want to stop the cluster on that machine
   first):
 +
 ----
 # stonith_admin --reboot nodename
 ----
 
 === Example Fencing Configuration ===
 
 Assume we have a chassis containing four nodes and an IPMI device
 active on 192.0.2.1. We would choose the `fence_ipmilan` driver,
 and obtain the following list of parameters:
 
 .Obtaining a list of Fence Agent Parameters
 ====
 ----
 # stonith_admin --metadata -a fence_ipmilan
 ----
 
 [source,XML]
 ----
 <resource-agent name="fence_ipmilan" shortdesc="Fence agent for IPMI over LAN">
   <symlink name="fence_ilo3" shortdesc="Fence agent for HP iLO3"/>
   <symlink name="fence_ilo4" shortdesc="Fence agent for HP iLO4"/>
   <symlink name="fence_idrac" shortdesc="Fence agent for Dell iDRAC"/>
   <symlink name="fence_imm" shortdesc="Fence agent for IBM Integrated Management Module"/>
   <longdesc>
   </longdesc>
   <vendor-url>
   </vendor-url>
   <parameters>
     <parameter name="auth" unique="0" required="0">
       <getopt mixed="-A"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="ipaddr" unique="0" required="1">
       <getopt mixed="-a"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="passwd" unique="0" required="0">
       <getopt mixed="-p"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="passwd_script" unique="0" required="0">
       <getopt mixed="-S"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="lanplus" unique="0" required="0">
       <getopt mixed="-P"/>
       <content type="boolean"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="login" unique="0" required="0">
       <getopt mixed="-l"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="action" unique="0" required="0">
       <getopt mixed="-o"/>
       <content type="string" default="reboot"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="timeout" unique="0" required="0">
       <getopt mixed="-t"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="cipher" unique="0" required="0">
       <getopt mixed="-C"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="method" unique="0" required="0">
       <getopt mixed="-M"/>
       <content type="string" default="onoff"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="power_wait" unique="0" required="0">
       <getopt mixed="-T"/>
       <content type="string" default="2"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="delay" unique="0" required="0">
       <getopt mixed="-f"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="privlvl" unique="0" required="0">
       <getopt mixed="-L"/>
       <content type="string"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
     <parameter name="verbose" unique="0" required="0">
       <getopt mixed="-v"/>
       <content type="boolean"/>
       <shortdesc lang="en">
       </shortdesc>
     </parameter>
   </parameters>
   <actions>
     <action name="on"/>
     <action name="off"/>
     <action name="reboot"/>
     <action name="status"/>
     <action name="diag"/>
     <action name="list"/>
     <action name="monitor"/>
     <action name="metadata"/>
     <action name="stop" timeout="20s"/>
     <action name="start" timeout="20s"/>
   </actions>
 </resource-agent>
 ----
 ====
 
 Based on that, we would create a fencing resource fragment that might look
 like this:
 
 .An IPMI-based Fencing Resource
 ====
 [source,XML]
 ----
 <primitive id="Fencing" class="stonith" type="fence_ipmilan" >
   <instance_attributes id="Fencing-params" >
     <nvpair id="Fencing-passwd" name="passwd" value="testuser" />
     <nvpair id="Fencing-login" name="login" value="abc123" />
     <nvpair id="Fencing-ipaddr" name="ipaddr" value="192.0.2.1" />
     <nvpair id="Fencing-pcmk_host_list" name="pcmk_host_list" value="pcmk-1 pcmk-2" />
   </instance_attributes>
   <operations >
     <op id="Fencing-monitor-10m" interval="10m" name="monitor" timeout="300s" />
   </operations>
 </primitive>
 ----
 ====
 
 Finally, we need to enable fencing:
 ----
 # crm_attribute -t crm_config -n stonith-enabled -v true
 ----
 
 == Fencing Topologies ==
 
 Pacemaker supports fencing nodes with multiple devices through a feature called
 'fencing topologies'. Fencing topologies may be used to provide alternative
 devices in case one fails, or to require multiple devices to all be executed
 successfully in order to consider the node successfully fenced, or even a
 combination of the two.
 
 Create the individual devices as you normally would, then define one or more
 +fencing-level+ entries in the +fencing-topology+ section of the configuration.
 
 * Each fencing level is attempted in order of ascending +index+. Allowed
   values are 1 through 9.
 * If a device fails, processing terminates for the current level.
   No further devices in that level are exercised, and the next level is attempted instead.
 * If the operation succeeds for all the listed devices in a level, the level is deemed to have passed.
 * The operation is finished when a level has passed (success), or all levels have been attempted (failed).
 * If the operation failed, the next step is determined by the scheduler
   and/or the controller.
 
 Some possible uses of topologies include:
 
 * Try on-board IPMI, then an intelligent power switch if that fails
 * Try fabric fencing of both disk and network, then fall back to power fencing
   if either fails
 * Wait up to a certain time for a kernel dump to complete, then cut power to
   the node
 
 .Properties of Fencing Levels
 [width="95%",cols="1m,<3",options="header",align="center"]
 |=========================================================
 
 |Field
 |Description
 
 |id
 |A unique name for the level
  indexterm:[id,fencing-level]
  indexterm:[Fencing,fencing-level,id]
 
 |target
 |The name of a single node to which this level applies
  indexterm:[target,fencing-level]
  indexterm:[Fencing,fencing-level,target]
 
 |target-pattern
 |An extended regular expression (as defined in
  http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04[POSIX])
  matching the names of nodes to which this level applies
  indexterm:[target-pattern,fencing-level]
  indexterm:[Fencing,fencing-level,target-pattern]
 
 |target-attribute
 |The name of a node attribute that is set (to +target-value+) for nodes to
  which this level applies
  indexterm:[target-attribute,fencing-level]
  indexterm:[Fencing,fencing-level,target-attribute]
 
 |target-value
 |The node attribute value (of +target-attribute+) that is set for nodes to
  which this level applies
  indexterm:[target-attribute,fencing-level]
  indexterm:[Fencing,fencing-level,target-attribute]
 
 |index
 |The order in which to attempt the levels.
  Levels are attempted in ascending order 'until one succeeds'.
  Valid values are 1 through 9.
  indexterm:[index,fencing-level]
  indexterm:[Fencing,fencing-level,index]
 
 |devices
 |A comma-separated list of devices that must all be tried for this level
  indexterm:[devices,fencing-level]
  indexterm:[Fencing,fencing-level,devices]
 
 |=========================================================
 
 .Fencing topology with different devices for different nodes
 ====
 [source,XML]
 ----
  <cib crm_feature_set="3.0.6" validate-with="pacemaker-1.2" admin_epoch="1" epoch="0" num_updates="0">
   <configuration>
     ...
     <fencing-topology>
       <!-- For pcmk-1, try poison-pill and fail back to power -->
       <fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill"/>
       <fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power"/>
 
       <!-- For pcmk-2, try disk and network, and fail back to power -->
       <fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,network"/>
       <fencing-level id="f-p2.2" target="pcmk-2" index="2" devices="power"/>
     </fencing-topology>
     ...
   <configuration>
   <status/>
 </cib>
 ----
 ====
 
 === Example Dual-Layer, Dual-Device Fencing Topologies ===
 
 The following example illustrates an advanced use of +fencing-topology+ in a cluster with the following properties:
 
 * 3 nodes (2 active prod-mysql nodes, 1 prod_mysql-rep in standby for quorum purposes)
 * the active nodes have an IPMI-controlled power board reached at 192.0.2.1 and 192.0.2.2
 * the active nodes also have two independent PSUs (Power Supply Units)
   connected to two independent PDUs (Power Distribution Units) reached at
   198.51.100.1 (port 10 and port 11) and 203.0.113.1 (port 10 and port 11)
 * the first fencing method uses the `fence_ipmi` agent
 * the second fencing method uses the `fence_apc_snmp` agent targetting 2 fencing devices (one per PSU, either port 10 or 11)
 * fencing is only implemented for the active nodes and has location constraints
 * fencing topology is set to try IPMI fencing first then default to a "sure-kill" dual PDU fencing
 
 In a normal failure scenario, STONITH will first select +fence_ipmi+ to try to kill the faulty node.
 Using a fencing topology, if that first method fails, STONITH will then move on to selecting +fence_apc_snmp+ twice:
 
 * once for the first PDU 
 * again for the second PDU 
 
 The fence action is considered successful only if both PDUs report the required status. If any of them fails, STONITH loops back to the first fencing method, +fence_ipmi+, and so on until the node is fenced or fencing action is cancelled.
 
 .First fencing method: single IPMI device
 
 Each cluster node has it own dedicated IPMI channel that can be called for fencing using the following primitives:
 [source,XML]
 ----
 <primitive class="stonith" id="fence_prod-mysql1_ipmi" type="fence_ipmilan">
   <instance_attributes id="fence_prod-mysql1_ipmi-instance_attributes">
     <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-ipaddr" name="ipaddr" value="192.0.2.1"/>
     <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-action" name="action" value="off"/>
     <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-login" name="login" value="fencing"/>
     <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-passwd" name="passwd" value="finishme"/>
     <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-verbose" name="verbose" value="true"/>
     <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql1"/>
     <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-lanplus" name="lanplus" value="true"/>
   </instance_attributes>
 </primitive>
 <primitive class="stonith" id="fence_prod-mysql2_ipmi" type="fence_ipmilan">
   <instance_attributes id="fence_prod-mysql2_ipmi-instance_attributes">
     <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-ipaddr" name="ipaddr" value="192.0.2.2"/>
     <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-action" name="action" value="off"/>
     <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-login" name="login" value="fencing"/>
     <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-passwd" name="passwd" value="finishme"/>
     <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-verbose" name="verbose" value="true"/>
     <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql2"/>
     <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-lanplus" name="lanplus" value="true"/>
   </instance_attributes>
 </primitive>
 ----
 
 .Second fencing method: dual PDU devices
 
 Each cluster node also has two distinct power channels controlled by two
 distinct PDUs. That means a total of 4 fencing devices configured as follows:
 
 - Node 1, PDU 1, PSU 1 @ port 10
 - Node 1, PDU 2, PSU 2 @ port 10
 - Node 2, PDU 1, PSU 1 @ port 11
 - Node 2, PDU 2, PSU 2 @ port 11
 
 The matching fencing agents are configured as follows:
 [source,XML]
 ----
 <primitive class="stonith" id="fence_prod-mysql1_apc1" type="fence_apc_snmp">
   <instance_attributes id="fence_prod-mysql1_apc1-instance_attributes">
     <nvpair id="fence_prod-mysql1_apc1-instance_attributes-ipaddr" name="ipaddr" value="198.51.100.1"/>
     <nvpair id="fence_prod-mysql1_apc1-instance_attributes-action" name="action" value="off"/>
     <nvpair id="fence_prod-mysql1_apc1-instance_attributes-port" name="port" value="10"/>
     <nvpair id="fence_prod-mysql1_apc1-instance_attributes-login" name="login" value="fencing"/>
     <nvpair id="fence_prod-mysql1_apc1-instance_attributes-passwd" name="passwd" value="fencing"/>
     <nvpair id="fence_prod-mysql1_apc1-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql1"/>
   </instance_attributes>
 </primitive>
 <primitive class="stonith" id="fence_prod-mysql1_apc2" type="fence_apc_snmp">
   <instance_attributes id="fence_prod-mysql1_apc2-instance_attributes">
     <nvpair id="fence_prod-mysql1_apc2-instance_attributes-ipaddr" name="ipaddr" value="203.0.113.1"/>
     <nvpair id="fence_prod-mysql1_apc2-instance_attributes-action" name="action" value="off"/>
     <nvpair id="fence_prod-mysql1_apc2-instance_attributes-port" name="port" value="10"/>
     <nvpair id="fence_prod-mysql1_apc2-instance_attributes-login" name="login" value="fencing"/>
     <nvpair id="fence_prod-mysql1_apc2-instance_attributes-passwd" name="passwd" value="fencing"/>
     <nvpair id="fence_prod-mysql1_apc2-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql1"/>
   </instance_attributes>
 </primitive>
 <primitive class="stonith" id="fence_prod-mysql2_apc1" type="fence_apc_snmp">
   <instance_attributes id="fence_prod-mysql2_apc1-instance_attributes">
     <nvpair id="fence_prod-mysql2_apc1-instance_attributes-ipaddr" name="ipaddr" value="198.51.100.1"/>
     <nvpair id="fence_prod-mysql2_apc1-instance_attributes-action" name="action" value="off"/>
     <nvpair id="fence_prod-mysql2_apc1-instance_attributes-port" name="port" value="11"/>
     <nvpair id="fence_prod-mysql2_apc1-instance_attributes-login" name="login" value="fencing"/>
     <nvpair id="fence_prod-mysql2_apc1-instance_attributes-passwd" name="passwd" value="fencing"/>
     <nvpair id="fence_prod-mysql2_apc1-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql2"/>
   </instance_attributes>
 </primitive>
 <primitive class="stonith" id="fence_prod-mysql2_apc2" type="fence_apc_snmp">
   <instance_attributes id="fence_prod-mysql2_apc2-instance_attributes">
     <nvpair id="fence_prod-mysql2_apc2-instance_attributes-ipaddr" name="ipaddr" value="203.0.113.1"/>
     <nvpair id="fence_prod-mysql2_apc2-instance_attributes-action" name="action" value="off"/>
     <nvpair id="fence_prod-mysql2_apc2-instance_attributes-port" name="port" value="11"/>
     <nvpair id="fence_prod-mysql2_apc2-instance_attributes-login" name="login" value="fencing"/>
     <nvpair id="fence_prod-mysql2_apc2-instance_attributes-passwd" name="passwd" value="fencing"/>
     <nvpair id="fence_prod-mysql2_apc2-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql2"/>
   </instance_attributes>
 </primitive>
 ----
 
 .Location Constraints 
 
 To prevent STONITH from trying to run a fencing agent on the same node it is
 supposed to fence, constraints are placed on all the fencing primitives:
 [source,XML]
 ----
 <constraints>
   <rsc_location id="l_fence_prod-mysql1_ipmi" node="prod-mysql1" rsc="fence_prod-mysql1_ipmi" score="-INFINITY"/>
   <rsc_location id="l_fence_prod-mysql2_ipmi" node="prod-mysql2" rsc="fence_prod-mysql2_ipmi" score="-INFINITY"/>
   <rsc_location id="l_fence_prod-mysql1_apc2" node="prod-mysql1" rsc="fence_prod-mysql1_apc2" score="-INFINITY"/>
   <rsc_location id="l_fence_prod-mysql1_apc1" node="prod-mysql1" rsc="fence_prod-mysql1_apc1" score="-INFINITY"/>
   <rsc_location id="l_fence_prod-mysql2_apc1" node="prod-mysql2" rsc="fence_prod-mysql2_apc1" score="-INFINITY"/>
   <rsc_location id="l_fence_prod-mysql2_apc2" node="prod-mysql2" rsc="fence_prod-mysql2_apc2" score="-INFINITY"/>
 </constraints>
 ----
 
 .Fencing topology
 
 Now that all the fencing resources are defined, it's time to create the right topology. 
 We want to first fence using IPMI and if that does not work, fence both PDUs to effectively and surely kill the node.
 [source,XML]
 ----
 <fencing-topology>
   <fencing-level devices="fence_prod-mysql1_ipmi" id="fencing-2" index="1" target="prod-mysql1"/>
   <fencing-level devices="fence_prod-mysql1_apc1,fence_prod-mysql1_apc2" id="fencing-3" index="2" target="prod-mysql1"/>
   <fencing-level devices="fence_prod-mysql2_ipmi" id="fencing-0" index="1" target="prod-mysql2"/>
   <fencing-level devices="fence_prod-mysql2_apc1,fence_prod-mysql2_apc2" id="fencing-1" index="2" target="prod-mysql2"/>
 </fencing-topology>
 ----
 Please note, in +fencing-topology+, the lowest +index+ value determines the priority of the first fencing method. 
 
 .Final configuration
 
 Put together, the configuration looks like this:
 [source,XML]
 ----
 <cib admin_epoch="0" crm_feature_set="3.0.7" epoch="292" have-quorum="1" num_updates="29" validate-with="pacemaker-1.2">
   <configuration>
     <crm_config>
       <cluster_property_set id="cib-bootstrap-options">
         <nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
         <nvpair id="cib-bootstrap-options-stonith-action" name="stonith-action" value="off"/>
         <nvpair id="cib-bootstrap-options-expected-quorum-votes" name="expected-quorum-votes" value="3"/>
        ...
       </cluster_property_set>
     </crm_config>
     <nodes>
       <node id="prod-mysql1" uname="prod-mysql1">
       <node id="prod-mysql2" uname="prod-mysql2"/>
       <node id="prod-mysql-rep1" uname="prod-mysql-rep1"/>
         <instance_attributes id="prod-mysql-rep1">
           <nvpair id="prod-mysql-rep1-standby" name="standby" value="on"/>
         </instance_attributes>
       </node>
     </nodes>
     <resources>
       <primitive class="stonith" id="fence_prod-mysql1_ipmi" type="fence_ipmilan">
         <instance_attributes id="fence_prod-mysql1_ipmi-instance_attributes">
           <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-ipaddr" name="ipaddr" value="192.0.2.1"/>
           <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-action" name="action" value="off"/>
           <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-login" name="login" value="fencing"/>
           <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-passwd" name="passwd" value="finishme"/>
           <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-verbose" name="verbose" value="true"/>
           <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql1"/>
           <nvpair id="fence_prod-mysql1_ipmi-instance_attributes-lanplus" name="lanplus" value="true"/>
         </instance_attributes>
       </primitive>
       <primitive class="stonith" id="fence_prod-mysql2_ipmi" type="fence_ipmilan">
         <instance_attributes id="fence_prod-mysql2_ipmi-instance_attributes">
           <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-ipaddr" name="ipaddr" value="192.0.2.2"/>
           <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-action" name="action" value="off"/>
           <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-login" name="login" value="fencing"/>
           <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-passwd" name="passwd" value="finishme"/>
           <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-verbose" name="verbose" value="true"/>
           <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql2"/>
           <nvpair id="fence_prod-mysql2_ipmi-instance_attributes-lanplus" name="lanplus" value="true"/>
         </instance_attributes>
       </primitive>
       <primitive class="stonith" id="fence_prod-mysql1_apc1" type="fence_apc_snmp">
         <instance_attributes id="fence_prod-mysql1_apc1-instance_attributes">
           <nvpair id="fence_prod-mysql1_apc1-instance_attributes-ipaddr" name="ipaddr" value="198.51.100.1"/>
           <nvpair id="fence_prod-mysql1_apc1-instance_attributes-action" name="action" value="off"/>
           <nvpair id="fence_prod-mysql1_apc1-instance_attributes-port" name="port" value="10"/>
           <nvpair id="fence_prod-mysql1_apc1-instance_attributes-login" name="login" value="fencing"/>
           <nvpair id="fence_prod-mysql1_apc1-instance_attributes-passwd" name="passwd" value="fencing"/>
           <nvpair id="fence_prod-mysql1_apc1-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql1"/>
         </instance_attributes>
       </primitive>
       <primitive class="stonith" id="fence_prod-mysql1_apc2" type="fence_apc_snmp">
         <instance_attributes id="fence_prod-mysql1_apc2-instance_attributes">
           <nvpair id="fence_prod-mysql1_apc2-instance_attributes-ipaddr" name="ipaddr" value="203.0.113.1"/>
           <nvpair id="fence_prod-mysql1_apc2-instance_attributes-action" name="action" value="off"/>
           <nvpair id="fence_prod-mysql1_apc2-instance_attributes-port" name="port" value="10"/>
           <nvpair id="fence_prod-mysql1_apc2-instance_attributes-login" name="login" value="fencing"/>
           <nvpair id="fence_prod-mysql1_apc2-instance_attributes-passwd" name="passwd" value="fencing"/>
           <nvpair id="fence_prod-mysql1_apc2-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql1"/>
         </instance_attributes>
       </primitive>
       <primitive class="stonith" id="fence_prod-mysql2_apc1" type="fence_apc_snmp">
         <instance_attributes id="fence_prod-mysql2_apc1-instance_attributes">
           <nvpair id="fence_prod-mysql2_apc1-instance_attributes-ipaddr" name="ipaddr" value="198.51.100.1"/>
           <nvpair id="fence_prod-mysql2_apc1-instance_attributes-action" name="action" value="off"/>
           <nvpair id="fence_prod-mysql2_apc1-instance_attributes-port" name="port" value="11"/>
           <nvpair id="fence_prod-mysql2_apc1-instance_attributes-login" name="login" value="fencing"/>
           <nvpair id="fence_prod-mysql2_apc1-instance_attributes-passwd" name="passwd" value="fencing"/>
           <nvpair id="fence_prod-mysql2_apc1-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql2"/>
         </instance_attributes>
       </primitive>
       <primitive class="stonith" id="fence_prod-mysql2_apc2" type="fence_apc_snmp">
         <instance_attributes id="fence_prod-mysql2_apc2-instance_attributes">
           <nvpair id="fence_prod-mysql2_apc2-instance_attributes-ipaddr" name="ipaddr" value="203.0.113.1"/>
           <nvpair id="fence_prod-mysql2_apc2-instance_attributes-action" name="action" value="off"/>
           <nvpair id="fence_prod-mysql2_apc2-instance_attributes-port" name="port" value="11"/>
           <nvpair id="fence_prod-mysql2_apc2-instance_attributes-login" name="login" value="fencing"/>
           <nvpair id="fence_prod-mysql2_apc2-instance_attributes-passwd" name="passwd" value="fencing"/>
           <nvpair id="fence_prod-mysql2_apc2-instance_attributes-pcmk_host_list" name="pcmk_host_list" value="prod-mysql2"/>
         </instance_attributes>
       </primitive>
    </resources>
     <constraints>
       <rsc_location id="l_fence_prod-mysql1_ipmi" node="prod-mysql1" rsc="fence_prod-mysql1_ipmi" score="-INFINITY"/>
       <rsc_location id="l_fence_prod-mysql2_ipmi" node="prod-mysql2" rsc="fence_prod-mysql2_ipmi" score="-INFINITY"/>
       <rsc_location id="l_fence_prod-mysql1_apc2" node="prod-mysql1" rsc="fence_prod-mysql1_apc2" score="-INFINITY"/>
       <rsc_location id="l_fence_prod-mysql1_apc1" node="prod-mysql1" rsc="fence_prod-mysql1_apc1" score="-INFINITY"/>
       <rsc_location id="l_fence_prod-mysql2_apc1" node="prod-mysql2" rsc="fence_prod-mysql2_apc1" score="-INFINITY"/>
       <rsc_location id="l_fence_prod-mysql2_apc2" node="prod-mysql2" rsc="fence_prod-mysql2_apc2" score="-INFINITY"/>
     </constraints>
     <fencing-topology>
       <fencing-level devices="fence_prod-mysql1_ipmi" id="fencing-2" index="1" target="prod-mysql1"/>
       <fencing-level devices="fence_prod-mysql1_apc1,fence_prod-mysql1_apc2" id="fencing-3" index="2" target="prod-mysql1"/>
       <fencing-level devices="fence_prod-mysql2_ipmi" id="fencing-0" index="1" target="prod-mysql2"/>
       <fencing-level devices="fence_prod-mysql2_apc1,fence_prod-mysql2_apc2" id="fencing-1" index="2" target="prod-mysql2"/>
     </fencing-topology>
    ...
   </configuration>
 </cib>
 ----
 
 == Remapping Reboots ==
 
 When the cluster needs to reboot a node, whether because +stonith-action+ is +reboot+ or because
 a reboot was manually requested (such as by `stonith_admin --reboot`), it will remap that to
 other commands in two cases:
 
 . If the chosen fencing device does not support the +reboot+ command, the cluster
   will ask it to perform +off+ instead.
 
 . If a fencing topology level with multiple devices must be executed, the cluster
   will ask all the devices to perform +off+, then ask the devices to perform +on+.
 
 To understand the second case, consider the example of a node with redundant
 power supplies connected to intelligent power switches. Rebooting one switch
 and then the other would have no effect on the node. Turning both switches off,
 and then on, actually reboots the node.
 
 In such a case, the fencing operation will be treated as successful as long as
 the +off+ commands succeed, because then it is safe for the cluster to recover
 any resources that were on the node. Timeouts and errors in the +on+ phase will
 be logged but ignored.
 
 When a reboot operation is remapped, any action-specific timeout for the
 remapped action will be used (for example, +pcmk_off_timeout+ will be used when
 executing the +off+ command, not +pcmk_reboot_timeout+).
diff --git a/doc/Pacemaker_Explained/en-US/Revision_History.xml b/doc/Pacemaker_Explained/en-US/Revision_History.xml
index 93fa71334f..4a80cb4fb6 100644
--- a/doc/Pacemaker_Explained/en-US/Revision_History.xml
+++ b/doc/Pacemaker_Explained/en-US/Revision_History.xml
@@ -1,199 +1,249 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <appendix>
   <!-- see comment in Book_Info.xml for revision numbering -->
   <title>Revision History</title>
   <simpara>
     <revhistory>
       <revision>
         <revnumber>1-0</revnumber>
         <date>19 Oct 2009</date>
         <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
         <revdescription><simplelist><member>Import from Pages.app</member></simplelist></revdescription>
       </revision>
       <revision>
         <revnumber>2-0</revnumber>
         <date>26 Oct 2009</date>
         <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
         <revdescription><simplelist><member>Cleanup and reformatting of docbook xml complete</member></simplelist></revdescription>
       </revision>
       <revision>
         <revnumber>3-0</revnumber>
         <date>Tue Nov 12 2009</date>
         <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
         <revdescription>
           <simplelist>
             <member>Split book into chapters and pass validation</member>
             <member>Re-organize book for use with <ulink url="https://fedorahosted.org/publican/">Publican</ulink></member>
           </simplelist>
         </revdescription>
       </revision>
+      <revision>
+        <revnumber>3-1</revnumber>
+        <date>Tue Nov 12 2009</date>
+        <author>
+          <firstname>Tanja</firstname><surname>Roth</surname>
+          <affiliation><orgname>SUSE</orgname></affiliation>
+          <email>taroth@suse.com</email>
+        </author>
+        <author>
+          <firstname>Lars</firstname><surname>Marowsky-Bree</surname>
+          <affiliation><orgname>SUSE</orgname></affiliation>
+          <email>lmb@suse.com</email>
+        </author>
+        <author>
+          <firstname>Yan</firstname><surname>Gao</surname>
+          <affiliation><orgname>SUSE</orgname></affiliation>
+          <email>ygao@suse.com</email>
+        </author>
+        <author>
+          <firstname>Thomas</firstname><surname>Schraitle</surname>
+          <affiliation><orgname>SUSE</orgname></affiliation>
+          <email>toms@suse.com</email>
+        </author>
+        <author>
+          <firstname>Dejan</firstname><surname>Muhamedagic</surname>
+          <affiliation><orgname>SUSE</orgname></affiliation>
+          <email>dmuhamedagic@suse.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Utilization chapter</member>
+            <member>Resource Templates chapter</member>
+            <member>Multi-Site Clusters chapter</member>
+          </simplelist>
+        </revdescription>
+      </revision>
+      <revision>
+        <revnumber>3-2</revnumber>
+        <date>Fri Nov 4 2011</date>
+        <author>
+          <firstname>Philipp</firstname><surname>Marek</surname>
+          <affiliation><orgname>LINBit</orgname></affiliation>
+          <email>philipp.marek@linbit.com</email>
+        </author>
+        <revdescription>
+          <simplelist>
+            <member>Extensive style, formatting, and indexing updates</member>
+          </simplelist>
+        </revdescription>
+      </revision>
       <revision>
         <revnumber>4-0</revnumber>
         <date>Mon Oct 8 2012</date>
         <author><firstname>Andrew</firstname><surname>Beekhof</surname><email>andrew@beekhof.net</email></author>
         <revdescription>
           <simplelist>
             <member>
               Converted to <ulink url="http://www.methods.co.nz/asciidoc">asciidoc</ulink>
               (which is converted to docbook for use with 
               <ulink url="https://fedorahosted.org/publican/">Publican</ulink>)
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>5-0</revnumber>
         <date>Mon Feb 23 2015</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Update for clarity, stylistic consistency and current command-line syntax
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>6-0</revnumber>
         <date>Tue Dec 8 2015</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Update for Pacemaker 1.1.14
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>7-0</revnumber>
         <date>Tue May 3 2016</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Update for Pacemaker 1.1.15
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>7-1</revnumber>
         <date>Fri Oct 28 2016</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Overhaul upgrade documentation, and document node health strategies
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>8-0</revnumber>
         <date>Tue Oct 25 2016</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Update for Pacemaker 1.1.16
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>9-0</revnumber>
         <date>Tue Jul 11 2017</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Update for Pacemaker 1.1.17
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>10-0</revnumber>
         <date>Fri Oct 6 2017</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Update for Pacemaker 1.1.18
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>11-0</revnumber>
         <date>Fri Jan 12 2018</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Update for Pacemaker 2.0.0
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>12-0</revnumber>
         <date>Mon Jan 28 2019</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <author><firstname>Reid</firstname><surname>Wahl</surname><email>nwahl@redhat.com</email></author>
         <author><firstname>Jan</firstname><surname>Pokorný</surname><email>jpokorny@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Update for Pacemaker 2.0.1, remove "Further Reading" and "FAQ" sections,
               and add minor clarifications and reformatting
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>12-1</revnumber>
         <date>Mon May 13 2019</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <author><firstname>Maciej</firstname><surname>Sobkowiak</surname><email>msobkowiak@egnyte.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Document podman support, cluster-name cluster option, and new
               HealthIOWait agent, with other minor clarifications and
               corrections
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>12-2</revnumber>
         <date>Wed Jun 19 2019</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname><email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>
               Add chapter for ACLs
             </member>
           </simplelist>
         </revdescription>
       </revision>
       <revision>
         <revnumber>13-0</revnumber>
         <date>Tue Oct 15 2019</date>
         <author><firstname>Ken</firstname><surname>Gaillot</surname>
                 <email>kgaillot@redhat.com</email></author>
         <revdescription>
           <simplelist>
             <member>Overhaul fencing, rules, and constraints chapters,
                     elaborate on various options, and update for Pacemaker 2.0.3
             </member>
           </simplelist>
         </revdescription>
       </revision>
     </revhistory>
   </simpara>
 </appendix>
diff --git a/doc/Pacemaker_Remote/en-US/Author_Group.xml b/doc/Pacemaker_Remote/en-US/Author_Group.xml
index 1de3082be1..ff907feee5 100644
--- a/doc/Pacemaker_Remote/en-US/Author_Group.xml
+++ b/doc/Pacemaker_Remote/en-US/Author_Group.xml
@@ -1,11 +1,10 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <authorgroup>
   <author>
-    <firstname>David</firstname><surname>Vossel</surname>
-    <affiliation><orgname>Red Hat</orgname></affiliation>
-    <contrib>Primary author</contrib>
-    <email>davidvossel@gmail.com</email>
+    <affiliation>
+      <orgname>Written by the Pacemaker project contributors</orgname>
+    </affiliation>
   </author>
 </authorgroup>
diff --git a/doc/README.md b/doc/README.md
new file mode 100644
index 0000000000..594d9e5717
--- /dev/null
+++ b/doc/README.md
@@ -0,0 +1,80 @@
+# Pacemaker Documentation
+
+Pacemaker has multiple forms of documentation:
+
+* The primary end-user documentation is a series of "books":
+
+    * Clusters From Scratch: Simplified walk-through of setting up a
+      cluster for the first time
+    * Pacemaker Administration: Tips for managing a cluster
+    * Pacemaker Development: How to work on the Pacemaker code base
+    * Pacemaker Explained: Configuration reference guide
+    * Pacemaker Remote: Configuration and walk-throughs for extended
+      clusters
+
+  The source for these is kept beneath this directory. Generated versions
+  are available online in epub, PDF, and HTML format at:
+
+    https://clusterlabs.org/pacemaker/doc/
+
+* Annotated source code in HTML format can be generated by running
+  "make global" in this directory, which requires the gtags and htags tools.
+  This is generated for releases and can be viewed online at:
+
+    https://clusterlabs.org/pacemaker/global/
+
+* Pacemaker manual pages are generated and installed automatically when
+  building the software. HTML versions can be generated by running
+  "make manhtml" in this directory, which requires the groff tool.
+  This is generated for releases and can be viewed online at:
+
+    https://clusterlabs.org/pacemaker/man/
+
+* For developers, documentation for Pacemaker's public C API is generated
+  by running "make doxygen" in this directory, which requires the doxygen tool.
+  This is generated for releases and can be viewed online at:
+
+    https://clusterlabs.org/pacemaker/doxygen/
+
+* Also for developers, a report of Pacemaker ABI compatibility between any two
+  commits can be generated by running in this directory:
+
+    make LAST_RELEASE=$EARLIER_COMMIT TAG=$NEWER_COMMIT abi-www
+
+  which requires the abi-compliance-checker tool. This is generated for each
+  release compared to the previous release and can be viewed online at:
+
+    https://clusterlabs.org/pacemaker/abi/
+
+* In addition, there are a few old text files in this directory focusing on
+  particular characteristics of Pacemaker clusters. These are mostly outdated
+  but do still have some useful information. The plan is to incorporate an
+  updated version of them into the books.
+
+## Editing the Books
+
+Each book's sources are kept in a subdirectory by title. Each book subdirectory
+has an en-US subdirectory with the master sources, and potentially other
+subdirectories for translations. However, the translations are not currently
+actively maintained and so are disabled.
+
+Each book's en-US subdirectory has text files with the chapter sources in
+asciidoc format. The file asciidoc.reference in this directory has a quick
+guide to asciidoc; search online for more detailed help.
+
+Once you have edited the asciidoc as desired, run "make" in this directory
+to generate all the books locally. You view the results by pointing your
+web browser to (replacing BOOK\_TITLE appropriately):
+
+    file:///path/to/checkout/doc/BOOK\_TITLE/publish/desktop/en-US/index.html
+
+Each en-US subdirectory also contains some raw XML files of lesser interest:
+
+* Author\_Group.xml: This contains the author listing at the top of the
+  generated book. To avoid clutter, we just put "Pacemaker project
+  contributors" here and list individual authors in the revision history, so
+  this typically does not need to be edited.
+* Book\_Info.xml: Revision numbers, etc. This is generally updated only for
+  official releases.
+* Revision\_History.xml: This can be updated for any change, listing the
+  individual authors.
diff --git a/include/crm/services.h b/include/crm/services.h
index 8fe9bc918c..c35aa2454d 100644
--- a/include/crm/services.h
+++ b/include/crm/services.h
@@ -1,431 +1,431 @@
 /*
  * Copyright 2010-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU Lesser General Public License
  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
  */
 
 #ifndef __PCMK_SERVICES__
 #  define __PCMK_SERVICES__
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /**
  * \file
  * \brief Services API
  * \ingroup core
  */
 
 #  include <glib.h>
 #  include <stdio.h>
 #  include <string.h>
 #  include <stdbool.h>
 #  include <sys/types.h>
 
 #  ifndef OCF_ROOT_DIR
 #    define OCF_ROOT_DIR "/usr/lib/ocf"
 #  endif
 
 #  ifndef LSB_ROOT_DIR
 #    define LSB_ROOT_DIR "/etc/init.d"
 #  endif
 
 /* TODO: Autodetect these two ?*/
 #  ifndef SYSTEMCTL
 #    define SYSTEMCTL "/bin/systemctl"
 #  endif
 
 /* Known resource classes */
 #define PCMK_RESOURCE_CLASS_OCF     "ocf"
 #define PCMK_RESOURCE_CLASS_SERVICE "service"
 #define PCMK_RESOURCE_CLASS_LSB     "lsb"
 #define PCMK_RESOURCE_CLASS_SYSTEMD "systemd"
 #define PCMK_RESOURCE_CLASS_UPSTART "upstart"
 #define PCMK_RESOURCE_CLASS_NAGIOS  "nagios"
 #define PCMK_RESOURCE_CLASS_STONITH "stonith"
 
 /* This is the string passed in the OCF_EXIT_REASON_PREFIX environment variable.
  * The stderr output that occurs after this prefix is encountered is considered
  * the exit reason for a completed operation.
  */
 #define PCMK_OCF_REASON_PREFIX "ocf-exit-reason:"
 
 // Agent version to use if agent doesn't specify one
 #define PCMK_DEFAULT_AGENT_VERSION "0.1"
 
 enum lsb_exitcode {
     PCMK_LSB_OK                  = 0,
     PCMK_LSB_UNKNOWN_ERROR       = 1,
     PCMK_LSB_INVALID_PARAM       = 2,
     PCMK_LSB_UNIMPLEMENT_FEATURE = 3,
     PCMK_LSB_INSUFFICIENT_PRIV   = 4,
     PCMK_LSB_NOT_INSTALLED       = 5,
     PCMK_LSB_NOT_CONFIGURED      = 6,
     PCMK_LSB_NOT_RUNNING         = 7,
 };
 
 /* The return codes for the status operation are not the same for other
  * operatios - go figure
  */
 enum lsb_status_exitcode {
     PCMK_LSB_STATUS_OK             = 0,
     PCMK_LSB_STATUS_VAR_PID        = 1,
     PCMK_LSB_STATUS_VAR_LOCK       = 2,
     PCMK_LSB_STATUS_NOT_RUNNING    = 3,
     PCMK_LSB_STATUS_UNKNOWN        = 4,
 
     /* custom codes should be in the 150-199 range reserved for application use */
     PCMK_LSB_STATUS_NOT_INSTALLED      = 150,
     PCMK_LSB_STATUS_INSUFFICIENT_PRIV  = 151,
 };
 
 /* Uniform exit codes
  * Everything is mapped to its OCF equivalent so that Pacemaker only deals with one set of codes
  */
 enum ocf_exitcode {
     PCMK_OCF_OK                   = 0,
     PCMK_OCF_UNKNOWN_ERROR        = 1,
     PCMK_OCF_INVALID_PARAM        = 2,
     PCMK_OCF_UNIMPLEMENT_FEATURE  = 3,
     PCMK_OCF_INSUFFICIENT_PRIV    = 4,
     PCMK_OCF_NOT_INSTALLED        = 5,
     PCMK_OCF_NOT_CONFIGURED       = 6,
     PCMK_OCF_NOT_RUNNING          = 7,  /* End of overlap with LSB */
     PCMK_OCF_RUNNING_MASTER       = 8,
     PCMK_OCF_FAILED_MASTER        = 9,
 
 
     /* 150-199	reserved for application use */
     PCMK_OCF_CONNECTION_DIED = 189, // Deprecated (see PCMK_LRM_OP_NOT_CONNECTED)
 
     PCMK_OCF_DEGRADED        = 190, /* Active resource that is no longer 100% functional */
     PCMK_OCF_DEGRADED_MASTER = 191, /* Promoted resource that is no longer 100% functional */
 
     PCMK_OCF_EXEC_ERROR    = 192, /* Generic problem invoking the agent */
     PCMK_OCF_UNKNOWN       = 193, /* State of the service is unknown - used for recording in-flight operations */
     PCMK_OCF_SIGNAL        = 194,
     PCMK_OCF_NOT_SUPPORTED = 195,
     PCMK_OCF_PENDING       = 196,
     PCMK_OCF_CANCELLED     = 197,
     PCMK_OCF_TIMEOUT       = 198,
     PCMK_OCF_OTHER_ERROR   = 199, /* Keep the same codes as PCMK_LSB */
 };
 
 enum op_status {
     PCMK_LRM_OP_UNKNOWN = -2,
     PCMK_LRM_OP_PENDING = -1,
     PCMK_LRM_OP_DONE,
     PCMK_LRM_OP_CANCELLED,
     PCMK_LRM_OP_TIMEOUT,
     PCMK_LRM_OP_NOTSUPPORTED,
     PCMK_LRM_OP_ERROR,
     PCMK_LRM_OP_ERROR_HARD,
     PCMK_LRM_OP_ERROR_FATAL,
     PCMK_LRM_OP_NOT_INSTALLED,
     PCMK_LRM_OP_NOT_CONNECTED,
     PCMK_LRM_OP_INVALID,
 };
 
 enum nagios_exitcode {
     NAGIOS_STATE_OK        = 0,
     NAGIOS_STATE_WARNING   = 1,
     NAGIOS_STATE_CRITICAL  = 2,
     NAGIOS_STATE_UNKNOWN   = 3,
     NAGIOS_STATE_DEPENDENT = 4,
 
     NAGIOS_INSUFFICIENT_PRIV = 100,
     NAGIOS_NOT_INSTALLED     = 101,
 };
 
 enum svc_action_flags {
     /* On timeout, only kill pid, do not kill entire pid group */
     SVC_ACTION_LEAVE_GROUP = 0x01,
     SVC_ACTION_NON_BLOCKED = 0x02,
 };
 
 typedef struct svc_action_private_s svc_action_private_t;
 typedef struct svc_action_s {
     char *id;
     char *rsc;
     char *action;
     guint interval_ms;
 
     char *standard;
     char *provider;
     char *agent;
 
     int timeout;
     GHashTable *params; /* used for setting up environment for ocf-ra &
                            alert agents
                            and to be sent via stdin for fence-agents
                          */
 
     int rc;
     int pid;
     int cancel;
     int status;
     int sequence;
     int expected_rc;
     int synchronous;
     enum svc_action_flags flags;
 
     char *stderr_data;
     char *stdout_data;
 
     /*!
      * Data stored by the creator of the action.
      *
      * This may be used to hold data that is needed later on by a callback,
      * for example.
      */
     void *cb_data;
 
     svc_action_private_t *opaque;
 } svc_action_t;
 
 /**
  * \brief Get a list of files or directories in a given path
  *
  * \param[in] root       full path to a directory to read
  * \param[in] files      return list of files if TRUE or directories if FALSE
  * \param[in] executable if TRUE and files is TRUE, only return executable files
  *
  * \return a list of what was found.  The list items are char *.
  * \note It is the caller's responsibility to free the result with g_list_free_full(list, free).
  */
     GList *get_directory_list(const char *root, gboolean files, gboolean executable);
 
 /**
  * Get a list of services
  *
  * \return a list of services.  The list items are gchar *.  This list _must_
  *         be destroyed using g_list_free_full(list, free).
  */
     GList *services_list(void);
 
 /**
  * \brief Get a list of providers
  *
  * \param[in] standard  list providers of this standard (e.g. ocf, lsb, etc.)
  *
  * \return a list of providers as char * list items (or NULL if standard does not support providers)
  * \note The caller is responsible for freeing the result using g_list_free_full(list, free).
  */
     GList *resources_list_providers(const char *standard);
 
 /**
  * \brief Get a list of resource agents
  *
  * \param[in] standard  list agents using this standard (e.g. ocf, lsb, etc.) (or NULL for all)
  * \param[in] provider  list agents from this provider (or NULL for all)
  *
  * \return a list of resource agents.  The list items are char *.
  * \note The caller is responsible for freeing the result using g_list_free_full(list, free).
  */
     GList *resources_list_agents(const char *standard, const char *provider);
 
 /**
  * Get list of available standards
  *
  * \return a list of resource standards. The list items are char *. This list _must_
  *         be destroyed using g_list_free_full(list, free).
  */
     GList *resources_list_standards(void);
 
 /**
  * Does the given standard, provider, and agent describe a resource that can exist?
  *
  * \param[in] standard  Which class of agent does the resource belong to?
  * \param[in] provider  What provides the agent (NULL for most standards)?
  * \param[in] agent     What is the name of the agent?
  *
  * \return A boolean
  */
     gboolean resources_agent_exists(const char *standard, const char *provider, const char *agent);
 
 svc_action_t *services_action_create(const char *name, const char *action,
                                      guint interval_ms, int timeout /* ms */);
 
 /**
  * \brief Create a new resource action
  *
  * \param[in] name        Name of resource
  * \param[in] standard    Resource agent standard (ocf, lsb, etc.)
  * \param[in] provider    Resource agent provider
  * \param[in] agent       Resource agent name
  * \param[in] action      action (start, stop, monitor, etc.)
  * \param[in] interval_ms How often to repeat this action (if 0, execute once)
  * \param[in] timeout     Consider action failed if it does not complete in this many milliseconds
  * \param[in] params      Action parameters
  *
  * \return newly allocated action instance
  *
  * \post After the call, 'params' is owned, and later free'd by the svc_action_t result
  * \note The caller is responsible for freeing the return value using
  *       services_action_free().
  */
 svc_action_t *resources_action_create(const char *name, const char *standard,
                                       const char *provider, const char *agent,
                                       const char *action, guint interval_ms,
                                       int timeout /* ms */, GHashTable *params,
                                       enum svc_action_flags flags);
 
 /**
  * Kick a recurring action so it is scheduled immediately for re-execution
  */
 gboolean services_action_kick(const char *name, const char *action,
                               guint interval_ms);
 
     const char *resources_find_service_class(const char *agent);
 
 /**
  * Utilize services API to execute an arbitrary command.
  *
  * This API has useful infrastructure in place to be able to run a command
  * in the background and get notified via a callback when the command finishes.
  *
  * \param[in] exec command to execute
  * \param[in] args arguments to the command, NULL terminated
  *
  * \return a svc_action_t object, used to pass to the execute function
  * (services_action_sync() or services_action_async()) and is
  * provided to the callback.
  */
     svc_action_t *services_action_create_generic(const char *exec, const char *args[]);
 
     void services_action_cleanup(svc_action_t * op);
     void services_action_free(svc_action_t * op);
     int services_action_user(svc_action_t *op, const char *user);
 
     gboolean services_action_sync(svc_action_t * op);
 
 /**
  * Run an action asynchronously.
  *
  * \param[in] op services action data
  * \param[in] action_callback callback for when the action completes
  * \param[in] action_fork_callback callback for when action forked successfully
  *
  * \retval TRUE succesfully started execution
  * \retval FALSE failed to start execution, no callback will be received
  */
     gboolean services_action_async_fork_notify(svc_action_t * op,
         void (*action_callback) (svc_action_t *),
         void (*action_fork_callback) (svc_action_t *));
 
     gboolean services_action_async(svc_action_t * op,
                                    void (*action_callback) (svc_action_t *));
 
 gboolean services_action_cancel(const char *name, const char *action,
                                 guint interval_ms);
 
 /* functions for alert agents */
 svc_action_t *services_alert_create(const char *id, const char *exec,
                                    int timeout, GHashTable *params,
                                    int sequence, void *cb_data);
 gboolean services_alert_async(svc_action_t *action,
                               void (*cb)(svc_action_t *op));
 
     static inline const char *services_lrm_status_str(enum op_status status) {
         switch (status) {
             case PCMK_LRM_OP_PENDING:
                 return "pending";
                 case PCMK_LRM_OP_DONE:return "complete";
                 case PCMK_LRM_OP_CANCELLED:return "Cancelled";
                 case PCMK_LRM_OP_TIMEOUT:return "Timed Out";
                 case PCMK_LRM_OP_NOTSUPPORTED:return "NOT SUPPORTED";
                 case PCMK_LRM_OP_ERROR:return "Error";
                 case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed";
                 case PCMK_LRM_OP_NOT_CONNECTED:return "No executor connection";
                 case PCMK_LRM_OP_INVALID:return "Cannot execute now";
                 default:return "UNKNOWN!";
         }
     }
 
     static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) {
         switch (code) {
             case PCMK_OCF_OK:
                 return "ok";
             case PCMK_OCF_UNKNOWN_ERROR:
-                return "unknown error";
+                return "error";
             case PCMK_OCF_INVALID_PARAM:
                 return "invalid parameter";
             case PCMK_OCF_UNIMPLEMENT_FEATURE:
                 return "unimplemented feature";
             case PCMK_OCF_INSUFFICIENT_PRIV:
                 return "insufficient privileges";
             case PCMK_OCF_NOT_INSTALLED:
                 return "not installed";
             case PCMK_OCF_NOT_CONFIGURED:
                 return "not configured";
             case PCMK_OCF_NOT_RUNNING:
                 return "not running";
             case PCMK_OCF_RUNNING_MASTER:
                 return "master";
             case PCMK_OCF_FAILED_MASTER:
                 return "master (failed)";
             case PCMK_OCF_SIGNAL:
                 return "OCF_SIGNAL";
             case PCMK_OCF_NOT_SUPPORTED:
                 return "OCF_NOT_SUPPORTED";
             case PCMK_OCF_PENDING:
                 return "OCF_PENDING";
             case PCMK_OCF_CANCELLED:
                 return "OCF_CANCELLED";
             case PCMK_OCF_TIMEOUT:
                 return "OCF_TIMEOUT";
             case PCMK_OCF_OTHER_ERROR:
                 return "OCF_OTHER_ERROR";
             case PCMK_OCF_DEGRADED:
                 return "OCF_DEGRADED";
             case PCMK_OCF_DEGRADED_MASTER:
                 return "OCF_DEGRADED_MASTER";
             default:
                 return "unknown";
         }
     }
 
     /**
      * \brief Get OCF equivalent of LSB exit code
      *
      * \param[in] action        LSB action that produced exit code
      * \param[in] lsb_exitcode  Exit code of LSB action
      *
      * \return PCMK_OCF_* constant that corresponds to LSB exit code
      */
     static inline enum ocf_exitcode
     services_get_ocf_exitcode(const char *action, int lsb_exitcode)
     {
         /* For non-status actions, LSB and OCF share error code meaning <= 7 */
         if (action && strcmp(action, "status") && strcmp(action, "monitor")) {
             if ((lsb_exitcode < 0) || (lsb_exitcode > PCMK_LSB_NOT_RUNNING)) {
                 return PCMK_OCF_UNKNOWN_ERROR;
             }
             return (enum ocf_exitcode)lsb_exitcode;
         }
 
         /* status has different return codes */
         switch (lsb_exitcode) {
             case PCMK_LSB_STATUS_OK:
                 return PCMK_OCF_OK;
             case PCMK_LSB_STATUS_NOT_INSTALLED:
                 return PCMK_OCF_NOT_INSTALLED;
             case PCMK_LSB_STATUS_INSUFFICIENT_PRIV:
                 return PCMK_OCF_INSUFFICIENT_PRIV;
             case PCMK_LSB_STATUS_VAR_PID:
             case PCMK_LSB_STATUS_VAR_LOCK:
             case PCMK_LSB_STATUS_NOT_RUNNING:
                 return PCMK_OCF_NOT_RUNNING;
         }
         return PCMK_OCF_UNKNOWN_ERROR;
     }
 
 #  ifdef __cplusplus
 }
 #  endif
 
 #endif                          /* __PCMK_SERVICES__ */
diff --git a/lib/common/iso8601.c b/lib/common/iso8601.c
index 7f64edd020..9ed0027520 100644
--- a/lib/common/iso8601.c
+++ b/lib/common/iso8601.c
@@ -1,1718 +1,1720 @@
 /*
  * Copyright 2005-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU Lesser General Public License
  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
  */
 
 /*
  * References:
  *	https://en.wikipedia.org/wiki/ISO_8601
  *	http://www.staff.science.uu.nl/~gent0113/calendar/isocalendar.htm
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <time.h>
 #include <ctype.h>
 #include <string.h>
 #include <stdbool.h>
 #include <crm/common/iso8601.h>
 #include <crm/common/iso8601_internal.h>
 
 /*
  * Andrew's code was originally written for OSes whose "struct tm" contains:
  *	long tm_gmtoff;		:: Seconds east of UTC
  *	const char *tm_zone;	:: Timezone abbreviation
  * Some OSes lack these, instead having:
  *	time_t (or long) timezone;
 		:: "difference between UTC and local standard time"
  *	char *tzname[2] = { "...", "..." };
  * I (David Lee) confess to not understanding the details.  So my attempted
  * generalisations for where their use is necessary may be flawed.
  *
  * 1. Does "difference between ..." subtract the same or opposite way?
  * 2. Should it use "altzone" instead of "timezone"?
  * 3. Should it use tzname[0] or tzname[1]?  Interaction with timezone/altzone?
  */
 #if defined(HAVE_STRUCT_TM_TM_GMTOFF)
 #  define GMTOFF(tm) ((tm)->tm_gmtoff)
 #else
 /* Note: extern variable; macro argument not actually used.  */
 #  define GMTOFF(tm) (-timezone+daylight)
 #endif
 
 #define HOUR_SECONDS    (60 * 60)
 #define DAY_SECONDS     (HOUR_SECONDS * 24)
 
 // A date/time or duration
 struct crm_time_s {
     int years;      // Calendar year (date/time) or number of years (duration)
     int months;     // Number of months (duration only)
     int days;       // Ordinal day of year (date/time) or number of days (duration)
     int seconds;    // Seconds of day (date/time) or number of seconds (duration)
     int offset;     // Seconds offset from UTC (date/time only)
     bool duration;  // True if duration
 };
 
 char *crm_time_as_string(crm_time_t * date_time, int flags);
 static crm_time_t *parse_date(const char *date_str);
 
 gboolean check_for_ordinal(const char *str);
 
 static crm_time_t *
 crm_get_utc_time(crm_time_t *dt)
 {
     crm_time_t *utc = NULL;
 
     if (dt == NULL) {
         errno = EINVAL;
         return NULL;
     }
 
     utc = crm_time_new_undefined();
     utc->years = dt->years;
     utc->days = dt->days;
     utc->seconds = dt->seconds;
     utc->offset = 0;
 
     if (dt->offset) {
         crm_time_add_seconds(utc, -dt->offset);
     } else {
         /* Durations (which are the only things that can include months, never have a timezone */
         utc->months = dt->months;
     }
 
     crm_time_log(LOG_TRACE, "utc-source", dt,
                  crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone);
     crm_time_log(LOG_TRACE, "utc-target", utc,
                  crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone);
     return utc;
 }
 
 crm_time_t *
 crm_time_new(const char *date_time)
 {
     time_t tm_now;
     crm_time_t *dt = NULL;
 
     tzset();
     if (date_time == NULL) {
         tm_now = time(NULL);
         dt = crm_time_new_undefined();
         crm_time_set_timet(dt, &tm_now);
     } else {
         dt = parse_date(date_time);
     }
     return dt;
 }
 
 /*!
  * \brief Allocate memory for an uninitialized time object
  *
  * \return Newly allocated time object
  * \note The caller is responsible for freeing the return value using
  *       crm_time_free().
  */
 crm_time_t *
 crm_time_new_undefined()
 {
     crm_time_t *result = calloc(1, sizeof(crm_time_t));
 
     CRM_ASSERT(result != NULL);
     return result;
 }
 
 /*!
  * \brief Check whether a time object has been initialized yet
  *
  * \param[in] t  Time object to check
  *
  * \return TRUE if time object has been initialized, FALSE otherwise
  */
 bool
 crm_time_is_defined(const crm_time_t *t)
 {
     // Any nonzero member indicates something has been done to t
     return (t != NULL) && (t->years || t->months || t->days || t->seconds
                            || t->offset || t->duration);
 }
 
 void
 crm_time_free(crm_time_t * dt)
 {
     if (dt == NULL) {
         return;
     }
     free(dt);
 }
 
 static int
 year_days(int year)
 {
     int d = 365;
 
     if (crm_time_leapyear(year)) {
         d++;
     }
     return d;
 }
 
 /* From http://myweb.ecu.edu/mccartyr/ISOwdALG.txt :
  *
  * 5. Find the Jan1Weekday for Y (Monday=1, Sunday=7)
  *  YY = (Y-1) % 100
  *  C = (Y-1) - YY
  *  G = YY + YY/4
  *  Jan1Weekday = 1 + (((((C / 100) % 4) x 5) + G) % 7)
  */
 int
 crm_time_january1_weekday(int year)
 {
     int YY = (year - 1) % 100;
     int C = (year - 1) - YY;
     int G = YY + YY / 4;
     int jan1 = 1 + (((((C / 100) % 4) * 5) + G) % 7);
 
     crm_trace("YY=%d, C=%d, G=%d", YY, C, G);
     crm_trace("January 1 %.4d: %d", year, jan1);
     return jan1;
 }
 
 int
 crm_time_weeks_in_year(int year)
 {
     int weeks = 52;
     int jan1 = crm_time_january1_weekday(year);
 
     /* if jan1 == thursday */
     if (jan1 == 4) {
         weeks++;
     } else {
         jan1 = crm_time_january1_weekday(year + 1);
         /* if dec31 == thursday aka. jan1 of next year is a friday */
         if (jan1 == 5) {
             weeks++;
         }
 
     }
     return weeks;
 }
 
 // Jan-Dec plus Feb of leap years
 static int month_days[13] = {
     31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 29
 };
 
 /*!
  * \brief Return number of days in given month of given year
  *
  * \param[in]  Ordinal month (1-12)
  * \param[in]  Gregorian year
  *
  * \return Number of days in given month (0 if given month is invalid)
  */
 int
 crm_time_days_in_month(int month, int year)
 {
     if ((month < 1) || (month > 12)) {
         return 0;
     }
     if ((month == 2) && crm_time_leapyear(year)) {
         month = 13;
     }
     return month_days[month - 1];
 }
 
 bool
 crm_time_leapyear(int year)
 {
     gboolean is_leap = FALSE;
 
     if (year % 4 == 0) {
         is_leap = TRUE;
     }
     if (year % 100 == 0 && year % 400 != 0) {
         is_leap = FALSE;
     }
     return is_leap;
 }
 
 static uint32_t
 get_ordinal_days(uint32_t y, uint32_t m, uint32_t d)
 {
     int lpc;
 
     for (lpc = 1; lpc < m; lpc++) {
         d += crm_time_days_in_month(lpc, y);
     }
     return d;
 }
 
 void
 crm_time_log_alias(int log_level, const char *file, const char *function, int line,
                    const char *prefix, crm_time_t * date_time, int flags)
 {
     char *date_s = crm_time_as_string(date_time, flags);
 
     if (log_level < LOG_CRIT) {
         printf("%s%s%s\n",
                (prefix? prefix : ""), (prefix? ": " : ""), date_s);
     } else {
         do_crm_log_alias(log_level, file, function, line, "%s%s%s",
                          (prefix? prefix : ""), (prefix? ": " : ""), date_s);
     }
     free(date_s);
 }
 
 static int
 crm_time_get_sec(int sec, uint * h, uint * m, uint * s)
 {
     uint hours, minutes, seconds;
 
     if (sec < 0) {
         seconds = 0 - sec;
     } else {
         seconds = sec;
     }
 
     hours = seconds / HOUR_SECONDS;
     seconds -= HOUR_SECONDS * hours;
 
     minutes = seconds / 60;
     seconds -= 60 * minutes;
 
     crm_trace("%d == %.2d:%.2d:%.2d", sec, hours, minutes, seconds);
 
     *h = hours;
     *m = minutes;
     *s = seconds;
 
     return TRUE;
 }
 
 int
 crm_time_get_timeofday(crm_time_t * dt, uint * h, uint * m, uint * s)
 {
     return crm_time_get_sec(dt->seconds, h, m, s);
 }
 
 int
 crm_time_get_timezone(crm_time_t * dt, uint * h, uint * m)
 {
     uint s;
 
     return crm_time_get_sec(dt->seconds, h, m, &s);
 }
 
 long long
 crm_time_get_seconds(crm_time_t * dt)
 {
     int lpc;
     crm_time_t *utc = NULL;
     long long in_seconds = 0;
 
     if (dt == NULL) {
         return 0;
     }
 
     utc = crm_get_utc_time(dt);
     if (utc == NULL) {
         return 0;
     }
 
     for (lpc = 1; lpc < utc->years; lpc++) {
         int dmax = year_days(lpc);
 
         in_seconds += DAY_SECONDS * dmax;
     }
 
     /* utc->months is an offset that can only be set for a duration.
      * By definition, the value is variable depending on the date to
      * which it is applied.
      *
      * Force 30-day months so that something vaguely sane happens
      * for anyone that tries to use a month in this way.
      */
     if (utc->months > 0) {
         in_seconds += DAY_SECONDS * 30 * utc->months;
     }
 
     if (utc->days > 0) {
         in_seconds += DAY_SECONDS * (utc->days - 1);
     }
     in_seconds += utc->seconds;
 
     crm_time_free(utc);
     return in_seconds;
 }
 
 #define EPOCH_SECONDS 62135596800ULL    /* Calculated using crm_time_get_seconds() */
 long long
 crm_time_get_seconds_since_epoch(crm_time_t * dt)
 {
     return (dt == NULL)? 0 : (crm_time_get_seconds(dt) - EPOCH_SECONDS);
 }
 
 int
 crm_time_get_gregorian(crm_time_t * dt, uint * y, uint * m, uint * d)
 {
     int months = 0;
     int days = dt->days;
 
     if(dt->years != 0) {
         for (months = 1; months <= 12 && days > 0; months++) {
             int mdays = crm_time_days_in_month(months, dt->years);
 
             if (mdays >= days) {
                 break;
             } else {
                 days -= mdays;
             }
         }
 
     } else if (dt->months) {
         /* This is a duration including months, don't convert the days field */
         months = dt->months;
 
     } else {
         /* This is a duration not including months, still don't convert the days field */
     }
 
     *y = dt->years;
     *m = months;
     *d = days;
     crm_trace("%.4d-%.3d -> %.4d-%.2d-%.2d", dt->years, dt->days, dt->years, months, days);
     return TRUE;
 }
 
 int
 crm_time_get_ordinal(crm_time_t * dt, uint * y, uint * d)
 {
     *y = dt->years;
     *d = dt->days;
     return TRUE;
 }
 
 int
 crm_time_get_isoweek(crm_time_t * dt, uint * y, uint * w, uint * d)
 {
     /*
      * Monday 29 December 2008 is written "2009-W01-1"
      * Sunday 3 January 2010 is written "2009-W53-7"
      */
     int year_num = 0;
     int jan1 = crm_time_january1_weekday(dt->years);
     int h = -1;
 
     CRM_CHECK(dt->days > 0, return FALSE);
 
 /* 6. Find the Weekday for Y M D */
     h = dt->days + jan1 - 1;
     *d = 1 + ((h - 1) % 7);
 
 /* 7. Find if Y M D falls in YearNumber Y-1, WeekNumber 52 or 53 */
     if (dt->days <= (8 - jan1) && jan1 > 4) {
         crm_trace("year--, jan1=%d", jan1);
         year_num = dt->years - 1;
         *w = crm_time_weeks_in_year(year_num);
 
     } else {
         year_num = dt->years;
     }
 
 /* 8. Find if Y M D falls in YearNumber Y+1, WeekNumber 1 */
     if (year_num == dt->years) {
         int dmax = year_days(year_num);
         int correction = 4 - *d;
 
         if ((dmax - dt->days) < correction) {
             crm_trace("year++, jan1=%d, i=%d vs. %d", jan1, dmax - dt->days, correction);
             year_num = dt->years + 1;
             *w = 1;
         }
     }
 
 /* 9. Find if Y M D falls in YearNumber Y, WeekNumber 1 through 53 */
     if (year_num == dt->years) {
         int j = dt->days + (7 - *d) + (jan1 - 1);
 
         *w = j / 7;
         if (jan1 > 4) {
             *w -= 1;
         }
     }
 
     *y = year_num;
     crm_trace("Converted %.4d-%.3d to %.4d-W%.2d-%d", dt->years, dt->days, *y, *w, *d);
     return TRUE;
 }
 
 #define DATE_MAX 128
 #define s_if_plural(i) (((i) == 1)? "" : "s")
 
 static void
 crm_duration_as_string(crm_time_t *dt, char *result)
 {
     size_t offset = 0;
 
     if (dt->years) {
         offset += snprintf(result + offset, DATE_MAX - offset, "%4d year%s ",
                            dt->years, s_if_plural(dt->years));
     }
     if (dt->months) {
         offset += snprintf(result + offset, DATE_MAX - offset, "%2d month%s ",
                            dt->months, s_if_plural(dt->months));
     }
     if (dt->days) {
         offset += snprintf(result + offset, DATE_MAX - offset, "%2d day%s ",
                            dt->days, s_if_plural(dt->days));
     }
 
     if (((offset == 0) || (dt->seconds != 0))
         && (dt->seconds > -60) && (dt->seconds < 60)) {
         offset += snprintf(result + offset, DATE_MAX - offset, "%d second%s",
                            dt->seconds, s_if_plural(dt->seconds));
     } else if (dt->seconds) {
         uint h = 0, m = 0, s = 0;
 
         offset += snprintf(result + offset, DATE_MAX - offset, "%d seconds (",
                            dt->seconds);
         crm_time_get_sec(dt->seconds, &h, &m, &s);
         if (h) {
             offset += snprintf(result + offset, DATE_MAX - offset, "%u hour%s%s",
                                h, s_if_plural(h), ((m || s)? " " : ""));
         }
         if (m) {
             offset += snprintf(result + offset, DATE_MAX - offset, "%u minute%s%s",
                                m, s_if_plural(m), (s? " " : ""));
         }
         if (s) {
             offset += snprintf(result + offset, DATE_MAX - offset, "%u second%s",
                                s, s_if_plural(s));
         }
         offset += snprintf(result + offset, DATE_MAX - offset, ")");
     }
 }
 
 char *
 crm_time_as_string(crm_time_t * date_time, int flags)
 {
     crm_time_t *dt = NULL;
     crm_time_t *utc = NULL;
     char result[DATE_MAX] = { '\0', };
     char *result_copy = NULL;
     size_t offset = 0;
 
     // Convert to UTC if local timezone was not requested
     if (date_time && date_time->offset
         && is_not_set(flags, crm_time_log_with_timezone)) {
         crm_trace("UTC conversion");
         utc = crm_get_utc_time(date_time);
         dt = utc;
     } else {
         dt = date_time;
     }
 
     if (!crm_time_is_defined(dt)) {
         strcpy(result, "<undefined time>");
         goto done;
     }
 
     // Simple cases: as duration, seconds, or seconds since epoch
 
     if (flags & crm_time_log_duration) {
         crm_duration_as_string(date_time, result);
         goto done;
     }
 
     if (flags & crm_time_seconds) {
         snprintf(result, DATE_MAX, "%lld", crm_time_get_seconds(date_time));
         goto done;
     }
 
     if (flags & crm_time_epoch) {
         snprintf(result, DATE_MAX, "%lld",
                  crm_time_get_seconds_since_epoch(date_time));
         goto done;
     }
 
     // As readable string
 
     if (flags & crm_time_log_date) {
         if (flags & crm_time_weeks) { // YYYY-WW-D
             uint y, w, d;
 
             if (crm_time_get_isoweek(dt, &y, &w, &d)) {
                 offset += snprintf(result + offset, DATE_MAX - offset,
                                    "%u-W%.2u-%u", y, w, d);
             }
 
         } else if (flags & crm_time_ordinal) { // YYYY-DDD
             uint y, d;
 
             if (crm_time_get_ordinal(dt, &y, &d)) {
                 offset += snprintf(result + offset, DATE_MAX - offset,
                                    "%u-%.3u", y, d);
             }
 
         } else { // YYYY-MM-DD
             uint y, m, d;
 
             if (crm_time_get_gregorian(dt, &y, &m, &d)) {
                 offset += snprintf(result + offset, DATE_MAX - offset,
                                    "%.4u-%.2u-%.2u", y, m, d);
             }
         }
     }
 
     if (flags & crm_time_log_timeofday) {
         uint h = 0, m = 0, s = 0;
 
         if (offset > 0) {
             offset += snprintf(result + offset, DATE_MAX - offset, " ");
         }
 
         if (crm_time_get_timeofday(dt, &h, &m, &s)) {
             offset += snprintf(result + offset, DATE_MAX - offset,
                                "%.2u:%.2u:%.2u", h, m, s);
         }
 
         if ((flags & crm_time_log_with_timezone) && (dt->offset != 0)) {
             crm_time_get_sec(dt->offset, &h, &m, &s);
             offset += snprintf(result + offset, DATE_MAX - offset,
                                " %c%.2u:%.2u",
                                ((dt->offset < 0)? '-' : '+'), h, m);
         } else {
             offset += snprintf(result + offset, DATE_MAX - offset, "Z");
         }
     }
 
   done:
     crm_time_free(utc);
 
     result_copy = strdup(result);
     CRM_ASSERT(result_copy != NULL);
     return result_copy;
 }
 
 /*!
  * \internal
  * \brief Determine number of seconds from an hour:minute:second string
  *
  * \param[in]  time_str  Time specification string
  * \param[out] result    Number of seconds equivalent to time_str
  *
  * \return TRUE if specification was valid, FALSE (and set errno) otherwise
  * \note This may return the number of seconds in a day (which is out of bounds
  *       for a time object) if given 24:00:00.
  */
 static bool
 crm_time_parse_sec(const char *time_str, int *result)
 {
     int rc;
     uint hour = 0;
     uint minute = 0;
     uint second = 0;
 
     *result = 0;
 
     // Must have at least hour, but minutes and seconds are optional
     rc = sscanf(time_str, "%d:%d:%d", &hour, &minute, &second);
     if (rc == 1) {
         rc = sscanf(time_str, "%2d%2d%2d", &hour, &minute, &second);
     }
     if (rc == 0) {
         crm_err("%s is not a valid ISO 8601 time specification", time_str);
         errno = EINVAL;
         return FALSE;
     }
 
     crm_trace("Got valid time: %.2d:%.2d:%.2d", hour, minute, second);
 
     if ((hour == 24) && (minute == 0) && (second == 0)) {
         // Equivalent to 00:00:00 of next day, return number of seconds in day
     } else if (hour >= 24) {
         crm_err("%s is not a valid ISO 8601 time specification "
                 "because %d is not a valid hour", time_str, hour);
         errno = EINVAL;
         return FALSE;
     }
     if (minute >= 60) {
         crm_err("%s is not a valid ISO 8601 time specification "
                 "because %d is not a valid minute", time_str, minute);
         errno = EINVAL;
         return FALSE;
     }
     if (second >= 60) {
         crm_err("%s is not a valid ISO 8601 time specification "
                 "because %d is not a valid second", time_str, second);
         errno = EINVAL;
         return FALSE;
     }
 
     *result = (hour * HOUR_SECONDS) + (minute * 60) + second;
     return TRUE;
 }
 
 static bool
 crm_time_parse_offset(const char *offset_str, int *offset)
 {
     tzset();
 
     if (offset_str == NULL) {
         // Use local offset
 #if defined(HAVE_STRUCT_TM_TM_GMTOFF)
         time_t now = time(NULL);
         struct tm *now_tm = localtime(&now);
 #endif
         int h_offset = GMTOFF(now_tm) / HOUR_SECONDS;
         int m_offset = (GMTOFF(now_tm) - (HOUR_SECONDS * h_offset)) / 60;
 
         if (h_offset < 0 && m_offset < 0) {
             m_offset = 0 - m_offset;
         }
         *offset = (HOUR_SECONDS * h_offset) + (60 * m_offset);
         return TRUE;
     }
 
     if (offset_str[0] == 'Z') { // @TODO invalid if anything after?
         *offset = 0;
         return TRUE;
     }
 
     *offset = 0;
     if ((offset_str[0] == '+') || (offset_str[0] == '-')
         || isdigit((int)offset_str[0])) {
 
         gboolean negate = FALSE;
 
         if (offset_str[0] == '-') {
             negate = TRUE;
             offset_str++;
         }
         if (crm_time_parse_sec(offset_str, offset) == FALSE) {
             return FALSE;
         }
         if (negate) {
             *offset = 0 - *offset;
         }
     } // @TODO else invalid?
     return TRUE;
 }
 
 /*!
  * \internal
  * \brief Parse the time portion of an ISO 8601 date/time string
  *
  * \param[in]     time_str  Time portion of specification (after any 'T')
  * \param[in,out] a_time    Time object to parse into
  *
  * \return TRUE if valid time was parsed, FALSE (and set errno) otherwise
  * \note This may add a day to a_time (if the time is 24:00:00).
  */
 static bool
 crm_time_parse(const char *time_str, crm_time_t *a_time)
 {
     uint h, m, s;
     char *offset_s = NULL;
 
     tzset();
 
     if (time_str) {
         if (crm_time_parse_sec(time_str, &(a_time->seconds)) == FALSE) {
             return FALSE;
         }
         offset_s = strstr(time_str, "Z");
         if (offset_s == NULL) {
             offset_s = strstr(time_str, " ");
             if (offset_s) {
                 while (isspace(offset_s[0])) {
                     offset_s++;
                 }
             }
         }
     }
 
     if (crm_time_parse_offset(offset_s, &(a_time->offset)) == FALSE) {
         return FALSE;
     }
     crm_time_get_sec(a_time->offset, &h, &m, &s);
     crm_trace("Got tz: %c%2.d:%.2d", ((a_time->offset < 0)? '-' : '+'), h, m);
 
     if (a_time->seconds == DAY_SECONDS) {
         // 24:00:00 == 00:00:00 of next day
         a_time->seconds = 0;
         crm_time_add_days(a_time, 1);
     }
     return TRUE;
 }
 
 /*
  * \internal
  * \brief Parse a time object from an ISO 8601 date/time specification
  *
  * \param[in] date_str  ISO 8601 date/time specification (or "epoch")
  *
  * \return New time object on success, NULL (and set errno) otherwise
  */
 static crm_time_t *
 parse_date(const char *date_str)
 {
     const char *time_s = NULL;
     crm_time_t *dt = NULL;
 
     int year = 0;
     int month = 0;
     int week = 0;
     int day = 0;
     int rc = 0;
 
     if ((date_str == NULL) || (date_str[0] == '\0')) {
         crm_err("No ISO 8601 date/time specification given");
         goto invalid;
     }
 
     if ((date_str[0] == 'T') || (date_str[2] == ':')) {
         /* Just a time supplied - Infer current date */
         dt = crm_time_new(NULL);
         if (date_str[0] == 'T') {
             time_s = date_str + 1;
         } else {
             time_s = date_str;
         }
         goto parse_time;
     }
 
     dt = crm_time_new_undefined();
 
     if (!strncasecmp("epoch", date_str, 5)
         && ((date_str[5] == '\0') || (date_str[5] == '/') || isspace(date_str[5]))) {
         dt->days = 1;
         dt->years = 1970;
         crm_time_log(LOG_TRACE, "Unpacked", dt, crm_time_log_date | crm_time_log_timeofday);
         return dt;
     }
 
     /* YYYY-MM-DD */
     rc = sscanf(date_str, "%d-%d-%d", &year, &month, &day);
     if (rc == 1) {
         /* YYYYMMDD */
         rc = sscanf(date_str, "%4d%2d%2d", &year, &month, &day);
     }
     if (rc == 3) {
         if (month > 12) {
             crm_err("'%s' is not a valid ISO 8601 date/time specification "
                     "because '%d' is not a valid month", date_str, month);
             goto invalid;
         } else if (day > crm_time_days_in_month(month, year)) {
             crm_err("'%s' is not a valid ISO 8601 date/time specification "
                     "because '%d' is not a valid day of the month",
                     date_str, day);
             goto invalid;
         } else {
             dt->years = year;
             dt->days = get_ordinal_days(year, month, day);
             crm_trace("Parsed Gregorian date '%.4d-%.3d' from date string '%s'",
                       year, dt->days, date_str);
         }
         goto parse_time;
     }
 
     /* YYYY-DDD */
     rc = sscanf(date_str, "%d-%d", &year, &day);
     if (rc == 2) {
         if (day > year_days(year)) {
             crm_err("'%s' is not a valid ISO 8601 date/time specification "
                     "because '%d' is not a valid day of the year (max %d)",
                     date_str, day, year_days(year));
             goto invalid;
         }
         crm_trace("Parsed ordinal year %d and days %d from date string '%s'",
                   year, day, date_str);
         dt->days = day;
         dt->years = year;
         goto parse_time;
     }
 
     /* YYYY-Www-D */
     rc = sscanf(date_str, "%d-W%d-%d", &year, &week, &day);
     if (rc == 3) {
         if (week > crm_time_weeks_in_year(year)) {
             crm_err("'%s' is not a valid ISO 8601 date/time specification "
                     "because '%d' is not a valid week of the year (max %d)",
                     date_str, week, crm_time_weeks_in_year(year));
             goto invalid;
         } else if (day < 1 || day > 7) {
             crm_err("'%s' is not a valid ISO 8601 date/time specification "
                     "because '%d' is not a valid day of the week",
                     date_str, day);
             goto invalid;
         } else {
             /*
              * See https://en.wikipedia.org/wiki/ISO_week_date
              *
              * Monday 29 December 2008 is written "2009-W01-1"
              * Sunday 3 January 2010 is written "2009-W53-7"
              * Saturday 27 September 2008 is written "2008-W37-6"
              *
              * If 1 January is on a Monday, Tuesday, Wednesday or Thursday, it is in week 01.
              * If 1 January is on a Friday, Saturday or Sunday, it is in week 52 or 53 of the previous year.
              */
             int jan1 = crm_time_january1_weekday(year);
 
             crm_trace("Got year %d (Jan 1 = %d), week %d, and day %d from date string '%s'",
                       year, jan1, week, day, date_str);
 
             dt->years = year;
             crm_time_add_days(dt, (week - 1) * 7);
 
             if (jan1 <= 4) {
                 crm_time_add_days(dt, 1 - jan1);
             } else {
                 crm_time_add_days(dt, 8 - jan1);
             }
 
             crm_time_add_days(dt, day);
         }
         goto parse_time;
     }
 
     crm_err("'%s' is not a valid ISO 8601 date/time specification", date_str);
     goto invalid;
 
   parse_time:
 
     if (time_s == NULL) {
         time_s = date_str + strspn(date_str, "0123456789-W");
         if ((time_s[0] == ' ') || (time_s[0] == 'T')) {
             ++time_s;
         } else {
             time_s = NULL;
         }
     }
     if ((time_s != NULL) && (crm_time_parse(time_s, dt) == FALSE)) {
         goto invalid;
     }
 
     crm_time_log(LOG_TRACE, "Unpacked", dt, crm_time_log_date | crm_time_log_timeofday);
     if (crm_time_check(dt) == FALSE) {
         crm_err("'%s' is not a valid ISO 8601 date/time specification",
                 date_str);
         goto invalid;
     }
     return dt;
 
 invalid:
     crm_time_free(dt);
     errno = EINVAL;
     return NULL;
 }
 
 // Parse an ISO 8601 numeric value and return number of characters consumed
 // @TODO This cannot handle >INT_MAX int values
 // @TODO Fractions appear to be not working
 // @TODO Error out on invalid specifications
 static int
 parse_int(const char *str, int field_width, int upper_bound, int *result)
 {
     int lpc = 0;
     int offset = 0;
     int intermediate = 0;
     gboolean fraction = FALSE;
     gboolean negate = FALSE;
 
     *result = 0;
     if (*str == '\0') {
         return 0;
     }
 
     if (str[offset] == 'T') {
         offset++;
     }
 
     if (str[offset] == '.' || str[offset] == ',') {
         fraction = TRUE;
         field_width = -1;
         offset++;
     } else if (str[offset] == '-') {
         negate = TRUE;
         offset++;
     } else if (str[offset] == '+' || str[offset] == ':') {
         offset++;
     }
 
     for (; (fraction || lpc < field_width) && isdigit((int)str[offset]); lpc++) {
         if (fraction) {
             intermediate = (str[offset] - '0') / (10 ^ lpc);
         } else {
             *result *= 10;
             intermediate = str[offset] - '0';
         }
         *result += intermediate;
         offset++;
     }
     if (fraction) {
         *result = (int)(*result * upper_bound);
 
     } else if (upper_bound > 0 && *result > upper_bound) {
         *result = upper_bound;
     }
     if (negate) {
         *result = 0 - *result;
     }
     if (lpc > 0) {
         crm_trace("Found int: %d.  Stopped at str[%d]='%c'", *result, lpc, str[lpc]);
         return offset;
     }
     return 0;
 }
 
 /*!
  * \brief Parse a time duration from an ISO 8601 duration specification
  *
  * \param[in] period_s  ISO 8601 duration specification (optionally followed by
  *                      whitespace, after which the rest of the string will be
  *                      ignored)
  *
  * \return New time object on success, NULL (and set errno) otherwise
  * \note It is the caller's responsibility to return the result using
  *       crm_time_free().
  */
 crm_time_t *
 crm_time_parse_duration(const char *period_s)
 {
     gboolean is_time = FALSE;
     crm_time_t *diff = NULL;
 
     if ((period_s == NULL) || (period_s[0] == '\0')) {
         crm_err("No ISO 8601 time duration given");
         goto invalid;
     }
     if (period_s[0] != 'P') {
         crm_err("'%s' is not a valid ISO 8601 time duration "
                 "because it does not start with a 'P'", period_s);
         goto invalid;
     }
     if ((period_s[1] == '\0') || isspace(period_s[1])) {
         crm_err("'%s' is not a valid ISO 8601 time duration "
                 "because nothing follows 'P'", period_s);
         goto invalid;
     }
 
     diff = crm_time_new_undefined();
     diff->duration = TRUE;
 
     for (const char *current = period_s + 1;
          current[0] && (current[0] != '/') && !isspace(current[0]);
          ++current) {
 
         int an_int = 0, rc;
 
         if (current[0] == 'T') {
             /* A 'T' separates year/month/day from hour/minute/seconds. We don't
              * require it strictly, but just use it to differentiate month from
              * minutes.
              */
             is_time = TRUE;
             continue;
         }
 
         // An integer must be next
         rc = parse_int(current, 10, 0, &an_int);
         if (rc == 0) {
             crm_err("'%s' is not a valid ISO 8601 time duration "
                     "because no integer at '%s'", period_s, current);
             goto invalid;
         }
         current += rc;
 
         // A time unit must be next (we're not strict about the order)
         switch (current[0]) {
             case 'Y':
                 diff->years = an_int;
                 break;
             case 'M':
                 if (is_time) {
                     /* Minutes */
                     diff->seconds += an_int * 60;
                 } else {
                     diff->months = an_int;
                 }
                 break;
             case 'W':
                 diff->days += an_int * 7;
                 break;
             case 'D':
                 diff->days += an_int;
                 break;
             case 'H':
                 diff->seconds += an_int * HOUR_SECONDS;
                 break;
             case 'S':
                 diff->seconds += an_int;
                 break;
             case '\0':
                 crm_err("'%s' is not a valid ISO 8601 time duration "
                         "because no units after %d", period_s, an_int);
                 goto invalid;
             default:
                 crm_err("'%s' is not a valid ISO 8601 time duration "
                         "because '%c' is not a valid time unit",
                         period_s, current[0]);
                 goto invalid;
         }
     }
 
     if (!crm_time_is_defined(diff)) {
         crm_err("'%s' is not a valid ISO 8601 time duration "
                 "because no amounts and units given", period_s);
         goto invalid;
     }
     return diff;
 
 invalid:
     crm_time_free(diff);
     errno = EINVAL;
     return NULL;
 }
 
 /*!
  * \brief Parse a time period from an ISO 8601 interval specification
  *
  * \param[in] period_str  ISO 8601 interval specification (start/end,
  *                        start/duration, or duration/end)
  *
  * \return New time period object on success, NULL (and set errno) otherwise
  * \note The caller is responsible for freeing the result using
  *       crm_time_free_period().
  */
 crm_time_period_t *
 crm_time_parse_period(const char *period_str)
 {
     const char *original = period_str;
     crm_time_period_t *period = NULL;
 
     if ((period_str == NULL) || (period_str[0] == '\0')) {
         crm_err("No ISO 8601 time period given");
         goto invalid;
     }
 
     tzset();
     period = calloc(1, sizeof(crm_time_period_t));
     CRM_ASSERT(period != NULL);
 
     if (period_str[0] == 'P') {
         period->diff = crm_time_parse_duration(period_str);
         if (period->diff == NULL) {
             goto error;
         }
     } else {
         period->start = parse_date(period_str);
         if (period->start == NULL) {
             goto error;
         }
     }
 
     period_str = strstr(original, "/");
     if (period_str) {
         ++period_str;
         if (period_str[0] == 'P') {
             if (period->diff != NULL) {
                 crm_err("'%s' is not a valid ISO 8601 time period "
                         "because it has two durations",
                         original);
                 goto invalid;
             }
             period->diff = crm_time_parse_duration(period_str);
             if (period->diff == NULL) {
                 goto error;
             }
         } else {
             period->end = parse_date(period_str);
             if (period->end == NULL) {
                 goto error;
             }
         }
 
     } else if (period->diff != NULL) {
         // Only duration given, assume start is now
         period->start = crm_time_new(NULL);
 
     } else {
         // Only start given
         crm_err("'%s' is not a valid ISO 8601 time period "
                 "because it has no duration or ending time",
                 original);
         goto invalid;
     }
 
     if (period->start == NULL) {
         period->start = crm_time_subtract(period->end, period->diff);
 
     } else if (period->end == NULL) {
         period->end = crm_time_add(period->start, period->diff);
     }
 
     if (crm_time_check(period->start) == FALSE) {
         crm_err("'%s' is not a valid ISO 8601 time period "
                 "because the start is invalid", period_str);
         goto invalid;
     }
     if (crm_time_check(period->end) == FALSE) {
         crm_err("'%s' is not a valid ISO 8601 time period "
                 "because the end is invalid", period_str);
         goto invalid;
     }
     return period;
 
 invalid:
     errno = EINVAL;
 error:
     crm_time_free_period(period);
     return NULL;
 }
 
 /*!
  * \brief Free a dynamically allocated time period object
  *
  * \param[in] period  Time period to free
  */
 void
 crm_time_free_period(crm_time_period_t *period)
 {
     if (period) {
         crm_time_free(period->start);
         crm_time_free(period->end);
         crm_time_free(period->diff);
         free(period);
     }
 }
 
 void
 crm_time_set(crm_time_t * target, crm_time_t * source)
 {
     crm_trace("target=%p, source=%p", target, source);
 
     CRM_CHECK(target != NULL && source != NULL, return);
 
     target->years = source->years;
     target->days = source->days;
     target->months = source->months;    /* Only for durations */
     target->seconds = source->seconds;
     target->offset = source->offset;
 
     crm_time_log(LOG_TRACE, "source", source,
                  crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone);
     crm_time_log(LOG_TRACE, "target", target,
                  crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone);
 }
 
 static void
 ha_set_tm_time(crm_time_t * target, struct tm *source)
 {
     int h_offset = 0;
     int m_offset = 0;
 
     /* Ensure target is fully initialized */
     target->years = 0;
     target->months = 0;
     target->days = 0;
     target->seconds = 0;
     target->offset = 0;
     target->duration = FALSE;
 
     if (source->tm_year > 0) {
         /* years since 1900 */
         target->years = 1900 + source->tm_year;
     }
 
     if (source->tm_yday >= 0) {
         /* days since January 1 [0-365] */
         target->days = 1 + source->tm_yday;
     }
 
     if (source->tm_hour >= 0) {
         target->seconds += HOUR_SECONDS * source->tm_hour;
     }
     if (source->tm_min >= 0) {
         target->seconds += 60 * source->tm_min;
     }
     if (source->tm_sec >= 0) {
         target->seconds += source->tm_sec;
     }
 
     /* tm_gmtoff == offset from UTC in seconds */
     h_offset = GMTOFF(source) / HOUR_SECONDS;
     m_offset = (GMTOFF(source) - (HOUR_SECONDS * h_offset)) / 60;
     crm_trace("Offset (s): %ld, offset (hh:mm): %.2d:%.2d", GMTOFF(source), h_offset, m_offset);
 
     target->offset += HOUR_SECONDS * h_offset;
     target->offset += 60 * m_offset;
 }
 
 void
 crm_time_set_timet(crm_time_t * target, time_t * source)
 {
     ha_set_tm_time(target, localtime(source));
 }
 
 crm_time_t *
 crm_time_add(crm_time_t * dt, crm_time_t * value)
 {
     crm_time_t *utc = NULL;
     crm_time_t *answer = NULL;
 
     if ((dt == NULL) || (value == NULL)) {
         errno = EINVAL;
         return NULL;
     }
 
     answer = crm_time_new_undefined();
     crm_time_set(answer, dt);
 
     utc = crm_get_utc_time(value);
     if (utc == NULL) {
         crm_time_free(answer);
         return NULL;
     }
 
     answer->years += utc->years;
     crm_time_add_months(answer, utc->months);
     crm_time_add_days(answer, utc->days);
     crm_time_add_seconds(answer, utc->seconds);
 
     crm_time_free(utc);
     return answer;
 }
 
 crm_time_t *
 crm_time_calculate_duration(crm_time_t * dt, crm_time_t * value)
 {
     crm_time_t *utc = NULL;
     crm_time_t *answer = NULL;
 
     if ((dt == NULL) || (value == NULL)) {
         errno = EINVAL;
         return NULL;
     }
 
     utc = crm_get_utc_time(value);
     if (utc == NULL) {
         return NULL;
     }
 
     answer = crm_get_utc_time(dt);
     if (answer == NULL) {
         crm_time_free(utc);
         return NULL;
     }
     answer->duration = TRUE;
 
     answer->years -= utc->years;
     if(utc->months != 0) {
         crm_time_add_months(answer, -utc->months);
     }
     crm_time_add_days(answer, -utc->days);
     crm_time_add_seconds(answer, -utc->seconds);
 
     crm_time_free(utc);
     return answer;
 }
 
 crm_time_t *
 crm_time_subtract(crm_time_t * dt, crm_time_t * value)
 {
     crm_time_t *utc = NULL;
     crm_time_t *answer = NULL;
 
     if ((dt == NULL) || (value == NULL)) {
         errno = EINVAL;
         return NULL;
     }
 
     utc = crm_get_utc_time(value);
     if (utc == NULL) {
         return NULL;
     }
 
     answer = crm_time_new_undefined();
     crm_time_set(answer, dt);
     answer->years -= utc->years;
     if(utc->months != 0) {
         crm_time_add_months(answer, -utc->months);
     }
     crm_time_add_days(answer, -utc->days);
     crm_time_add_seconds(answer, -utc->seconds);
 
     return answer;
 }
 
 /*!
  * \brief Check whether a time object represents a sensible date/time
  *
  * \param[in] dt  Date/time object to check
  *
  * \return TRUE if years, days, and seconds are sensible, FALSE otherwise
  */
 bool
 crm_time_check(crm_time_t * dt)
 {
     return (dt != NULL)
            && (dt->days > 0) && (dt->days <= year_days(dt->years))
            && (dt->seconds >= 0) && (dt->seconds < DAY_SECONDS);
 }
 
 #define do_cmp_field(l, r, field)					\
     if(rc == 0) {                                                       \
 		if(l->field > r->field) {				\
 			crm_trace("%s: %d > %d",			\
 				    #field, l->field, r->field);	\
 			rc = 1;                                         \
 		} else if(l->field < r->field) {			\
 			crm_trace("%s: %d < %d",			\
 				    #field, l->field, r->field);	\
 			rc = -1;					\
 		}							\
     }
 
 int
 crm_time_compare(crm_time_t *a, crm_time_t *b)
 {
     int rc = 0;
     crm_time_t *t1 = crm_get_utc_time(a);
     crm_time_t *t2 = crm_get_utc_time(b);
 
     if ((t1 == NULL) && (t2 == NULL)) {
         return 0;
     } else if (t1 == NULL) {
         return -1;
     } else if (t2 == NULL) {
         return 1;
     }
 
     do_cmp_field(t1, t2, years);
     do_cmp_field(t1, t2, days);
     do_cmp_field(t1, t2, seconds);
 
     crm_time_free(t1);
     crm_time_free(t2);
     return rc;
 }
 
 /*!
  * \brief Add a given number of seconds to a date/time or duration
  *
  * \param[in] a_time  Date/time or duration to add seconds to
  * \param[in] extra   Number of seconds to add
  */
 void
 crm_time_add_seconds(crm_time_t *a_time, int extra)
 {
     int days = 0;
 
     crm_trace("Adding %d seconds to %d (max=%d)",
               extra, a_time->seconds, DAY_SECONDS);
     a_time->seconds += extra;
     days = a_time->seconds / DAY_SECONDS;
     a_time->seconds %= DAY_SECONDS;
 
     // Don't have negative seconds
     if (a_time->seconds < 0) {
         a_time->seconds += DAY_SECONDS;
         --days;
     }
 
     crm_time_add_days(a_time, days);
 }
 
 void
 crm_time_add_days(crm_time_t * a_time, int extra)
 {
     int lower_bound = 1;
     int ydays = crm_time_leapyear(a_time->years) ? 366 : 365;
 
     crm_trace("Adding %d days to %.4d-%.3d", extra, a_time->years, a_time->days);
 
     a_time->days += extra;
     while (a_time->days > ydays) {
         a_time->years++;
         a_time->days -= ydays;
         ydays = crm_time_leapyear(a_time->years) ? 366 : 365;
     }
 
     if(a_time->duration) {
         lower_bound = 0;
     }
 
     while (a_time->days < lower_bound) {
         a_time->years--;
         a_time->days += crm_time_leapyear(a_time->years) ? 366 : 365;
     }
 }
 
 void
 crm_time_add_months(crm_time_t * a_time, int extra)
 {
     int lpc;
     uint32_t y, m, d, dmax;
 
     crm_time_get_gregorian(a_time, &y, &m, &d);
     crm_trace("Adding %d months to %.4d-%.2d-%.2d", extra, y, m, d);
 
     if (extra > 0) {
         for (lpc = extra; lpc > 0; lpc--) {
             m++;
             if (m == 13) {
                 m = 1;
                 y++;
             }
         }
     } else {
         for (lpc = -extra; lpc > 0; lpc--) {
             m--;
             if (m == 0) {
                 m = 12;
                 y--;
             }
         }
     }
 
     dmax = crm_time_days_in_month(m, y);
     if (dmax < d) {
         /* Preserve day-of-month unless the month doesn't have enough days */
         d = dmax;
     }
 
     crm_trace("Calculated %.4d-%.2d-%.2d", y, m, d);
 
     a_time->years = y;
     a_time->days = get_ordinal_days(y, m, d);
 
     crm_time_get_gregorian(a_time, &y, &m, &d);
     crm_trace("Got %.4d-%.2d-%.2d", y, m, d);
 }
 
 void
 crm_time_add_minutes(crm_time_t * a_time, int extra)
 {
     crm_time_add_seconds(a_time, extra * 60);
 }
 
 void
 crm_time_add_hours(crm_time_t * a_time, int extra)
 {
     crm_time_add_seconds(a_time, extra * HOUR_SECONDS);
 }
 
 void
 crm_time_add_weeks(crm_time_t * a_time, int extra)
 {
     crm_time_add_days(a_time, extra * 7);
 }
 
 void
 crm_time_add_years(crm_time_t * a_time, int extra)
 {
     a_time->years += extra;
 }
 
 static void
 ha_get_tm_time( struct tm *target, crm_time_t *source)
 {
     *target = (struct tm) {
         .tm_year = source->years - 1900,
         .tm_mday = source->days,
         .tm_sec = source->seconds % 60,
         .tm_min = ( source->seconds / 60 ) % 60,
         .tm_hour = source->seconds / HOUR_SECONDS,
         .tm_isdst = -1, /* don't adjust */
 
 #if defined(HAVE_STRUCT_TM_TM_GMTOFF)
         .tm_gmtoff = source->offset
 #endif
     };
     mktime(target);
 }
 
 crm_time_hr_t *
 crm_time_hr_convert(crm_time_hr_t *target, crm_time_t *dt)
 {
     crm_time_hr_t *hr_dt = NULL;
 
     if (dt) {
         hr_dt = target?target:calloc(1, sizeof(crm_time_hr_t));
         CRM_ASSERT(hr_dt != NULL);
         *hr_dt = (crm_time_hr_t) {
             .years = dt->years,
             .months = dt->months,
             .days = dt->days,
             .seconds = dt->seconds,
             .offset = dt->offset,
             .duration = dt->duration
         };
     }
 
     return hr_dt;
 }
 
 void
 crm_time_set_hr_dt(crm_time_t *target, crm_time_hr_t *hr_dt)
 {
     CRM_ASSERT((hr_dt) && (target));
     *target = (crm_time_t) {
         .years = hr_dt->years,
         .months = hr_dt->months,
         .days = hr_dt->days,
         .seconds = hr_dt->seconds,
         .offset = hr_dt->offset,
         .duration = hr_dt->duration
     };
 }
 
 crm_time_hr_t *
 crm_time_timeval_hr_convert(crm_time_hr_t *target, struct timeval *tv)
 {
     crm_time_t dt;
     crm_time_hr_t *ret;
 
     crm_time_set_timet(&dt, &tv->tv_sec);
     ret = crm_time_hr_convert(target, &dt);
     if (ret) {
         ret->useconds = tv->tv_usec;
     }
     return ret;
 }
 
 crm_time_hr_t *
 crm_time_hr_new(const char *date_time)
 {
     crm_time_hr_t *hr_dt = NULL;
     struct timeval tv_now;
 
     if (!date_time) {
         if (gettimeofday(&tv_now, NULL) == 0) {
             hr_dt = crm_time_timeval_hr_convert(NULL, &tv_now);
         }
     } else {
         crm_time_t *dt;
 
         dt = parse_date(date_time);
         hr_dt = crm_time_hr_convert(NULL, dt);
         crm_time_free(dt);
     }
     return hr_dt;
 }
 
 void
 crm_time_hr_free(crm_time_hr_t * hr_dt)
 {
     free(hr_dt);
 }
 
 char *
 crm_time_format_hr(const char *format, crm_time_hr_t * hr_dt)
 {
     const char *mark_s;
     int max = 128, scanned_pos = 0, printed_pos = 0, fmt_pos = 0,
         date_len = 0, nano_digits = 0;
     char nano_s[10], date_s[max+1], nanofmt_s[5] = "%", *tmp_fmt_s;
     struct tm tm;
     crm_time_t dt;
 
     if (!format) {
         return NULL;
     }
     crm_time_set_hr_dt(&dt, hr_dt);
     ha_get_tm_time(&tm, &dt);
     sprintf(nano_s, "%06d000", hr_dt->useconds);
 
     while ((format[scanned_pos]) != '\0') {
         mark_s = strchr(&format[scanned_pos], '%');
         if (mark_s) {
             int fmt_len = 1;
 
             fmt_pos = mark_s - format;
             while ((format[fmt_pos+fmt_len] != '\0') &&
                 (format[fmt_pos+fmt_len] >= '0') &&
                 (format[fmt_pos+fmt_len] <= '9')) {
                 fmt_len++;
             }
             scanned_pos = fmt_pos + fmt_len + 1;
             if (format[fmt_pos+fmt_len] == 'N') {
                 nano_digits = atoi(&format[fmt_pos+1]);
                 nano_digits = (nano_digits > 6)?6:nano_digits;
                 nano_digits = (nano_digits < 0)?0:nano_digits;
                 sprintf(&nanofmt_s[1], ".%ds", nano_digits);
             } else {
                 if (format[scanned_pos] != '\0') {
                     continue;
                 }
                 fmt_pos = scanned_pos; /* print till end */
             }
         } else {
             scanned_pos = strlen(format);
             fmt_pos = scanned_pos; /* print till end */
         }
         tmp_fmt_s = strndup(&format[printed_pos], fmt_pos - printed_pos);
 #ifdef GCC_FORMAT_NONLITERAL_CHECKING_ENABLED
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
 #endif
         date_len += strftime(&date_s[date_len], max-date_len, tmp_fmt_s, &tm);
 #ifdef GCC_FORMAT_NONLITERAL_CHECKING_ENABLED
 #pragma GCC diagnostic pop
 #endif
         printed_pos = scanned_pos;
         free(tmp_fmt_s);
         if (nano_digits) {
 #ifdef GCC_FORMAT_NONLITERAL_CHECKING_ENABLED
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
 #endif
             date_len += snprintf(&date_s[date_len], max-date_len,
                                  nanofmt_s, nano_s);
 #ifdef GCC_FORMAT_NONLITERAL_CHECKING_ENABLED
 #pragma GCC diagnostic pop
 #endif
             nano_digits = 0;
         }
     }
 
     return (date_len == 0)?NULL:strdup(date_s);
 }
 
 /*!
  * \internal
- * \brief Return human-friendly string representing current time
+ * \brief Return human-friendly string corresponding to a time
  *
- * \return Current time as string (as by ctime() but without newline) on success
- *         or "Could not determine current time" on error
+ * \param[in] when   Pointer to epoch time value (or NULL for current time)
+ *
+ * \return Current time as string (as by ctime() but without newline) on
+ *         success, NULL otherwise
  * \note The return value points to a statically allocated string which might be
  *       overwritten by subsequent calls to any of the C library date and time functions.
  */
 const char *
 crm_now_string(time_t *when)
 {
     char *since_epoch = NULL;
 
     if (when == NULL) {
         time_t a_time = time(NULL);
 
         if (a_time == (time_t) -1) {
             return NULL;
         } else {
             since_epoch = ctime(&a_time);
         }
     } else {
         since_epoch = ctime(when);
     }
 
     if (since_epoch == NULL) {
         return NULL;
     } else {
         return crm_strip_trailing_newline(since_epoch);
     }
 }
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 64c1979bf8..d33775838c 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,3774 +1,3842 @@
 /*
  * Copyright 2004-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU Lesser General Public License
  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_internal.h>
 
+#include <stdio.h>
+#include <string.h>
 #include <glib.h>
 
 #include <crm/crm.h>
 #include <crm/services.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crm/common/util.h>
 #include <crm/pengine/rules.h>
 #include <crm/pengine/internal.h>
+#include <crm/common/iso8601_internal.h>
 #include <unpack.h>
 #include <pe_status_private.h>
 
 CRM_TRACE_INIT_DATA(pe_status);
 
 #define set_config_flag(data_set, option, flag) do {			\
 	const char *tmp = pe_pref(data_set->config_hash, option);	\
 	if(tmp) {							\
 	    if(crm_is_true(tmp)) {					\
 		set_bit(data_set->flags, flag);			\
 	    } else {							\
 		clear_bit(data_set->flags, flag);		\
 	    }								\
 	}								\
     } while(0)
 
 static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
                           xmlNode **last_failure,
                           enum action_fail_response *failed,
                           pe_working_set_t *data_set);
 static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
 static void add_node_attrs(xmlNode *attrs, pe_node_t *node, bool overwrite,
                            pe_working_set_t *data_set);
 
 
 // Bitmask for warnings we only want to print once
 uint32_t pe_wo = 0;
 
 static gboolean
 is_dangling_guest_node(node_t *node)
 {
     /* we are looking for a remote-node that was supposed to be mapped to a
      * container resource, but all traces of that container have disappeared 
      * from both the config and the status section. */
     if (pe__is_guest_or_remote_node(node) &&
         node->details->remote_rsc &&
         node->details->remote_rsc->container == NULL &&
         is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 
 /*!
  * \brief Schedule a fence action for a node
  *
  * \param[in,out] data_set  Current working set of cluster
  * \param[in,out] node      Node to fence
  * \param[in]     reason    Text description of why fencing is needed
  */
 void
 pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
 {
     CRM_CHECK(node, return);
 
     /* A guest node is fenced by marking its container as failed */
     if (pe__is_guest_node(node)) {
         resource_t *rsc = node->details->remote_rsc->container;
 
         if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
             if (!is_set(rsc->flags, pe_rsc_managed)) {
                 crm_notice("Not fencing guest node %s "
                            "(otherwise would because %s): "
                            "its guest resource %s is unmanaged",
                            node->details->uname, reason, rsc->id);
             } else {
                 crm_warn("Guest node %s will be fenced "
                          "(by recovering its guest resource %s): %s",
                          node->details->uname, rsc->id, reason);
 
                 /* We don't mark the node as unclean because that would prevent the
                  * node from running resources. We want to allow it to run resources
                  * in this transition if the recovery succeeds.
                  */
                 node->details->remote_requires_reset = TRUE;
                 set_bit(rsc->flags, pe_rsc_failed);
             }
         }
 
     } else if (is_dangling_guest_node(node)) {
         crm_info("Cleaning up dangling connection for guest node %s: "
                  "fencing was already done because %s, "
                  "and guest resource no longer exists",
                  node->details->uname, reason);
         set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
 
     } else if (pe__is_remote_node(node)) {
         resource_t *rsc = node->details->remote_rsc;
 
         if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
             crm_notice("Not fencing remote node %s "
                        "(otherwise would because %s): connection is unmanaged",
                        node->details->uname, reason);
         } else if(node->details->remote_requires_reset == FALSE) {
             node->details->remote_requires_reset = TRUE;
             crm_warn("Remote node %s %s: %s",
                      node->details->uname,
                      pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
                      reason);
         }
         node->details->unclean = TRUE;
         pe_fence_op(node, NULL, TRUE, reason, data_set);
 
     } else if (node->details->unclean) {
         crm_trace("Cluster node %s %s because %s",
                   node->details->uname,
                   pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
                   reason);
 
     } else {
         crm_warn("Cluster node %s %s: %s",
                  node->details->uname,
                  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
                  reason);
         node->details->unclean = TRUE;
         pe_fence_op(node, NULL, TRUE, reason, data_set);
     }
 }
 
 // @TODO xpaths can't handle templates, rules, or id-refs
 
 // nvpair with provides or requires set to unfencing
 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR                \
     "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'"    \
     "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
     "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
 
 // unfencing in rsc_defaults or any resource
 #define XPATH_ENABLE_UNFENCING \
     "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES   \
     "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR                                               \
     "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG  \
     "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
 
 static
 void set_if_xpath(unsigned long long flag, const char *xpath,
                   pe_working_set_t *data_set)
 {
     xmlXPathObjectPtr result = NULL;
 
     if (is_not_set(data_set->flags, flag)) {
         result = xpath_search(data_set->input, xpath);
         if (result && (numXpathResults(result) > 0)) {
             set_bit(data_set->flags, flag);
         }
         freeXpathObject(result);
     }
 }
 
 gboolean
 unpack_config(xmlNode * config, pe_working_set_t * data_set)
 {
     const char *value = NULL;
     GHashTable *config_hash = crm_str_table_new();
 
     data_set->config_hash = config_hash;
 
     pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, NULL, config_hash,
                                CIB_OPTIONS_FIRST, FALSE, data_set);
 
     verify_pe_options(data_set->config_hash);
 
     set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
     if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
         crm_info("Startup probes: disabled (dangerous)");
     }
 
     value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
     if (value && crm_is_true(value)) {
         crm_notice("Watchdog will be used via SBD if fencing is required "
                    "and stonith-watchdog-timeout is nonzero");
         set_bit(data_set->flags, pe_flag_have_stonith_resource);
     }
 
     /* Set certain flags via xpath here, so they can be used before the relevant
      * configuration sections are unpacked.
      */
     set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
 
     value = pe_pref(data_set->config_hash, "stonith-timeout");
     data_set->stonith_timeout = (int) crm_parse_interval_spec(value);
     crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
 
     set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
     crm_debug("STONITH of failed nodes is %s",
               is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
 
     data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
     if (!strcmp(data_set->stonith_action, "poweroff")) {
         pe_warn_once(pe_wo_poweroff,
                      "Support for stonith-action of 'poweroff' is deprecated "
                      "and will be removed in a future release (use 'off' instead)");
         data_set->stonith_action = "off";
     }
     crm_trace("STONITH will %s nodes", data_set->stonith_action);
 
     set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
     crm_debug("Concurrent fencing is %s",
               is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
 
     set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
     crm_debug("Stop all active resources: %s",
               is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
 
     set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
     if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
         crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
     }
 
     value = pe_pref(data_set->config_hash, "no-quorum-policy");
 
     if (safe_str_eq(value, "ignore")) {
         data_set->no_quorum_policy = no_quorum_ignore;
 
     } else if (safe_str_eq(value, "freeze")) {
         data_set->no_quorum_policy = no_quorum_freeze;
 
     } else if (safe_str_eq(value, "suicide")) {
         if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
             int do_panic = 0;
 
             crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC,
                                   &do_panic);
             if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
                 data_set->no_quorum_policy = no_quorum_suicide;
             } else {
                 crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
                 data_set->no_quorum_policy = no_quorum_stop;
             }
         } else {
             crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
             data_set->no_quorum_policy = no_quorum_stop;
         }
 
     } else {
         data_set->no_quorum_policy = no_quorum_stop;
     }
 
     switch (data_set->no_quorum_policy) {
         case no_quorum_freeze:
             crm_debug("On loss of quorum: Freeze resources");
             break;
         case no_quorum_stop:
             crm_debug("On loss of quorum: Stop ALL resources");
             break;
         case no_quorum_suicide:
             crm_notice("On loss of quorum: Fence all remaining nodes");
             break;
         case no_quorum_ignore:
             crm_notice("On loss of quorum: Ignore");
             break;
     }
 
     set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
     crm_trace("Orphan resources are %s",
               is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
 
     set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
     crm_trace("Orphan resource actions are %s",
               is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
 
     set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
     crm_trace("Stopped resources are removed from the status section: %s",
               is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
 
     set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
     crm_trace("Maintenance mode: %s",
               is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
 
     set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
     crm_trace("Start failures are %s",
               is_set(data_set->flags,
                      pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
 
     if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
         set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
     }
     if (is_set(data_set->flags, pe_flag_startup_fencing)) {
         crm_trace("Unseen nodes will be fenced");
     } else {
         pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
     }
 
     node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
     node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
     node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
 
     crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
              pe_pref(data_set->config_hash, "node-health-red"),
              pe_pref(data_set->config_hash, "node-health-yellow"),
              pe_pref(data_set->config_hash, "node-health-green"));
 
     data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
     crm_trace("Placement strategy: %s", data_set->placement_strategy);
 
     return TRUE;
 }
 
 static void
 destroy_digest_cache(gpointer ptr)
 {
     op_digest_cache_t *data = ptr;
 
     free_xml(data->params_all);
     free_xml(data->params_secure);
     free_xml(data->params_restart);
 
     free(data->digest_all_calc);
     free(data->digest_restart_calc);
     free(data->digest_secure_calc);
 
     free(data);
 }
 
 node_t *
 pe_create_node(const char *id, const char *uname, const char *type,
                const char *score, pe_working_set_t * data_set)
 {
     node_t *new_node = NULL;
 
     if (pe_find_node(data_set->nodes, uname) != NULL) {
         crm_config_warn("Detected multiple node entries with uname=%s"
                         " - this is rarely intended", uname);
     }
 
     new_node = calloc(1, sizeof(node_t));
     if (new_node == NULL) {
         return NULL;
     }
 
     new_node->weight = char2score(score);
     new_node->fixed = FALSE;
     new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
 
     if (new_node->details == NULL) {
         free(new_node);
         return NULL;
     }
 
     crm_trace("Creating node for entry %s/%s", uname, id);
     new_node->details->id = id;
     new_node->details->uname = uname;
     new_node->details->online = FALSE;
     new_node->details->shutdown = FALSE;
     new_node->details->rsc_discovery_enabled = TRUE;
     new_node->details->running_rsc = NULL;
     new_node->details->type = node_ping;
 
     if (safe_str_eq(type, "remote")) {
         new_node->details->type = node_remote;
         set_bit(data_set->flags, pe_flag_have_remote_nodes);
     } else if ((type == NULL) || safe_str_eq(type, "member")) {
         new_node->details->type = node_member;
     }
 
     new_node->details->attrs = crm_str_table_new();
 
     if (pe__is_guest_or_remote_node(new_node)) {
         g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
                             strdup("remote"));
     } else {
         g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
                             strdup("cluster"));
     }
 
     new_node->details->utilization = crm_str_table_new();
 
     new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash,
                                                             g_str_equal, free,
                                                             destroy_digest_cache);
 
     data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
     return new_node;
 }
 
 bool
 remote_id_conflict(const char *remote_name, pe_working_set_t *data) 
 {
     bool match = FALSE;
 #if 1
     pe_find_resource(data->resources, remote_name);
 #else
     if (data->name_check == NULL) {
         data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
         for (xml_rsc = __xml_first_child_element(parent); xml_rsc != NULL;
              xml_rsc = __xml_next_element(xml_rsc)) {
 
             const char *id = ID(xml_rsc);
 
             /* avoiding heap allocation here because we know the duration of this hashtable allows us to */
             g_hash_table_insert(data->name_check, (char *) id, (char *) id);
         }
     }
     if (g_hash_table_lookup(data->name_check, remote_name)) {
         match = TRUE;
     }
 #endif
     if (match) {
         crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
         return NULL;
     }
 
     return match;
 }
 
 
 static const char *
 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
 {
     xmlNode *attr_set = NULL;
     xmlNode *attr = NULL;
 
     const char *container_id = ID(xml_obj);
     const char *remote_name = NULL;
     const char *remote_server = NULL;
     const char *remote_port = NULL;
     const char *connect_timeout = "60s";
     const char *remote_allow_migrate=NULL;
     const char *is_managed = NULL;
 
     for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL;
          attr_set = __xml_next_element(attr_set)) {
         if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
             continue;
         }
 
         for (attr = __xml_first_child_element(attr_set); attr != NULL;
              attr = __xml_next_element(attr)) {
             const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
             const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
 
             if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) {
                 remote_name = value;
             } else if (safe_str_eq(name, "remote-addr")) {
                 remote_server = value;
             } else if (safe_str_eq(name, "remote-port")) {
                 remote_port = value;
             } else if (safe_str_eq(name, "remote-connect-timeout")) {
                 connect_timeout = value;
             } else if (safe_str_eq(name, "remote-allow-migrate")) {
                 remote_allow_migrate=value;
             } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
                 is_managed = value;
             }
         }
     }
 
     if (remote_name == NULL) {
         return NULL;
     }
 
     if (remote_id_conflict(remote_name, data)) {
         return NULL;
     }
 
     pe_create_remote_xml(parent, remote_name, container_id,
                          remote_allow_migrate, is_managed,
                          connect_timeout, remote_server, remote_port);
     return remote_name;
 }
 
 static void
 handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
 {
     if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
         /* Ignore fencing for remote nodes that don't have a connection resource
          * associated with them. This happens when remote node entries get left
          * in the nodes section after the connection resource is removed.
          */
         return;
     }
 
     if (is_set(data_set->flags, pe_flag_startup_fencing)) {
         // All nodes are unclean until we've seen their status entry
         new_node->details->unclean = TRUE;
 
     } else {
         // Blind faith ...
         new_node->details->unclean = FALSE;
     }
 
     /* We need to be able to determine if a node's status section
      * exists or not separate from whether the node is unclean. */
     new_node->details->unseen = TRUE;
 }
 
 gboolean
 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
     node_t *new_node = NULL;
     const char *id = NULL;
     const char *uname = NULL;
     const char *type = NULL;
     const char *score = NULL;
 
     for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL;
          xml_obj = __xml_next_element(xml_obj)) {
 
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
             new_node = NULL;
 
             id = crm_element_value(xml_obj, XML_ATTR_ID);
             uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
             type = crm_element_value(xml_obj, XML_ATTR_TYPE);
             score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
             crm_trace("Processing node %s/%s", uname, id);
 
             if (id == NULL) {
                 crm_config_err("Must specify id tag in <node>");
                 continue;
             }
             new_node = pe_create_node(id, uname, type, score, data_set);
 
             if (new_node == NULL) {
                 return FALSE;
             }
 
 /* 		if(data_set->have_quorum == FALSE */
 /* 		   && data_set->no_quorum_policy == no_quorum_stop) { */
 /* 			/\* start shutting resources down *\/ */
 /* 			new_node->weight = -INFINITY; */
 /* 		} */
 
             handle_startup_fencing(data_set, new_node);
 
             add_node_attrs(xml_obj, new_node, FALSE, data_set);
             pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, NULL,
                                        new_node->details->utilization, NULL,
                                        FALSE, data_set);
 
             crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
         }
     }
 
     if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
         crm_info("Creating a fake local node");
         pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
                        data_set);
     }
 
     return TRUE;
 }
 
 static void
 setup_container(resource_t * rsc, pe_working_set_t * data_set)
 {
     const char *container_id = NULL;
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             setup_container(child_rsc, data_set);
         }
         return;
     }
 
     container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
     if (container_id && safe_str_neq(container_id, rsc->id)) {
         resource_t *container = pe_find_resource(data_set->resources, container_id);
 
         if (container) {
             rsc->container = container;
             set_bit(container->flags, pe_rsc_is_container);
             container->fillers = g_list_append(container->fillers, rsc);
             pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
         } else {
             pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
         }
     }
 }
 
 gboolean
 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
 
     /* Create remote nodes and guest nodes from the resource configuration
      * before unpacking resources.
      */
     for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
          xml_obj = __xml_next_element(xml_obj)) {
 
         const char *new_node_id = NULL;
 
         /* Check for remote nodes, which are defined by ocf:pacemaker:remote
          * primitives.
          */
         if (xml_contains_remote_node(xml_obj)) {
             new_node_id = ID(xml_obj);
             /* The "pe_find_node" check is here to make sure we don't iterate over
              * an expanded node that has already been added to the node list. */
             if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
                 crm_trace("Found remote node %s defined by resource %s",
                           new_node_id, ID(xml_obj));
                 pe_create_node(new_node_id, new_node_id, "remote", NULL,
                                data_set);
             }
             continue;
         }
 
         /* Check for guest nodes, which are defined by special meta-attributes
          * of a primitive of any type (for example, VirtualDomain or Xen).
          */
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
             /* This will add an ocf:pacemaker:remote primitive to the
              * configuration for the guest node's connection, to be unpacked
              * later.
              */
             new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
             if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
                 crm_trace("Found guest node %s in resource %s",
                           new_node_id, ID(xml_obj));
                 pe_create_node(new_node_id, new_node_id, "remote", NULL,
                                data_set);
             }
             continue;
         }
 
         /* Check for guest nodes inside a group. Clones are currently not
          * supported as guest nodes.
          */
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
             xmlNode *xml_obj2 = NULL;
             for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL;
                  xml_obj2 = __xml_next_element(xml_obj2)) {
 
                 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
 
                 if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
                     crm_trace("Found guest node %s in resource %s inside group %s",
                               new_node_id, ID(xml_obj2), ID(xml_obj));
                     pe_create_node(new_node_id, new_node_id, "remote", NULL,
                                    data_set);
                 }
             }
         }
     }
     return TRUE;
 }
 
 /* Call this after all the nodes and resources have been
  * unpacked, but before the status section is read.
  *
  * A remote node's online status is reflected by the state
  * of the remote node's connection resource. We need to link
  * the remote node to this connection resource so we can have
  * easy access to the connection resource during the PE calculations.
  */
 static void
 link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
 {
     node_t *remote_node = NULL;
 
     if (new_rsc->is_remote_node == FALSE) {
         return;
     }
 
     if (is_set(data_set->flags, pe_flag_quick_location)) {
         /* remote_nodes and remote_resources are not linked in quick location calculations */
         return;
     }
 
     remote_node = pe_find_node(data_set->nodes, new_rsc->id);
     CRM_CHECK(remote_node != NULL, return;);
 
     pe_rsc_trace(new_rsc, "Linking remote connection resource %s to node %s",
                  new_rsc->id, remote_node->details->uname);
     remote_node->details->remote_rsc = new_rsc;
 
     if (new_rsc->container == NULL) {
         /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
          * the same as is done for cluster nodes.
          */
         handle_startup_fencing(data_set, remote_node);
 
     } else {
         /* pe_create_node() marks the new node as "remote" or "cluster"; now
          * that we know the node is a guest node, update it correctly.
          */
         g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
                              strdup("container"));
     }
 }
 
 static void
 destroy_tag(gpointer data)
 {
     tag_t *tag = data;
 
     if (tag) {
         free(tag->id);
         g_list_free_full(tag->refs, free);
         free(tag);
     }
 }
 
 /*!
  * \internal
  * \brief Parse configuration XML for resource information
  *
  * \param[in]     xml_resources  Top of resource configuration XML
  * \param[in,out] data_set       Where to put resource information
  *
  * \return TRUE
  *
  * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
  *       be used when common_unpack() calls resource_location()
  */
 gboolean
 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
     GListPtr gIter = NULL;
 
     data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash,
                                                         g_str_equal, free,
                                                         destroy_tag);
 
     for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
          xml_obj = __xml_next_element(xml_obj)) {
 
         resource_t *new_rsc = NULL;
 
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
             const char *template_id = ID(xml_obj);
 
             if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
                                                             template_id, NULL, NULL) == FALSE) {
                 /* Record the template's ID for the knowledge of its existence anyway. */
                 g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
             }
             continue;
         }
 
         crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
         if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
             data_set->resources = g_list_append(data_set->resources, new_rsc);
             pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
 
         } else {
             crm_config_err("Failed unpacking %s %s",
                            crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
             if (new_rsc != NULL && new_rsc->fns != NULL) {
                 new_rsc->fns->free(new_rsc);
             }
         }
     }
 
     for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
         resource_t *rsc = (resource_t *) gIter->data;
 
         setup_container(rsc, data_set);
         link_rsc2remotenode(data_set, rsc);
     }
 
     data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
     if (is_set(data_set->flags, pe_flag_quick_location)) {
         /* Ignore */
 
     } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
                && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
 
         crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
         crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
         crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
     }
 
     return TRUE;
 }
 
 gboolean
 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
 {
     xmlNode *xml_tag = NULL;
 
     data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
                                            destroy_tag);
 
     for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL;
          xml_tag = __xml_next_element(xml_tag)) {
 
         xmlNode *xml_obj_ref = NULL;
         const char *tag_id = ID(xml_tag);
 
         if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
             continue;
         }
 
         if (tag_id == NULL) {
             crm_config_err("Failed unpacking %s: %s should be specified",
                            crm_element_name(xml_tag), XML_ATTR_ID);
             continue;
         }
 
         for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL;
              xml_obj_ref = __xml_next_element(xml_obj_ref)) {
 
             const char *obj_ref = ID(xml_obj_ref);
 
             if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
                 continue;
             }
 
             if (obj_ref == NULL) {
                 crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
                                crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
                 continue;
             }
 
             if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
                 return FALSE;
             }
         }
     }
 
     return TRUE;
 }
 
 /* The ticket state section:
  * "/cib/status/tickets/ticket_state" */
 static gboolean
 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
 {
     const char *ticket_id = NULL;
     const char *granted = NULL;
     const char *last_granted = NULL;
     const char *standby = NULL;
     xmlAttrPtr xIter = NULL;
 
     ticket_t *ticket = NULL;
 
     ticket_id = ID(xml_ticket);
     if (ticket_id == NULL || strlen(ticket_id) == 0) {
         return FALSE;
     }
 
     crm_trace("Processing ticket state for %s", ticket_id);
 
     ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
     if (ticket == NULL) {
         ticket = ticket_new(ticket_id, data_set);
         if (ticket == NULL) {
             return FALSE;
         }
     }
 
     for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
         const char *prop_name = (const char *)xIter->name;
         const char *prop_value = crm_element_value(xml_ticket, prop_name);
 
         if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
             continue;
         }
         g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
     }
 
     granted = g_hash_table_lookup(ticket->state, "granted");
     if (granted && crm_is_true(granted)) {
         ticket->granted = TRUE;
         crm_info("We have ticket '%s'", ticket->id);
     } else {
         ticket->granted = FALSE;
         crm_info("We do not have ticket '%s'", ticket->id);
     }
 
     last_granted = g_hash_table_lookup(ticket->state, "last-granted");
     if (last_granted) {
         ticket->last_granted = crm_parse_int(last_granted, 0);
     }
 
     standby = g_hash_table_lookup(ticket->state, "standby");
     if (standby && crm_is_true(standby)) {
         ticket->standby = TRUE;
         if (ticket->granted) {
             crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
         }
     } else {
         ticket->standby = FALSE;
     }
 
     crm_trace("Done with ticket state for %s", ticket_id);
 
     return TRUE;
 }
 
 static gboolean
 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
 {
     xmlNode *xml_obj = NULL;
 
     for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL;
          xml_obj = __xml_next_element(xml_obj)) {
 
         if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
             continue;
         }
         unpack_ticket_state(xml_obj, data_set);
     }
 
     return TRUE;
 }
 
 static void
 unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set) 
 {
     const char *resource_discovery_enabled = NULL;
     xmlNode *attrs = NULL;
     resource_t *rsc = NULL;
 
     if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
         return;
     }
 
     if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
         return;
     }
     crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
 
     this_node->details->remote_maintenance =
         crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0");
 
     rsc = this_node->details->remote_rsc;
     if (this_node->details->remote_requires_reset == FALSE) {
         this_node->details->unclean = FALSE;
         this_node->details->unseen = FALSE;
     }
     attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
     add_node_attrs(attrs, this_node, TRUE, data_set);
 
     if (pe__shutdown_requested(this_node)) {
         crm_info("Node %s is shutting down", this_node->details->uname);
         this_node->details->shutdown = TRUE;
         if (rsc) {
             rsc->next_role = RSC_ROLE_STOPPED;
         }
     }
  
     if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
         crm_info("Node %s is in standby-mode", this_node->details->uname);
         this_node->details->standby = TRUE;
     }
 
     if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
         (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
         crm_info("Node %s is in maintenance-mode", this_node->details->uname);
         this_node->details->maintenance = TRUE;
     }
 
     resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
     if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
         if (pe__is_remote_node(this_node)
             && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
             crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled",
                      XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
         } else {
             /* This is either a remote node with fencing enabled, or a guest
              * node. We don't care whether fencing is enabled when fencing guest
              * nodes, because they are "fenced" by recovering their containing
              * resource.
              */
             crm_info("Node %s has resource discovery disabled", this_node->details->uname);
             this_node->details->rsc_discovery_enabled = FALSE;
         }
     }
 }
 
 static bool
 unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) 
 {
     bool changed = false;
     xmlNode *lrm_rsc = NULL;
 
     for (xmlNode *state = __xml_first_child_element(status); state != NULL;
          state = __xml_next_element(state)) {
 
         const char *id = NULL;
         const char *uname = NULL;
         node_t *this_node = NULL;
         bool process = FALSE;
 
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
             continue;
         }
 
         id = crm_element_value(state, XML_ATTR_ID);
         uname = crm_element_value(state, XML_ATTR_UNAME);
         this_node = pe_find_node_any(data_set->nodes, id, uname);
 
         if (this_node == NULL) {
             crm_info("Node %s is unknown", id);
             continue;
 
         } else if (this_node->details->unpacked) {
             crm_info("Node %s is already processed", id);
             continue;
 
         } else if (!pe__is_guest_or_remote_node(this_node)
                    && is_set(data_set->flags, pe_flag_stonith_enabled)) {
             // A redundant test, but preserves the order for regression tests
             process = TRUE;
 
         } else if (pe__is_guest_or_remote_node(this_node)) {
             bool check = FALSE;
             resource_t *rsc = this_node->details->remote_rsc;
 
             if(fence) {
                 check = TRUE;
 
             } else if(rsc == NULL) {
                 /* Not ready yet */
 
             } else if (pe__is_guest_node(this_node)
                        && rsc->role == RSC_ROLE_STARTED
                        && rsc->container->role == RSC_ROLE_STARTED) {
                 /* Both the connection and its containing resource need to be
                  * known to be up before we process resources running in it.
                  */
                 check = TRUE;
                 crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
 
             } else if (!pe__is_guest_node(this_node)
                        && rsc->role == RSC_ROLE_STARTED) {
                 check = TRUE;
                 crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
             }
 
             if (check) {
                 determine_remote_online_status(data_set, this_node);
                 unpack_handle_remote_attrs(this_node, state, data_set);
                 process = TRUE;
             }
 
         } else if (this_node->details->online) {
             process = TRUE;
 
         } else if (fence) {
             process = TRUE;
         }
 
         if(process) {
             crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
                       fence?"un":"",
                       (pe__is_guest_or_remote_node(this_node)? " remote" : ""),
                       this_node->details->uname);
             changed = TRUE;
             this_node->details->unpacked = TRUE;
 
             lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
             lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
             unpack_lrm_resources(this_node, lrm_rsc, data_set);
         }
     }
     return changed;
 }
 
 /* remove nodes that are down, stopping */
 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
 /* anything else? */
 gboolean
 unpack_status(xmlNode * status, pe_working_set_t * data_set)
 {
     const char *id = NULL;
     const char *uname = NULL;
 
     xmlNode *state = NULL;
     node_t *this_node = NULL;
 
     crm_trace("Beginning unpack");
 
     if (data_set->tickets == NULL) {
         data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                                   free, destroy_ticket);
     }
 
     for (state = __xml_first_child_element(status); state != NULL;
          state = __xml_next_element(state)) {
 
         if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
             unpack_tickets_state((xmlNode *) state, data_set);
 
         } else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
             xmlNode *attrs = NULL;
             const char *resource_discovery_enabled = NULL;
 
             id = crm_element_value(state, XML_ATTR_ID);
             uname = crm_element_value(state, XML_ATTR_UNAME);
             this_node = pe_find_node_any(data_set->nodes, id, uname);
 
             if (uname == NULL) {
                 /* error */
                 continue;
 
             } else if (this_node == NULL) {
                 crm_config_warn("Node %s in status section no longer exists", uname);
                 continue;
 
             } else if (pe__is_guest_or_remote_node(this_node)) {
                 /* online state for remote nodes is determined by the
                  * rsc state after all the unpacking is done. we do however
                  * need to mark whether or not the node has been fenced as this plays
                  * a role during unpacking cluster node resource state */
                 this_node->details->remote_was_fenced = 
                     crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0");
                 continue;
             }
 
             crm_trace("Processing node id=%s, uname=%s", id, uname);
 
             /* Mark the node as provisionally clean
              * - at least we have seen it in the current cluster's lifetime
              */
             this_node->details->unclean = FALSE;
             this_node->details->unseen = FALSE;
             attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
             add_node_attrs(attrs, this_node, TRUE, data_set);
 
             if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
                 crm_info("Node %s is in standby-mode", this_node->details->uname);
                 this_node->details->standby = TRUE;
             }
 
             if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
                 crm_info("Node %s is in maintenance-mode", this_node->details->uname);
                 this_node->details->maintenance = TRUE;
             }
 
             resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
             if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
                 crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
                     XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
             }
 
             crm_trace("determining node state");
             determine_online_status(state, this_node, data_set);
 
             if (is_not_set(data_set->flags, pe_flag_have_quorum)
                 && this_node->details->online
                 && (data_set->no_quorum_policy == no_quorum_suicide)) {
                 /* Everything else should flow from this automatically
                  * At least until the PE becomes able to migrate off healthy resources
                  */
                 pe_fence_node(data_set, this_node, "cluster does not have quorum");
             }
         }
     }
 
 
     while(unpack_node_loop(status, FALSE, data_set)) {
         crm_trace("Start another loop");
     }
 
     // Now catch any nodes we didn't see
     unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
 
     /* Now that we know where resources are, we can schedule stops of containers
      * with failed bundle connections
      */
     if (data_set->stop_needed != NULL) {
         for (GList *item = data_set->stop_needed; item; item = item->next) {
             pe_resource_t *container = item->data;
             pe_node_t *node = pe__current_node(container);
 
             if (node) {
                 stop_action(container, node, FALSE);
             }
         }
         g_list_free(data_set->stop_needed);
         data_set->stop_needed = NULL;
     }
 
     for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         node_t *this_node = gIter->data;
 
         if (this_node == NULL) {
             continue;
         } else if (!pe__is_guest_or_remote_node(this_node)) {
             continue;
         } else if(this_node->details->unpacked) {
             continue;
         }
         determine_remote_online_status(data_set, this_node);
     }
 
     return TRUE;
 }
 
 static gboolean
 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
                                    node_t * this_node)
 {
     gboolean online = FALSE;
     const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
     const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
     const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
 
     if (!crm_is_true(in_cluster)) {
         crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
 
     } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
         if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
             online = TRUE;
         } else {
             crm_debug("Node is not ready to run resources: %s", join);
         }
 
     } else if (this_node->details->expected_up == FALSE) {
         crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster));
         crm_trace("\tis_peer=%s, join=%s, expected=%s",
                   crm_str(is_peer), crm_str(join), crm_str(exp_state));
 
     } else {
         /* mark it unclean */
         pe_fence_node(data_set, this_node, "peer is unexpectedly down");
         crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
                  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
     }
     return online;
 }
 
 static gboolean
 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
                                 node_t * this_node)
 {
     gboolean online = FALSE;
     gboolean do_terminate = FALSE;
     bool crmd_online = FALSE;
     const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
     const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
     const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
     const char *terminate = pe_node_attribute_raw(this_node, "terminate");
 
 /*
   - XML_NODE_IN_CLUSTER    ::= true|false
   - XML_NODE_IS_PEER       ::= online|offline
   - XML_NODE_JOIN_STATE    ::= member|down|pending|banned
   - XML_NODE_EXPECTED      ::= member|down
 */
 
     if (crm_is_true(terminate)) {
         do_terminate = TRUE;
 
     } else if (terminate != NULL && strlen(terminate) > 0) {
         /* could be a time() value */
         char t = terminate[0];
 
         if (t != '0' && isdigit(t)) {
             do_terminate = TRUE;
         }
     }
 
     crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
               this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
               crm_str(join), crm_str(exp_state), do_terminate);
 
     online = crm_is_true(in_cluster);
     crmd_online = safe_str_eq(is_peer, ONLINESTATUS);
     if (exp_state == NULL) {
         exp_state = CRMD_JOINSTATE_DOWN;
     }
 
     if (this_node->details->shutdown) {
         crm_debug("%s is shutting down", this_node->details->uname);
 
         /* Slightly different criteria since we can't shut down a dead peer */
         online = crmd_online;
 
     } else if (in_cluster == NULL) {
         pe_fence_node(data_set, this_node, "peer has not been seen by the cluster");
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
         pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria");
 
     } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
 
         if (crm_is_true(in_cluster) || crmd_online) {
             crm_info("- Node %s is not ready to run resources", this_node->details->uname);
             this_node->details->standby = TRUE;
             this_node->details->pending = TRUE;
 
         } else {
             crm_trace("%s is down or still coming up", this_node->details->uname);
         }
 
     } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
                && crm_is_true(in_cluster) == FALSE && !crmd_online) {
         crm_info("Node %s was just shot", this_node->details->uname);
         online = FALSE;
 
     } else if (crm_is_true(in_cluster) == FALSE) {
         pe_fence_node(data_set, this_node, "peer is no longer part of the cluster");
 
     } else if (!crmd_online) {
         pe_fence_node(data_set, this_node, "peer process is no longer available");
 
         /* Everything is running at this point, now check join state */
     } else if (do_terminate) {
         pe_fence_node(data_set, this_node, "termination was requested");
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
         crm_info("Node %s is active", this_node->details->uname);
 
     } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
                || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
         crm_info("Node %s is not ready to run resources", this_node->details->uname);
         this_node->details->standby = TRUE;
         this_node->details->pending = TRUE;
 
     } else {
         pe_fence_node(data_set, this_node, "peer was in an unknown state");
         crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
                  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
                  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
     }
 
     return online;
 }
 
 static gboolean
 determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
 {
     resource_t *rsc = this_node->details->remote_rsc;
     resource_t *container = NULL;
     pe_node_t *host = NULL;
 
     /* If there is a node state entry for a (former) Pacemaker Remote node
      * but no resource creating that node, the node's connection resource will
      * be NULL. Consider it an offline remote node in that case.
      */
     if (rsc == NULL) {
         this_node->details->online = FALSE;
         goto remote_online_done;
     }
 
     container = rsc->container;
 
     if (container && (g_list_length(rsc->running_on) == 1)) {
         host = rsc->running_on->data;
     }
 
     /* If the resource is currently started, mark it online. */
     if (rsc->role == RSC_ROLE_STARTED) {
         crm_trace("%s node %s presumed ONLINE because connection resource is started",
                   (container? "Guest" : "Remote"), this_node->details->id);
         this_node->details->online = TRUE;
     }
 
     /* consider this node shutting down if transitioning start->stop */
     if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
         crm_trace("%s node %s shutting down because connection resource is stopping",
                   (container? "Guest" : "Remote"), this_node->details->id);
         this_node->details->shutdown = TRUE;
     }
 
     /* Now check all the failure conditions. */
     if(container && is_set(container->flags, pe_rsc_failed)) {
         crm_trace("Guest node %s UNCLEAN because guest resource failed",
                   this_node->details->id);
         this_node->details->online = FALSE;
         this_node->details->remote_requires_reset = TRUE;
 
     } else if(is_set(rsc->flags, pe_rsc_failed)) {
         crm_trace("%s node %s OFFLINE because connection resource failed",
                   (container? "Guest" : "Remote"), this_node->details->id);
         this_node->details->online = FALSE;
 
     } else if (rsc->role == RSC_ROLE_STOPPED
         || (container && container->role == RSC_ROLE_STOPPED)) {
 
         crm_trace("%s node %s OFFLINE because its resource is stopped",
                   (container? "Guest" : "Remote"), this_node->details->id);
         this_node->details->online = FALSE;
         this_node->details->remote_requires_reset = FALSE;
 
     } else if (host && (host->details->online == FALSE)
                && host->details->unclean) {
         crm_trace("Guest node %s UNCLEAN because host is unclean",
                   this_node->details->id);
         this_node->details->online = FALSE;
         this_node->details->remote_requires_reset = TRUE;
     }
 
 remote_online_done:
     crm_trace("Remote node %s online=%s",
         this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
     return this_node->details->online;
 }
 
 gboolean
 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
 {
     gboolean online = FALSE;
     const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
 
     if (this_node == NULL) {
         crm_config_err("No node to check");
         return online;
     }
 
     this_node->details->shutdown = FALSE;
     this_node->details->expected_up = FALSE;
 
     if (pe__shutdown_requested(this_node)) {
         this_node->details->shutdown = TRUE;
 
     } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
         this_node->details->expected_up = TRUE;
     }
 
     if (this_node->details->type == node_ping) {
         this_node->details->unclean = FALSE;
         online = FALSE;         /* As far as resource management is concerned,
                                  * the node is safely offline.
                                  * Anyone caught abusing this logic will be shot
                                  */
 
     } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
         online = determine_online_status_no_fencing(data_set, node_state, this_node);
 
     } else {
         online = determine_online_status_fencing(data_set, node_state, this_node);
     }
 
     if (online) {
         this_node->details->online = TRUE;
 
     } else {
         /* remove node from contention */
         this_node->fixed = TRUE;
         this_node->weight = -INFINITY;
     }
 
     if (online && this_node->details->shutdown) {
         /* don't run resources here */
         this_node->fixed = TRUE;
         this_node->weight = -INFINITY;
     }
 
     if (this_node->details->type == node_ping) {
         crm_info("Node %s is not a pacemaker node", this_node->details->uname);
 
     } else if (this_node->details->unclean) {
         pe_proc_warn("Node %s is unclean", this_node->details->uname);
 
     } else if (this_node->details->online) {
         crm_info("Node %s is %s", this_node->details->uname,
                  this_node->details->shutdown ? "shutting down" :
                  this_node->details->pending ? "pending" :
                  this_node->details->standby ? "standby" :
                  this_node->details->maintenance ? "maintenance" : "online");
 
     } else {
         crm_trace("Node %s is offline", this_node->details->uname);
     }
 
     return online;
 }
 
 /*!
  * \internal
  * \brief Find the end of a resource's name, excluding any clone suffix
  *
  * \param[in] id  Resource ID to check
  *
  * \return Pointer to last character of resource's base name
  */
 const char *
 pe_base_name_end(const char *id)
 {
     if (!crm_strlen_zero(id)) {
         const char *end = id + strlen(id) - 1;
 
         for (const char *s = end; s > id; --s) {
             switch (*s) {
                 case '0':
                 case '1':
                 case '2':
                 case '3':
                 case '4':
                 case '5':
                 case '6':
                 case '7':
                 case '8':
                 case '9':
                     break;
                 case ':':
                     return (s == end)? s : (s - 1);
                 default:
                     return end;
             }
         }
         return end;
     }
     return NULL;
 }
 
 /*!
  * \internal
  * \brief Get a resource name excluding any clone suffix
  *
  * \param[in] last_rsc_id  Resource ID to check
  *
  * \return Pointer to newly allocated string with resource's base name
  * \note It is the caller's responsibility to free() the result.
  *       This asserts on error, so callers can assume result is not NULL.
  */
 char *
 clone_strip(const char *last_rsc_id)
 {
     const char *end = pe_base_name_end(last_rsc_id);
     char *basename = NULL;
 
     CRM_ASSERT(end);
     basename = strndup(last_rsc_id, end - last_rsc_id + 1);
     CRM_ASSERT(basename);
     return basename;
 }
 
 /*!
  * \internal
  * \brief Get the name of the first instance of a cloned resource
  *
  * \param[in] last_rsc_id  Resource ID to check
  *
  * \return Pointer to newly allocated string with resource's base name plus :0
  * \note It is the caller's responsibility to free() the result.
  *       This asserts on error, so callers can assume result is not NULL.
  */
 char *
 clone_zero(const char *last_rsc_id)
 {
     const char *end = pe_base_name_end(last_rsc_id);
     size_t base_name_len = end - last_rsc_id + 1;
     char *zero = NULL;
 
     CRM_ASSERT(end);
     zero = calloc(base_name_len + 3, sizeof(char));
     CRM_ASSERT(zero);
     memcpy(zero, last_rsc_id, base_name_len);
     zero[base_name_len] = ':';
     zero[base_name_len + 1] = '0';
     return zero;
 }
 
 static resource_t *
 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
 {
     resource_t *rsc = NULL;
     xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
 
     copy_in_properties(xml_rsc, rsc_entry);
     crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
     crm_log_xml_debug(xml_rsc, "Orphan resource");
 
     if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
         return NULL;
     }
 
     if (xml_contains_remote_node(xml_rsc)) {
         node_t *node;
 
         crm_debug("Detected orphaned remote node %s", rsc_id);
         node = pe_find_node(data_set->nodes, rsc_id);
         if (node == NULL) {
 	        node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
         }
         link_rsc2remotenode(data_set, rsc);
 
         if (node) {
             crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
             node->details->shutdown = TRUE;
         }
     }
 
     if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
         /* This orphaned rsc needs to be mapped to a container. */
         crm_trace("Detected orphaned container filler %s", rsc_id);
         set_bit(rsc->flags, pe_rsc_orphan_container_filler);
     }
     set_bit(rsc->flags, pe_rsc_orphan);
     data_set->resources = g_list_append(data_set->resources, rsc);
     return rsc;
 }
 
 /*!
  * \internal
  * \brief Create orphan instance for anonymous clone resource history
  */
 static pe_resource_t *
 create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
                         pe_node_t *node, pe_working_set_t *data_set)
 {
     pe_resource_t *top = pe__create_clone_child(parent, data_set);
 
     // find_rsc() because we might be a cloned group
     pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
 
     pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
                  top->id, parent->id, rsc_id, node->details->uname);
     return orphan;
 }
 
 /*!
  * \internal
  * \brief Check a node for an instance of an anonymous clone
  *
  * Return a child instance of the specified anonymous clone, in order of
  * preference: (1) the instance running on the specified node, if any;
  * (2) an inactive instance (i.e. within the total of clone-max instances);
  * (3) a newly created orphan (i.e. clone-max instances are already active).
  *
  * \param[in] data_set  Cluster information
  * \param[in] node      Node on which to check for instance
  * \param[in] parent    Clone to check
  * \param[in] rsc_id    Name of cloned resource in history (without instance)
  */
 static resource_t *
 find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
                      const char *rsc_id)
 {
     GListPtr rIter = NULL;
     pe_resource_t *rsc = NULL;
     pe_resource_t *inactive_instance = NULL;
     gboolean skip_inactive = FALSE;
 
     CRM_ASSERT(parent != NULL);
     CRM_ASSERT(pe_rsc_is_clone(parent));
     CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
 
     // Check for active (or partially active, for cloned groups) instance
     pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
     for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
         GListPtr locations = NULL;
         resource_t *child = rIter->data;
 
         /* Check whether this instance is already known to be active or pending
          * anywhere, at this stage of unpacking. Because this function is called
          * for a resource before the resource's individual operation history
          * entries are unpacked, locations will generally not contain the
          * desired node.
          *
          * However, there are three exceptions:
          * (1) when child is a cloned group and we have already unpacked the
          *     history of another member of the group on the same node;
          * (2) when we've already unpacked the history of another numbered
          *     instance on the same node (which can happen if globally-unique
          *     was flipped from true to false); and
          * (3) when we re-run calculations on the same data set as part of a
          *     simulation.
          */
         child->fns->location(child, &locations, 2);
         if (locations) {
             /* We should never associate the same numbered anonymous clone
              * instance with multiple nodes, and clone instances can't migrate,
              * so there must be only one location, regardless of history.
              */
             CRM_LOG_ASSERT(locations->next == NULL);
 
             if (((pe_node_t *)locations->data)->details == node->details) {
                 /* This child instance is active on the requested node, so check
                  * for a corresponding configured resource. We use find_rsc()
                  * instead of child because child may be a cloned group, and we
                  * need the particular member corresponding to rsc_id.
                  *
                  * If the history entry is orphaned, rsc will be NULL.
                  */
                 rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
                 if (rsc) {
                     /* If there are multiple instance history entries for an
                      * anonymous clone in a single node's history (which can
                      * happen if globally-unique is switched from true to
                      * false), we want to consider the instances beyond the
                      * first as orphans, even if there are inactive instance
                      * numbers available.
                      */
                     if (rsc->running_on) {
                         crm_notice("Active (now-)anonymous clone %s has "
                                    "multiple (orphan) instance histories on %s",
                                    parent->id, node->details->uname);
                         skip_inactive = TRUE;
                         rsc = NULL;
                     } else {
                         pe_rsc_trace(parent, "Resource %s, active", rsc->id);
                     }
                 }
             }
             g_list_free(locations);
 
         } else {
             pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
             if (!skip_inactive && !inactive_instance
                 && is_not_set(child->flags, pe_rsc_block)) {
                 // Remember one inactive instance in case we don't find active
                 inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
                                                           pe_find_clone);
 
                 /* ... but don't use it if it was already associated with a
                  * pending action on another node
                  */
                 if (inactive_instance && inactive_instance->pending_node
                     && (inactive_instance->pending_node->details != node->details)) {
                     inactive_instance = NULL;
                 }
             }
         }
     }
 
     if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
         pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
         rsc = inactive_instance;
     }
 
     /* If the resource has "requires" set to "quorum" or "nothing", and we don't
      * have a clone instance for every node, we don't want to consume a valid
      * instance number for unclean nodes. Such instances may appear to be active
      * according to the history, but should be considered inactive, so we can
      * start an instance elsewhere. Treat such instances as orphans.
      *
      * An exception is instances running on guest nodes -- since guest node
      * "fencing" is actually just a resource stop, requires shouldn't apply.
      *
      * @TODO Ideally, we'd use an inactive instance number if it is not needed
      * for any clean instances. However, we don't know that at this point.
      */
     if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing)
         && (!node->details->online || node->details->unclean)
         && !pe__is_guest_node(node)
         && !pe__is_universal_clone(parent, data_set)) {
 
         rsc = NULL;
     }
 
     if (rsc == NULL) {
         rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
         pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
     }
     return rsc;
 }
 
 static resource_t *
 unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
                      xmlNode * rsc_entry)
 {
     resource_t *rsc = NULL;
     resource_t *parent = NULL;
 
     crm_trace("looking for %s", rsc_id);
     rsc = pe_find_resource(data_set->resources, rsc_id);
 
     if (rsc == NULL) {
         /* If we didn't find the resource by its name in the operation history,
          * check it again as a clone instance. Even when clone-max=0, we create
          * a single :0 orphan to match against here.
          */
         char *clone0_id = clone_zero(rsc_id);
         resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
 
         if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
             rsc = clone0;
             parent = uber_parent(clone0);
             crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
         } else {
             crm_trace("%s is not known as %s either (orphan)",
                       rsc_id, clone0_id);
         }
         free(clone0_id);
 
     } else if (rsc->variant > pe_native) {
         crm_trace("Resource history for %s is orphaned because it is no longer primitive",
                   rsc_id);
         return NULL;
 
     } else {
         parent = uber_parent(rsc);
     }
 
     if (pe_rsc_is_anon_clone(parent)) {
 
         if (pe_rsc_is_bundled(parent)) {
             rsc = pe__find_bundle_replica(parent->parent, node);
         } else {
             char *base = clone_strip(rsc_id);
 
             rsc = find_anonymous_clone(data_set, node, parent, base);
             free(base);
             CRM_ASSERT(rsc != NULL);
         }
     }
 
     if (rsc && safe_str_neq(rsc_id, rsc->id)
         && safe_str_neq(rsc_id, rsc->clone_name)) {
 
         free(rsc->clone_name);
         rsc->clone_name = strdup(rsc_id);
         pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
                      rsc_id, node->details->uname, rsc->id,
                      (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
     }
     return rsc;
 }
 
 static resource_t *
 process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
 {
     resource_t *rsc = NULL;
     const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
 
     crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
     rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
 
     if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
         clear_bit(rsc->flags, pe_rsc_managed);
 
     } else {
         CRM_CHECK(rsc != NULL, return NULL);
         pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
         resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set);
     }
     return rsc;
 }
 
 static void
 process_rsc_state(resource_t * rsc, node_t * node,
                   enum action_fail_response on_fail,
                   xmlNode * migrate_op, pe_working_set_t * data_set)
 {
     node_t *tmpnode = NULL;
     char *reason = NULL;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
                  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
 
     /* process current state */
     if (rsc->role != RSC_ROLE_UNKNOWN) {
         resource_t *iter = rsc;
 
         while (iter) {
             if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
                 node_t *n = node_copy(node);
 
                 pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
                              n->details->uname);
                 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
             }
             if (is_set(iter->flags, pe_rsc_unique)) {
                 break;
             }
             iter = iter->parent;
         }
     }
 
     /* If a managed resource is believed to be running, but node is down ... */
     if (rsc->role > RSC_ROLE_STOPPED
         && node->details->online == FALSE
         && node->details->maintenance == FALSE
         && is_set(rsc->flags, pe_rsc_managed)) {
 
         gboolean should_fence = FALSE;
 
         /* If this is a guest node, fence it (regardless of whether fencing is
          * enabled, because guest node fencing is done by recovery of the
          * container resource rather than by the fencer). Mark the resource
          * we're processing as failed. When the guest comes back up, its
          * operation history in the CIB will be cleared, freeing the affected
          * resource to run again once we are sure we know its state.
          */
         if (pe__is_guest_node(node)) {
             set_bit(rsc->flags, pe_rsc_failed);
             should_fence = TRUE;
 
         } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
             if (pe__is_remote_node(node) && node->details->remote_rsc
                 && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
 
                 /* Setting unseen means that fencing of the remote node will
                  * occur only if the connection resource is not going to start
                  * somewhere. This allows connection resources on a failed
                  * cluster node to move to another node without requiring the
                  * remote nodes to be fenced as well.
                  */
                 node->details->unseen = TRUE;
                 reason = crm_strdup_printf("%s is active there (fencing will be"
                                            " revoked if remote connection can "
                                            "be re-established elsewhere)",
                                            rsc->id);
             }
             should_fence = TRUE;
         }
 
         if (should_fence) {
             if (reason == NULL) {
                reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
             }
             pe_fence_node(data_set, node, reason);
         }
         free(reason);
     }
 
     if (node->details->unclean) {
         /* No extra processing needed
          * Also allows resources to be started again after a node is shot
          */
         on_fail = action_fail_ignore;
     }
 
     switch (on_fail) {
         case action_fail_ignore:
             /* nothing to do */
             break;
 
         case action_fail_fence:
             /* treat it as if it is still running
              * but also mark the node as unclean
              */
             reason = crm_strdup_printf("%s failed there", rsc->id);
             pe_fence_node(data_set, node, reason);
             free(reason);
             break;
 
         case action_fail_standby:
             node->details->standby = TRUE;
             node->details->standby_onfail = TRUE;
             break;
 
         case action_fail_block:
             /* is_managed == FALSE will prevent any
              * actions being sent for the resource
              */
             clear_bit(rsc->flags, pe_rsc_managed);
             set_bit(rsc->flags, pe_rsc_block);
             break;
 
         case action_fail_migrate:
             /* make sure it comes up somewhere else
              * or not at all
              */
             resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
             break;
 
         case action_fail_stop:
             rsc->next_role = RSC_ROLE_STOPPED;
             break;
 
         case action_fail_recover:
             if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
                 set_bit(rsc->flags, pe_rsc_failed);
                 stop_action(rsc, node, FALSE);
             }
             break;
 
         case action_fail_restart_container:
             set_bit(rsc->flags, pe_rsc_failed);
 
             if (rsc->container && pe_rsc_is_bundled(rsc)) {
                 /* A bundle's remote connection can run on a different node than
                  * the bundle's container. We don't necessarily know where the
                  * container is running yet, so remember it and add a stop
                  * action for it later.
                  */
                 data_set->stop_needed = g_list_prepend(data_set->stop_needed,
                                                        rsc->container);
             } else if (rsc->container) {
                 stop_action(rsc->container, node, FALSE);
             } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
                 stop_action(rsc, node, FALSE);
             }
             break;
 
         case action_fail_reset_remote:
             set_bit(rsc->flags, pe_rsc_failed);
             if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
                 tmpnode = NULL;
                 if (rsc->is_remote_node) {
                     tmpnode = pe_find_node(data_set->nodes, rsc->id);
                 }
                 if (tmpnode &&
                     pe__is_remote_node(tmpnode) &&
                     tmpnode->details->remote_was_fenced == 0) {
 
                     /* The remote connection resource failed in a way that
                      * should result in fencing the remote node.
                      */
                     pe_fence_node(data_set, tmpnode,
                                   "remote connection is unrecoverable");
                 }
             }
 
             /* require the stop action regardless if fencing is occurring or not. */
             if (rsc->role > RSC_ROLE_STOPPED) {
                 stop_action(rsc, node, FALSE);
             }
 
             /* if reconnect delay is in use, prevent the connection from exiting the
              * "STOPPED" role until the failure is cleared by the delay timeout. */
             if (rsc->remote_reconnect_ms) {
                 rsc->next_role = RSC_ROLE_STOPPED;
             }
             break;
     }
 
     /* ensure a remote-node connection failure forces an unclean remote-node
      * to be fenced. By setting unseen = FALSE, the remote-node failure will
      * result in a fencing operation regardless if we're going to attempt to 
      * reconnect to the remote-node in this transition or not. */
     if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
         tmpnode = pe_find_node(data_set->nodes, rsc->id);
         if (tmpnode && tmpnode->details->unclean) {
             tmpnode->details->unseen = FALSE;
         }
     }
 
     if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
         if (is_set(rsc->flags, pe_rsc_orphan)) {
             if (is_set(rsc->flags, pe_rsc_managed)) {
                 crm_config_warn("Detected active orphan %s running on %s",
                                 rsc->id, node->details->uname);
             } else {
                 crm_config_warn("Cluster configured not to stop active orphans."
                                 " %s must be stopped manually on %s",
                                 rsc->id, node->details->uname);
             }
         }
 
         native_add_running(rsc, node, data_set);
         if (on_fail != action_fail_ignore) {
             set_bit(rsc->flags, pe_rsc_failed);
         }
 
     } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
         /* Only do this for older status sections that included instance numbers
          * Otherwise stopped instances will appear as orphans
          */
         pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
         free(rsc->clone_name);
         rsc->clone_name = NULL;
 
     } else {
         GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP,
                                                        FALSE);
         GListPtr gIter = possible_matches;
 
         for (; gIter != NULL; gIter = gIter->next) {
             action_t *stop = (action_t *) gIter->data;
 
             stop->flags |= pe_action_optional;
         }
 
         g_list_free(possible_matches);
     }
 }
 
 /* create active recurring operations as optional */
 static void
 process_recurring(node_t * node, resource_t * rsc,
                   int start_index, int stop_index,
                   GListPtr sorted_op_list, pe_working_set_t * data_set)
 {
     int counter = -1;
     const char *task = NULL;
     const char *status = NULL;
     GListPtr gIter = sorted_op_list;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
 
     for (; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         guint interval_ms = 0;
         char *key = NULL;
         const char *id = ID(rsc_op);
         const char *interval_ms_s = NULL;
 
         counter++;
 
         if (node->details->online == FALSE) {
             pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
             break;
 
             /* Need to check if there's a monitor for role="Stopped" */
         } else if (start_index < stop_index && counter <= stop_index) {
             pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
             continue;
 
         } else if (counter < start_index) {
             pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
             continue;
         }
 
         interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
         interval_ms = crm_parse_ms(interval_ms_s);
         if (interval_ms == 0) {
             pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
             continue;
         }
 
         status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
         if (safe_str_eq(status, "-1")) {
             pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
             continue;
         }
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         /* create the action */
         key = generate_op_key(rsc->id, task, interval_ms);
         pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
         custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
     }
 }
 
 void
 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
 {
     int counter = -1;
     int implied_monitor_start = -1;
     int implied_clone_start = -1;
     const char *task = NULL;
     const char *status = NULL;
     GListPtr gIter = sorted_op_list;
 
     *stop_index = -1;
     *start_index = -1;
 
     for (; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         counter++;
 
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
 
         if (safe_str_eq(task, CRMD_ACTION_STOP)
             && safe_str_eq(status, "0")) {
             *stop_index = counter;
 
         } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
             *start_index = counter;
 
         } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
             const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
 
             if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
                 implied_monitor_start = counter;
             }
         } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
             implied_clone_start = counter;
         }
     }
 
     if (*start_index == -1) {
         if (implied_clone_start != -1) {
             *start_index = implied_clone_start;
         } else if (implied_monitor_start != -1) {
             *start_index = implied_monitor_start;
         }
     }
 }
 
 static resource_t *
 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     int stop_index = -1;
     int start_index = -1;
     enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
 
     const char *task = NULL;
     const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
 
     resource_t *rsc = NULL;
     GListPtr op_list = NULL;
     GListPtr sorted_op_list = NULL;
 
     xmlNode *migrate_op = NULL;
     xmlNode *rsc_op = NULL;
     xmlNode *last_failure = NULL;
 
     enum action_fail_response on_fail = FALSE;
     enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
 
     crm_trace("[%s] Processing %s on %s",
               crm_element_name(rsc_entry), rsc_id, node->details->uname);
 
     /* extract operations */
     op_list = NULL;
     sorted_op_list = NULL;
 
     for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
          rsc_op = __xml_next_element(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             op_list = g_list_prepend(op_list, rsc_op);
         }
     }
 
     if (op_list == NULL) {
         /* if there are no operations, there is nothing to do */
         return NULL;
     }
 
     /* find the resource */
     rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
     if (rsc == NULL) {
         rsc = process_orphan_resource(rsc_entry, node, data_set);
     }
     CRM_ASSERT(rsc != NULL);
 
     /* process operations */
     saved_role = rsc->role;
     on_fail = action_fail_ignore;
     rsc->role = RSC_ROLE_UNKNOWN;
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
 
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
         if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
             migrate_op = rsc_op;
         }
 
         unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
     }
 
     /* create active recurring operations as optional */
     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
     process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
 
     /* no need to free the contents */
     g_list_free(sorted_op_list);
 
     process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
 
     if (get_target_role(rsc, &req_role)) {
         if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
             pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
                          " with requested next role %s",
                          rsc->id, role2text(rsc->next_role), role2text(req_role));
             rsc->next_role = req_role;
 
         } else if (req_role > rsc->next_role) {
             pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
                         " with requested next role %s",
                         rsc->id, role2text(rsc->next_role), role2text(req_role));
         }
     }
 
     if (saved_role > rsc->role) {
         rsc->role = saved_role;
     }
 
     return rsc;
 }
 
 static void
 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
 {
     xmlNode *rsc_entry = NULL;
     for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
          rsc_entry = __xml_next_element(rsc_entry)) {
 
         resource_t *rsc;
         resource_t *container;
         const char *rsc_id;
         const char *container_id;
 
         if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
             continue;
         }
 
         container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
         rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
         if (container_id == NULL || rsc_id == NULL) {
             continue;
         }
 
         container = pe_find_resource(data_set->resources, container_id);
         if (container == NULL) {
             continue;
         }
 
         rsc = pe_find_resource(data_set->resources, rsc_id);
         if (rsc == NULL ||
             is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
             rsc->container != NULL) {
             continue;
         }
 
         pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
                      rsc->id, container_id);
         rsc->container = container;
         container->fillers = g_list_append(container->fillers, rsc);
     }
 }
 
 gboolean
 unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
 {
     xmlNode *rsc_entry = NULL;
     gboolean found_orphaned_container_filler = FALSE;
 
     CRM_CHECK(node != NULL, return FALSE);
 
     crm_trace("Unpacking resources on %s", node->details->uname);
 
     for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
          rsc_entry = __xml_next_element(rsc_entry)) {
 
         if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
             resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
             if (!rsc) {
                 continue;
             }
             if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
                 found_orphaned_container_filler = TRUE;
             }
         }
     }
 
     /* now that all the resource state has been unpacked for this node
      * we have to go back and map any orphaned container fillers to their
      * container resource */
     if (found_orphaned_container_filler) {
         handle_orphaned_container_fillers(lrm_rsc_list, data_set);
     }
     return TRUE;
 }
 
 static void
 set_active(resource_t * rsc)
 {
     resource_t *top = uber_parent(rsc);
 
     if (top && is_set(top->flags, pe_rsc_promotable)) {
         rsc->role = RSC_ROLE_SLAVE;
     } else {
         rsc->role = RSC_ROLE_STARTED;
     }
 }
 
 static void
 set_node_score(gpointer key, gpointer value, gpointer user_data)
 {
     node_t *node = value;
     int *score = user_data;
 
     node->weight = *score;
 }
 
 #define STATUS_PATH_MAX 1024
 static xmlNode *
 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
             bool success_only, pe_working_set_t *data_set)
 {
     int offset = 0;
     char xpath[STATUS_PATH_MAX];
     xmlNode *xml = NULL;
 
     offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
     offset +=
         snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
                  resource);
 
     /* Need to check against transition_magic too? */
     if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
                      source);
     } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
                      source);
     } else {
         offset +=
             snprintf(xpath + offset, STATUS_PATH_MAX - offset,
                      "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
     }
 
     CRM_LOG_ASSERT(offset > 0);
     xml = get_xpath_object(xpath, data_set->input, LOG_DEBUG);
 
     if (xml && success_only) {
         int rc = PCMK_OCF_UNKNOWN_ERROR;
         int status = PCMK_LRM_OP_ERROR;
 
         crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc);
         crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status);
         if ((rc != PCMK_OCF_OK) || (status != PCMK_LRM_OP_DONE)) {
             return NULL;
         }
     }
     return xml;
 }
 
 static int
 pe__call_id(xmlNode *op_xml)
 {
     int id = 0;
 
     if (op_xml) {
         crm_element_value_int(op_xml, XML_LRM_ATTR_CALLID, &id);
     }
     return id;
 }
 
 /*!
  * \brief Check whether a stop happened on the same node after some event
  *
  * \param[in] rsc       Resource being checked
  * \param[in] node      Node being checked
  * \param[in] xml_op    Event that stop is being compared to
  * \param[in] data_set  Cluster working set
  *
  * \return TRUE if stop happened after event, FALSE otherwise
  *
  * \note This is really unnecessary, but kept as a safety mechanism. We
  *       currently don't save more than one successful event in history, so this
  *       only matters when processing really old CIB files that we don't
  *       technically support anymore, or as preparation for logging an extended
  *       history in the future.
  */
 static bool
 stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
                     pe_working_set_t *data_set)
 {
     xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP,
                                    node->details->uname, NULL, TRUE, data_set);
 
     return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op)));
 }
 
 static void
 unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
                           pe_working_set_t *data_set)
 {
     /* A successful migration sequence is:
      *    migrate_to on source node
      *    migrate_from on target node
      *    stop on source node
      *
      * If a migrate_to is followed by a stop, the entire migration (successful
      * or failed) is complete, and we don't care what happened on the target.
      *
      * If no migrate_from has happened, the migration is considered to be
      * "partial". If the migrate_from failed, make sure the resource gets
      * stopped on both source and target (if up).
      *
      * If the migrate_to and migrate_from both succeeded (which also implies the
      * resource is no longer running on the source), but there is no stop, the
      * migration is considered to be "dangling". Schedule a stop on the source
      * in this case.
      */
     int from_rc = 0;
     int from_status = 0;
     pe_node_t *target_node = NULL;
     pe_node_t *source_node = NULL;
     xmlNode *migrate_from = NULL;
     const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
     const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
     // Sanity check
     CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
 
     if (stop_happened_after(rsc, node, xml_op, data_set)) {
         return;
     }
 
     // Clones are not allowed to migrate, so role can't be master
     rsc->role = RSC_ROLE_STARTED;
 
     target_node = pe_find_node(data_set->nodes, target);
     source_node = pe_find_node(data_set->nodes, source);
 
     // Check whether there was a migrate_from action on the target
     migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
                                source, FALSE, data_set);
     if (migrate_from) {
         crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
         crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
         pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
                      ID(migrate_from), target, from_status, from_rc);
     }
 
     if (migrate_from && from_rc == PCMK_OCF_OK
         && from_status == PCMK_LRM_OP_DONE) {
         /* The migrate_to and migrate_from both succeeded, so mark the migration
          * as "dangling". This will be used to schedule a stop action on the
          * source without affecting the target.
          */
         pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
                      source);
         rsc->role = RSC_ROLE_STOPPED;
         rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
 
     } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
         if (target_node && target_node->details->online) {
             pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
                          target_node->details->online);
             native_add_running(rsc, target_node, data_set);
         }
 
     } else { // Pending, or complete but erased
         if (target_node && target_node->details->online) {
             pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
                          target_node->details->online);
 
             native_add_running(rsc, target_node, data_set);
             if (source_node && source_node->details->online) {
                 /* This is a partial migration: the migrate_to completed
                  * successfully on the source, but the migrate_from has not
                  * completed. Remember the source and target; if the newly
                  * chosen target remains the same when we schedule actions
                  * later, we may continue with the migration.
                  */
                 rsc->partial_migration_target = target_node;
                 rsc->partial_migration_source = source_node;
             }
         } else {
             /* Consider it failed here - forces a restart, prevents migration */
             set_bit(rsc->flags, pe_rsc_failed);
             clear_bit(rsc->flags, pe_rsc_allow_migrate);
         }
     }
 }
 
 static void
 unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
                           pe_working_set_t *data_set)
 {
     int target_stop_id = 0;
     int target_migrate_from_id = 0;
     xmlNode *target_stop = NULL;
     xmlNode *target_migrate_from = NULL;
     const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
     const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
     // Sanity check
     CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
 
     /* If a migration failed, we have to assume the resource is active. Clones
      * are not allowed to migrate, so role can't be master.
      */
     rsc->role = RSC_ROLE_STARTED;
 
     // Check for stop on the target
     target_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, target, NULL,
                               TRUE, data_set);
     target_stop_id = pe__call_id(target_stop);
 
     // Check for migrate_from on the target
     target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
                                       source, TRUE, data_set);
     target_migrate_from_id = pe__call_id(target_migrate_from);
 
     if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) {
         /* There was no stop on the source, or a stop that happened before a
          * migrate_from, so assume the resource is still active on the target
          * (if it is up).
          */
         node_t *target_node = pe_find_node(data_set->nodes, target);
 
         pe_rsc_trace(rsc, "stop (%d) + migrate_from (%d)",
                      target_stop_id, target_migrate_from_id);
         if (target_node && target_node->details->online) {
             native_add_running(rsc, target_node, data_set);
         }
 
     } else if (target_migrate_from == NULL) {
         /* We know there was a stop on the target, but there may not have been a
          * migrate_from (the stop could have happened before migrate_from was
          * scheduled or attempted).
          *
          * That means this could be a "dangling" migration. But first, check
          * whether there is a newer migrate_from or start on the source node --
          * it's possible the failed migration was followed by a successful
          * full restart or migration in the reverse direction, in which case we
          * don't want to force it to stop.
          */
         xmlNode *source_migrate_from = NULL;
         xmlNode *source_start = NULL;
         int source_migrate_to_id = pe__call_id(xml_op);
 
         source_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, source,
                                           NULL, TRUE, data_set);
         if (pe__call_id(source_migrate_from) > source_migrate_to_id) {
             return;
         }
 
         source_start = find_lrm_op(rsc->id, CRMD_ACTION_START, source, NULL,
                                    TRUE, data_set);
         if (pe__call_id(source_start) > source_migrate_to_id) {
             return;
         }
 
         // Mark node as having dangling migration so we can force a stop later
         rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
     }
 }
 
 static void
 unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node,
                             xmlNode *xml_op, pe_working_set_t *data_set)
 {
     xmlNode *source_stop = NULL;
     xmlNode *source_migrate_to = NULL;
     const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
     const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
 
     // Sanity check
     CRM_CHECK(source && target && !strcmp(target, node->details->uname), return);
 
     /* If a migration failed, we have to assume the resource is active. Clones
      * are not allowed to migrate, so role can't be master.
      */
     rsc->role = RSC_ROLE_STARTED;
 
     // Check for a stop on the source
     source_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, source, NULL,
                               TRUE, data_set);
 
     // Check for a migrate_to on the source
     source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE,
                                     source, target, TRUE, data_set);
 
     if ((source_stop == NULL)
         || (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) {
         /* There was no stop on the source, or a stop that happened before
          * migrate_to, so assume the resource is still active on the source (if
          * it is up).
          */
         pe_node_t *source_node = pe_find_node(data_set->nodes, source);
 
         if (source_node && source_node->details->online) {
             native_add_running(rsc, source_node, data_set);
         }
     }
 }
 
 static void
 record_failed_op(xmlNode *op, const pe_node_t *node,
                  const pe_resource_t *rsc, pe_working_set_t *data_set)
 {
     xmlNode *xIter = NULL;
     const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
 
     if (node->details->online == FALSE) {
         return;
     }
 
     for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
         const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
         const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
 
         if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
             crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
             return;
         }
     }
 
     crm_trace("Adding entry %s on %s", op_key, node->details->uname);
     crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
     crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
     add_node_copy(data_set->failed, op);
 }
 
 static const char *get_op_key(xmlNode *xml_op)
 {
     const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
     if(key == NULL) {
         key = ID(xml_op);
     }
     return key;
 }
 
+static const char *
+last_change_str(xmlNode *xml_op)
+{
+    time_t when;
+    const char *when_s = NULL;
+
+    if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
+                                &when) == pcmk_ok) {
+        when_s = crm_now_string(&when);
+        if (when_s) {
+            // Skip day of week to make message shorter
+            when_s = strchr(when_s, ' ');
+            if (when_s) {
+                ++when_s;
+            }
+        }
+    }
+    return ((when_s && *when_s)? when_s : "unknown time");
+}
+
 static void
 unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
                       enum action_fail_response * on_fail, pe_working_set_t * data_set)
 {
     guint interval_ms = 0;
-    bool is_probe = FALSE;
+    bool is_probe = false;
     action_t *action = NULL;
 
     const char *key = get_op_key(xml_op);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
+    const char *exit_reason = crm_element_value(xml_op,
+                                                XML_LRM_ATTR_EXIT_REASON);
 
     CRM_ASSERT(rsc);
+    CRM_CHECK(task != NULL, return);
 
     *last_failure = xml_op;
 
     crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
-    if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
-        is_probe = TRUE;
-        pe_rsc_trace(rsc, "is a probe: %s", key);
+    if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) {
+        is_probe = true;
     }
 
-    if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
-        crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d",
+    if (exit_reason == NULL) {
+        exit_reason = "";
+    }
+
+    if (is_not_set(data_set->flags, pe_flag_symmetric_cluster)
+        && (rc == PCMK_OCF_NOT_INSTALLED)) {
+        crm_trace("Unexpected result (%s%s%s) was recorded for "
+                  "%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
+                  services_ocf_exitcode_str(rc),
+                  (*exit_reason? ": " : ""), exit_reason,
+                  (is_probe? "probe" : task), rsc->id, node->details->uname,
+                  last_change_str(xml_op), rc, ID(xml_op));
+    } else {
+        crm_warn("Unexpected result (%s%s%s) was recorded for "
+                  "%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
+                 services_ocf_exitcode_str(rc),
+                 (*exit_reason? ": " : ""), exit_reason,
                  (is_probe? "probe" : task), rsc->id, node->details->uname,
-                 services_ocf_exitcode_str(rc), rc);
+                 last_change_str(xml_op), rc, ID(xml_op));
 
         if (is_probe && (rc != PCMK_OCF_OK)
             && (rc != PCMK_OCF_NOT_RUNNING)
             && (rc != PCMK_OCF_RUNNING_MASTER)) {
 
             /* A failed (not just unexpected) probe result could mean the user
              * didn't know resources will be probed even where they can't run.
              */
             crm_notice("If it is not possible for %s to run on %s, see "
                        "the resource-discovery option for location constraints",
                        rsc->id, node->details->uname);
         }
 
         record_failed_op(xml_op, node, rsc, data_set);
-
-    } else {
-        crm_trace("Processing failed op %s for %s on %s: %s (%d)",
-                 task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
-                 rc);
     }
 
     action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
     if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
         (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
         (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
         (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
         pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
                      fail2text(action->on_fail), action->uuid, key);
         *on_fail = action->on_fail;
     }
 
-    if (safe_str_eq(task, CRMD_ACTION_STOP)) {
+    if (!strcmp(task, CRMD_ACTION_STOP)) {
         resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
 
-    } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
+    } else if (!strcmp(task, CRMD_ACTION_MIGRATE)) {
         unpack_migrate_to_failure(rsc, node, xml_op, data_set);
 
-    } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
+    } else if (!strcmp(task, CRMD_ACTION_MIGRATED)) {
         unpack_migrate_from_failure(rsc, node, xml_op, data_set);
 
-    } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
+    } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) {
         rsc->role = RSC_ROLE_MASTER;
 
-    } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
+    } else if (!strcmp(task, CRMD_ACTION_DEMOTE)) {
         if (action->on_fail == action_fail_block) {
             rsc->role = RSC_ROLE_MASTER;
             rsc->next_role = RSC_ROLE_STOPPED;
 
         } else if(rc == PCMK_OCF_NOT_RUNNING) {
             rsc->role = RSC_ROLE_STOPPED;
 
         } else {
             /*
              * Staying in master role would put the PE/TE into a loop. Setting
              * slave role is not dangerous because the resource will be stopped
              * as part of recovery, and any master promotion will be ordered
              * after that stop.
              */
             rsc->role = RSC_ROLE_SLAVE;
         }
     }
 
     if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
         /* leave stopped */
         pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
         rsc->role = RSC_ROLE_STOPPED;
 
     } else if (rsc->role < RSC_ROLE_STARTED) {
         pe_rsc_trace(rsc, "Setting %s active", rsc->id);
         set_active(rsc);
     }
 
     pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
                  rsc->id, role2text(rsc->role),
                  node->details->unclean ? "true" : "false",
                  fail2text(action->on_fail), role2text(action->fail_role));
 
     if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
         rsc->next_role = action->fail_role;
     }
 
     if (action->fail_role == RSC_ROLE_STOPPED) {
         int score = -INFINITY;
 
         resource_t *fail_rsc = rsc;
 
         if (fail_rsc->parent) {
             resource_t *parent = uber_parent(fail_rsc);
 
             if (pe_rsc_is_clone(parent)
                 && is_not_set(parent->flags, pe_rsc_unique)) {
                 /* For clone resources, if a child fails on an operation
                  * with on-fail = stop, all the resources fail.  Do this by preventing
                  * the parent from coming up again. */
                 fail_rsc = parent;
             }
         }
-        crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
+        crm_notice("%s will not be started under current conditions",
+                   fail_rsc->id);
         /* make sure it doesn't come up again */
         if (fail_rsc->allowed_nodes != NULL) {
             g_hash_table_destroy(fail_rsc->allowed_nodes);
         }
         fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
         g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
     }
 
     pe_free_action(action);
 }
 
 /*!
  * \internal
  * \brief Remap operation status based on action result
  *
  * Given an action result, determine an appropriate operation status for the
  * purposes of responding to the action (the status provided by the executor is
  * not directly usable since the executor does not know what was expected).
  *
  * \param[in,out] rsc        Resource that operation history entry is for
  * \param[in]     rc         Actual return code of operation
  * \param[in]     target_rc  Expected return code of operation
  * \param[in]     node       Node where operation was executed
  * \param[in]     xml_op     Operation history entry XML from CIB status
  * \param[in,out] on_fail    What should be done about the result
  * \param[in]     data_set   Current cluster working set
  *
  * \return Operation status based on return code and action info
  * \note This may update the resource's current and next role.
  */
 static int
 determine_op_status(
     resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) 
 {
     guint interval_ms = 0;
+    bool is_probe = false;
     int result = PCMK_LRM_OP_DONE;
-
     const char *key = get_op_key(xml_op);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
-
-    bool is_probe = FALSE;
+    const char *exit_reason = crm_element_value(xml_op,
+                                                XML_LRM_ATTR_EXIT_REASON);
 
     CRM_ASSERT(rsc);
+    CRM_CHECK(task != NULL, return PCMK_LRM_OP_ERROR);
+
+    if (exit_reason == NULL) {
+        exit_reason = "";
+    }
 
     crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
-    if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
-        is_probe = TRUE;
+    if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) {
+        is_probe = true;
+        task = "probe";
     }
 
     if (target_rc < 0) {
         /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
          * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
          * target_rc in the transition key, which (along with the similar case
          * of a corrupted transition key in the CIB) will be reported to this
          * function as -1. Pacemaker 2.0+ does not support rolling upgrades from
          * those versions or processing of saved CIB files from those versions,
          * so we do not need to care much about this case.
          */
         result = PCMK_LRM_OP_ERROR;
         crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)",
                  key, node->details->uname);
 
     } else if (target_rc != rc) {
         result = PCMK_LRM_OP_ERROR;
-        pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
+        pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)",
                      key, node->details->uname,
-                     services_ocf_exitcode_str(rc), rc,
-                     services_ocf_exitcode_str(target_rc), target_rc);
+                     target_rc, services_ocf_exitcode_str(target_rc),
+                     rc, services_ocf_exitcode_str(rc),
+                     (*exit_reason? ": " : ""), exit_reason);
     }
 
     switch (rc) {
         case PCMK_OCF_OK:
-            // @TODO Should this be (rc != target_rc)?
             if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) {
                 result = PCMK_LRM_OP_DONE;
-                pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
-                            task, rsc->id, node->details->uname);
+                pe_rsc_info(rsc, "Probe found %s active on %s at %s",
+                            rsc->id, node->details->uname,
+                            last_change_str(xml_op));
             }
             break;
 
         case PCMK_OCF_NOT_RUNNING:
             if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
                 result = PCMK_LRM_OP_DONE;
                 rsc->role = RSC_ROLE_STOPPED;
 
                 /* clear any previous failure actions */
                 *on_fail = action_fail_ignore;
                 rsc->next_role = RSC_ROLE_UNKNOWN;
             }
             break;
 
         case PCMK_OCF_RUNNING_MASTER:
             if (is_probe && (rc != target_rc)) {
                 result = PCMK_LRM_OP_DONE;
-                pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
-                            task, rsc->id, node->details->uname);
+                pe_rsc_info(rsc,
+                            "Probe found %s active and promoted on %s at %s",
+                            rsc->id, node->details->uname,
+                            last_change_str(xml_op));
             }
             rsc->role = RSC_ROLE_MASTER;
             break;
 
         case PCMK_OCF_DEGRADED_MASTER:
         case PCMK_OCF_FAILED_MASTER:
             rsc->role = RSC_ROLE_MASTER;
             result = PCMK_LRM_OP_ERROR;
             break;
 
         case PCMK_OCF_NOT_CONFIGURED:
             result = PCMK_LRM_OP_ERROR_FATAL;
             break;
 
         case PCMK_OCF_UNIMPLEMENT_FEATURE:
             if (interval_ms > 0) {
                 result = PCMK_LRM_OP_NOTSUPPORTED;
                 break;
             }
             // fall through
         case PCMK_OCF_NOT_INSTALLED:
         case PCMK_OCF_INVALID_PARAM:
         case PCMK_OCF_INSUFFICIENT_PRIV:
             if (!pe_can_fence(data_set, node)
-                && safe_str_eq(task, CRMD_ACTION_STOP)) {
+                && !strcmp(task, CRMD_ACTION_STOP)) {
                 /* If a stop fails and we can't fence, there's nothing else we can do */
-                pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
-                            rsc->id, task, services_ocf_exitcode_str(rc), rc);
+                pe_proc_err("No further recovery can be attempted for %s "
+                            "because %s on %s failed (%s%s%s) at %s "
+                            CRM_XS " rc=%d id=%s", rsc->id, task,
+                            node->details->uname, services_ocf_exitcode_str(rc),
+                            (*exit_reason? ": " : ""), exit_reason,
+                            last_change_str(xml_op), rc, ID(xml_op));
                 clear_bit(rsc->flags, pe_rsc_managed);
                 set_bit(rsc->flags, pe_rsc_block);
             }
             result = PCMK_LRM_OP_ERROR_HARD;
             break;
 
         default:
             if (result == PCMK_LRM_OP_DONE) {
-                crm_info("Treating unknown return code %d for %s on %s as failure",
-                         rc, key, node->details->uname);
+                crm_info("Treating unknown exit status %d from %s of %s "
+                         "on %s at %s as failure",
+                         rc, task, rsc->id, node->details->uname,
+                         last_change_str(xml_op));
                 result = PCMK_LRM_OP_ERROR;
             }
             break;
     }
     return result;
 }
 
 // return TRUE if start or monitor last failure but parameters changed
 static bool
 should_clear_for_param_change(xmlNode *xml_op, const char *task,
                               pe_resource_t *rsc, pe_node_t *node,
                               pe_working_set_t *data_set)
 {
     if (!strcmp(task, "start") || !strcmp(task, "monitor")) {
 
         if (pe__bundle_needs_remote_name(rsc)) {
             /* We haven't allocated resources yet, so we can't reliably
              * substitute addr parameters for the REMOTE_CONTAINER_HACK.
              * When that's needed, defer the check until later.
              */
             pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
                                 data_set);
 
         } else {
             op_digest_cache_t *digest_data = NULL;
 
             digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
             switch (digest_data->rc) {
                 case RSC_DIGEST_UNKNOWN:
                     crm_trace("Resource %s history entry %s on %s"
                               " has no digest to compare",
                               rsc->id, get_op_key(xml_op), node->details->id);
                     break;
                 case RSC_DIGEST_MATCH:
                     break;
                 default:
                     return TRUE;
             }
         }
     }
     return FALSE;
 }
 
 // Order action after fencing of remote node, given connection rsc
 static void
 order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn,
                            pe_working_set_t *data_set)
 {
     pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id);
 
     if (remote_node) {
         pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
                                          data_set);
 
         order_actions(fence, action, pe_order_implies_then);
     }
 }
 
 static bool
 should_ignore_failure_timeout(pe_resource_t *rsc, xmlNode *xml_op,
                               const char *task, guint interval_ms,
                               bool is_last_failure, pe_working_set_t *data_set)
 {
     /* Clearing failures of recurring monitors has special concerns. The
      * executor reports only changes in the monitor result, so if the
      * monitor is still active and still getting the same failure result,
      * that will go undetected after the failure is cleared.
      *
      * Also, the operation history will have the time when the recurring
      * monitor result changed to the given code, not the time when the
      * result last happened.
      *
      * @TODO We probably should clear such failures only when the failure
      * timeout has passed since the last occurrence of the failed result.
      * However we don't record that information. We could maybe approximate
      * that by clearing only if there is a more recent successful monitor or
      * stop result, but we don't even have that information at this point
      * since we are still unpacking the resource's operation history.
      *
      * This is especially important for remote connection resources with a
      * reconnect interval, so in that case, we skip clearing failures
      * if the remote node hasn't been fenced.
      */
     if (rsc->remote_reconnect_ms
         && is_set(data_set->flags, pe_flag_stonith_enabled)
         && (interval_ms != 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
 
         pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
 
         if (remote_node && !remote_node->details->remote_was_fenced) {
             if (is_last_failure) {
                 crm_info("Waiting to clear monitor failure for remote node %s"
                          " until fencing has occurred", rsc->id);
             }
             return TRUE;
         }
     }
     return FALSE;
 }
 
 /*!
  * \internal
  * \brief Check operation age and schedule failure clearing when appropriate
  *
  * This function has two distinct purposes. The first is to check whether an
  * operation history entry is expired (i.e. the resource has a failure timeout,
  * the entry is older than the timeout, and the resource either has no fail
  * count or its fail count is entirely older than the timeout). The second is to
  * schedule fail count clearing when appropriate (i.e. the operation is expired
  * and either the resource has an expired fail count or the operation is a
  * last_failure for a remote connection resource with a reconnect interval,
  * or the operation is a last_failure for a start or monitor operation and the
  * resource's parameters have changed since the operation).
  *
  * \param[in] rsc       Resource that operation happened to
  * \param[in] node      Node that operation happened on
  * \param[in] rc        Actual result of operation
  * \param[in] xml_op    Operation history entry XML
  * \param[in] data_set  Current working set
  *
  * \return TRUE if operation history entry is expired, FALSE otherwise
  */
 static bool
 check_operation_expiry(pe_resource_t *rsc, pe_node_t *node, int rc,
                        xmlNode *xml_op, pe_working_set_t *data_set)
 {
     bool expired = FALSE;
     bool is_last_failure = crm_ends_with(ID(xml_op), "_last_failure_0");
     time_t last_run = 0;
     guint interval_ms = 0;
     int unexpired_fail_count = 0;
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     const char *clear_reason = NULL;
 
     crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
 
     if ((rsc->failure_timeout > 0)
         && (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
                                     &last_run) == 0)) {
 
         // Resource has a failure-timeout, and history entry has a timestamp
 
         time_t now = get_effective_time(data_set);
         time_t last_failure = 0;
 
         // Is this particular operation history older than the failure timeout?
         if ((now >= (last_run + rsc->failure_timeout))
             && !should_ignore_failure_timeout(rsc, xml_op, task, interval_ms,
                                               is_last_failure, data_set)) {
             expired = TRUE;
         }
 
         // Does the resource as a whole have an unexpired fail count?
         unexpired_fail_count = pe_get_failcount(node, rsc, &last_failure,
                                                 pe_fc_effective, xml_op,
                                                 data_set);
 
         // Update scheduler recheck time according to *last* failure
         crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
                   " last-failure@%lld",
                   ID(xml_op), (long long) last_run, (expired? "" : "not "),
                   (long long) now, unexpired_fail_count, rsc->failure_timeout,
                   (long long) last_failure);
         last_failure += rsc->failure_timeout + 1;
         if (unexpired_fail_count && (now < last_failure)) {
             pe__update_recheck_time(last_failure, data_set);
         }
     }
 
     if (expired) {
         if (pe_get_failcount(node, rsc, NULL, pe_fc_default, xml_op, data_set)) {
 
             // There is a fail count ignoring timeout
 
             if (unexpired_fail_count == 0) {
                 // There is no fail count considering timeout
                 clear_reason = "it expired";
 
             } else {
                 /* This operation is old, but there is an unexpired fail count.
                  * In a properly functioning cluster, this should only be
                  * possible if this operation is not a failure (otherwise the
                  * fail count should be expired too), so this is really just a
                  * failsafe.
                  */
                 expired = FALSE;
             }
 
         } else if (is_last_failure && rsc->remote_reconnect_ms) {
             /* Clear any expired last failure when reconnect interval is set,
              * even if there is no fail count.
              */
             clear_reason = "reconnect interval is set";
         }
     }
 
     if (!expired && is_last_failure
         && should_clear_for_param_change(xml_op, task, rsc, node, data_set)) {
         clear_reason = "resource parameters have changed";
     }
 
     if (clear_reason != NULL) {
         // Schedule clearing of the fail count
         pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
                                                     data_set);
 
         if (is_set(data_set->flags, pe_flag_stonith_enabled)
             && rsc->remote_reconnect_ms) {
             /* If we're clearing a remote connection due to a reconnect
              * interval, we want to wait until any scheduled fencing
              * completes.
              *
              * We could limit this to remote_node->details->unclean, but at
              * this point, that's always true (it won't be reliable until
              * after unpack_node_loop() is done).
              */
             crm_info("Clearing %s failure will wait until any scheduled "
                      "fencing of %s completes", task, rsc->id);
             order_after_remote_fencing(clear_op, rsc, data_set);
         }
     }
 
     if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
         switch(rc) {
             case PCMK_OCF_OK:
             case PCMK_OCF_NOT_RUNNING:
             case PCMK_OCF_RUNNING_MASTER:
             case PCMK_OCF_DEGRADED:
             case PCMK_OCF_DEGRADED_MASTER:
                 // Don't expire probes that return these values
                 expired = FALSE;
                 break;
         }
     }
 
     return expired;
 }
 
 int pe__target_rc_from_xml(xmlNode *xml_op)
 {
     int target_rc = 0;
     const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
 
     if (key == NULL) {
         return -1;
     }
     decode_transition_key(key, NULL, NULL, NULL, &target_rc);
     return target_rc;
 }
 
 static enum action_fail_response
 get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) 
 {
     int result = action_fail_recover;
     action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
 
     result = action->on_fail;
     pe_free_action(action);
 
     return result;
 }
 
 static void
 update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
                       xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
 {
     gboolean clear_past_failure = FALSE;
 
     CRM_ASSERT(rsc);
     CRM_ASSERT(xml_op);
 
     if (rc == PCMK_OCF_NOT_RUNNING) {
         clear_past_failure = TRUE;
 
     } else if (rc == PCMK_OCF_NOT_INSTALLED) {
         rsc->role = RSC_ROLE_STOPPED;
 
     } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
         if (last_failure) {
             const char *op_key = get_op_key(xml_op);
             const char *last_failure_key = get_op_key(last_failure);
 
             if (safe_str_eq(op_key, last_failure_key)) {
                 clear_past_failure = TRUE;
             }
         }
 
         if (rsc->role < RSC_ROLE_STARTED) {
             set_active(rsc);
         }
 
     } else if (safe_str_eq(task, CRMD_ACTION_START)) {
         rsc->role = RSC_ROLE_STARTED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
         rsc->role = RSC_ROLE_STOPPED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
         rsc->role = RSC_ROLE_MASTER;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
         /* Demote from Master does not clear an error */
         rsc->role = RSC_ROLE_SLAVE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
         rsc->role = RSC_ROLE_STARTED;
         clear_past_failure = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
         unpack_migrate_to_success(rsc, node, xml_op, data_set);
 
     } else if (rsc->role < RSC_ROLE_STARTED) {
         pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
         set_active(rsc);
     }
 
     /* clear any previous failure actions */
     if (clear_past_failure) {
         switch (*on_fail) {
             case action_fail_stop:
             case action_fail_fence:
             case action_fail_migrate:
             case action_fail_standby:
                 pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
                              rsc->id, fail2text(*on_fail));
                 break;
 
             case action_fail_block:
             case action_fail_ignore:
             case action_fail_recover:
             case action_fail_restart_container:
                 *on_fail = action_fail_ignore;
                 rsc->next_role = RSC_ROLE_UNKNOWN;
                 break;
             case action_fail_reset_remote:
                 if (rsc->remote_reconnect_ms == 0) {
                     /* With no reconnect interval, the connection is allowed to
                      * start again after the remote node is fenced and
                      * completely stopped. (With a reconnect interval, we wait
                      * for the failure to be cleared entirely before attempting
                      * to reconnect.)
                      */
                     *on_fail = action_fail_ignore;
                     rsc->next_role = RSC_ROLE_UNKNOWN;
                 }
                 break;
         }
     }
 }
 
 /*!
  * \internal
  * \brief Remap informational monitor results to usual values
  *
  * Certain OCF result codes are for providing extended information to the
  * user about services that aren't yet failed but not entirely healthy either.
  * These must be treated as the "normal" result by pacemaker.
  *
  * \param[in] rc        Actual result of a monitor action
  * \param[in] xml_op    Operation history XML
  * \param[in] node      Node that operation happened on
  * \param[in] rsc       Resource that operation happened to
  * \param[in] data_set  Cluster working set
  *
  * \return Result code that pacemaker should use
  *
  * \note If the result is remapped, and the node is not shutting down or failed,
  *       the operation will be recorded in the data set's list of failed
  *       operations, to highlight it for the user.
  */
 static int
 remap_monitor_rc(int rc, xmlNode *xml_op, const pe_node_t *node,
                  const pe_resource_t *rsc, pe_working_set_t *data_set)
 {
     int remapped_rc = rc;
 
     switch (rc) {
         case PCMK_OCF_DEGRADED:
             remapped_rc = PCMK_OCF_OK;
             break;
 
         case PCMK_OCF_DEGRADED_MASTER:
             remapped_rc = PCMK_OCF_RUNNING_MASTER;
             break;
 
         default:
             break;
     }
 
     if (rc != remapped_rc) {
         crm_trace("Remapping monitor result %d to %d", rc, remapped_rc);
         if (!node->details->shutdown || node->details->online) {
             record_failed_op(xml_op, node, rsc, data_set);
         }
     }
     return remapped_rc;
 }
 
 static void
 unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
               xmlNode **last_failure, enum action_fail_response *on_fail,
               pe_working_set_t *data_set)
 {
-    int task_id = 0;
-
-    const char *task = NULL;
-    const char *task_key = NULL;
-
     int rc = 0;
+    int task_id = 0;
+    int target_rc = 0;
     int status = PCMK_LRM_OP_UNKNOWN;
-    int target_rc = pe__target_rc_from_xml(xml_op);
     guint interval_ms = 0;
-
+    const char *task = NULL;
+    const char *task_key = NULL;
+    const char *exit_reason = NULL;
     bool expired = FALSE;
     resource_t *parent = rsc;
     enum action_fail_response failure_strategy = action_fail_recover;
 
-    CRM_CHECK(rsc != NULL, return);
-    CRM_CHECK(node != NULL, return);
-    CRM_CHECK(xml_op != NULL, return);
+    CRM_CHECK(rsc && node && xml_op, return);
 
+    target_rc = pe__target_rc_from_xml(xml_op);
     task_key = get_op_key(xml_op);
-
     task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
+    exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
+    if (exit_reason == NULL) {
+        exit_reason = "";
+    }
 
     crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
     crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
     crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
     crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
 
     CRM_CHECK(task != NULL, return);
     CRM_CHECK(status <= PCMK_LRM_OP_INVALID, return);
     CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return);
 
-    if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
-        safe_str_eq(task, CRMD_ACTION_METADATA)) {
+    if (!strcmp(task, CRMD_ACTION_NOTIFY) ||
+        !strcmp(task, CRMD_ACTION_METADATA)) {
         /* safe to ignore these */
         return;
     }
 
     if (is_not_set(rsc->flags, pe_rsc_unique)) {
         parent = uber_parent(rsc);
     }
 
     pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
                  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
 
     if (node->details->unclean) {
         pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
                      " Further action depends on the value of the stop's on-fail attribute",
                      node->details->uname, rsc->id);
     }
 
     /* It should be possible to call remap_monitor_rc() first then call
      * check_operation_expiry() only if rc != target_rc, because there should
      * never be a fail count without at least one unexpected result in the
      * resource history. That would be more efficient by avoiding having to call
      * check_operation_expiry() for expected results.
      *
      * However, we do have such configurations in the scheduler regression
      * tests, even if it shouldn't be possible with the current code. It's
      * probably a good idea anyway, but that would require updating the test
      * inputs to something currently possible.
      */
 
     if ((status != PCMK_LRM_OP_NOT_INSTALLED)
         && check_operation_expiry(rsc, node, rc, xml_op, data_set)) {
         expired = TRUE;
     }
 
     if (!strcmp(task, CRMD_ACTION_STATUS)) {
         rc = remap_monitor_rc(rc, xml_op, node, rsc, data_set);
     }
 
     if (expired && (rc != target_rc)) {
         const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
 
         if (interval_ms == 0) {
             crm_notice("Ignoring expired %s failure on %s "
                        CRM_XS " actual=%d expected=%d magic=%s",
                        task_key, node->details->uname, rc, target_rc, magic);
             goto done;
 
         } else if(node->details->online && node->details->unclean == FALSE) {
             /* Reschedule the recurring monitor. CancelXmlOp() won't work at
              * this stage, so as a hacky workaround, forcibly change the restart
              * digest so check_action_definition() does what we want later.
              *
              * @TODO We should skip this if there is a newer successful monitor.
              *       Also, this causes rescheduling only if the history entry
              *       has an op-digest (which the expire-non-blocked-failure
              *       scheduler regression test doesn't, but that may not be a
              *       realistic scenario in production).
              */
             crm_notice("Rescheduling %s after failure expired on %s "
                        CRM_XS " actual=%d expected=%d magic=%s",
                        task_key, node->details->uname, rc, target_rc, magic);
             crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
             goto done;
         }
     }
 
     /* If the executor reported an operation status of anything but done or
      * error, consider that final. But for done or error, we know better whether
      * it should be treated as a failure or not, because we know the expected
      * result.
      */
     if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
         status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
+        pe_rsc_trace(rsc, "Remapped %s status to %d", task_key, status);
     }
 
-    pe_rsc_trace(rsc, "Handling status: %d", status);
     switch (status) {
         case PCMK_LRM_OP_CANCELLED:
-            /* do nothing?? */
-            pe_err("Don't know what to do for cancelled ops yet");
+            // Should never happen
+            pe_err("Resource history contains cancellation '%s' "
+                   "(%s of %s on %s at %s)",
+                   ID(xml_op), task, rsc->id, node->details->uname,
+                   last_change_str(xml_op));
             break;
 
         case PCMK_LRM_OP_PENDING:
-            if (safe_str_eq(task, CRMD_ACTION_START)) {
+            if (!strcmp(task, CRMD_ACTION_START)) {
                 set_bit(rsc->flags, pe_rsc_start_pending);
                 set_active(rsc);
 
-            } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
+            } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) {
                 rsc->role = RSC_ROLE_MASTER;
 
-            } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
+            } else if (!strcmp(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
                 /* If a pending migrate_to action is out on a unclean node,
                  * we have to force the stop action on the target. */
                 const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
                 node_t *target = pe_find_node(data_set->nodes, migrate_target);
                 if (target) {
                     stop_action(rsc, target, FALSE);
                 }
             }
 
             if (rsc->pending_task == NULL) {
-                if (safe_str_eq(task, CRMD_ACTION_STATUS) && (interval_ms == 0)) {
+                if ((interval_ms != 0) || strcmp(task, CRMD_ACTION_STATUS)) {
+                    rsc->pending_task = strdup(task);
+                    rsc->pending_node = node;
+                } else {
                     /* Pending probes are not printed, even if pending
                      * operations are requested. If someone ever requests that
-                     * behavior, uncomment this and the corresponding part of
+                     * behavior, enable the below and the corresponding part of
                      * native.c:native_pending_task().
                      */
-                    /*rsc->pending_task = strdup("probe");*/
-                    /*rsc->pending_node = node;*/
-                } else {
-                    rsc->pending_task = strdup(task);
+#if 0
+                    rsc->pending_task = strdup("probe");
                     rsc->pending_node = node;
+#endif
                 }
             }
             break;
 
         case PCMK_LRM_OP_DONE:
-            pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
+            pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s",
+                         task, rsc->id, node->details->uname,
+                         last_change_str(xml_op), ID(xml_op));
             update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
             break;
 
         case PCMK_LRM_OP_NOT_INSTALLED:
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
             if (failure_strategy == action_fail_ignore) {
-                crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
-                         "Resource agent doesn't exist",
-                         task_key, status, rc, node->details->uname);
+                crm_warn("Cannot ignore failed %s of %s on %s: "
+                         "Resource agent doesn't exist "
+                         CRM_XS " status=%d rc=%d id=%s",
+                         task, rsc->id, node->details->uname, status, rc,
+                         ID(xml_op));
                 /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
                 *on_fail = action_fail_migrate;
             }
             resource_location(parent, node, -INFINITY, "hard-error", data_set);
             unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
             break;
 
         case PCMK_LRM_OP_NOT_CONNECTED:
             if (pe__is_guest_or_remote_node(node)
                 && is_set(node->details->remote_rsc->flags, pe_rsc_managed)) {
                 /* We should never get into a situation where a managed remote
                  * connection resource is considered OK but a resource action
                  * behind the connection gets a "not connected" status. But as a
                  * fail-safe in case a bug or unusual circumstances do lead to
                  * that, ensure the remote connection is considered failed.
                  */
                 set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
             }
 
             // fall through
 
         case PCMK_LRM_OP_ERROR:
         case PCMK_LRM_OP_ERROR_HARD:
         case PCMK_LRM_OP_ERROR_FATAL:
         case PCMK_LRM_OP_TIMEOUT:
         case PCMK_LRM_OP_NOTSUPPORTED:
         case PCMK_LRM_OP_INVALID:
 
             failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
             if ((failure_strategy == action_fail_ignore)
                 || (failure_strategy == action_fail_restart_container
-                    && safe_str_eq(task, CRMD_ACTION_STOP))) {
+                    && !strcmp(task, CRMD_ACTION_STOP))) {
 
-                crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
-                         task_key, rc, node->details->uname);
+                crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s "
+                         "succeeded " CRM_XS " rc=%d id=%s",
+                         task, services_ocf_exitcode_str(rc),
+                         (*exit_reason? ": " : ""), exit_reason, rsc->id,
+                         node->details->uname, last_change_str(xml_op), rc,
+                         ID(xml_op));
 
                 update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
                 crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
                 set_bit(rsc->flags, pe_rsc_failure_ignored);
 
                 record_failed_op(xml_op, node, rsc, data_set);
 
                 if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
                     *on_fail = failure_strategy;
                 }
 
             } else {
                 unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
 
                 if(status == PCMK_LRM_OP_ERROR_HARD) {
                     do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
-                               "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
+                               "Preventing %s from restarting on %s because "
+                               "of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s",
                                parent->id, node->details->uname,
-                               task, services_ocf_exitcode_str(rc), rc);
-
+                               services_ocf_exitcode_str(rc),
+                               (*exit_reason? ": " : ""), exit_reason,
+                               rc, ID(xml_op));
                     resource_location(parent, node, -INFINITY, "hard-error", data_set);
 
                 } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
-                    crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
-                            parent->id, task, services_ocf_exitcode_str(rc), rc);
-
+                    crm_err("Preventing %s from restarting anywhere because "
+                            "of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s",
+                            parent->id, services_ocf_exitcode_str(rc),
+                            (*exit_reason? ": " : ""), exit_reason,
+                            rc, ID(xml_op));
                     resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
                 }
             }
             break;
     }
 
   done:
     pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s",
                  rsc->id, task, role2text(rsc->role),
                  role2text(rsc->next_role));
 }
 
 static void
 add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite,
                pe_working_set_t *data_set)
 {
     const char *cluster_name = NULL;
 
     g_hash_table_insert(node->details->attrs,
                         strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
 
     g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
                         strdup(node->details->id));
     if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
         data_set->dc_node = node;
         node->details->is_dc = TRUE;
         g_hash_table_insert(node->details->attrs,
                             strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
     } else {
         g_hash_table_insert(node->details->attrs,
                             strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
     }
 
     cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
     if (cluster_name) {
         g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
                             strdup(cluster_name));
     }
 
     pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, NULL,
                                node->details->attrs, NULL, overwrite, data_set);
 
     if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
         const char *site_name = pe_node_attribute_raw(node, "site-name");
 
         if (site_name) {
             g_hash_table_insert(node->details->attrs,
                                 strdup(CRM_ATTR_SITE_NAME),
                                 strdup(site_name));
 
         } else if (cluster_name) {
             /* Default to cluster-name if unset */
             g_hash_table_insert(node->details->attrs,
                                 strdup(CRM_ATTR_SITE_NAME),
                                 strdup(cluster_name));
         }
     }
 }
 
 static GListPtr
 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
 {
     int counter = -1;
     int stop_index = -1;
     int start_index = -1;
 
     xmlNode *rsc_op = NULL;
 
     GListPtr gIter = NULL;
     GListPtr op_list = NULL;
     GListPtr sorted_op_list = NULL;
 
     /* extract operations */
     op_list = NULL;
     sorted_op_list = NULL;
 
     for (rsc_op = __xml_first_child_element(rsc_entry);
          rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
         if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
             crm_xml_add(rsc_op, "resource", rsc);
             crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
             op_list = g_list_prepend(op_list, rsc_op);
         }
     }
 
     if (op_list == NULL) {
         /* if there are no operations, there is nothing to do */
         return NULL;
     }
 
     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
 
     /* create active recurring operations as optional */
     if (active_filter == FALSE) {
         return sorted_op_list;
     }
 
     op_list = NULL;
 
     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
 
     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
         xmlNode *rsc_op = (xmlNode *) gIter->data;
 
         counter++;
 
         if (start_index < stop_index) {
             crm_trace("Skipping %s: not active", ID(rsc_entry));
             break;
 
         } else if (counter < start_index) {
             crm_trace("Skipping %s: old", ID(rsc_op));
             continue;
         }
         op_list = g_list_append(op_list, rsc_op);
     }
 
     g_list_free(sorted_op_list);
     return op_list;
 }
 
 GListPtr
 find_operations(const char *rsc, const char *node, gboolean active_filter,
                 pe_working_set_t * data_set)
 {
     GListPtr output = NULL;
     GListPtr intermediate = NULL;
 
     xmlNode *tmp = NULL;
     xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
 
     node_t *this_node = NULL;
 
     xmlNode *node_state = NULL;
 
     for (node_state = __xml_first_child_element(status); node_state != NULL;
          node_state = __xml_next_element(node_state)) {
 
         if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
             const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
 
             if (node != NULL && safe_str_neq(uname, node)) {
                 continue;
             }
 
             this_node = pe_find_node(data_set->nodes, uname);
             if(this_node == NULL) {
                 CRM_LOG_ASSERT(this_node != NULL);
                 continue;
 
             } else if (pe__is_guest_or_remote_node(this_node)) {
                 determine_remote_online_status(data_set, this_node);
 
             } else {
                 determine_online_status(node_state, this_node, data_set);
             }
 
             if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
                 /* offline nodes run no resources...
                  * unless stonith is enabled in which case we need to
                  *   make sure rsc start events happen after the stonith
                  */
                 xmlNode *lrm_rsc = NULL;
 
                 tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
                 tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
 
                 for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL;
                      lrm_rsc = __xml_next_element(lrm_rsc)) {
                     if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
 
                         const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
 
                         if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
                             continue;
                         }
 
                         intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
                         output = g_list_concat(output, intermediate);
                     }
                 }
             }
         }
     }
 
     return output;
 }
diff --git a/tools/crm_mon_output.c b/tools/crm_mon_output.c
index 9a9f3ce74b..d012ef8d5d 100644
--- a/tools/crm_mon_output.c
+++ b/tools/crm_mon_output.c
@@ -1,1203 +1,1205 @@
 /*
  * Copyright 2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU General Public License version 2
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
  */
 
 #include <glib.h>
 #include <stdarg.h>
 
 #include <crm/stonith-ng.h>
 #include <crm/common/iso8601.h>
 #include <crm/common/iso8601_internal.h>
 #include <crm/common/output.h>
 #include <crm/common/util.h>
 #include <crm/common/xml.h>
 #include <crm/common/internal.h>
 #include <crm/pengine/internal.h>
 #include <crm/msg_xml.h>
 #include <crm/pengine/pe_types.h>
 
 #include "crm_mon.h"
 
 static char *
 time_t_string(time_t when) {
     crm_time_t *crm_when = crm_time_new(NULL);
     char *buf = NULL;
 
     crm_time_set_timet(crm_when, &when);
     buf = crm_time_as_string(crm_when, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone);
     crm_time_free(crm_when);
     return buf;
 }
 
 static char *
 failed_action_string(xmlNodePtr xml_op) {
     const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
     int rc = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), "0");
     int status = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), "0");
     const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
 
     time_t last_change = 0;
     
     if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
                                 &last_change) == pcmk_ok) {
         char *time = time_t_string(last_change);
         char *buf = crm_strdup_printf("%s on %s '%s' (%d): call=%s, status='%s', exitreason='%s', last-rc-change='%s', queued=%sms, exec=%sms",
                                       op_key ? op_key : ID(xml_op),
                                       crm_element_value(xml_op, XML_ATTR_UNAME),
                                       services_ocf_exitcode_str(rc), rc,
                                       crm_element_value(xml_op, XML_LRM_ATTR_CALLID),
                                       services_lrm_status_str(status),
                                       exit_reason ? exit_reason : "none",
                                       time,
                                       crm_element_value(xml_op, XML_RSC_OP_T_QUEUE),
                                       crm_element_value(xml_op, XML_RSC_OP_T_EXEC));
 
         free(time);
         return buf;
     } else {
         return crm_strdup_printf("%s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
                                  op_key ? op_key : ID(xml_op),
                                  crm_element_value(xml_op, XML_ATTR_UNAME),
                                  services_ocf_exitcode_str(rc), rc,
                                  crm_element_value(xml_op, XML_LRM_ATTR_CALLID),
                                  services_lrm_status_str(status),
                                  exit_reason ? exit_reason : "none");
     }
 }
 
 static char *
 last_changed_string(const char *last_written, const char *user,
                     const char *client, const char *origin) {
     if (last_written != NULL || user != NULL || client != NULL || origin != NULL) {
         return crm_strdup_printf("%s%s%s%s%s%s%s",
                                  last_written ? last_written : "",
                                  user ? " by " : "",
                                  user ? user : "",
                                  client ? " via " : "",
                                  client ? client : "",
                                  origin ? " on " : "",
                                  origin ? origin : "");
     } else {
         return strdup("");
     }
 }
 
 static char *
 op_history_string(xmlNode *xml_op, const char *task, const char *interval_ms_s,
                   int rc, unsigned int mon_ops) {
     const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
     char *interval_str = NULL;
     char *buf = NULL;
 
     if (interval_ms_s && safe_str_neq(interval_ms_s, "0")) {
         char *pair = pcmk_format_nvpair("interval", interval_ms_s, "ms");
         interval_str = crm_strdup_printf(" %s", pair);
         free(pair);
     }
 
     if (is_set(mon_ops, mon_op_print_timing)) {
         char *last_change_str = NULL;
         char *last_run_str = NULL;
         char *exec_str = NULL;
         char *queue_str = NULL;
 
         const char *value = NULL;
 
         time_t epoch = 0;
 
         if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok)
             && (epoch > 0)) {
             char *time = pcmk_format_named_time(XML_RSC_OP_LAST_CHANGE, epoch);
             last_change_str = crm_strdup_printf(" %s", time);
             free(time);
         }
 
         if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_RUN, &epoch) == pcmk_ok)
             && (epoch > 0)) {
             char *time = pcmk_format_named_time(XML_RSC_OP_LAST_RUN, epoch);
             last_run_str = crm_strdup_printf(" %s", time);
             free(time);
         }
 
         value = crm_element_value(xml_op, XML_RSC_OP_T_EXEC);
         if (value) {
             char *pair = pcmk_format_nvpair(XML_RSC_OP_T_EXEC, value, "ms");
             exec_str = crm_strdup_printf(" %s", pair);
             free(pair);
         }
 
         value = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE);
         if (value) {
             char *pair = pcmk_format_nvpair(XML_RSC_OP_T_QUEUE, value, "ms");
             queue_str = crm_strdup_printf(" %s", pair);
             free(pair);
         }
 
         buf = crm_strdup_printf("(%s) %s:%s%s%s%s%s rc=%d (%s)", call, task,
                                 interval_str ? interval_str : "",
                                 last_change_str ? last_change_str : "",
                                 last_run_str ? last_run_str : "",
                                 exec_str ? exec_str : "",
                                 queue_str ? queue_str : "",
                                 rc, services_ocf_exitcode_str(rc));
 
         if (last_change_str) {
             free(last_change_str);
         }
 
         if (last_run_str) {
             free(last_run_str);
         }
 
         if (exec_str) {
             free(exec_str);
         }
 
         if (queue_str) {
             free(queue_str);
         }
     } else {
         buf = crm_strdup_printf("(%s) %s:%s", call, task,
                                 interval_str ? interval_str : "");
     }
 
     if (interval_str) {
         free(interval_str);
     }
 
     return buf;
 }
 
 static char *
 resource_history_string(resource_t *rsc, const char *rsc_id, gboolean all,
                         int failcount, time_t last_failure) {
     char *buf = NULL;
 
     if (rsc == NULL) {
         buf = crm_strdup_printf("%s: orphan", rsc_id);
     } else if (all || failcount || last_failure > 0) {
         char *failcount_s = failcount > 0 ? crm_strdup_printf(" %s=%d", CRM_FAIL_COUNT_PREFIX, failcount) : strdup("");
         char *lastfail_s = last_failure > 0 ? crm_strdup_printf(" %s=%s", CRM_LAST_FAILURE_PREFIX,
                                                                 crm_now_string(&last_failure)) : strdup("");
 
         buf = crm_strdup_printf("%s: migration-threshold=%d%s%s",
                                 rsc_id, rsc->migration_threshold, failcount_s, lastfail_s);
         free(failcount_s);
         free(lastfail_s);
     } else {
         buf = crm_strdup_printf("%s:", rsc_id);
     }
 
     return buf;
 }
 
 static int
 ban_html(pcmk__output_t *out, va_list args) {
     pe_node_t *pe_node = va_arg(args, pe_node_t *);
     pe__location_t *location = va_arg(args, pe__location_t *);
     unsigned int mon_ops = va_arg(args, unsigned int);
 
     char *node_name = get_node_display_name(pe_node, mon_ops);
     char *buf = crm_strdup_printf("%s\tprevents %s from running %son %s",
                                   location->id, location->rsc_lh->id,
                                   location->role_filter == RSC_ROLE_MASTER ? "as Master " : "",
                                   node_name);
 
     pcmk__output_create_html_node(out, "li", NULL, NULL, buf);
 
     free(node_name);
     free(buf);
     return 0;
 }
 
 static int
 ban_text(pcmk__output_t *out, va_list args) {
     pe_node_t *pe_node = va_arg(args, pe_node_t *);
     pe__location_t *location = va_arg(args, pe__location_t *);
     unsigned int mon_ops = va_arg(args, unsigned int);
 
     char *node_name = get_node_display_name(pe_node, mon_ops);
     out->list_item(out, NULL, "%s\tprevents %s from running %son %s",
                    location->id, location->rsc_lh->id,
                    location->role_filter == RSC_ROLE_MASTER ? "as Master " : "",
                    node_name);
 
     free(node_name);
     return 0;
 }
 
 static int
 ban_xml(pcmk__output_t *out, va_list args) {
     xmlNodePtr node = pcmk__output_create_xml_node(out, "ban");
     pe_node_t *pe_node = va_arg(args, pe_node_t *);
     pe__location_t *location = va_arg(args, pe__location_t *);
 
     char *weight_s = crm_itoa(pe_node->weight);
 
     xmlSetProp(node, (pcmkXmlStr) "id", (pcmkXmlStr) location->id);
     xmlSetProp(node, (pcmkXmlStr) "resource", (pcmkXmlStr) location->rsc_lh->id);
     xmlSetProp(node, (pcmkXmlStr) "node", (pcmkXmlStr) pe_node->details->uname);
     xmlSetProp(node, (pcmkXmlStr) "weight", (pcmkXmlStr) weight_s);
     xmlSetProp(node, (pcmkXmlStr) "master_only",
                (pcmkXmlStr) (location->role_filter == RSC_ROLE_MASTER ? "true" : "false"));
 
     free(weight_s);
     return 0;
 }
 
 static int
 cluster_counts_html(pcmk__output_t *out, va_list args) {
     xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "li");
     xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "li");
 
     unsigned int nnodes = va_arg(args, unsigned int);
     unsigned int nresources = va_arg(args, unsigned int);
     unsigned int ndisabled = va_arg(args, unsigned int);
     unsigned int nblocked = va_arg(args, unsigned int);
 
     char *nnodes_str = crm_strdup_printf("%d node%s configured", nnodes, s_if_plural(nnodes));
 
     pcmk_create_html_node(nodes_node, "span", NULL, NULL, nnodes_str);
     free(nnodes_str);
 
     if (ndisabled && nblocked) {
         char *s = crm_strdup_printf("%d resource instance%s configured (%d ",
                                     nresources, s_if_plural(nresources),
                                     ndisabled);
         pcmk_create_html_node(resources_node, "span", NULL, NULL, s);
         free(s);
 
         pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED");
 
         s = crm_strdup_printf(", %d ", nblocked);
         pcmk_create_html_node(resources_node, "span", NULL, NULL, s);
         free(s);
 
         pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED");
-        pcmk_create_html_node(resources_node, "span", NULL, NULL, " from starting due to failure)");
+        pcmk_create_html_node(resources_node, "span", NULL, NULL,
+                              " from further action due to failure)");
     } else if (ndisabled && !nblocked) {
         char *s = crm_strdup_printf("%d resource instance%s configured (%d ",
                                     nresources, s_if_plural(nresources),
                                     ndisabled);
         pcmk_create_html_node(resources_node, "span", NULL, NULL, s);
         free(s);
 
         pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED");
         pcmk_create_html_node(resources_node, "span", NULL, NULL, ")");
     } else if (!ndisabled && nblocked) {
         char *s = crm_strdup_printf("%d resource instance%s configured (%d ",
                                     nresources, s_if_plural(nresources),
                                     nblocked);
         pcmk_create_html_node(resources_node, "span", NULL, NULL, s);
         free(s);
 
         pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED");
-        pcmk_create_html_node(resources_node, "span", NULL, NULL, " from starting due to failure)");
+        pcmk_create_html_node(resources_node, "span", NULL, NULL,
+                              " from further action due to failure)");
     } else {
         char *s = crm_strdup_printf("%d resource instance%s configured",
                                     nresources, s_if_plural(nresources));
         pcmk_create_html_node(resources_node, "span", NULL, NULL, s);
         free(s);
     }
 
     return 0;
 }
 
 static int
 cluster_counts_text(pcmk__output_t *out, va_list args) {
     unsigned int nnodes = va_arg(args, unsigned int);
     unsigned int nresources = va_arg(args, unsigned int);
     unsigned int ndisabled = va_arg(args, unsigned int);
     unsigned int nblocked = va_arg(args, unsigned int);
 
     out->list_item(out, NULL, "%d node%s configured", nnodes, s_if_plural(nnodes));
 
     if (ndisabled && nblocked) {
         out->list_item(out, NULL, "%d resource instance%s configured "
                                   "(%d DISABLED, %d BLOCKED from "
                                   "further action due to failure)",
                        nresources, s_if_plural(nresources), ndisabled,
                        nblocked);
     } else if (ndisabled && !nblocked) {
         out->list_item(out, NULL, "%d resource instance%s configured "
                                   "(%d DISABLED)",
                        nresources, s_if_plural(nresources), ndisabled);
     } else if (!ndisabled && nblocked) {
         out->list_item(out, NULL, "%d resource instance%s configured "
                                   "(%d BLOCKED from further action "
                                   "due to failure)",
                        nresources, s_if_plural(nresources), nblocked);
     } else {
         out->list_item(out, NULL, "%d resource instance%s configured",
                        nresources, s_if_plural(nresources));
     }
 
     return 0;
 }
 
 static int
 cluster_counts_xml(pcmk__output_t *out, va_list args) {
     xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "nodes_configured");
     xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "resources_configured");
 
     unsigned int nnodes = va_arg(args, unsigned int);
     unsigned int nresources = va_arg(args, unsigned int);
     unsigned int ndisabled = va_arg(args, unsigned int);
     unsigned int nblocked = va_arg(args, unsigned int);
 
     char *s = crm_itoa(nnodes);
     xmlSetProp(nodes_node, (pcmkXmlStr) "number", (pcmkXmlStr) s);
     free(s);
 
     s = crm_itoa(nresources);
     xmlSetProp(resources_node, (pcmkXmlStr) "number", (pcmkXmlStr) s);
     free(s);
 
     s = crm_itoa(ndisabled);
     xmlSetProp(resources_node, (pcmkXmlStr) "disabled", (pcmkXmlStr) s);
     free(s);
 
     s = crm_itoa(nblocked);
     xmlSetProp(resources_node, (pcmkXmlStr) "blocked", (pcmkXmlStr) s);
     free(s);
 
     return 0;
 }
 
 static int
 cluster_dc_html(pcmk__output_t *out, va_list args) {
     xmlNodePtr node = pcmk__output_create_xml_node(out, "li");
 
     node_t *dc = va_arg(args, node_t *);
     const char *quorum = va_arg(args, const char *);
     const char *dc_version_s = va_arg(args, const char *);
     const char *dc_name = va_arg(args, const char *);
 
     pcmk_create_html_node(node, "span", NULL, "bold", "Current DC: ");
 
     if (dc) {
         if (crm_is_true(quorum)) {
             char *buf = crm_strdup_printf("%s (version %s) - partition with quorum",
                                           dc_name, dc_version_s ? dc_version_s : "unknown");
             pcmk_create_html_node(node, "span", NULL, NULL, buf);
             free(buf);
         } else {
             char *buf = crm_strdup_printf("%s (version %s) - partition",
                                           dc_name, dc_version_s ? dc_version_s : "unknown");
             pcmk_create_html_node(node, "span", NULL, NULL, buf);
             free(buf);
 
             pcmk_create_html_node(node, "span", NULL, "warning", "WITHOUT");
             pcmk_create_html_node(node, "span", NULL, NULL, "quorum");
         }
     } else {
         pcmk_create_html_node(node ,"span", NULL, "warning", "NONE");
     }
 
     return 0;
 }
 
 static int
 cluster_dc_text(pcmk__output_t *out, va_list args) {
     node_t *dc = va_arg(args, node_t *);
     const char *quorum = va_arg(args, const char *);
     const char *dc_version_s = va_arg(args, const char *);
     const char *dc_name = va_arg(args, const char *);
 
     if (dc) {
         out->list_item(out, "Current DC", "%s (version %s) - partition %s quorum",
                        dc_name, dc_version_s ? dc_version_s : "unknown",
                        crm_is_true(quorum) ? "with" : "WITHOUT");
     } else {
         out->list_item(out, "Current DC", "NONE");
     }
 
     return 0;
 }
 
 static int
 cluster_dc_xml(pcmk__output_t *out, va_list args) {
     xmlNodePtr node = pcmk__output_create_xml_node(out, "current_dc");
 
     node_t *dc = va_arg(args, node_t *);
     const char *quorum = va_arg(args, const char *);
     const char *dc_version_s = va_arg(args, const char *);
 
     if (dc) {
         xmlSetProp(node, (pcmkXmlStr) "present", (pcmkXmlStr) "true");
         xmlSetProp(node, (pcmkXmlStr) "version", (pcmkXmlStr) (dc_version_s ? dc_version_s : ""));
         xmlSetProp(node, (pcmkXmlStr) "name", (pcmkXmlStr) dc->details->uname);
         xmlSetProp(node, (pcmkXmlStr) "id", (pcmkXmlStr) dc->details->id);
         xmlSetProp(node, (pcmkXmlStr) "with_quorum", (pcmkXmlStr) (crm_is_true(quorum) ? "true" : "false"));
     } else {
         xmlSetProp(node, (pcmkXmlStr) "present", (pcmkXmlStr) "false");
     }
 
     return 0;
 }
 
 static int
 cluster_options_html(pcmk__output_t *out, va_list args) {
     pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
 
     /* Kind of a hack - close the list started by print_cluster_summary so we
      * can put all the options in their own list, but just for HTML output.
      */
     out->end_list(out);
 
     /* And then this list will be closed by print_cluster_summary since it
      * wants to close the list it created unconditionally.
      */
     out->begin_list(out, NULL, NULL, "Config Options");
 
     out->list_item(out, NULL, "STONITH of failed nodes %s",
                    is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
 
     out->list_item(out, NULL, "Cluster is %s",
                    is_set(data_set->flags, pe_flag_symmetric_cluster) ? "symmetric" : "asymmetric");
 
     switch (data_set->no_quorum_policy) {
         case no_quorum_freeze:
             out->list_item(out, NULL, "No Quorum policy: Freeze resources");
             break;
 
         case no_quorum_stop:
             out->list_item(out, NULL, "No Quorum policy: Stop ALL resources");
             break;
 
         case no_quorum_ignore:
             out->list_item(out, NULL, "No Quorum policy: Ignore");
             break;
 
         case no_quorum_suicide:
             out->list_item(out, NULL, "No Quorum policy: Suicide");
             break;
     }
 
     if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
         xmlNodePtr node = pcmk__output_create_xml_node(out, "li");
 
         pcmk_create_html_node(node, "span", NULL, "bold", "DISABLED");
         pcmk_create_html_node(node, "span", NULL, NULL,
                               " (the cluster will not attempt to start, stop, or recover services)");
     } else {
         out->list_item(out, NULL, "Resource management enabled");
     }
 
     return 0;
 }
 
 static int
 cluster_options_text(pcmk__output_t *out, va_list args) {
     pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
 
     if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
         fprintf(out->dest, "\n              *** Resource management is DISABLED ***");
         fprintf(out->dest, "\n  The cluster will not attempt to start, stop or recover services");
         fprintf(out->dest, "\n");
     }
 
     return 0;
 }
 
 static int
 cluster_options_xml(pcmk__output_t *out, va_list args) {
     xmlNodePtr node = pcmk__output_create_xml_node(out, "cluster_options");
     pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
 
     xmlSetProp(node, (pcmkXmlStr) "stonith-enabled",
                (pcmkXmlStr) (is_set(data_set->flags, pe_flag_stonith_enabled) ? "true" : "false"));
     xmlSetProp(node, (pcmkXmlStr) "symmetric-cluster",
                (pcmkXmlStr) (is_set(data_set->flags, pe_flag_symmetric_cluster) ? "true" : "false"));
 
     switch (data_set->no_quorum_policy) {
         case no_quorum_freeze:
             xmlSetProp(node, (pcmkXmlStr) "no-quorum-policy", (pcmkXmlStr) "freeze");
             break;
 
         case no_quorum_stop:
             xmlSetProp(node, (pcmkXmlStr) "no-quorum-policy", (pcmkXmlStr) "stop");
             break;
 
         case no_quorum_ignore:
             xmlSetProp(node, (pcmkXmlStr) "no-quorum-policy", (pcmkXmlStr) "ignore");
             break;
 
         case no_quorum_suicide:
             xmlSetProp(node, (pcmkXmlStr) "no-quorum-policy", (pcmkXmlStr) "suicide");
             break;
     }
 
     xmlSetProp(node, (pcmkXmlStr) "maintenance-mode",
                (pcmkXmlStr) (is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"));
 
     return 0;
 }
 
 static int
 cluster_stack_html(pcmk__output_t *out, va_list args) {
     xmlNodePtr node = pcmk__output_create_xml_node(out, "li");
     const char *stack_s = va_arg(args, const char *);
 
     pcmk_create_html_node(node, "span", NULL, "bold", "Stack: ");
     pcmk_create_html_node(node, "span", NULL, NULL, stack_s);
 
     return 0;
 }
 
 static int
 cluster_stack_text(pcmk__output_t *out, va_list args) {
     const char *stack_s = va_arg(args, const char *);
     out->list_item(out, "Stack", "%s", stack_s);
     return 0;
 }
 
 static int
 cluster_stack_xml(pcmk__output_t *out, va_list args) {
     xmlNodePtr node = pcmk__output_create_xml_node(out, "stack");
     const char *stack_s = va_arg(args, const char *);
 
     xmlSetProp(node, (pcmkXmlStr) "type", (pcmkXmlStr) stack_s);
 
     return 0;
 }
 
 static int
 cluster_times_html(pcmk__output_t *out, va_list args) {
     xmlNodePtr updated_node = pcmk__output_create_xml_node(out, "li");
     xmlNodePtr changed_node = pcmk__output_create_xml_node(out, "li");
 
     const char *last_written = va_arg(args, const char *);
     const char *user = va_arg(args, const char *);
     const char *client = va_arg(args, const char *);
     const char *origin = va_arg(args, const char *);
 
     char *buf = last_changed_string(last_written, user, client, origin);
 
     pcmk_create_html_node(updated_node, "span", NULL, "bold", "Last updated: ");
     pcmk_create_html_node(updated_node, "span", NULL, NULL, crm_now_string(NULL));
 
     pcmk_create_html_node(changed_node, "span", NULL, "bold", "Last change: ");
     pcmk_create_html_node(changed_node, "span", NULL, NULL, buf);
 
     free(buf);
     return 0;
 }
 
 static int
 cluster_times_xml(pcmk__output_t *out, va_list args) {
     xmlNodePtr updated_node = pcmk__output_create_xml_node(out, "last_update");
     xmlNodePtr changed_node = pcmk__output_create_xml_node(out, "last_change");
 
     const char *last_written = va_arg(args, const char *);
     const char *user = va_arg(args, const char *);
     const char *client = va_arg(args, const char *);
     const char *origin = va_arg(args, const char *);
 
     xmlSetProp(updated_node, (pcmkXmlStr) "time", (pcmkXmlStr) crm_now_string(NULL));
 
     xmlSetProp(changed_node, (pcmkXmlStr) "time", (pcmkXmlStr) (last_written ? last_written : ""));
     xmlSetProp(changed_node, (pcmkXmlStr) "user", (pcmkXmlStr) (user ? user : ""));
     xmlSetProp(changed_node, (pcmkXmlStr) "client", (pcmkXmlStr) (client ? client : ""));
     xmlSetProp(changed_node, (pcmkXmlStr) "origin", (pcmkXmlStr) (origin ? origin : ""));
 
     return 0;
 }
 
 static int
 cluster_times_text(pcmk__output_t *out, va_list args) {
     const char *last_written = va_arg(args, const char *);
     const char *user = va_arg(args, const char *);
     const char *client = va_arg(args, const char *);
     const char *origin = va_arg(args, const char *);
 
     char *buf = last_changed_string(last_written, user, client, origin);
 
     out->list_item(out, "Last updated", "%s", crm_now_string(NULL));
     out->list_item(out, "Last change", " %s", buf);
 
     free(buf);
     return 0;
 }
 
 static int
 failed_action_console(pcmk__output_t *out, va_list args) {
     xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
     char *s = failed_action_string(xml_op);
 
     curses_indented_printf(out, "%s\n", s);
     free(s);
     return 0;
 }
 
 static int
 failed_action_html(pcmk__output_t *out, va_list args) {
     xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
     char *s = failed_action_string(xml_op);
 
     pcmk__output_create_html_node(out, "li", NULL, NULL, s);
     free(s);
     return 0;
 }
 
 static int
 failed_action_text(pcmk__output_t *out, va_list args) {
     xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
     char *s = failed_action_string(xml_op);
 
     pcmk__indented_printf(out, "%s\n", s);
     free(s);
     return 0;
 }
 
 static int
 failed_action_xml(pcmk__output_t *out, va_list args) {
     xmlNodePtr xml_op = va_arg(args, xmlNodePtr);
 
     const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
     const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE);
     int rc = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), "0");
     int status = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), "0");
     const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
 
     char *rc_s = crm_itoa(rc);
     char *reason_s = crm_xml_escape(exit_reason ? exit_reason : "none");
     xmlNodePtr node = pcmk__output_create_xml_node(out, "failure");
 
     xmlSetProp(node, (pcmkXmlStr) (op_key ? "op_key" : "id"),
                (pcmkXmlStr) (op_key ? op_key : "id"));
     xmlSetProp(node, (pcmkXmlStr) "node",
                (pcmkXmlStr) crm_element_value(xml_op, XML_ATTR_UNAME));
     xmlSetProp(node, (pcmkXmlStr) "exitstatus",
                (pcmkXmlStr) services_ocf_exitcode_str(rc));
     xmlSetProp(node, (pcmkXmlStr) "exitreason", (pcmkXmlStr) reason_s);
     xmlSetProp(node, (pcmkXmlStr) "exitcode", (pcmkXmlStr) rc_s);
     xmlSetProp(node, (pcmkXmlStr) "call",
                (pcmkXmlStr) crm_element_value(xml_op, XML_LRM_ATTR_CALLID));
     xmlSetProp(node, (pcmkXmlStr) "status",
                (pcmkXmlStr) services_lrm_status_str(status));
 
     if (last) {
         char *s = crm_itoa(crm_parse_ms(crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS)));
         char *rc_change = time_t_string(crm_parse_int(last, "0"));
 
         xmlSetProp(node, (pcmkXmlStr) "last-rc-change", (pcmkXmlStr) rc_change);
         xmlSetProp(node, (pcmkXmlStr) "queued",
                    (pcmkXmlStr) crm_element_value(xml_op, XML_RSC_OP_T_QUEUE));
         xmlSetProp(node, (pcmkXmlStr) "exec",
                    (pcmkXmlStr) crm_element_value(xml_op, XML_RSC_OP_T_EXEC));
         xmlSetProp(node, (pcmkXmlStr) "interval", (pcmkXmlStr) s);
         xmlSetProp(node, (pcmkXmlStr) "task",
                    (pcmkXmlStr) crm_element_value(xml_op, XML_LRM_ATTR_TASK));
 
         free(s);
         free(rc_change);
     }
 
     free(reason_s);
     free(rc_s);
     return 0;
 }
 
 static int
 node_html(pcmk__output_t *out, va_list args) {
     node_t *node = va_arg(args, node_t *);
     unsigned int mon_ops = va_arg(args, unsigned int);
     gboolean full = va_arg(args, gboolean);
 
     char *node_name = get_node_display_name(node, mon_ops);
     char *buf = crm_strdup_printf("Node: %s", node_name);
     int print_opts = get_resource_display_options(mon_ops, mon_output_html);
 
     if (full) {
         xmlNodePtr item_node = pcmk__output_create_xml_node(out, "li");
 
         pcmk_create_html_node(item_node, "span", NULL, NULL, buf);
 
         if (node->details->standby_onfail && node->details->online) {
             pcmk_create_html_node(item_node, "span", NULL, "standby", " standby (on-fail)");
         } else if (node->details->standby && node->details->online) {
             char *s = crm_strdup_printf(" standby%s", node->details->running_rsc ? " (with active resources)" : "");
             pcmk_create_html_node(item_node, "span", NULL, " standby", s);
             free(s);
         } else if (node->details->standby) {
             pcmk_create_html_node(item_node, "span", NULL, "offline", " OFFLINE (standby)");
         } else if (node->details->maintenance && node->details->online) {
             pcmk_create_html_node(item_node, "span", NULL, "maint", " maintenance");
         } else if (node->details->maintenance) {
             pcmk_create_html_node(item_node, "span", NULL, "offline", " OFFLINE (maintenance)");
         } else if (node->details->online) {
             pcmk_create_html_node(item_node, "span", NULL, "online", " online");
         } else {
             pcmk_create_html_node(item_node, "span", NULL, "offline", " OFFLINE");
         }
         if (is_set(mon_ops, mon_op_print_brief) && is_set(mon_ops, mon_op_group_by_node)) {
             out->begin_list(out, NULL, NULL, NULL);
             pe__rscs_brief_output(out, node->details->running_rsc, print_opts | pe_print_rsconly,
                                   FALSE);
             out->end_list(out);
 
         } else if (is_set(mon_ops, mon_op_group_by_node)) {
             GListPtr lpc2 = NULL;
 
             out->begin_list(out, NULL, NULL, NULL);
             for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
                 resource_t *rsc = (resource_t *) lpc2->data;
                 out->message(out, crm_map_element_name(rsc->xml), print_opts | pe_print_rsconly, rsc);
             }
             out->end_list(out);
         }
     } else {
         out->begin_list(out, NULL, NULL, "%s", buf);
     }
 
     free(buf);
     free(node_name);
     return 0;
 }
 
 static int
 node_text(pcmk__output_t *out, va_list args) {
     node_t *node = va_arg(args, node_t *);
     unsigned int mon_ops = va_arg(args, unsigned int);
     gboolean full = va_arg(args, gboolean);
 
     if (full) {
         const char *node_mode = va_arg(args, const char *);
 
         char *node_name = get_node_display_name(node, mon_ops);
         int print_opts = get_resource_display_options(mon_ops, mon_output_xml);
         char *buf = NULL;
 
         /* Print the node name and status */
         if (pe__is_guest_node(node)) {
             buf = crm_strdup_printf("GuestNode %s: %s", node_name, node_mode);
         } else if (pe__is_remote_node(node)) {
             buf = crm_strdup_printf("RemoteNode %s: %s", node_name, node_mode);
         } else {
             buf = crm_strdup_printf("Node %s: %s", node_name, node_mode);
         }
 
         /* If we're grouping by node, print its resources */
         if (is_set(mon_ops, mon_op_group_by_node)) {
             out->begin_list(out, NULL, NULL, "%s", buf);
             out->begin_list(out, NULL, NULL, "Resources");
 
             if (is_set(mon_ops, mon_op_print_brief)) {
                 pe__rscs_brief_output(out, node->details->running_rsc,
                                       print_opts | pe_print_rsconly, FALSE);
             } else {
                 GListPtr gIter2 = NULL;
 
                 for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) {
                     resource_t *rsc = (resource_t *) gIter2->data;
                     out->message(out, crm_map_element_name(rsc->xml), print_opts | pe_print_rsconly, rsc);
                 }
             }
 
             out->end_list(out);
             out->end_list(out);
         } else {
             out->list_item(out, NULL, "%s", buf);
         }
 
         free(buf);
         free(node_name);
     } else {
         out->begin_list(out, NULL, NULL, "Node: %s", get_node_display_name(node, mon_ops));
     }
 
     return 0;
 }
 
 static int
 node_xml(pcmk__output_t *out, va_list args) {
     node_t *node = va_arg(args, node_t *);
     unsigned int mon_ops G_GNUC_UNUSED = va_arg(args, unsigned int);
     gboolean full = va_arg(args, gboolean);
 
     if (full) {
         const char *node_type = "unknown";
         int print_opts = get_resource_display_options(mon_ops, mon_output_xml);
         char *length_s = crm_itoa(g_list_length(node->details->running_rsc));
 
         switch (node->details->type) {
             case node_member:
                 node_type = "member";
                 break;
             case node_remote:
                 node_type = "remote";
                 break;
             case node_ping:
                 node_type = "ping";
                 break;
         }
         pe__name_and_nvpairs_xml(out, true, "node", 13,
                                  "name", node->details->uname,
                                  "id", node->details->id,
                                  "online", node->details->online ? "true" : "false",
                                  "standby", node->details->standby ? "true" : "false",
                                  "standby_onfail", node->details->standby_onfail ? "true" : "false",
                                  "maintenance", node->details->maintenance ? "true" : "false",
                                  "pending", node->details->pending ? "true" : "false",
                                  "unclean", node->details->unclean ? "true" : "false",
                                  "shutdown", node->details->shutdown ? "true" : "false",
                                  "expected_up", node->details->expected_up ? "true" : "false",
                                  "is_dc", node->details->is_dc ? "true" : "false",
                                  "resources_running", length_s,
                                  "type", node_type);
 
         if (pe__is_guest_node(node)) {
             xmlNodePtr xml_node = pcmk__output_xml_peek_parent(out);
             xmlSetProp(xml_node, (pcmkXmlStr) "id_as_resource",
                                  (pcmkXmlStr) node->details->remote_rsc->container->id);
         }
 
         if (is_set(mon_ops, mon_op_group_by_node)) {
             GListPtr lpc = NULL;
 
             for (lpc = node->details->running_rsc; lpc != NULL; lpc = lpc->next) {
                 resource_t *rsc = (resource_t *) lpc->data;
                 out->message(out, crm_map_element_name(rsc->xml), print_opts | pe_print_rsconly, rsc);
             }
         }
 
         free(length_s);
 
         out->end_list(out);
     } else {
         xmlNodePtr parent = pcmk__output_xml_create_parent(out, "node");
         xmlSetProp(parent, (pcmkXmlStr) "name", (pcmkXmlStr) node->details->uname);
     }
 
     return 0;
 }
 
 static int
 node_attribute_text(pcmk__output_t *out, va_list args) {
     const char *name = va_arg(args, const char *);
     const char *value = va_arg(args, const char *);
     gboolean add_extra = va_arg(args, gboolean);
     int expected_score = va_arg(args, int);
 
 
     if (add_extra) {
         int v = crm_parse_int(value, "0");
 
         if (v <= 0) {
             out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is lost", name, value);
         } else if (v < expected_score) {
             out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is degraded (Expected=%d)", name, value, expected_score);
         } else {
             out->list_item(out, NULL, "%-32s\t: %-10s", name, value);
         }
     } else {
         out->list_item(out, NULL, "%-32s\t: %-10s", name, value);
     }
 
     return 0;
 }
 
 static int
 node_attribute_html(pcmk__output_t *out, va_list args) {
     const char *name = va_arg(args, const char *);
     const char *value = va_arg(args, const char *);
     gboolean add_extra = va_arg(args, gboolean);
     int expected_score = va_arg(args, int);
 
     if (add_extra) {
         int v = crm_parse_int(value, "0");
         char *s = crm_strdup_printf("%s: %s", name, value);
         xmlNodePtr item_node = pcmk__output_create_xml_node(out, "li");
 
         pcmk_create_html_node(item_node, "span", NULL, NULL, s);
         free(s);
 
         if (v <= 0) {
             pcmk_create_html_node(item_node, "span", NULL, "bold", "(connectivity is lost)");
         } else if (v < expected_score) {
             char *buf = crm_strdup_printf("(connectivity is degraded -- expected %d", expected_score);
             pcmk_create_html_node(item_node, "span", NULL, "bold", buf);
             free(buf);
         }
     } else {
         out->list_item(out, NULL, "%s: %s", name, value);
     }
 
     return 0;
 }
 
 static int
 node_attribute_xml(pcmk__output_t *out, va_list args) {
     const char *name = va_arg(args, const char *);
     const char *value = va_arg(args, const char *);
     gboolean add_extra = va_arg(args, gboolean);
     int expected_score = va_arg(args, int);
 
     xmlNodePtr node = pcmk__output_create_xml_node(out, "attribute");
     xmlSetProp(node, (pcmkXmlStr) "name", (pcmkXmlStr) name);
     xmlSetProp(node, (pcmkXmlStr) "value", (pcmkXmlStr) value);
 
     if (add_extra) {
         char *buf = crm_itoa(expected_score);
         xmlSetProp(node, (pcmkXmlStr) "expected", (pcmkXmlStr) buf);
         free(buf);
     }
 
     return 0;
 }
 
 static int
 op_history_text(pcmk__output_t *out, va_list args) {
     xmlNode *xml_op = va_arg(args, xmlNode *);
     const char *task = va_arg(args, const char *);
     const char *interval_ms_s = va_arg(args, const char *);
     int rc = va_arg(args, int);
     unsigned int mon_ops = va_arg(args, unsigned int);
 
     char *buf = op_history_string(xml_op, task, interval_ms_s, rc, mon_ops);
 
     out->list_item(out, NULL, "%s", buf);
 
     free(buf);
     return 0;
 }
 
 static int
 op_history_xml(pcmk__output_t *out, va_list args) {
     xmlNode *xml_op = va_arg(args, xmlNode *);
     const char *task = va_arg(args, const char *);
     const char *interval_ms_s = va_arg(args, const char *);
     int rc = va_arg(args, int);
     unsigned int mon_ops = va_arg(args, unsigned int);
 
     char *rc_s = NULL;
 
     xmlNodePtr node = pcmk__output_create_xml_node(out, "operation_history");
 
     xmlSetProp(node, (pcmkXmlStr) "call",
                (pcmkXmlStr) crm_element_value(xml_op, XML_LRM_ATTR_CALLID));
     xmlSetProp(node, (pcmkXmlStr) "task", (pcmkXmlStr) task);
 
     if (interval_ms_s && safe_str_neq(interval_ms_s, "0")) {
         char *s = crm_strdup_printf("%sms", interval_ms_s);
         xmlSetProp(node, (pcmkXmlStr) "interval", (pcmkXmlStr) s);
         free(s);
     }
 
     if (is_set(mon_ops, mon_op_print_timing)) {
         const char *value = NULL;
 
         value = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE);
         if (value) {
             time_t int_value = (time_t) crm_parse_int(value, NULL);
             if (int_value > 0) {
                 xmlSetProp(node, (pcmkXmlStr) XML_RSC_OP_LAST_CHANGE,
                            (pcmkXmlStr) crm_now_string(&int_value));
             }
         }
 
         value = crm_element_value(xml_op, XML_RSC_OP_LAST_RUN);
         if (value) {
             time_t int_value = (time_t) crm_parse_int(value, NULL);
             if (int_value > 0) {
                 xmlSetProp(node, (pcmkXmlStr) XML_RSC_OP_LAST_RUN,
                            (pcmkXmlStr) crm_now_string(&int_value));
             }
         }
 
         value = crm_element_value(xml_op, XML_RSC_OP_T_EXEC);
         if (value) {
             char *s = crm_strdup_printf("%sms", value);
             xmlSetProp(node, (pcmkXmlStr) XML_RSC_OP_T_EXEC, (pcmkXmlStr) s);
             free(s);
         }
         value = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE);
         if (value) {
             char *s = crm_strdup_printf("%sms", value);
             xmlSetProp(node, (pcmkXmlStr) XML_RSC_OP_T_QUEUE, (pcmkXmlStr) s);
             free(s);
         }
     }
 
     rc_s = crm_itoa(rc);
     xmlSetProp(node, (pcmkXmlStr) "rc", (pcmkXmlStr) rc_s);
     xmlSetProp(node, (pcmkXmlStr) "rc_text", (pcmkXmlStr) services_ocf_exitcode_str(rc));
     free(rc_s);
     return 0;
 }
 
 static int
 resource_history_text(pcmk__output_t *out, va_list args) {
     resource_t *rsc = va_arg(args, resource_t *);
     const char *rsc_id = va_arg(args, const char *);
     gboolean all = va_arg(args, gboolean);
     int failcount = va_arg(args, int);
     time_t last_failure = va_arg(args, int);
 
     char *buf = resource_history_string(rsc, rsc_id, all, failcount, last_failure);
 
     out->begin_list(out, NULL, NULL, "%s", buf);
     free(buf);
     return 0;
 }
 
 static int
 resource_history_xml(pcmk__output_t *out, va_list args) {
     resource_t *rsc = va_arg(args, resource_t *);
     const char *rsc_id = va_arg(args, const char *);
     gboolean all = va_arg(args, gboolean);
     int failcount = va_arg(args, int);
     time_t last_failure = va_arg(args, int);
 
     xmlNodePtr node = pcmk__output_xml_create_parent(out, "resource_history");
     xmlSetProp(node, (pcmkXmlStr) "id", (pcmkXmlStr) rsc_id);
 
     if (rsc == NULL) {
         xmlSetProp(node, (pcmkXmlStr) "orphan", (pcmkXmlStr) "true");
     } else if (all || failcount || last_failure > 0) {
         char *migration_s = crm_itoa(rsc->migration_threshold);
 
         xmlSetProp(node, (pcmkXmlStr) "orphan", (pcmkXmlStr) "false");
         xmlSetProp(node, (pcmkXmlStr) "migration-threshold",
                    (pcmkXmlStr) migration_s);
         free(migration_s);
 
         if (failcount > 0) {
             char *s = crm_itoa(failcount);
             xmlSetProp(node, (pcmkXmlStr) CRM_FAIL_COUNT_PREFIX, (pcmkXmlStr) s);
             free(s);
         }
 
         if (last_failure > 0) {
             xmlSetProp(node, (pcmkXmlStr) CRM_LAST_FAILURE_PREFIX,
                        (pcmkXmlStr) crm_now_string(&last_failure));
         }
     }
 
     return 0;
 }
 
 static int
 stonith_event_console(pcmk__output_t *out, va_list args) {
     stonith_history_t *event = va_arg(args, stonith_history_t *);
     int full_history = va_arg(args, int);
     gboolean later_succeeded = va_arg(args, gboolean);
 
     char *buf = NULL;
 
     buf = time_t_string(event->completed);
 
     switch (event->state) {
         case st_failed:
             curses_indented_printf(out, "%s of %s failed: delegate=%s, client=%s, origin=%s, %s='%s %s'\n",
                                    stonith_action_str(event->action), event->target,
                                    event->delegate ? event->delegate : "",
                                    event->client, event->origin,
                                    full_history ? "completed" : "last-failed", buf,
                                    later_succeeded ? "(a later attempt succeeded)" : "");
             break;
 
         case st_done:
             curses_indented_printf(out, "%s of %s successful: delegate=%s, client=%s, origin=%s, %s='%s'\n",
                                    stonith_action_str(event->action), event->target,
                                    event->delegate ? event->delegate : "",
                                    event->client, event->origin,
                                    full_history ? "completed" : "last-successful", buf);
             break;
 
         default:
             curses_indented_printf(out, "%s of %s pending: client=%s, origin=%s\n",
                                    stonith_action_str(event->action), event->target,
                                    event->client, event->origin);
             break;
     }
 
     free(buf);
     return 0;
 }
 
 static int
 ticket_console(pcmk__output_t *out, va_list args) {
     ticket_t *ticket = va_arg(args, ticket_t *);
 
     if (ticket->last_granted > -1) {
         char *time = pcmk_format_named_time("last-granted", ticket->last_granted);
         out->list_item(out, ticket->id, "\t%s%s %s",
                        ticket->granted ? "granted" : "revoked",
                        ticket->standby ? " [standby]" : "",
                        time);
         free(time);
     } else {
         out->list_item(out, ticket->id, "\t%s%s",
                        ticket->granted ? "granted" : "revoked",
                        ticket->standby ? " [standby]" : "");
     }
 
     return 0;
 }
 
 static pcmk__message_entry_t fmt_functions[] = {
     { "ban", "console", ban_text },
     { "ban", "html", ban_html },
     { "ban", "text", ban_text },
     { "ban", "xml", ban_xml },
     { "bundle", "console", pe__bundle_text },
     { "clone", "console", pe__clone_text },
     { "cluster-counts", "console", cluster_counts_text },
     { "cluster-counts", "html", cluster_counts_html },
     { "cluster-counts", "text", cluster_counts_text },
     { "cluster-counts", "xml", cluster_counts_xml },
     { "cluster-dc", "console", cluster_dc_text },
     { "cluster-dc", "html", cluster_dc_html },
     { "cluster-dc", "text", cluster_dc_text },
     { "cluster-dc", "xml", cluster_dc_xml },
     { "cluster-options", "console", cluster_options_text },
     { "cluster-options", "html", cluster_options_html },
     { "cluster-options", "text", cluster_options_text },
     { "cluster-options", "xml", cluster_options_xml },
     { "cluster-stack", "console", cluster_stack_text },
     { "cluster-stack", "html", cluster_stack_html },
     { "cluster-stack", "text", cluster_stack_text },
     { "cluster-stack", "xml", cluster_stack_xml },
     { "cluster-times", "console", cluster_times_text },
     { "cluster-times", "html", cluster_times_html },
     { "cluster-times", "text", cluster_times_text },
     { "cluster-times", "xml", cluster_times_xml },
     { "failed-action", "console", failed_action_console },
     { "failed-action", "html", failed_action_html },
     { "failed-action", "text", failed_action_text },
     { "failed-action", "xml", failed_action_xml },
     { "group", "console", pe__group_text },
     { "node", "console", node_text },
     { "node", "html", node_html },
     { "node", "text", node_text },
     { "node", "xml", node_xml },
     { "node-attribute", "console", node_attribute_text },
     { "node-attribute", "html", node_attribute_html },
     { "node-attribute", "text", node_attribute_text },
     { "node-attribute", "xml", node_attribute_xml },
     { "op-history", "console", op_history_text },
     { "op-history", "html", op_history_text },
     { "op-history", "text", op_history_text },
     { "op-history", "xml", op_history_xml },
     { "primitive", "console", pe__resource_text },
     { "resource-history", "console", resource_history_text },
     { "resource-history", "html", resource_history_text },
     { "resource-history", "text", resource_history_text },
     { "resource-history", "xml", resource_history_xml },
     { "stonith-event", "console", stonith_event_console },
     { "ticket", "console", ticket_console },
 
     { NULL, NULL, NULL }
 };
 
 void
 crm_mon_register_messages(pcmk__output_t *out) {
     pcmk__register_messages(out, fmt_functions);
 }