No OneTemporary
Actions

Size

658 KB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

This document is not UTF8. It was detected as Shift JIS and converted to UTF8 for display.

	diff --git a/cts/CTSaudits.py b/cts/CTSaudits.py
	index aa18d64813..f4c3b15011 100755
	--- a/cts/CTSaudits.py
	+++ b/cts/CTSaudits.py
	@@ -1,866 +1,865 @@
	""" Auditing classes for Pacemaker's Cluster Test Suite (CTS)
	"""

	# Pacemaker targets compatibility with Python 2.7 and 3.2+
	from __future__ import print_function, unicode_literals, absolute_import, division

	__copyright__ = "Copyright 2000-2018 Alan Robertson <alanr@unix.sh>"
	__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"

	import time, re, uuid
	from cts.watcher import LogWatcher
	from cts.remote import input_wrapper
	from cts.CTSvars import *

	class ClusterAudit(object):

	def __init__(self, cm):
	self.CM = cm

	def __call__(self):
	raise ValueError("Abstract Class member (__call__)")

	def is_applicable(self):
	'''Return TRUE if we are applicable in the current test configuration'''
	raise ValueError("Abstract Class member (is_applicable)")
	return 1

	def log(self, args):
	self.CM.log("audit: %s" % args)

	def debug(self, args):
	self.CM.debug("audit: %s" % args)

	def name(self):
	raise ValueError("Abstract Class member (name)")

	AllAuditClasses = [ ]


	class LogAudit(ClusterAudit):

	def name(self):
	return "LogAudit"

	def __init__(self, cm):
	self.CM = cm
	self.kinds = [ "combined syslog", "journal", "remote" ]

	def RestartClusterLogging(self, nodes=None):
	if not nodes:
	nodes = self.CM.Env["nodes"]

	self.CM.debug("Restarting logging on: %s" % repr(nodes))

	for node in nodes:
	if self.CM.Env["have_systemd"]:
	if self.CM.rsh(node, "systemctl stop systemd-journald.socket") != 0:
	self.CM.log ("ERROR: Cannot stop 'systemd-journald' on %s" % node)
	if self.CM.rsh(node, "systemctl start systemd-journald.service") != 0:
	self.CM.log ("ERROR: Cannot start 'systemd-journald' on %s" % node)

	if self.CM.rsh(node, "service %s restart" % self.CM.Env["syslogd"]) != 0:
	self.CM.log ("ERROR: Cannot restart '%s' on %s" % (self.CM.Env["syslogd"], node))

	def TestLogging(self):
	patterns = []
	prefix = "Test message from"
	suffix = str(uuid.uuid4())
	watch = {}

	for node in self.CM.Env["nodes"]:
	# Look for the node name in two places to make sure
	# that syslog is logging with the correct hostname
	m = re.search("^([^.]+).*", node)
	if m:
	simple = m.group(1)
	else:
	simple = node
	patterns.append("%s.*%s %s %s" % (simple, prefix, node, suffix))

	watch_pref = self.CM.Env["LogWatcher"]
	if watch_pref == "any":
	for k in self.kinds:
	watch[k] = LogWatcher(self.CM.Env["LogFileName"], patterns, "LogAudit", 5, silent=True, hosts=self.CM.Env["nodes"], kind=k)
	watch[k].setwatch()
	else:
	k = watch_pref
	watch[k] = LogWatcher(self.CM.Env["LogFileName"], patterns, "LogAudit", 5, silent=True, hosts=self.CM.Env["nodes"], kind=k)
	watch[k].setwatch()

	if watch_pref == "any": self.CM.log("Writing log with key: %s" % (suffix))
	for node in self.CM.Env["nodes"]:
	cmd = "logger -p %s.info %s %s %s" % (self.CM.Env["SyslogFacility"], prefix, node, suffix)
	if self.CM.rsh(node, cmd, synchronous=0, silent=True) != 0:
	self.CM.log ("ERROR: Cannot execute remote command [%s] on %s" % (cmd, node))

	for k in self.kinds:
	if k in watch:
	w = watch[k]
	if watch_pref == "any": self.CM.log("Testing for %s logs" % (k))
	w.lookforall(silent=True)
	if not w.unmatched:
	if watch_pref == "any":
	self.CM.log ("Continuing with %s-based log reader" % (w.kind))
	self.CM.Env["LogWatcher"] = w.kind
	return 1

	for k in list(watch.keys()):
	w = watch[k]
	if w.unmatched:
	for regex in w.unmatched:
	self.CM.log ("Test message [%s] not found in %s logs." % (regex, w.kind))

	return 0

	def __call__(self):
	max = 3
	attempt = 0

	self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"])
	while attempt <= max and self.TestLogging() == 0:
	attempt = attempt + 1
	self.RestartClusterLogging()
	time.sleep(60*attempt)

	if attempt > max:
	self.CM.log("ERROR: Cluster logging unrecoverable.")
	return 0

	return 1

	def is_applicable(self):
	if self.CM.Env["DoBSC"]:
	return 0
	if self.CM.Env["LogAuditDisabled"]:
	return 0
	return 1


	class DiskAudit(ClusterAudit):

	def name(self):
	return "DiskspaceAudit"

	def __init__(self, cm):
	self.CM = cm

	def __call__(self):
	result = 1
	# @TODO Use directory of PCMK_logfile if set on host
	dfcmd = "df -BM " + CTSvars.CRM_LOG_DIR + " \| tail -1 \| awk '{print $(NF-1)\" \"$(NF-2)}' \| tr -d 'M%'"

	self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"])
	for node in self.CM.Env["nodes"]:
	dfout = self.CM.rsh(node, dfcmd, 1)
	if not dfout:
	self.CM.log ("ERROR: Cannot execute remote df command [%s] on %s" % (dfcmd, node))
	else:
	try:
	(used, remain) = dfout.split()
	used_percent = int(used)
	remaining_mb = int(remain)
	except (ValueError, TypeError):
	self.CM.log("Warning: df output '%s' from %s was invalid [%s, %s]"
	% (dfout, node, used, remain))
	else:
	if remaining_mb < 10 or used_percent > 95:
	self.CM.log("CRIT: Out of log disk space on %s (%d%% / %dMB)"
	% (node, used_percent, remaining_mb))
	result = None
	if self.CM.Env["continue"] == 1:
	answer = "Y"
	else:
	try:
	answer = input_wrapper('Continue? [nY]')
	except EOFError as e:
	answer = "n"

	if answer and answer == "n":
	raise ValueError("Disk full on %s" % (node))
	- ret = 0

	elif remaining_mb < 100 or used_percent > 90:
	self.CM.log("WARN: Low on log disk space (%dMB) on %s" % (remaining_mb, node))
	return result

	def is_applicable(self):
	if self.CM.Env["DoBSC"]:
	return 0
	return 1


	class FileAudit(ClusterAudit):

	def name(self):
	return "FileAudit"

	def __init__(self, cm):
	self.CM = cm
	self.known = []

	def __call__(self):
	result = 1

	self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"])
	for node in self.CM.Env["nodes"]:

	(rc, lsout) = self.CM.rsh(node, "ls -al /var/lib/pacemaker/cores/* \| grep core.[0-9]", None)
	for line in lsout:
	line = line.strip()
	if line not in self.known:
	result = 0
	self.known.append(line)
	self.CM.log("Warning: Pacemaker core file on %s: %s" % (node, line))

	(rc, lsout) = self.CM.rsh(node, "ls -al /var/lib/corosync \| grep core.[0-9]", None)
	for line in lsout:
	line = line.strip()
	if line not in self.known:
	result = 0
	self.known.append(line)
	self.CM.log("Warning: Corosync core file on %s: %s" % (node, line))

	if node in self.CM.ShouldBeStatus and self.CM.ShouldBeStatus[node] == "down":
	clean = 0
	(rc, lsout) = self.CM.rsh(node, "ls -al /dev/shm \| grep qb-", None)
	for line in lsout:
	result = 0
	clean = 1
	self.CM.log("Warning: Stale IPC file on %s: %s" % (node, line))

	if clean:
	(rc, lsout) = self.CM.rsh(node, "ps axf \| grep -e pacemaker -e corosync", None)
	for line in lsout:
	self.CM.debug("ps[%s]: %s" % (node, line))

	self.CM.rsh(node, "rm -f /dev/shm/qb-*")

	else:
	self.CM.debug("Skipping %s" % node)

	return result

	def is_applicable(self):
	return 1


	class AuditResource(object):
	def __init__(self, cm, line):
	fields = line.split()
	self.CM = cm
	self.line = line
	self.type = fields[1]
	self.id = fields[2]
	self.clone_id = fields[3]
	self.parent = fields[4]
	self.rprovider = fields[5]
	self.rclass = fields[6]
	self.rtype = fields[7]
	self.host = fields[8]
	self.needs_quorum = fields[9]
	self.flags = int(fields[10])
	self.flags_s = fields[11]

	if self.parent == "NA":
	self.parent = None

	def unique(self):
	if self.flags & int("0x00000020", 16):
	return 1
	return 0

	def orphan(self):
	if self.flags & int("0x00000001", 16):
	return 1
	return 0

	def managed(self):
	if self.flags & int("0x00000002", 16):
	return 1
	return 0


	class AuditConstraint(object):
	def __init__(self, cm, line):
	fields = line.split()
	self.CM = cm
	self.line = line
	self.type = fields[1]
	self.id = fields[2]
	self.rsc = fields[3]
	self.target = fields[4]
	self.score = fields[5]
	self.rsc_role = fields[6]
	self.target_role = fields[7]

	if self.rsc_role == "NA":
	self.rsc_role = None
	if self.target_role == "NA":
	self.target_role = None


	class PrimitiveAudit(ClusterAudit):
	def name(self):
	return "PrimitiveAudit"

	def __init__(self, cm):
	self.CM = cm

	def doResourceAudit(self, resource, quorum):
	rc = 1
	active = self.CM.ResourceLocation(resource.id)

	if len(active) == 1:
	if quorum:
	self.debug("Resource %s active on %s" % (resource.id, repr(active)))

	elif resource.needs_quorum == 1:
	self.CM.log("Resource %s active without quorum: %s"
	% (resource.id, repr(active)))
	rc = 0

	elif not resource.managed():
	self.CM.log("Resource %s not managed. Active on %s"
	% (resource.id, repr(active)))

	elif not resource.unique():
	# TODO: Figure out a clever way to actually audit these resource types
	if len(active) > 1:
	self.debug("Non-unique resource %s is active on: %s"
	% (resource.id, repr(active)))
	else:
	self.debug("Non-unique resource %s is not active" % resource.id)

	elif len(active) > 1:
	self.CM.log("Resource %s is active multiple times: %s"
	% (resource.id, repr(active)))
	rc = 0

	elif resource.orphan():
	self.debug("Resource %s is an inactive orphan" % resource.id)

	elif len(self.inactive_nodes) == 0:
	self.CM.log("WARN: Resource %s not served anywhere" % resource.id)
	rc = 0

	elif self.CM.Env["warn-inactive"] == 1:
	if quorum or not resource.needs_quorum:
	self.CM.log("WARN: Resource %s not served anywhere (Inactive nodes: %s)"
	% (resource.id, repr(self.inactive_nodes)))
	else:
	self.debug("Resource %s not served anywhere (Inactive nodes: %s)"
	% (resource.id, repr(self.inactive_nodes)))

	elif quorum or not resource.needs_quorum:
	self.debug("Resource %s not served anywhere (Inactive nodes: %s)"
	% (resource.id, repr(self.inactive_nodes)))

	return rc

	def setup(self):
	self.target = None
	self.resources = []
	self.constraints = []
	self.active_nodes = []
	self.inactive_nodes = []

	for node in self.CM.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == "up":
	self.active_nodes.append(node)
	else:
	self.inactive_nodes.append(node)

	for node in self.CM.Env["nodes"]:
	if self.target == None and self.CM.ShouldBeStatus[node] == "up":
	self.target = node

	if not self.target:
	# TODO: In Pacemaker 1.0 clusters we'll be able to run crm_resource
	# with CIB_file=/path/to/cib.xml even when the cluster isn't running
	self.debug("No nodes active - skipping %s" % self.name())
	return 0

	(rc, lines) = self.CM.rsh(self.target, "crm_resource -c", None)

	for line in lines:
	if re.search("^Resource", line):
	self.resources.append(AuditResource(self.CM, line))
	elif re.search("^Constraint", line):
	self.constraints.append(AuditConstraint(self.CM, line))
	else:
	self.CM.log("Unknown entry: %s" % line);

	return 1

	def __call__(self):
	rc = 1

	if not self.setup():
	return 1

	quorum = self.CM.HasQuorum(None)
	for resource in self.resources:
	if resource.type == "primitive":
	if self.doResourceAudit(resource, quorum) == 0:
	rc = 0
	return rc

	def is_applicable(self):
	# @TODO Due to long-ago refactoring, this name test would never match,
	# so this audit (and those derived from it) would never run.
	# Uncommenting the next lines fixes the name test, but that then
	# exposes pre-existing bugs that need to be fixed.
	#if self.CM["Name"] == "crm-corosync":
	# return 1
	return 0


	class GroupAudit(PrimitiveAudit):
	def name(self):
	return "GroupAudit"

	def __call__(self):
	rc = 1
	if not self.setup():
	return 1

	for group in self.resources:
	if group.type == "group":
	first_match = 1
	group_location = None
	for child in self.resources:
	if child.parent == group.id:
	nodes = self.CM.ResourceLocation(child.id)

	if first_match and len(nodes) > 0:
	group_location = nodes[0]

	first_match = 0

	if len(nodes) > 1:
	rc = 0
	self.CM.log("Child %s of %s is active more than once: %s"
	% (child.id, group.id, repr(nodes)))

	elif len(nodes) == 0:
	# Groups are allowed to be partially active
	# However we do need to make sure later children aren't running
	group_location = None
	self.debug("Child %s of %s is stopped" % (child.id, group.id))

	elif nodes[0] != group_location:
	rc = 0
	self.CM.log("Child %s of %s is active on the wrong node (%s) expected %s"
	% (child.id, group.id, nodes[0], group_location))
	else:
	self.debug("Child %s of %s is active on %s" % (child.id, group.id, nodes[0]))

	return rc


	class CloneAudit(PrimitiveAudit):
	def name(self):
	return "CloneAudit"

	def __call__(self):
	rc = 1
	if not self.setup():
	return 1

	for clone in self.resources:
	if clone.type == "clone":
	for child in self.resources:
	if child.parent == clone.id and child.type == "primitive":
	self.debug("Checking child %s of %s..." % (child.id, clone.id))
	# Check max and node_max
	# Obtain with:
	# crm_resource -g clone_max --meta -r child.id
	# crm_resource -g clone_node_max --meta -r child.id

	return rc


	class ColocationAudit(PrimitiveAudit):
	def name(self):
	return "ColocationAudit"

	def crm_location(self, resource):
	(rc, lines) = self.CM.rsh(self.target, "crm_resource -W -r %s -Q"%resource, None)
	hosts = []
	if rc == 0:
	for line in lines:
	fields = line.split()
	hosts.append(fields[0])

	return hosts

	def __call__(self):
	rc = 1
	if not self.setup():
	return 1

	for coloc in self.constraints:
	if coloc.type == "rsc_colocation":
	source = self.crm_location(coloc.rsc)
	target = self.crm_location(coloc.target)
	if len(source) == 0:
	self.debug("Colocation audit (%s): %s not running" % (coloc.id, coloc.rsc))
	else:
	for node in source:
	if not node in target:
	rc = 0
	self.CM.log("Colocation audit (%s): %s running on %s (not in %s)"
	% (coloc.id, coloc.rsc, node, repr(target)))
	else:
	self.debug("Colocation audit (%s): %s running on %s (in %s)"
	% (coloc.id, coloc.rsc, node, repr(target)))

	return rc


	class ControllerStateAudit(ClusterAudit):
	def __init__(self, cm):
	self.CM = cm
	self.Stats = {"calls":0
	, "success":0
	, "failure":0
	, "skipped":0
	, "auditfail":0}

	def has_key(self, key):
	return key in self.Stats

	def __setitem__(self, key, value):
	self.Stats[key] = value

	def __getitem__(self, key):
	return self.Stats[key]

	def incr(self, name):
	'''Increment (or initialize) the value associated with the given name'''
	if not name in self.Stats:
	self.Stats[name] = 0
	self.Stats[name] = self.Stats[name]+1

	def __call__(self):
	passed = 1
	up_are_down = 0
	down_are_up = 0
	unstable_list = []

	for node in self.CM.Env["nodes"]:
	should_be = self.CM.ShouldBeStatus[node]
	rc = self.CM.test_node_CM(node)
	if rc > 0:
	if should_be == "down":
	down_are_up = down_are_up + 1
	if rc == 1:
	unstable_list.append(node)
	elif should_be == "up":
	up_are_down = up_are_down + 1

	if len(unstable_list) > 0:
	passed = 0
	self.CM.log("Cluster is not stable: %d (of %d): %s"
	% (len(unstable_list), self.CM.upcount(), repr(unstable_list)))

	if up_are_down > 0:
	passed = 0
	self.CM.log("%d (of %d) nodes expected to be up were down."
	% (up_are_down, len(self.CM.Env["nodes"])))

	if down_are_up > 0:
	passed = 0
	self.CM.log("%d (of %d) nodes expected to be down were up."
	% (down_are_up, len(self.CM.Env["nodes"])))

	return passed

	def name(self):
	return "ControllerStateAudit"

	def is_applicable(self):
	# @TODO Due to long-ago refactoring, this name test would never match,
	# so this audit (and those derived from it) would never run.
	# Uncommenting the next lines fixes the name test, but that then
	# exposes pre-existing bugs that need to be fixed.
	#if self.CM["Name"] == "crm-corosync":
	# return 1
	return 0


	class CIBAudit(ClusterAudit):
	def __init__(self, cm):
	self.CM = cm
	self.Stats = {"calls":0
	, "success":0
	, "failure":0
	, "skipped":0
	, "auditfail":0}

	def has_key(self, key):
	return key in self.Stats

	def __setitem__(self, key, value):
	self.Stats[key] = value

	def __getitem__(self, key):
	return self.Stats[key]

	def incr(self, name):
	'''Increment (or initialize) the value associated with the given name'''
	if not name in self.Stats:
	self.Stats[name] = 0
	self.Stats[name] = self.Stats[name]+1

	def __call__(self):
	passed = 1
	ccm_partitions = self.CM.find_partitions()

	if len(ccm_partitions) == 0:
	self.debug("\tNo partitions to audit")
	return 1

	for partition in ccm_partitions:
	self.debug("\tAuditing CIB consistency for: %s" % partition)
	partition_passed = 0
	if self.audit_cib_contents(partition) == 0:
	passed = 0

	return passed

	def audit_cib_contents(self, hostlist):
	passed = 1
	node0 = None
	node0_xml = None

	partition_hosts = hostlist.split()
	for node in partition_hosts:
	node_xml = self.store_remote_cib(node, node0)

	if node_xml == None:
	self.CM.log("Could not perform audit: No configuration from %s" % node)
	passed = 0

	elif node0 == None:
	node0 = node
	node0_xml = node_xml

	elif node0_xml == None:
	self.CM.log("Could not perform audit: No configuration from %s" % node0)
	passed = 0

	else:
	(rc, result) = self.CM.rsh(
	node0, "crm_diff -VV -cf --new %s --original %s" % (node_xml, node0_xml), None)

	if rc != 0:
	self.CM.log("Diff between %s and %s failed: %d" % (node0_xml, node_xml, rc))
	passed = 0

	for line in result:
	if not re.search("<diff/>", line):
	passed = 0
	self.debug("CibDiff[%s-%s]: %s" % (node0, node, line))
	else:
	self.debug("CibDiff[%s-%s] Ignoring: %s" % (node0, node, line))

	# self.CM.rsh(node0, "rm -f %s" % node_xml)
	# self.CM.rsh(node0, "rm -f %s" % node0_xml)
	return passed

	def store_remote_cib(self, node, target):
	combined = ""
	filename = "/tmp/ctsaudit.%s.xml" % node

	if not target:
	target = node

	(rc, lines) = self.CM.rsh(node, self.CM["CibQuery"], None)
	if rc != 0:
	self.CM.log("Could not retrieve configuration")
	return None

	self.CM.rsh("localhost", "rm -f %s" % filename)
	for line in lines:
	self.CM.rsh("localhost", "echo \'%s\' >> %s" % (line[:-1], filename), silent=True)

	if self.CM.rsh.cp(filename, "root@%s:%s" % (target, filename), silent=True) != 0:
	self.CM.log("Could not store configuration")
	return None
	return filename

	def name(self):
	return "CibAudit"

	def is_applicable(self):
	# @TODO Due to long-ago refactoring, this name test would never match,
	# so this audit (and those derived from it) would never run.
	# Uncommenting the next lines fixes the name test, but that then
	# exposes pre-existing bugs that need to be fixed.
	#if self.CM["Name"] == "crm-corosync":
	# return 1
	return 0


	class PartitionAudit(ClusterAudit):
	def __init__(self, cm):
	self.CM = cm
	self.Stats = {"calls":0
	, "success":0
	, "failure":0
	, "skipped":0
	, "auditfail":0}
	self.NodeEpoch = {}
	self.NodeState = {}
	self.NodeQuorum = {}

	def has_key(self, key):
	return key in self.Stats

	def __setitem__(self, key, value):
	self.Stats[key] = value

	def __getitem__(self, key):
	return self.Stats[key]

	def incr(self, name):
	'''Increment (or initialize) the value associated with the given name'''
	if not name in self.Stats:
	self.Stats[name] = 0
	self.Stats[name] = self.Stats[name]+1

	def __call__(self):
	passed = 1
	ccm_partitions = self.CM.find_partitions()

	if ccm_partitions == None or len(ccm_partitions) == 0:
	return 1

	self.CM.cluster_stable(double_check=True)

	if len(ccm_partitions) != self.CM.partitions_expected:
	self.CM.log("ERROR: %d cluster partitions detected:" % len(ccm_partitions))
	passed = 0
	for partition in ccm_partitions:
	self.CM.log("\t %s" % partition)

	for partition in ccm_partitions:
	partition_passed = 0
	if self.audit_partition(partition) == 0:
	passed = 0

	return passed

	def trim_string(self, avalue):
	if not avalue:
	return None
	if len(avalue) > 1:
	return avalue[:-1]

	def trim2int(self, avalue):
	if not avalue:
	return None
	if len(avalue) > 1:
	return int(avalue[:-1])

	def audit_partition(self, partition):
	passed = 1
	dc_found = []
	dc_allowed_list = []
	lowest_epoch = None
	node_list = partition.split()

	self.debug("Auditing partition: %s" % (partition))
	for node in node_list:
	if self.CM.ShouldBeStatus[node] != "up":
	self.CM.log("Warn: Node %s appeared out of nowhere" % (node))
	self.CM.ShouldBeStatus[node] = "up"
	# not in itself a reason to fail the audit (not what we're
	# checking for in this audit)

	self.NodeState[node] = self.CM.rsh(node, self.CM["StatusCmd"] % node, 1)
	self.NodeEpoch[node] = self.CM.rsh(node, self.CM["EpochCmd"], 1)
	self.NodeQuorum[node] = self.CM.rsh(node, self.CM["QuorumCmd"], 1)

	self.debug("Node %s: %s - %s - %s." % (node, self.NodeState[node], self.NodeEpoch[node], self.NodeQuorum[node]))
	self.NodeState[node] = self.trim_string(self.NodeState[node])
	self.NodeEpoch[node] = self.trim2int(self.NodeEpoch[node])
	self.NodeQuorum[node] = self.trim_string(self.NodeQuorum[node])

	if not self.NodeEpoch[node]:
	self.CM.log("Warn: Node %s dissappeared: cant determin epoch" % (node))
	self.CM.ShouldBeStatus[node] = "down"
	# not in itself a reason to fail the audit (not what we're
	# checking for in this audit)
	elif lowest_epoch == None or self.NodeEpoch[node] < lowest_epoch:
	lowest_epoch = self.NodeEpoch[node]

	if not lowest_epoch:
	self.CM.log("Lowest epoch not determined in %s" % (partition))
	passed = 0

	for node in node_list:
	if self.CM.ShouldBeStatus[node] == "up":
	if self.CM.is_node_dc(node, self.NodeState[node]):
	dc_found.append(node)
	if self.NodeEpoch[node] == lowest_epoch:
	self.debug("%s: OK" % node)
	elif not self.NodeEpoch[node]:
	self.debug("Check on %s ignored: no node epoch" % node)
	elif not lowest_epoch:
	self.debug("Check on %s ignored: no lowest epoch" % node)
	else:
	self.CM.log("DC %s is not the oldest node (%d vs. %d)"
	% (node, self.NodeEpoch[node], lowest_epoch))
	passed = 0

	if len(dc_found) == 0:
	self.CM.log("DC not found on any of the %d allowed nodes: %s (of %s)"
	% (len(dc_allowed_list), str(dc_allowed_list), str(node_list)))

	elif len(dc_found) > 1:
	self.CM.log("%d DCs (%s) found in cluster partition: %s"
	% (len(dc_found), str(dc_found), str(node_list)))
	passed = 0

	if passed == 0:
	for node in node_list:
	if self.CM.ShouldBeStatus[node] == "up":
	self.CM.log("epoch %s : %s"
	% (self.NodeEpoch[node], self.NodeState[node]))

	return passed

	def name(self):
	return "PartitionAudit"

	def is_applicable(self):
	# @TODO Due to long-ago refactoring, this name test would never match,
	# so this audit (and those derived from it) would never run.
	# Uncommenting the next lines fixes the name test, but that then
	# exposes pre-existing bugs that need to be fixed.
	#if self.CM["Name"] == "crm-corosync":
	# return 1
	return 0

	AllAuditClasses.append(DiskAudit)
	AllAuditClasses.append(FileAudit)
	AllAuditClasses.append(LogAudit)
	AllAuditClasses.append(ControllerStateAudit)
	AllAuditClasses.append(PartitionAudit)
	AllAuditClasses.append(PrimitiveAudit)
	AllAuditClasses.append(GroupAudit)
	AllAuditClasses.append(CloneAudit)
	AllAuditClasses.append(ColocationAudit)
	AllAuditClasses.append(CIBAudit)


	def AuditList(cm):
	result = []
	for auditclass in AllAuditClasses:
	a = auditclass(cm)
	if a.is_applicable():
	result.append(a)
	return result
	diff --git a/cts/CTStests.py b/cts/CTStests.py
	index 58f084fa10..42f6119294 100644
	--- a/cts/CTStests.py
	+++ b/cts/CTStests.py
	@@ -1,3111 +1,3111 @@
	""" Test-specific classes for Pacemaker's Cluster Test Suite (CTS)
	"""

	# Pacemaker targets compatibility with Python 2.7 and 3.2+
	from __future__ import print_function, unicode_literals, absolute_import, division

	__copyright__ = """Copyright 2000, 2001 Alan Robertson <alanr@unix.sh>
	Add RecourceRecover testcase Zhao Kai <zhaokai@cn.ibm.com>
	"""

	__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"

	#
	# SPECIAL NOTE:
	#
	# Tests may NOT implement any cluster-manager-specific code in them.
	# EXTEND the ClusterManager object to provide the base capabilities
	# the test needs if you need to do something that the current CM classes
	# do not. Otherwise you screw up the whole point of the object structure
	# in CTS.
	#
	# Thank you.
	#

	import os
	import re
	import time
	import subprocess
	import tempfile

	from stat import *
	from cts import CTS
	from cts.CTSaudits import *
	from cts.CTSvars import *
	from cts.patterns import PatternSelector
	from cts.logging import LogFactory
	from cts.remote import RemoteFactory, input_wrapper
	from cts.watcher import LogWatcher
	from cts.environment import EnvFactory

	AllTestClasses = [ ]


	class CTSTest(object):
	'''
	A Cluster test.
	We implement the basic set of properties and behaviors for a generic
	cluster test.

	Cluster tests track their own statistics.
	We keep each of the kinds of counts we track as separate {name,value}
	pairs.
	'''

	def __init__(self, cm):
	#self.name="the unnamed test"
	self.Stats = {"calls":0
	, "success":0
	, "failure":0
	, "skipped":0
	, "auditfail":0}

	# if not issubclass(cm.__class__, ClusterManager):
	# raise ValueError("Must be a ClusterManager object")
	self.CM = cm
	self.Env = EnvFactory().getInstance()
	self.rsh = RemoteFactory().getInstance()
	self.logger = LogFactory()
	self.templates = PatternSelector(cm["Name"])
	self.Audits = []
	self.timeout = 120
	self.passed = 1
	self.is_loop = 0
	self.is_unsafe = 0
	self.is_docker_unsafe = 0
	self.is_experimental = 0
	self.is_container = 0
	self.is_valgrind = 0
	self.benchmark = 0 # which tests to benchmark
	self.timer = {} # timers

	def log(self, args):
	self.logger.log(args)

	def debug(self, args):
	self.logger.debug(args)

	def has_key(self, key):
	return key in self.Stats

	def __setitem__(self, key, value):
	self.Stats[key] = value

	def __getitem__(self, key):
	if str(key) == "0":
	raise ValueError("Bad call to 'foo in X', should reference 'foo in X.Stats' instead")

	if key in self.Stats:
	return self.Stats[key]
	return None

	def log_mark(self, msg):
	self.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
	return

	def get_timer(self,key = "test"):
	try: return self.timer[key]
	except: return 0

	def set_timer(self,key = "test"):
	self.timer[key] = time.time()
	return self.timer[key]

	def log_timer(self,key = "test"):
	elapsed = 0
	if key in self.timer:
	elapsed = time.time() - self.timer[key]
	s = key == "test" and self.name or "%s:%s" % (self.name,key)
	self.debug("%s runtime: %.2f" % (s, elapsed))
	del self.timer[key]
	return elapsed

	def incr(self, name):
	'''Increment (or initialize) the value associated with the given name'''
	if not name in self.Stats:
	self.Stats[name] = 0
	self.Stats[name] = self.Stats[name]+1

	# Reset the test passed boolean
	if name == "calls":
	self.passed = 1

	def failure(self, reason="none"):
	'''Increment the failure count'''
	self.passed = 0
	self.incr("failure")
	self.logger.log(("Test %s" % self.name).ljust(35) + " FAILED: %s" % reason)
	return None

	def success(self):
	'''Increment the success count'''
	self.incr("success")
	return 1

	def skipped(self):
	'''Increment the skipped count'''
	self.incr("skipped")
	return 1

	def __call__(self, node):
	'''Perform the given test'''
	raise ValueError("Abstract Class member (__call__)")
	self.incr("calls")
	return self.failure()

	def audit(self):
	passed = 1
	if len(self.Audits) > 0:
	for audit in self.Audits:
	if not audit():
	self.logger.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
	self.incr("auditfail")
	passed = 0
	return passed

	def setup(self, node):
	'''Setup the given test'''
	return self.success()

	def teardown(self, node):
	'''Tear down the given test'''
	return self.success()

	def create_watch(self, patterns, timeout, name=None):
	if not name:
	name = self.name
	return LogWatcher(self.Env["LogFileName"], patterns, name, timeout, kind=self.Env["LogWatcher"], hosts=self.Env["nodes"])

	def local_badnews(self, prefix, watch, local_ignore=[]):
	errcount = 0
	if not prefix:
	prefix = "LocalBadNews:"

	ignorelist = []
	ignorelist.append(" CTS: ")
	ignorelist.append(prefix)
	ignorelist.extend(local_ignore)

	while errcount < 100:
	match = watch.look(0)
	if match:
	add_err = 1
	for ignore in ignorelist:
	if add_err == 1 and re.search(ignore, match):
	add_err = 0
	if add_err == 1:
	self.logger.log(prefix + " " + match)
	errcount = errcount + 1
	else:
	break
	else:
	self.logger.log("Too many errors!")

	watch.end()
	return errcount

	def is_applicable(self):
	return self.is_applicable_common()

	def is_applicable_common(self):
	'''Return TRUE if we are applicable in the current test configuration'''
	#raise ValueError("Abstract Class member (is_applicable)")

	if self.is_loop and not self.Env["loop-tests"]:
	return 0
	elif self.is_unsafe and not self.Env["unsafe-tests"]:
	return 0
	elif self.is_valgrind and not self.Env["valgrind-tests"]:
	return 0
	elif self.is_experimental and not self.Env["experimental-tests"]:
	return 0
	elif self.is_docker_unsafe and self.Env["docker"]:
	return 0
	elif self.is_container and not self.Env["container-tests"]:
	return 0
	elif self.Env["benchmark"] and self.benchmark == 0:
	return 0

	return 1

	def find_ocfs2_resources(self, node):
	self.r_o2cb = None
	self.r_ocfs2 = []

	(rc, lines) = self.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	r = AuditResource(self.CM, line)
	if r.rtype == "o2cb" and r.parent != "NA":
	self.debug("Found o2cb: %s" % self.r_o2cb)
	self.r_o2cb = r.parent
	if re.search("^Constraint", line):
	c = AuditConstraint(self.CM, line)
	if c.type == "rsc_colocation" and c.target == self.r_o2cb:
	self.r_ocfs2.append(c.rsc)

	self.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
	return len(self.r_ocfs2)

	def canrunnow(self, node):
	'''Return TRUE if we can meaningfully run right now'''
	return 1

	def errorstoignore(self):
	'''Return list of errors which are 'normal' and should be ignored'''
	return []


	class StopTest(CTSTest):
	'''Stop (deactivate) the cluster manager on a node'''
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name = "Stop"

	def __call__(self, node):
	'''Perform the 'stop' test. '''
	self.incr("calls")
	if self.CM.ShouldBeStatus[node] != "up":
	return self.skipped()

	patterns = []
	# Technically we should always be able to notice ourselves stopping
	patterns.append(self.templates["Pat:We_stopped"] % node)

	# Any active node needs to notice this one left
	# (note that this won't work if we have multiple partitions)
	for other in self.Env["nodes"]:
	if self.CM.ShouldBeStatus[other] == "up" and other != node:
	patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
	#self.debug("Checking %s will notice %s left"%(other, node))

	watch = self.create_watch(patterns, self.Env["DeadTime"])
	watch.setwatch()

	if node == self.CM.OurNode:
	self.incr("us")
	else:
	if self.CM.upcount() <= 1:
	self.incr("all")
	else:
	self.incr("them")

	self.CM.StopaCM(node)
	watch_result = watch.lookforall()

	failreason = None
	UnmatchedList = "\|\|"
	if watch.unmatched:
	(rc, output) = self.rsh(node, "/bin/ps axf", None)
	for line in output:
	self.debug(line)

	(rc, output) = self.rsh(node, "/usr/sbin/dlm_tool dump", None)
	for line in output:
	self.debug(line)

	for regex in watch.unmatched:
	self.logger.log ("ERROR: Shutdown pattern not found: %s" % (regex))
	UnmatchedList += regex + "\|\|";
	failreason = "Missing shutdown pattern"

	self.CM.cluster_stable(self.Env["DeadTime"])

	if not watch.unmatched or self.CM.upcount() == 0:
	return self.success()

	if len(watch.unmatched) >= self.CM.upcount():
	return self.failure("no match against (%s)" % UnmatchedList)

	if failreason == None:
	return self.success()
	else:
	return self.failure(failreason)
	#
	# We don't register StopTest because it's better when called by
	# another test...
	#


	class StartTest(CTSTest):
	'''Start (activate) the cluster manager on a node'''
	def __init__(self, cm, debug=None):
	CTSTest.__init__(self,cm)
	self.name = "start"
	self.debug = debug

	def __call__(self, node):
	'''Perform the 'start' test. '''
	self.incr("calls")

	if self.CM.upcount() == 0:
	self.incr("us")
	else:
	self.incr("them")

	if self.CM.ShouldBeStatus[node] != "down":
	return self.skipped()
	elif self.CM.StartaCM(node):
	return self.success()
	else:
	return self.failure("Startup %s on node %s failed"
	% (self.Env["Name"], node))

	#
	# We don't register StartTest because it's better when called by
	# another test...
	#


	class FlipTest(CTSTest):
	'''If it's running, stop it. If it's stopped start it.
	Overthrow the status quo...
	'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "Flip"
	self.start = StartTest(cm)
	self.stop = StopTest(cm)

	def __call__(self, node):
	'''Perform the 'Flip' test. '''
	self.incr("calls")
	if self.CM.ShouldBeStatus[node] == "up":
	self.incr("stopped")
	ret = self.stop(node)
	type = "up->down"
	# Give the cluster time to recognize it's gone...
	time.sleep(self.Env["StableTime"])
	elif self.CM.ShouldBeStatus[node] == "down":
	self.incr("started")
	ret = self.start(node)
	type = "down->up"
	else:
	return self.skipped()

	self.incr(type)
	if ret:
	return self.success()
	else:
	return self.failure("%s failure" % type)

	# Register FlipTest as a good test to run
	AllTestClasses.append(FlipTest)


	class RestartTest(CTSTest):
	'''Stop and restart a node'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "Restart"
	self.start = StartTest(cm)
	self.stop = StopTest(cm)
	self.benchmark = 1

	def __call__(self, node):
	'''Perform the 'restart' test. '''
	self.incr("calls")

	self.incr("node:" + node)

	ret1 = 1
	if self.CM.StataCM(node):
	self.incr("WasStopped")
	if not self.start(node):
	return self.failure("start (setup) failure: "+node)

	self.set_timer()
	if not self.stop(node):
	return self.failure("stop failure: "+node)
	if not self.start(node):
	return self.failure("start failure: "+node)
	return self.success()

	# Register RestartTest as a good test to run
	AllTestClasses.append(RestartTest)


	class StonithdTest(CTSTest):
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name = "Stonithd"
	self.startall = SimulStartLite(cm)
	self.benchmark = 1

	def __call__(self, node):
	self.incr("calls")
	if len(self.Env["nodes"]) < 2:
	return self.skipped()

	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	is_dc = self.CM.is_node_dc(node)

	watchpats = []
	watchpats.append(self.templates["Pat:FenceOpOK"] % node)
	watchpats.append(self.templates["Pat:NodeFenced"] % node)

	if self.Env["at-boot"] == 0:
	self.debug("Expecting %s to stay down" % node)
	self.CM.ShouldBeStatus[node] = "down"
	else:
	self.debug("Expecting %s to come up again %d" % (node, self.Env["at-boot"]))
	watchpats.append("%s.* S_STARTING -> S_PENDING" % node)
	watchpats.append("%s.* S_PENDING -> S_NOT_DC" % node)

	watch = self.create_watch(watchpats, 30 + self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
	watch.setwatch()

	origin = self.Env.RandomGen.choice(self.Env["nodes"])

	rc = self.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node)

	if rc == 194:
	# 194 - 256 = -62 = Timer expired
	#
	# Look for the patterns, usually this means the required
	# device was running on the node to be fenced - or that
	# the required devices were in the process of being loaded
	# and/or moved
	#
	# Effectively the node committed suicide so there will be
	# no confirmation, but pacemaker should be watching and
	# fence the node again

	self.logger.log("Fencing command on %s to fence %s timed out" % (origin, node))

	elif origin != node and rc != 0:
	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()

	self.debug("Waiting for fenced node to come back up")
	self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)

	self.logger.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc))

	elif origin == node and rc != 255:
	# 255 == broken pipe, ie. the node was fenced as expected
	self.logger.log("Locally originated fencing returned %d" % rc)

	self.set_timer("fence")
	matched = watch.lookforall()
	self.log_timer("fence")
	self.set_timer("reform")
	if watch.unmatched:
	self.logger.log("Patterns not found: " + repr(watch.unmatched))

	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()

	self.debug("Waiting for fenced node to come back up")
	self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)

	self.debug("Waiting for the cluster to re-stabilize with all nodes")
	is_stable = self.CM.cluster_stable(self.Env["StartTime"])

	if not matched:
	return self.failure("Didn't find all expected patterns")
	elif not is_stable:
	return self.failure("Cluster did not become stable")

	self.log_timer("reform")
	return self.success()

	def errorstoignore(self):
	return [
	self.templates["Pat:Fencing_start"] % ".*",
	self.templates["Pat:Fencing_ok"] % ".*",
	r"error.: Resource .stonith::.* is active on 2 nodes attempting recovery",
	r"error.: Operation reboot of .by .* for stonith_admin.*: Timer expired",
	]

	def is_applicable(self):
	if not self.is_applicable_common():
	return 0

	if "DoFencing" in list(self.Env.keys()):
	return self.Env["DoFencing"]

	return 1

	AllTestClasses.append(StonithdTest)


	class StartOnebyOne(CTSTest):
	'''Start all the nodes ~ one by one'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "StartOnebyOne"
	self.stopall = SimulStopLite(cm)
	self.start = StartTest(cm)
	self.ns = CTS.NodeStatus(cm.Env)

	def __call__(self, dummy):
	'''Perform the 'StartOnebyOne' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Shut down all the nodes...
	ret = self.stopall(None)
	if not ret:
	return self.failure("Test setup failed")

	failed = []
	self.set_timer()
	for node in self.Env["nodes"]:
	if not self.start(node):
	failed.append(node)

	if len(failed) > 0:
	return self.failure("Some node failed to start: " + repr(failed))

	return self.success()

	# Register StartOnebyOne as a good test to run
	AllTestClasses.append(StartOnebyOne)


	class SimulStart(CTSTest):
	'''Start all the nodes ~ simultaneously'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "SimulStart"
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)

	def __call__(self, dummy):
	'''Perform the 'SimulStart' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Shut down all the nodes...
	ret = self.stopall(None)
	if not ret:
	return self.failure("Setup failed")

	if not self.startall(None):
	return self.failure("Startall failed")

	return self.success()

	# Register SimulStart as a good test to run
	AllTestClasses.append(SimulStart)


	class SimulStop(CTSTest):
	'''Stop all the nodes ~ simultaneously'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "SimulStop"
	self.startall = SimulStartLite(cm)
	self.stopall = SimulStopLite(cm)

	def __call__(self, dummy):
	'''Perform the 'SimulStop' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Start up all the nodes...
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	if not self.stopall(None):
	return self.failure("Stopall failed")

	return self.success()

	# Register SimulStop as a good test to run
	AllTestClasses.append(SimulStop)


	class StopOnebyOne(CTSTest):
	'''Stop all the nodes in order'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "StopOnebyOne"
	self.startall = SimulStartLite(cm)
	self.stop = StopTest(cm)

	def __call__(self, dummy):
	'''Perform the 'StopOnebyOne' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Start up all the nodes...
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	failed = []
	self.set_timer()
	for node in self.Env["nodes"]:
	if not self.stop(node):
	failed.append(node)

	if len(failed) > 0:
	return self.failure("Some node failed to stop: " + repr(failed))

	return self.success()

	# Register StopOnebyOne as a good test to run
	AllTestClasses.append(StopOnebyOne)


	class RestartOnebyOne(CTSTest):
	'''Restart all the nodes in order'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "RestartOnebyOne"
	self.startall = SimulStartLite(cm)

	def __call__(self, dummy):
	'''Perform the 'RestartOnebyOne' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Start up all the nodes...
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	did_fail = []
	self.set_timer()
	self.restart = RestartTest(self.CM)
	for node in self.Env["nodes"]:
	if not self.restart(node):
	did_fail.append(node)

	if did_fail:
	return self.failure("Could not restart %d nodes: %s"
	% (len(did_fail), repr(did_fail)))
	return self.success()

	# Register StopOnebyOne as a good test to run
	AllTestClasses.append(RestartOnebyOne)


	class PartialStart(CTSTest):
	'''Start a node - but tell it to stop before it finishes starting up'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "PartialStart"
	self.startall = SimulStartLite(cm)
	self.stopall = SimulStopLite(cm)
	self.stop = StopTest(cm)
	#self.is_unsafe = 1

	def __call__(self, node):
	'''Perform the 'PartialStart' test. '''
	self.incr("calls")

	ret = self.stopall(None)
	if not ret:
	return self.failure("Setup failed")

	# FIXME! This should use the CM class to get the pattern
	# then it would be applicable in general
	watchpats = []
	watchpats.append("pacemaker-controld.*Connecting to cluster infrastructure")
	watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
	watch.setwatch()

	self.CM.StartaCMnoBlock(node)
	ret = watch.lookforall()
	if not ret:
	self.logger.log("Patterns not found: " + repr(watch.unmatched))
	return self.failure("Setup of %s failed" % node)

	ret = self.stop(node)
	if not ret:
	return self.failure("%s did not stop in time" % node)

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''

	# We might do some fencing in the 2-node case if we make it up far enough
	return [
	r"Executing reboot fencing operation",
	r"Requesting fencing $[^)]+$ of node ",
	]

	# Register StopOnebyOne as a good test to run
	AllTestClasses.append(PartialStart)


	class StandbyTest(CTSTest):
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "Standby"
	self.benchmark = 1

	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)

	# make sure the node is active
	# set the node to standby mode
	# check resources, none resource should be running on the node
	# set the node to active mode
	# check resouces, resources should have been migrated back (SHOULD THEY?)

	def __call__(self, node):

	self.incr("calls")
	ret = self.startall(None)
	if not ret:
	return self.failure("Start all nodes failed")

	self.debug("Make sure node %s is active" % node)
	if self.CM.StandbyStatus(node) != "off":
	if not self.CM.SetStandbyMode(node, "off"):
	return self.failure("can't set node %s to active mode" % node)

	self.CM.cluster_stable()

	status = self.CM.StandbyStatus(node)
	if status != "off":
	return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))

	self.debug("Getting resources running on node %s" % node)
	rsc_on_node = self.CM.active_resources(node)

	watchpats = []
	watchpats.append(r"State transition .* -> S_POLICY_ENGINE")
	watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
	watch.setwatch()

	self.debug("Setting node %s to standby mode" % node)
	if not self.CM.SetStandbyMode(node, "on"):
	return self.failure("can't set node %s to standby mode" % node)

	self.set_timer("on")

	ret = watch.lookforall()
	if not ret:
	self.logger.log("Patterns not found: " + repr(watch.unmatched))
	self.CM.SetStandbyMode(node, "off")
	return self.failure("cluster didn't react to standby change on %s" % node)

	self.CM.cluster_stable()

	status = self.CM.StandbyStatus(node)
	if status != "on":
	return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
	self.log_timer("on")

	self.debug("Checking resources")
	bad_run = self.CM.active_resources(node)
	if len(bad_run) > 0:
	rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
	self.debug("Setting node %s to active mode" % node)
	self.CM.SetStandbyMode(node, "off")
	return rc

	self.debug("Setting node %s to active mode" % node)
	if not self.CM.SetStandbyMode(node, "off"):
	return self.failure("can't set node %s to active mode" % node)

	self.set_timer("off")
	self.CM.cluster_stable()

	status = self.CM.StandbyStatus(node)
	if status != "off":
	return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
	self.log_timer("off")

	return self.success()

	AllTestClasses.append(StandbyTest)


	class ValgrindTest(CTSTest):
	'''Check for memory leaks'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "Valgrind"
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)
	self.is_valgrind = 1
	self.is_loop = 1

	def setup(self, node):
	self.incr("calls")

	ret = self.stopall(None)
	if not ret:
	return self.failure("Stop all nodes failed")

	# @TODO Edit /etc/sysconfig/pacemaker on all nodes to enable valgrind,
	# and clear any valgrind logs from previous runs. For now, we rely on
	# the user to do this manually.

	ret = self.startall(None)
	if not ret:
	return self.failure("Start all nodes failed")

	return self.success()

	def teardown(self, node):
	# Return all nodes to normal
	# @TODO Edit /etc/sysconfig/pacemaker on all nodes to disable valgrind
	ret = self.stopall(None)
	if not ret:
	return self.failure("Stop all nodes failed")

	return self.success()

	def find_leaks(self):
	# Check for leaks
	# (no longer used but kept in case feature is restored)
	leaked = []
	self.stop = StopTest(self.CM)

	for node in self.Env["nodes"]:
	rc = self.stop(node)
	if not rc:
	self.failure("Couldn't shut down %s" % node)

	rc = self.rsh(node, "grep -e indirectly.lost:.[1-9] -e definitely.lost:.[1-9] -e (ERROR\|error).SUMMARY:.[1-9].*errors %s" % self.logger.logPat, 0)
	if rc != 1:
	leaked.append(node)
	self.failure("Valgrind errors detected on %s" % node)
	(rc, output) = self.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logger.logPat, None)
	for line in output:
	self.logger.log(line)
	(rc, output) = self.rsh(node, "cat %s" % self.logger.logPat, None)
	for line in output:
	self.debug(line)

	self.rsh(node, "rm -f %s" % self.logger.logPat, None)
	return leaked

	def __call__(self, node):
	#leaked = self.find_leaks()
	#if len(leaked) > 0:
	# return self.failure("Nodes %s leaked" % repr(leaked))

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [
	r"pacemaker-based.: \\\\\\\\\\\\\",
	r"pacemaker-based.: . avoid confusing Valgrind",
	r"HA_VALGRIND_ENABLED",
	]


	class StandbyLoopTest(ValgrindTest):
	'''Check for memory leaks by putting a node in and out of standby for an hour'''
	# @TODO This is not a useful test for memory leaks
	def __init__(self, cm):
	ValgrindTest.__init__(self,cm)
	self.name = "StandbyLoop"

	def __call__(self, node):

	lpc = 0
	delay = 2
	failed = 0
	done = time.time() + self.Env["loop-minutes"] * 60
	while time.time() <= done and not failed:
	lpc = lpc + 1

	time.sleep(delay)
	if not self.CM.SetStandbyMode(node, "on"):
	self.failure("can't set node %s to standby mode" % node)
	failed = lpc

	time.sleep(delay)
	if not self.CM.SetStandbyMode(node, "off"):
	self.failure("can't set node %s to active mode" % node)
	failed = lpc

	leaked = self.find_leaks()
	if failed:
	return self.failure("Iteration %d failed" % failed)
	elif len(leaked) > 0:
	return self.failure("Nodes %s leaked" % repr(leaked))

	return self.success()

	#AllTestClasses.append(StandbyLoopTest)


	class BandwidthTest(CTSTest):
	# Tests should not be cluster-manager-specific
	# If you need to find out cluster manager configuration to do this, then
	# it should be added to the generic cluster manager API.
	'''Test the bandwidth which the cluster uses'''
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name = "Bandwidth"
	self.start = StartTest(cm)
	self.__setitem__("min",0)
	self.__setitem__("max",0)
	self.__setitem__("totalbandwidth",0)
	(handle, self.tempfile) = tempfile.mkstemp(".cts")
	os.close(handle)
	self.startall = SimulStartLite(cm)

	def __call__(self, node):
	'''Perform the Bandwidth test'''
	self.incr("calls")

	if self.CM.upcount() < 1:
	return self.skipped()

	Path = self.CM.InternalCommConfig()
	if "ip" not in Path["mediatype"]:
	return self.skipped()

	port = Path["port"][0]
	port = int(port)

	ret = self.startall(None)
	if not ret:
	return self.failure("Test setup failed")
	time.sleep(5) # We get extra messages right after startup.

	fstmpfile = "/var/run/band_estimate"
	dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
	% (port, fstmpfile)

	rc = self.rsh(node, dumpcmd)
	if rc == 0:
	farfile = "root@%s:%s" % (node, fstmpfile)
	self.rsh.cp(farfile, self.tempfile)
	Bandwidth = self.countbandwidth(self.tempfile)
	if not Bandwidth:
	self.logger.log("Could not compute bandwidth.")
	return self.success()
	intband = int(Bandwidth + 0.5)
	self.logger.log("...bandwidth: %d bits/sec" % intband)
	self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
	if self.Stats["min"] == 0:
	self.Stats["min"] = Bandwidth
	if Bandwidth > self.Stats["max"]:
	self.Stats["max"] = Bandwidth
	if Bandwidth < self.Stats["min"]:
	self.Stats["min"] = Bandwidth
	self.rsh(node, "rm -f %s" % fstmpfile)
	os.unlink(self.tempfile)
	return self.success()
	else:
	return self.failure("no response from tcpdump command [%d]!" % rc)

	def countbandwidth(self, file):
	fp = open(file, "r")
	fp.seek(0)
	count = 0
	sum = 0
	while 1:
	line = fp.readline()
	if not line:
	return None
	if re.search("udp",line) or re.search("UDP,", line):
	count = count + 1
	linesplit = line.split(" ")
	for j in range(len(linesplit)-1):
	if linesplit[j] == "udp": break
	if linesplit[j] == "length:": break

	try:
	sum = sum + int(linesplit[j+1])
	except ValueError:
	self.logger.log("Invalid tcpdump line: %s" % line)
	return None
	T1 = linesplit[0]
	timesplit = T1.split(":")
	time2split = timesplit[2].split(".")
	time1 = (int(timesplit[0])60+int(timesplit[1]))60+int(time2split[0])+int(time2split[1])*0.000001
	break

	while count < 100:
	line = fp.readline()
	if not line:
	return None
	if re.search("udp",line) or re.search("UDP,", line):
	count = count+1
	linessplit = line.split(" ")
	for j in range(len(linessplit)-1):
	if linessplit[j] == "udp": break
	- if linesplit[j] == "length:": break
	+ if linessplit[j] == "length:": break
	try:
	sum = int(linessplit[j+1]) + sum
	except ValueError:
	self.logger.log("Invalid tcpdump line: %s" % line)
	return None

	T2 = linessplit[0]
	timesplit = T2.split(":")
	time2split = timesplit[2].split(".")
	time2 = (int(timesplit[0])60+int(timesplit[1]))60+int(time2split[0])+int(time2split[1])*0.000001
	time = time2-time1
	if (time <= 0):
	return 0
	return int((sum*8)/time)

	def is_applicable(self):
	'''BandwidthTest never applicable'''
	return 0

	AllTestClasses.append(BandwidthTest)


	###################################################################
	class MaintenanceMode(CTSTest):
	###################################################################
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "MaintenanceMode"
	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)
	self.max = 30
	#self.is_unsafe = 1
	self.benchmark = 1
	self.action = "asyncmon"
	self.interval = 0
	self.rid = "maintenanceDummy"

	def toggleMaintenanceMode(self, node, action):
	pats = []
	pats.append(self.templates["Pat:DC_IDLE"])

	# fail the resource right after turning Maintenance mode on
	# verify it is not recovered until maintenance mode is turned off
	if action == "On":
	pats.append(r"schedulerd.:\s+warning:.Processing failed %s of %s on" % (self.action, self.rid))
	else:
	pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))
	pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid))

	watch = self.create_watch(pats, 60)
	watch.setwatch()

	self.debug("Turning maintenance mode %s" % action)
	self.rsh(node, self.templates["MaintenanceMode%s" % (action)])
	if (action == "On"):
	self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))

	self.set_timer("recover%s" % (action))
	watch.lookforall()
	self.log_timer("recover%s" % (action))
	if watch.unmatched:
	self.debug("Failed to find patterns when turning maintenance mode %s" % action)
	return repr(watch.unmatched)

	return ""

	def insertMaintenanceDummy(self, node):
	pats = []
	pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % ("start", self.rid)))

	watch = self.create_watch(pats, 60)
	watch.setwatch()

	self.CM.AddDummyRsc(node, self.rid)

	self.set_timer("addDummy")
	watch.lookforall()
	self.log_timer("addDummy")

	if watch.unmatched:
	self.debug("Failed to find patterns when adding maintenance dummy resource")
	return repr(watch.unmatched)
	return ""

	def removeMaintenanceDummy(self, node):
	pats = []
	pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))

	watch = self.create_watch(pats, 60)
	watch.setwatch()
	self.CM.RemoveDummyRsc(node, self.rid)

	self.set_timer("removeDummy")
	watch.lookforall()
	self.log_timer("removeDummy")

	if watch.unmatched:
	self.debug("Failed to find patterns when removing maintenance dummy resource")
	return repr(watch.unmatched)
	return ""

	def managedRscList(self, node):
	rscList = []
	(rc, lines) = self.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	tmp = AuditResource(self.CM, line)
	if tmp.managed():
	rscList.append(tmp.id)

	return rscList

	def verifyResources(self, node, rscList, managed):
	managedList = list(rscList)
	managed_str = "managed"
	if not managed:
	managed_str = "unmanaged"

	(rc, lines) = self.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	tmp = AuditResource(self.CM, line)
	if managed and not tmp.managed():
	continue
	elif not managed and tmp.managed():
	continue
	elif managedList.count(tmp.id):
	managedList.remove(tmp.id)

	if len(managedList) == 0:
	self.debug("Found all %s resources on %s" % (managed_str, node))
	return True

	self.logger.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList))
	return False

	def __call__(self, node):
	'''Perform the 'MaintenanceMode' test. '''
	self.incr("calls")
	verify_managed = False
	verify_unmanaged = False
	failPat = ""

	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	# get a list of all the managed resources. We use this list
	# after enabling maintenance mode to verify all managed resources
	# become un-managed. After maintenance mode is turned off, we use
	# this list to verify all the resources become managed again.
	managedResources = self.managedRscList(node)
	if len(managedResources) == 0:
	self.logger.log("No managed resources on %s" % node)
	return self.skipped()

	# insert a fake resource we can fail during maintenance mode
	# so we can verify recovery does not take place until after maintenance
	# mode is disabled.
	failPat = failPat + self.insertMaintenanceDummy(node)

	# toggle maintenance mode ON, then fail dummy resource.
	failPat = failPat + self.toggleMaintenanceMode(node, "On")

	# verify all the resources are now unmanaged
	if self.verifyResources(node, managedResources, False):
	verify_unmanaged = True

	# Toggle maintenance mode OFF, verify dummy is recovered.
	failPat = failPat + self.toggleMaintenanceMode(node, "Off")

	# verify all the resources are now managed again
	if self.verifyResources(node, managedResources, True):
	verify_managed = True

	# Remove our maintenance dummy resource.
	failPat = failPat + self.removeMaintenanceDummy(node)

	self.CM.cluster_stable()

	if failPat != "":
	return self.failure("Unmatched patterns: %s" % (failPat))
	elif verify_unmanaged is False:
	return self.failure("Failed to verify resources became unmanaged during maintenance mode")
	elif verify_managed is False:
	return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode")

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [
	r"Updating failcount for %s" % self.rid,
	r"schedulerd.: Recover %s\s$.*$" % self.rid,
	r"Unknown operation: fail",
	self.templates["Pat:RscOpOK"] % (self.action, self.rid),
	r"(ERROR\|error).: Action %s_%s_%d . initiated outside of a transition" % (self.rid, self.action, self.interval),
	]

	AllTestClasses.append(MaintenanceMode)


	class ResourceRecover(CTSTest):
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "ResourceRecover"
	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)
	self.max = 30
	self.rid = None
	self.rid_alt = None
	#self.is_unsafe = 1
	self.benchmark = 1

	# these are the values used for the new LRM API call
	self.action = "asyncmon"
	self.interval = 0

	def __call__(self, node):
	'''Perform the 'ResourceRecover' test. '''
	self.incr("calls")

	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	resourcelist = self.CM.active_resources(node)
	# if there are no resourcelist, return directly
	if len(resourcelist) == 0:
	self.logger.log("No active resources on %s" % node)
	return self.skipped()

	self.rid = self.Env.RandomGen.choice(resourcelist)
	self.rid_alt = self.rid

	rsc = None
	(rc, lines) = self.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	tmp = AuditResource(self.CM, line)
	if tmp.id == self.rid:
	rsc = tmp
	# Handle anonymous clones that get renamed
	self.rid = rsc.clone_id
	break

	if not rsc:
	return self.failure("Could not find %s in the resource list" % self.rid)

	self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))

	pats = []
	pats.append(r"schedulerd.:\s+warning:.Processing failed %s of (%s\|%s) on" % (self.action,
	rsc.id, rsc.clone_id))

	if rsc.managed():
	pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))
	if rsc.unique():
	pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid))
	else:
	# Anonymous clones may get restarted with a different clone number
	pats.append(self.templates["Pat:RscOpOK"] % ("start", ".*"))

	watch = self.create_watch(pats, 60)
	watch.setwatch()

	self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))

	self.set_timer("recover")
	watch.lookforall()
	self.log_timer("recover")

	self.CM.cluster_stable()
	recovered = self.CM.ResourceLocation(self.rid)

	if watch.unmatched:
	return self.failure("Patterns not found: %s" % repr(watch.unmatched))

	elif rsc.unique() and len(recovered) > 1:
	return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))

	elif len(recovered) > 0:
	self.debug("%s is running on: %s" % (self.rid, repr(recovered)))

	elif rsc.managed():
	return self.failure("%s was not recovered and is inactive" % self.rid)

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [
	r"Updating failcount for %s" % self.rid,
	r"schedulerd.: Recover (%s\|%s)\s$.*$" % (self.rid, self.rid_alt),
	r"Unknown operation: fail",
	self.templates["Pat:RscOpOK"] % (self.action, self.rid),
	r"(ERROR\|error).: Action %s_%s_%d . initiated outside of a transition" % (self.rid, self.action, self.interval),
	]

	AllTestClasses.append(ResourceRecover)


	class ComponentFail(CTSTest):
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "ComponentFail"
	# TODO make this work correctly in docker.
	self.is_docker_unsafe = 1
	self.startall = SimulStartLite(cm)
	self.complist = cm.Components()
	self.patterns = []
	self.okerrpatterns = []
	self.is_unsafe = 1

	def __call__(self, node):
	'''Perform the 'ComponentFail' test. '''
	self.incr("calls")
	self.patterns = []
	self.okerrpatterns = []

	# start all nodes
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	if not self.CM.cluster_stable(self.Env["StableTime"]):
	return self.failure("Setup failed - unstable")

	node_is_dc = self.CM.is_node_dc(node, None)

	# select a component to kill
	chosen = self.Env.RandomGen.choice(self.complist)
	while chosen.dc_only == 1 and node_is_dc == 0:
	chosen = self.Env.RandomGen.choice(self.complist)

	self.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
	self.incr(chosen.name)

	if chosen.name != "corosync":
	self.patterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
	self.patterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))

	self.patterns.extend(chosen.pats)
	if node_is_dc:
	self.patterns.extend(chosen.dc_pats)

	if chosen.name == "pacemaker-fenced":
	# Ignore actions for STONITH resources
	(rc, lines) = self.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	r = AuditResource(self.CM, line)
	if r.rclass == "stonith":
	self.okerrpatterns.append(self.templates["Pat:Fencing_recover"] % r.id)

	# supply a copy so self.patterns doesn't end up empty
	tmpPats = []
	tmpPats.extend(self.patterns)
	self.patterns.extend(chosen.badnews_ignore)

	# Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
	stonithPats = []
	stonithPats.append(self.templates["Pat:Fencing_ok"] % node)
	stonith = self.create_watch(stonithPats, 0)
	stonith.setwatch()

	# set the watch for stable
	watch = self.create_watch(
	tmpPats, self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
	watch.setwatch()

	# kill the component
	chosen.kill(node)

	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()

	self.debug("Waiting for any fenced node to come back up")
	self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)

	self.debug("Waiting for the cluster to re-stabilize with all nodes")
	self.CM.cluster_stable(self.Env["StartTime"])

	self.debug("Checking if %s was shot" % node)
	shot = stonith.look(60)
	if shot:
	self.debug("Found: " + repr(shot))
	self.okerrpatterns.append(self.templates["Pat:Fencing_start"] % node)

	if self.Env["at-boot"] == 0:
	self.CM.ShouldBeStatus[node] = "down"

	# If fencing occurred, chances are many (if not all) the expected logs
	# will not be sent - or will be lost when the node reboots
	return self.success()

	# check for logs indicating a graceful recovery
	matched = watch.lookforall(allow_multiple_matches=1)
	if watch.unmatched:
	self.logger.log("Patterns not found: " + repr(watch.unmatched))

	self.debug("Waiting for the cluster to re-stabilize with all nodes")
	is_stable = self.CM.cluster_stable(self.Env["StartTime"])

	if not matched:
	return self.failure("Didn't find all expected %s patterns" % chosen.name)
	elif not is_stable:
	return self.failure("Cluster did not become stable after killing %s" % chosen.name)

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	# Note that okerrpatterns refers to the last time we ran this test
	# The good news is that this works fine for us...
	self.okerrpatterns.extend(self.patterns)
	return self.okerrpatterns

	AllTestClasses.append(ComponentFail)


	class SplitBrainTest(CTSTest):
	'''It is used to test split-brain. when the path between the two nodes break
	check the two nodes both take over the resource'''
	def __init__(self,cm):
	CTSTest.__init__(self,cm)
	self.name = "SplitBrain"
	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)
	self.is_experimental = 1

	def isolate_partition(self, partition):
	other_nodes = []
	other_nodes.extend(self.Env["nodes"])

	for node in partition:
	try:
	other_nodes.remove(node)
	except ValueError:
	self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]) + " from " +repr(partition))

	if len(other_nodes) == 0:
	return 1

	self.debug("Creating partition: " + repr(partition))
	self.debug("Everyone else: " + repr(other_nodes))

	for node in partition:
	if not self.CM.isolate_node(node, other_nodes):
	self.logger.log("Could not isolate %s" % node)
	return 0

	return 1

	def heal_partition(self, partition):
	other_nodes = []
	other_nodes.extend(self.Env["nodes"])

	for node in partition:
	try:
	other_nodes.remove(node)
	except ValueError:
	self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]))

	if len(other_nodes) == 0:
	return 1

	self.debug("Healing partition: " + repr(partition))
	self.debug("Everyone else: " + repr(other_nodes))

	for node in partition:
	self.CM.unisolate_node(node, other_nodes)

	def __call__(self, node):
	'''Perform split-brain test'''
	self.incr("calls")
	self.passed = 1
	partitions = {}

	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	while 1:
	# Retry until we get multiple partitions
	partitions = {}
	p_max = len(self.Env["nodes"])
	for node in self.Env["nodes"]:
	p = self.Env.RandomGen.randint(1, p_max)
	if not p in partitions:
	partitions[p] = []
	partitions[p].append(node)
	p_max = len(list(partitions.keys()))
	if p_max > 1:
	break
	# else, try again

	self.debug("Created %d partitions" % p_max)
	for key in list(partitions.keys()):
	self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))

	# Disabling STONITH to reduce test complexity for now
	self.rsh(node, "crm_attribute -V -n stonith-enabled -v false")

	for key in list(partitions.keys()):
	self.isolate_partition(partitions[key])

	count = 30
	while count > 0:
	if len(self.CM.find_partitions()) != p_max:
	time.sleep(10)
	else:
	break
	else:
	self.failure("Expected partitions were not created")

	# Target number of partitions formed - wait for stability
	if not self.CM.cluster_stable():
	self.failure("Partitioned cluster not stable")

	# Now audit the cluster state
	self.CM.partitions_expected = p_max
	if not self.audit():
	self.failure("Audits failed")
	self.CM.partitions_expected = 1

	# And heal them again
	for key in list(partitions.keys()):
	self.heal_partition(partitions[key])

	# Wait for a single partition to form
	count = 30
	while count > 0:
	if len(self.CM.find_partitions()) != 1:
	time.sleep(10)
	count -= 1
	else:
	break
	else:
	self.failure("Cluster did not reform")

	# Wait for it to have the right number of members
	count = 30
	while count > 0:
	members = []

	partitions = self.CM.find_partitions()
	if len(partitions) > 0:
	members = partitions[0].split()

	if len(members) != len(self.Env["nodes"]):
	time.sleep(10)
	count -= 1
	else:
	break
	else:
	self.failure("Cluster did not completely reform")

	# Wait up to 20 minutes - the delay is more preferable than
	# trying to continue with in a messed up state
	if not self.CM.cluster_stable(1200):
	self.failure("Reformed cluster not stable")
	if self.Env["continue"] == 1:
	answer = "Y"
	else:
	try:
	answer = input_wrapper('Continue? [nY]')
	except EOFError as e:
	answer = "n"
	if answer and answer == "n":
	raise ValueError("Reformed cluster not stable")

	# Turn fencing back on
	if self.Env["DoFencing"]:
	self.rsh(node, "crm_attribute -V -D -n stonith-enabled")

	self.CM.cluster_stable()

	if self.passed:
	return self.success()
	return self.failure("See previous errors")

	def errorstoignore(self):
	'''Return list of errors which are 'normal' and should be ignored'''
	return [
	r"Another DC detected:",
	r"(ERROR\|error).: .Application of an update diff failed",
	r"pacemaker-controld.:.not in our membership list",
	r"CRIT:.node.returning after partition",
	]

	def is_applicable(self):
	if not self.is_applicable_common():
	return 0
	return len(self.Env["nodes"]) > 2

	AllTestClasses.append(SplitBrainTest)


	class Reattach(CTSTest):
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "Reattach"
	self.startall = SimulStartLite(cm)
	self.restart1 = RestartTest(cm)
	self.stopall = SimulStopLite(cm)
	self.is_unsafe = 0 # Handled by canrunnow()

	def _is_managed(self, node):
	is_managed = self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -q -G -d true", 1)
	is_managed = is_managed[:-1] # Strip off the newline
	return is_managed == "true"

	def _set_unmanaged(self, node):
	self.debug("Disable resource management")
	self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -v false")

	def _set_managed(self, node):
	self.debug("Re-enable resource management")
	self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -D")

	def setup(self, node):
	attempt = 0
	if not self.startall(None):
	return None

	# Make sure we are really _really_ stable and that all
	# resources, including those that depend on transient node
	# attributes, are started
	while not self.CM.cluster_stable(double_check=True):
	if attempt < 5:
	attempt += 1
	self.debug("Not stable yet, re-testing")
	else:
	self.logger.log("Cluster is not stable")
	return None

	return 1

	def teardown(self, node):

	# Make sure 'node' is up
	start = StartTest(self.CM)
	start(node)

	if not self._is_managed(node):
	self.logger.log("Attempting to re-enable resource management on %s" % node)
	self._set_managed(node)
	self.CM.cluster_stable()
	if not self._is_managed(node):
	self.logger.log("Could not re-enable resource management")
	return 0

	return 1

	def canrunnow(self, node):
	'''Return TRUE if we can meaningfully run right now'''
	if self.find_ocfs2_resources(node):
	self.logger.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
	return 0
	return 1

	def __call__(self, node):
	self.incr("calls")

	pats = []
	# Conveniently, the scheduler will display this message when disabling
	# management, even if fencing is not enabled, so we can rely on it.
	managed = self.create_watch(["Delaying fencing operations"], 60)
	managed.setwatch()

	self._set_unmanaged(node)

	if not managed.lookforall():
	self.logger.log("Patterns not found: " + repr(managed.unmatched))
	return self.failure("Resource management not disabled")

	pats = []
	pats.append(self.templates["Pat:RscOpOK"] % ("start", ".*"))
	pats.append(self.templates["Pat:RscOpOK"] % ("stop", ".*"))
	pats.append(self.templates["Pat:RscOpOK"] % ("promote", ".*"))
	pats.append(self.templates["Pat:RscOpOK"] % ("demote", ".*"))
	pats.append(self.templates["Pat:RscOpOK"] % ("migrate", ".*"))

	watch = self.create_watch(pats, 60, "ShutdownActivity")
	watch.setwatch()

	self.debug("Shutting down the cluster")
	ret = self.stopall(None)
	if not ret:
	self._set_managed(node)
	return self.failure("Couldn't shut down the cluster")

	self.debug("Bringing the cluster back up")
	ret = self.startall(None)
	time.sleep(5) # allow ping to update the CIB
	if not ret:
	self._set_managed(node)
	return self.failure("Couldn't restart the cluster")

	if self.local_badnews("ResourceActivity:", watch):
	self._set_managed(node)
	return self.failure("Resources stopped or started during cluster restart")

	watch = self.create_watch(pats, 60, "StartupActivity")
	watch.setwatch()

	# Re-enable resource management (and verify it happened).
	self._set_managed(node)
	self.CM.cluster_stable()
	if not self._is_managed(node):
	return self.failure("Could not re-enable resource management")

	# Ignore actions for STONITH resources
	ignore = []
	(rc, lines) = self.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	r = AuditResource(self.CM, line)
	if r.rclass == "stonith":

	self.debug("Ignoring start actions for %s" % r.id)
	ignore.append(self.templates["Pat:RscOpOK"] % ("start", r.id))

	if self.local_badnews("ResourceActivity:", watch, ignore):
	return self.failure("Resources stopped or started after resource management was re-enabled")

	return ret

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [
	r"resource( was\|s were) active at shutdown",
	]

	def is_applicable(self):
	return 1

	AllTestClasses.append(Reattach)


	class SpecialTest1(CTSTest):
	'''Set up a custom test to cause quorum failure issues for Andrew'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "SpecialTest1"
	self.startall = SimulStartLite(cm)
	self.restart1 = RestartTest(cm)
	self.stopall = SimulStopLite(cm)

	def __call__(self, node):
	'''Perform the 'SpecialTest1' test for Andrew. '''
	self.incr("calls")

	# Shut down all the nodes...
	ret = self.stopall(None)
	if not ret:
	return self.failure("Could not stop all nodes")

	# Test config recovery when the other nodes come up
	self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")

	# Start the selected node
	ret = self.restart1(node)
	if not ret:
	return self.failure("Could not start "+node)

	# Start all remaining nodes
	ret = self.startall(None)
	if not ret:
	return self.failure("Could not start the remaining nodes")

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	# Errors that occur as a result of the CIB being wiped
	return [
	r"error.*: v1 patchset error, patch failed to apply: Application of an update diff failed",
	r"error.*: Resource start-up disabled since no STONITH resources have been defined",
	r"error.*: Either configure some or disable STONITH with the stonith-enabled option",
	r"error.*: NOTE: Clusters with shared data need STONITH to ensure data integrity",
	]

	AllTestClasses.append(SpecialTest1)


	class HAETest(CTSTest):
	'''Set up a custom test to cause quorum failure issues for Andrew'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "HAETest"
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)
	self.is_loop = 1

	def setup(self, node):
	# Start all remaining nodes
	ret = self.startall(None)
	if not ret:
	return self.failure("Couldn't start all nodes")
	return self.success()

	def teardown(self, node):
	# Stop everything
	ret = self.stopall(None)
	if not ret:
	return self.failure("Couldn't stop all nodes")
	return self.success()

	def wait_on_state(self, node, resource, expected_clones, attempts=240):
	while attempts > 0:
	active = 0
	(rc, lines) = self.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)

	# Hack until crm_resource does the right thing
	if rc == 0 and lines:
	active = len(lines)

	if len(lines) == expected_clones:
	return 1

	elif rc == 1:
	self.debug("Resource %s is still inactive" % resource)

	elif rc == 234:
	self.logger.log("Unknown resource %s" % resource)
	return 0

	elif rc == 246:
	self.logger.log("Cluster is inactive")
	return 0

	elif rc != 0:
	self.logger.log("Call to crm_resource failed, rc=%d" % rc)
	return 0

	else:
	self.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))

	attempts -= 1
	time.sleep(1)

	return 0

	def find_dlm(self, node):
	self.r_dlm = None

	(rc, lines) = self.rsh(node, "crm_resource -c", None)
	for line in lines:
	if re.search("^Resource", line):
	r = AuditResource(self.CM, line)
	if r.rtype == "controld" and r.parent != "NA":
	self.debug("Found dlm: %s" % self.r_dlm)
	self.r_dlm = r.parent
	return 1
	return 0

	def find_hae_resources(self, node):
	self.r_dlm = None
	self.r_o2cb = None
	self.r_ocfs2 = []

	if self.find_dlm(node):
	self.find_ocfs2_resources(node)

	def is_applicable(self):
	if not self.is_applicable_common():
	return 0
	if self.Env["Schema"] == "hae":
	return 1
	return None


	class HAERoleTest(HAETest):
	def __init__(self, cm):
	'''Lars' mount/unmount test for the HA extension. '''
	HAETest.__init__(self,cm)
	self.name = "HAERoleTest"

	def change_state(self, node, resource, target):
	rc = self.rsh(node, "crm_resource -V -r %s -p target-role -v %s --meta" % (resource, target))
	return rc

	def __call__(self, node):
	self.incr("calls")
	lpc = 0
	failed = 0
	delay = 2
	done = time.time() + self.Env["loop-minutes"]*60
	self.find_hae_resources(node)

	clone_max = len(self.Env["nodes"])
	while time.time() <= done and not failed:
	lpc = lpc + 1

	self.change_state(node, self.r_dlm, "Stopped")
	if not self.wait_on_state(node, self.r_dlm, 0):
	self.failure("%s did not go down correctly" % self.r_dlm)
	failed = lpc

	self.change_state(node, self.r_dlm, "Started")
	if not self.wait_on_state(node, self.r_dlm, clone_max):
	self.failure("%s did not come up correctly" % self.r_dlm)
	failed = lpc

	if not self.wait_on_state(node, self.r_o2cb, clone_max):
	self.failure("%s did not come up correctly" % self.r_o2cb)
	failed = lpc

	for fs in self.r_ocfs2:
	if not self.wait_on_state(node, fs, clone_max):
	self.failure("%s did not come up correctly" % fs)
	failed = lpc

	if failed:
	return self.failure("iteration %d failed" % failed)
	return self.success()

	AllTestClasses.append(HAERoleTest)


	class HAEStandbyTest(HAETest):
	'''Set up a custom test to cause quorum failure issues for Andrew'''
	def __init__(self, cm):
	HAETest.__init__(self,cm)
	self.name = "HAEStandbyTest"

	def change_state(self, node, resource, target):
	rc = self.rsh(node, "crm_standby -V -l reboot -v %s" % (target))
	return rc

	def __call__(self, node):
	self.incr("calls")

	lpc = 0
	failed = 0
	done = time.time() + self.Env["loop-minutes"]*60
	self.find_hae_resources(node)

	clone_max = len(self.Env["nodes"])
	while time.time() <= done and not failed:
	lpc = lpc + 1

	self.change_state(node, self.r_dlm, "true")
	if not self.wait_on_state(node, self.r_dlm, clone_max-1):
	self.failure("%s did not go down correctly" % self.r_dlm)
	failed = lpc

	self.change_state(node, self.r_dlm, "false")
	if not self.wait_on_state(node, self.r_dlm, clone_max):
	self.failure("%s did not come up correctly" % self.r_dlm)
	failed = lpc

	if not self.wait_on_state(node, self.r_o2cb, clone_max):
	self.failure("%s did not come up correctly" % self.r_o2cb)
	failed = lpc

	for fs in self.r_ocfs2:
	if not self.wait_on_state(node, fs, clone_max):
	self.failure("%s did not come up correctly" % fs)
	failed = lpc

	if failed:
	return self.failure("iteration %d failed" % failed)
	return self.success()

	AllTestClasses.append(HAEStandbyTest)


	class NearQuorumPointTest(CTSTest):
	'''
	This test brings larger clusters near the quorum point (50%).
	In addition, it will test doing starts and stops at the same time.

	Here is how I think it should work:
	- loop over the nodes and decide randomly which will be up and which
	will be down Use a 50% probability for each of up/down.
	- figure out what to do to get into that state from the current state
	- in parallel, bring up those going up and bring those going down.
	'''

	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "NearQuorumPoint"

	def __call__(self, dummy):
	'''Perform the 'NearQuorumPoint' test. '''
	self.incr("calls")
	startset = []
	stopset = []

	stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint")
	#decide what to do with each node
	for node in self.Env["nodes"]:
	action = self.Env.RandomGen.choice(["start","stop"])
	#action = self.Env.RandomGen.choice(["start","stop","no change"])
	if action == "start" :
	startset.append(node)
	elif action == "stop" :
	stopset.append(node)

	self.debug("start nodes:" + repr(startset))
	self.debug("stop nodes:" + repr(stopset))

	#add search patterns
	watchpats = [ ]
	for node in stopset:
	if self.CM.ShouldBeStatus[node] == "up":
	watchpats.append(self.templates["Pat:We_stopped"] % node)

	for node in startset:
	if self.CM.ShouldBeStatus[node] == "down":
	#watchpats.append(self.templates["Pat:NonDC_started"] % node)
	watchpats.append(self.templates["Pat:Local_started"] % node)
	else:
	for stopping in stopset:
	if self.CM.ShouldBeStatus[stopping] == "up":
	watchpats.append(self.templates["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))

	if len(watchpats) == 0:
	return self.skipped()

	if len(startset) != 0:
	watchpats.append(self.templates["Pat:DC_IDLE"])

	watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)

	watch.setwatch()

	#begin actions
	for node in stopset:
	if self.CM.ShouldBeStatus[node] == "up":
	self.CM.StopaCMnoBlock(node)

	for node in startset:
	if self.CM.ShouldBeStatus[node] == "down":
	self.CM.StartaCMnoBlock(node)

	#get the result
	if watch.lookforall():
	self.CM.cluster_stable()
	self.CM.fencing_cleanup("NearQuorumPoint", stonith)
	return self.success()

	self.logger.log("Warn: Patterns not found: " + repr(watch.unmatched))

	#get the "bad" nodes
	upnodes = []
	for node in stopset:
	if self.CM.StataCM(node) == 1:
	upnodes.append(node)

	downnodes = []
	for node in startset:
	if self.CM.StataCM(node) == 0:
	downnodes.append(node)

	self.CM.fencing_cleanup("NearQuorumPoint", stonith)
	if upnodes == [] and downnodes == []:
	self.CM.cluster_stable()

	# Make sure they're completely down with no residule
	for node in stopset:
	self.rsh(node, self.templates["StopCmd"])

	return self.success()

	if len(upnodes) > 0:
	self.logger.log("Warn: Unstoppable nodes: " + repr(upnodes))

	if len(downnodes) > 0:
	self.logger.log("Warn: Unstartable nodes: " + repr(downnodes))

	return self.failure()

	def is_applicable(self):
	return 1

	AllTestClasses.append(NearQuorumPointTest)


	class RollingUpgradeTest(CTSTest):
	'''Perform a rolling upgrade of the cluster'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "RollingUpgrade"
	self.start = StartTest(cm)
	self.stop = StopTest(cm)
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)

	def setup(self, node):
	# Start all remaining nodes
	ret = self.stopall(None)
	if not ret:
	return self.failure("Couldn't stop all nodes")

	for node in self.Env["nodes"]:
	if not self.downgrade(node, None):
	return self.failure("Couldn't downgrade %s" % node)

	ret = self.startall(None)
	if not ret:
	return self.failure("Couldn't start all nodes")
	return self.success()

	def teardown(self, node):
	# Stop everything
	ret = self.stopall(None)
	if not ret:
	return self.failure("Couldn't stop all nodes")

	for node in self.Env["nodes"]:
	if not self.upgrade(node, None):
	return self.failure("Couldn't upgrade %s" % node)

	return self.success()

	def install(self, node, version, start=1, flags="--force"):

	target_dir = "/tmp/rpm-%s" % version
	src_dir = "%s/%s" % (self.Env["rpm-dir"], version)

	self.logger.log("Installing %s on %s with %s" % (version, node, flags))
	if not self.stop(node):
	return self.failure("stop failure: "+node)

	rc = self.rsh(node, "mkdir -p %s" % target_dir)
	rc = self.rsh(node, "rm -f %s/*.rpm" % target_dir)
	(rc, lines) = self.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
	for line in lines:
	line = line[:-1]
	rc = self.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
	rc = self.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))

	if start and not self.start(node):
	return self.failure("start failure: "+node)

	return self.success()

	def upgrade(self, node, start=1):
	return self.install(node, self.Env["current-version"], start)

	def downgrade(self, node, start=1):
	return self.install(node, self.Env["previous-version"], start, "--force --nodeps")

	def __call__(self, node):
	'''Perform the 'Rolling Upgrade' test. '''
	self.incr("calls")

	for node in self.Env["nodes"]:
	if self.upgrade(node):
	return self.failure("Couldn't upgrade %s" % node)

	self.CM.cluster_stable()

	return self.success()

	def is_applicable(self):
	if not self.is_applicable_common():
	return None

	if not "rpm-dir" in list(self.Env.keys()):
	return None
	if not "current-version" in list(self.Env.keys()):
	return None
	if not "previous-version" in list(self.Env.keys()):
	return None

	return 1

	# Register RestartTest as a good test to run
	AllTestClasses.append(RollingUpgradeTest)


	class BSC_AddResource(CTSTest):
	'''Add a resource to the cluster'''
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name = "AddResource"
	self.resource_offset = 0
	self.cib_cmd = """cibadmin -C -o %s -X '%s' """

	def __call__(self, node):
	self.incr("calls")
	self.resource_offset = self.resource_offset + 1

	r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
	start_pat = "pacemaker-controld.%s_start_0.confirmed.*ok"

	patterns = []
	patterns.append(start_pat % r_id)

	watch = self.create_watch(patterns, self.Env["DeadTime"])
	watch.setwatch()

	ip = self.NextIP()
	if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
	return self.failure("Make resource %s failed" % r_id)

	failed = 0
	watch_result = watch.lookforall()
	if watch.unmatched:
	for regex in watch.unmatched:
	self.logger.log ("Warn: Pattern not found: %s" % (regex))
	failed = 1

	if failed:
	return self.failure("Resource pattern(s) not found")

	if not self.CM.cluster_stable(self.Env["DeadTime"]):
	return self.failure("Unstable cluster")

	return self.success()

	def NextIP(self):
	ip = self.Env["IPBase"]
	if ":" in ip:
	fields = ip.rpartition(":")
	fields[2] = str(hex(int(fields[2], 16)+1))
	print(str(hex(int(f[2], 16)+1)))
	else:
	fields = ip.rpartition('.')
	fields[2] = str(int(fields[2])+1)

	ip = fields[0] + fields[1] + fields[3];
	self.Env["IPBase"] = ip
	return ip.strip()

	def make_ip_resource(self, node, id, rclass, type, ip):
	self.logger.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
	rsc_xml="""
	<primitive id="%s" class="%s" type="%s" provider="heartbeat">
	<instance_attributes id="%s"><attributes>
	<nvpair id="%s" name="ip" value="%s"/>
	</attributes></instance_attributes>
	</primitive>""" % (id, rclass, type, id, id, ip)

	node_constraint = """
	<rsc_location id="run_%s" rsc="%s">
	<rule id="pref_run_%s" score="100">
	<expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
	</rule>
	</rsc_location>""" % (id, id, id, id, node)

	rc = 0
	(rc, lines) = self.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
	if rc != 0:
	self.logger.log("Constraint creation failed: %d" % rc)
	return None

	(rc, lines) = self.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
	if rc != 0:
	self.logger.log("Resource creation failed: %d" % rc)
	return None

	return 1

	def is_applicable(self):
	if self.Env["DoBSC"]:
	return 1
	return None

	AllTestClasses.append(BSC_AddResource)


	class SimulStopLite(CTSTest):
	'''Stop any active nodes ~ simultaneously'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "SimulStopLite"

	def __call__(self, dummy):
	'''Perform the 'SimulStopLite' setup work. '''
	self.incr("calls")

	self.debug("Setup: " + self.name)

	# We ignore the "node" parameter...
	watchpats = [ ]

	for node in self.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == "up":
	self.incr("WasStarted")
	watchpats.append(self.templates["Pat:We_stopped"] % node)

	if len(watchpats) == 0:
	return self.success()

	# Stop all the nodes - at about the same time...
	watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)

	watch.setwatch()
	self.set_timer()
	for node in self.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == "up":
	self.CM.StopaCMnoBlock(node)
	if watch.lookforall():
	# Make sure they're completely down with no residule
	for node in self.Env["nodes"]:
	self.rsh(node, self.templates["StopCmd"])

	return self.success()

	did_fail = 0
	up_nodes = []
	for node in self.Env["nodes"]:
	if self.CM.StataCM(node) == 1:
	did_fail = 1
	up_nodes.append(node)

	if did_fail:
	return self.failure("Active nodes exist: " + repr(up_nodes))

	self.logger.log("Warn: All nodes stopped but CTS didnt detect: "
	+ repr(watch.unmatched))

	return self.failure("Missing log message: "+repr(watch.unmatched))

	def is_applicable(self):
	'''SimulStopLite is a setup test and never applicable'''
	return 0


	class SimulStartLite(CTSTest):
	'''Start any stopped nodes ~ simultaneously'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "SimulStartLite"

	def __call__(self, dummy):
	'''Perform the 'SimulStartList' setup work. '''
	self.incr("calls")
	self.debug("Setup: " + self.name)

	# We ignore the "node" parameter...
	node_list = []
	for node in self.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == "down":
	self.incr("WasStopped")
	node_list.append(node)

	self.set_timer()
	while len(node_list) > 0:
	# Repeat until all nodes come up
	watchpats = [ ]

	uppat = self.templates["Pat:NonDC_started"]
	if self.CM.upcount() == 0:
	uppat = self.templates["Pat:Local_started"]

	watchpats.append(self.templates["Pat:DC_IDLE"])
	for node in node_list:
	watchpats.append(uppat % node)
	watchpats.append(self.templates["Pat:InfraUp"] % node)
	watchpats.append(self.templates["Pat:PacemakerUp"] % node)

	# Start all the nodes - at about the same time...
	watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
	watch.setwatch()

	stonith = self.CM.prepare_fencing_watcher(self.name)

	for node in node_list:
	self.CM.StartaCMnoBlock(node)

	watch.lookforall()

	node_list = self.CM.fencing_cleanup(self.name, stonith)

	if node_list == None:
	return self.failure("Cluster did not stabilize")

	# Remove node_list messages from watch.unmatched
	for node in node_list:
	self.logger.debug("Dealing with stonith operations for %s" % repr(node_list))
	if watch.unmatched:
	try:
	watch.unmatched.remove(uppat % node)
	except:
	self.debug("Already matched: %s" % (uppat % node))
	try:
	watch.unmatched.remove(self.templates["Pat:InfraUp"] % node)
	except:
	self.debug("Already matched: %s" % (self.templates["Pat:InfraUp"] % node))
	try:
	watch.unmatched.remove(self.templates["Pat:PacemakerUp"] % node)
	except:
	self.debug("Already matched: %s" % (self.templates["Pat:PacemakerUp"] % node))

	if watch.unmatched:
	for regex in watch.unmatched:
	self.logger.log ("Warn: Startup pattern not found: %s" %(regex))

	if not self.CM.cluster_stable():
	return self.failure("Cluster did not stabilize")

	did_fail = 0
	unstable = []
	for node in self.Env["nodes"]:
	if self.CM.StataCM(node) == 0:
	did_fail = 1
	unstable.append(node)

	if did_fail:
	return self.failure("Unstarted nodes exist: " + repr(unstable))

	unstable = []
	for node in self.Env["nodes"]:
	if not self.CM.node_stable(node):
	did_fail = 1
	unstable.append(node)

	if did_fail:
	return self.failure("Unstable cluster nodes exist: " + repr(unstable))

	return self.success()

	def is_applicable(self):
	'''SimulStartLite is a setup test and never applicable'''
	return 0


	def TestList(cm, audits):
	result = []
	for testclass in AllTestClasses:
	bound_test = testclass(cm)
	if bound_test.is_applicable():
	bound_test.Audits = audits
	result.append(bound_test)
	return result


	class RemoteLXC(CTSTest):
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = "RemoteLXC"
	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)
	self.num_containers = 2
	self.is_container = 1
	self.is_docker_unsafe = 1
	self.failed = 0
	self.fail_string = ""

	def start_lxc_simple(self, node):

	# restore any artifacts laying around from a previous test.
	self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")

	# generate the containers, put them in the config, add some resources to them
	pats = [ ]
	watch = self.create_watch(pats, 120)
	watch.setwatch()
	pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc1"))
	pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc2"))
	pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc-ms"))
	pats.append(self.templates["Pat:RscOpOK"] % ("promote", "lxc-ms"))

	self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers)
	self.set_timer("remoteSimpleInit")
	watch.lookforall()
	self.log_timer("remoteSimpleInit")
	if watch.unmatched:
	self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
	self.failed = 1

	def cleanup_lxc_simple(self, node):

	pats = [ ]
	# if the test failed, attempt to clean up the cib and libvirt environment
	# as best as possible
	if self.failed == 1:
	# restore libvirt and cib
	self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")
	return

	watch = self.create_watch(pats, 120)
	watch.setwatch()

	pats.append(self.templates["Pat:RscOpOK"] % ("stop", "container1"))
	pats.append(self.templates["Pat:RscOpOK"] % ("stop", "container2"))

	self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null")
	self.set_timer("remoteSimpleCleanup")
	watch.lookforall()
	self.log_timer("remoteSimpleCleanup")

	if watch.unmatched:
	self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
	self.failed = 1

	# cleanup libvirt
	self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")

	def __call__(self, node):
	'''Perform the 'RemoteLXC' test. '''
	self.incr("calls")

	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed, start all nodes failed.")

	rc = self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null")
	if rc == 1:
	self.log("Environment test for lxc support failed.")
	return self.skipped()

	self.start_lxc_simple(node)
	self.cleanup_lxc_simple(node)

	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()

	if self.failed == 1:
	return self.failure(self.fail_string)

	return self.success()

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [
	r"Updating failcount for ping",
	r"schedulerd.: Recover (ping\|lxc-ms\|container)\s$.*$",
	# The orphaned lxc-ms resource causes an expected transition error
	# that is a result of the scheduler not having knowledge that the
	# promotable resource used to be a clone. As a result, it looks like that
	# resource is running in multiple locations when it shouldn't... But in
	# this instance we know why this error is occurring and that it is expected.
	r"Calculated [Tt]ransition .*pe-error",
	r"Resource lxc-ms .* is active on 2 nodes attempting recovery",
	r"Unknown operation: fail",
	r"VirtualDomain.*ERROR: Unable to determine emulator",
	]

	AllTestClasses.append(RemoteLXC)


	class RemoteDriver(CTSTest):

	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.name = self.__class__.__name__
	self.is_docker_unsafe = 1
	self.start = StartTest(cm)
	self.startall = SimulStartLite(cm)
	self.stop = StopTest(cm)
	self.remote_rsc = "remote-rsc"
	self.cib_cmd = """cibadmin -C -o %s -X '%s' """
	self.reset()

	def reset(self):
	self.pcmk_started = 0
	self.failed = False
	self.fail_string = ""
	self.remote_node_added = 0
	self.remote_rsc_added = 0
	self.remote_use_reconnect_interval = self.Env.RandomGen.choice([True,False])

	def fail(self, msg):
	""" Mark test as failed. """

	self.failed = True

	# Always log the failure.
	self.logger.log(msg)

	# Use first failure as test status, as it's likely to be most useful.
	if not self.fail_string:
	self.fail_string = msg

	def get_othernode(self, node):
	for othernode in self.Env["nodes"]:
	if othernode == node:
	# we don't want to try and use the cib that we just shutdown.
	# find a cluster node that is not our soon to be remote-node.
	continue
	else:
	return othernode

	def del_rsc(self, node, rsc):
	othernode = self.get_othernode(node)
	rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc))
	if rc != 0:
	self.fail("Removal of resource '%s' failed" % rsc)

	def add_rsc(self, node, rsc_xml):
	othernode = self.get_othernode(node)
	rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml))
	if rc != 0:
	self.fail("resource creation failed")

	def add_primitive_rsc(self, node):
	rsc_xml = """
	<primitive class="ocf" id="%s" provider="heartbeat" type="Dummy">
	<operations>
	<op id="remote-rsc-monitor-interval-10s" interval="10s" name="monitor"/>
	</operations>
	<meta_attributes id="remote-meta_attributes"/>
	</primitive>""" % (self.remote_rsc)
	self.add_rsc(node, rsc_xml)
	if not self.failed:
	self.remote_rsc_added = 1

	def add_connection_rsc(self, node):
	if self.remote_use_reconnect_interval:
	# use reconnect interval and make sure to set cluster-recheck-interval as well.
	rsc_xml = """
	<primitive class="ocf" id="%s" provider="pacemaker" type="remote">
	<instance_attributes id="remote-instance_attributes"/>
	<instance_attributes id="remote-instance_attributes">
	<nvpair id="remote-instance_attributes-server" name="server" value="%s"/>
	<nvpair id="remote-instance_attributes-reconnect_interval" name="reconnect_interval" value="60s"/>
	</instance_attributes>
	<operations>
	<op id="remote-monitor-interval-60s" interval="60s" name="monitor"/>
	<op id="remote-name-start-interval-0-timeout-120" interval="0" name="start" timeout="60"/>
	</operations>
	</primitive>""" % (self.remote_node, node)
	self.rsh(self.get_othernode(node), self.templates["SetCheckInterval"] % ("45s"))
	else:
	# not using reconnect interval
	rsc_xml = """
	<primitive class="ocf" id="%s" provider="pacemaker" type="remote">
	<instance_attributes id="remote-instance_attributes"/>
	<instance_attributes id="remote-instance_attributes">
	<nvpair id="remote-instance_attributes-server" name="server" value="%s"/>
	</instance_attributes>
	<operations>
	<op id="remote-monitor-interval-60s" interval="60s" name="monitor"/>
	<op id="remote-name-start-interval-0-timeout-120" interval="0" name="start" timeout="120"/>
	</operations>
	</primitive>""" % (self.remote_node, node)

	self.add_rsc(node, rsc_xml)
	if not self.failed:
	self.remote_node_added = 1

	def stop_pcmk_remote(self, node):
	# disable pcmk remote
	for i in range(10):
	rc = self.rsh(node, "service pacemaker_remote stop")
	if rc != 0:
	time.sleep(6)
	else:
	break

	def start_pcmk_remote(self, node):
	for i in range(10):
	rc = self.rsh(node, "service pacemaker_remote start")
	if rc != 0:
	time.sleep(6)
	else:
	self.pcmk_started = 1
	break

	def kill_pcmk_remote(self, node):
	""" Simulate a Pacemaker Remote daemon failure. """

	# We kill the process to prevent a graceful stop,
	# then stop it to prevent the OS from restarting it.
	self.rsh(node, "killall -9 pacemaker-remoted")
	self.stop_pcmk_remote(node)

	def start_metal(self, node):
	pcmk_started = 0

	# make sure the resource doesn't already exist for some reason
	self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_rsc))
	self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_node))

	if not self.stop(node):
	self.fail("Failed to shutdown cluster node %s" % node)
	return

	self.start_pcmk_remote(node)

	if self.pcmk_started == 0:
	self.fail("Failed to start pacemaker_remote on node %s" % node)
	return

	# Convert node to baremetal now that it has shutdown the cluster stack
	pats = [ ]
	watch = self.create_watch(pats, 120)
	watch.setwatch()
	pats.append(self.templates["Pat:RscOpOK"] % ("start", self.remote_node))
	pats.append(self.templates["Pat:DC_IDLE"])

	self.add_connection_rsc(node)

	self.set_timer("remoteMetalInit")
	watch.lookforall()
	self.log_timer("remoteMetalInit")
	if watch.unmatched:
	self.fail("Unmatched patterns: %s" % watch.unmatched)

	def migrate_connection(self, node):
	if self.failed:
	return

	pats = [ ]
	pats.append(self.templates["Pat:RscOpOK"] % ("migrate_to", self.remote_node))
	pats.append(self.templates["Pat:RscOpOK"] % ("migrate_from", self.remote_node))
	pats.append(self.templates["Pat:DC_IDLE"])
	watch = self.create_watch(pats, 120)
	watch.setwatch()

	(rc, lines) = self.rsh(node, "crm_resource -M -r %s" % (self.remote_node), None)
	if rc != 0:
	self.fail("failed to move remote node connection resource")
	return

	self.set_timer("remoteMetalMigrate")
	watch.lookforall()
	self.log_timer("remoteMetalMigrate")

	if watch.unmatched:
	self.fail("Unmatched patterns: %s" % watch.unmatched)
	return

	def fail_rsc(self, node):
	if self.failed:
	return

	watchpats = [ ]
	watchpats.append(self.templates["Pat:RscRemoteOpOK"] % ("stop", self.remote_rsc, self.remote_node))
	watchpats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))
	watchpats.append(self.templates["Pat:DC_IDLE"])

	watch = self.create_watch(watchpats, 120)
	watch.setwatch()

	self.debug("causing dummy rsc to fail.")

	rc = self.rsh(node, "rm -f /var/run/resource-agents/Dummy*")

	self.set_timer("remoteRscFail")
	watch.lookforall()
	self.log_timer("remoteRscFail")
	if watch.unmatched:
	self.fail("Unmatched patterns during rsc fail: %s" % watch.unmatched)

	def fail_connection(self, node):
	if self.failed:
	return

	watchpats = [ ]
	watchpats.append(self.templates["Pat:FenceOpOK"] % self.remote_node)
	watchpats.append(self.templates["Pat:NodeFenced"] % self.remote_node)

	watch = self.create_watch(watchpats, 120)
	watch.setwatch()

	# force stop the pcmk remote daemon. this will result in fencing
	self.debug("Force stopped active remote node")
	self.kill_pcmk_remote(node)

	self.debug("Waiting for remote node to be fenced.")
	self.set_timer("remoteMetalFence")
	watch.lookforall()
	self.log_timer("remoteMetalFence")
	if watch.unmatched:
	self.fail("Unmatched patterns: %s" % watch.unmatched)
	return

	self.debug("Waiting for the remote node to come back up")
	self.CM.ns.WaitForNodeToComeUp(node, 120);

	pats = [ ]
	watch = self.create_watch(pats, 240)
	watch.setwatch()
	pats.append(self.templates["Pat:RscOpOK"] % ("start", self.remote_node))
	if self.remote_rsc_added == 1:
	pats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))

	# start the remote node again watch it integrate back into cluster.
	self.start_pcmk_remote(node)
	if self.pcmk_started == 0:
	self.fail("Failed to start pacemaker_remote on node %s" % node)
	return

	self.debug("Waiting for remote node to rejoin cluster after being fenced.")
	self.set_timer("remoteMetalRestart")
	watch.lookforall()
	self.log_timer("remoteMetalRestart")
	if watch.unmatched:
	self.fail("Unmatched patterns: %s" % watch.unmatched)
	return

	def add_dummy_rsc(self, node):
	if self.failed:
	return

	# verify we can put a resource on the remote node
	pats = [ ]
	watch = self.create_watch(pats, 120)
	watch.setwatch()
	pats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))
	pats.append(self.templates["Pat:DC_IDLE"])

	# Add a resource that must live on remote-node
	self.add_primitive_rsc(node)

	# force that rsc to prefer the remote node.
	(rc, line) = self.CM.rsh(node, "crm_resource -M -r %s -N %s -f" % (self.remote_rsc, self.remote_node), None)
	if rc != 0:
	self.fail("Failed to place remote resource on remote node.")
	return

	self.set_timer("remoteMetalRsc")
	watch.lookforall()
	self.log_timer("remoteMetalRsc")
	if watch.unmatched:
	self.fail("Unmatched patterns: %s" % watch.unmatched)

	def test_attributes(self, node):
	if self.failed:
	return

	# This verifies permanent attributes can be set on a remote-node. It also
	# verifies the remote-node can edit its own cib node section remotely.
	(rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -v testval -N %s" % (self.remote_node), None)
	if rc != 0:
	self.fail("Failed to set remote-node attribute. rc:%s output:%s" % (rc, line))
	return

	(rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -q -N %s" % (self.remote_node), None)
	if rc != 0:
	self.fail("Failed to get remote-node attribute")
	return

	(rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -D -N %s" % (self.remote_node), None)
	if rc != 0:
	self.fail("Failed to delete remote-node attribute")
	return

	def cleanup_metal(self, node):
	if self.pcmk_started == 0:
	return

	pats = [ ]

	watch = self.create_watch(pats, 120)
	watch.setwatch()

	if self.remote_rsc_added == 1:
	pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.remote_rsc))
	if self.remote_node_added == 1:
	pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.remote_node))

	self.set_timer("remoteMetalCleanup")

	if self.remote_use_reconnect_interval:
	self.debug("Cleaning up re-check interval")
	self.rsh(self.get_othernode(node), self.templates["ClearCheckInterval"])

	if self.remote_rsc_added == 1:

	# Remove dummy resource added for remote node tests
	self.debug("Cleaning up dummy rsc put on remote node")
	self.rsh(node, "crm_resource -U -r %s" % self.remote_rsc)
	self.del_rsc(node, self.remote_rsc)

	if self.remote_node_added == 1:

	# Remove remote node's connection resource
	self.debug("Cleaning up remote node connection resource")
	self.rsh(node, "crm_resource -U -r %s" % (self.remote_node))
	self.del_rsc(node, self.remote_node)

	watch.lookforall()
	self.log_timer("remoteMetalCleanup")

	if watch.unmatched:
	self.fail("Unmatched patterns: %s" % watch.unmatched)

	self.stop_pcmk_remote(node)

	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()

	if self.remote_node_added == 1:
	# Remove remote node itself
	self.debug("Cleaning up node entry for remote node")
	self.rsh(self.get_othernode(node), "crm_node --force --remove %s" % self.remote_node)

	def setup_env(self, node):

	self.remote_node = "remote-%s" % (node)

	# we are assuming if all nodes have a key, that it is
	# the right key... If any node doesn't have a remote
	# key, we regenerate it everywhere.
	if self.rsh.exists_on_all("/etc/pacemaker/authkey", self.Env["nodes"]):
	return

	# create key locally
	(handle, keyfile) = tempfile.mkstemp(".cts")
	os.close(handle)
	devnull = open(os.devnull, 'wb')
	subprocess.check_call(["dd", "if=/dev/urandom", "of=%s" % keyfile, "bs=4096", "count=1"],
	stdout=devnull, stderr=devnull)
	devnull.close()

	# sync key throughout the cluster
	for node in self.Env["nodes"]:
	self.rsh(node, "mkdir -p --mode=0750 /etc/pacemaker")
	self.rsh.cp(keyfile, "root@%s:/etc/pacemaker/authkey" % node)
	self.rsh(node, "chgrp haclient /etc/pacemaker /etc/pacemaker/authkey")
	self.rsh(node, "chmod 0640 /etc/pacemaker/authkey")
	os.unlink(keyfile)

	def is_applicable(self):
	if not self.is_applicable_common():
	return False

	for node in self.Env["nodes"]:
	rc = self.rsh(node, "which pacemaker-remoted >/dev/null 2>&1")
	if rc != 0:
	return False
	return True

	def start_new_test(self, node):
	self.incr("calls")
	self.reset()

	ret = self.startall(None)
	if not ret:
	return self.failure("setup failed: could not start all nodes")

	self.setup_env(node)
	self.start_metal(node)
	self.add_dummy_rsc(node)
	return True

	def __call__(self, node):
	return self.failure("This base class is not meant to be called directly.")

	def errorstoignore(self):
	'''Return list of errors which should be ignored'''
	return [ """is running on remote.*which isn't allowed""",
	"""Connection terminated""",
	"""Failed to send remote""",
	]

	# RemoteDriver is just a base class for other tests, so it is not added to AllTestClasses


	class RemoteBasic(RemoteDriver):

	def __call__(self, node):
	'''Perform the 'RemoteBaremetal' test. '''

	if not self.start_new_test(node):
	return self.failure(self.fail_string)

	self.test_attributes(node)
	self.cleanup_metal(node)

	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()
	if self.failed:
	return self.failure(self.fail_string)

	return self.success()

	AllTestClasses.append(RemoteBasic)

	class RemoteStonithd(RemoteDriver):

	def __call__(self, node):
	'''Perform the 'RemoteStonithd' test. '''

	if not self.start_new_test(node):
	return self.failure(self.fail_string)

	self.fail_connection(node)
	self.cleanup_metal(node)

	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()
	if self.failed:
	return self.failure(self.fail_string)

	return self.success()

	def is_applicable(self):
	if not RemoteDriver.is_applicable(self):
	return False

	if "DoFencing" in list(self.Env.keys()):
	return self.Env["DoFencing"]

	return True

	def errorstoignore(self):
	ignore_pats = [
	r"Lost connection to Pacemaker Remote node",
	r"Software caused connection abort",
	r"pacemaker-controld.:\s+error.: Operation remote-.*_monitor",
	r"pacemaker-controld.:\s+error.: Result of monitor operation for remote-.*",
	r"schedulerd.:\s+Recover remote-.\s$.$",
	r"Calculated [Tt]ransition .*pe-error",
	r"error.: Resource .ocf::.* is active on 2 nodes attempting recovery",
	]

	ignore_pats.extend(RemoteDriver.errorstoignore(self))
	return ignore_pats

	AllTestClasses.append(RemoteStonithd)


	class RemoteMigrate(RemoteDriver):

	def __call__(self, node):
	'''Perform the 'RemoteMigrate' test. '''

	if not self.start_new_test(node):
	return self.failure(self.fail_string)

	self.migrate_connection(node)
	self.cleanup_metal(node)

	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()
	if self.failed:
	return self.failure(self.fail_string)

	return self.success()

	AllTestClasses.append(RemoteMigrate)


	class RemoteRscFailure(RemoteDriver):

	def __call__(self, node):
	'''Perform the 'RemoteRscFailure' test. '''

	if not self.start_new_test(node):
	return self.failure(self.fail_string)

	# This is an important step. We are migrating the connection
	# before failing the resource. This verifies that the migration
	# has properly maintained control over the remote-node.
	self.migrate_connection(node)

	self.fail_rsc(node)
	self.cleanup_metal(node)

	self.debug("Waiting for the cluster to recover")
	self.CM.cluster_stable()
	if self.failed:
	return self.failure(self.fail_string)

	return self.success()

	def errorstoignore(self):
	ignore_pats = [
	r"schedulerd.: Recover remote-rsc\s$.*$",
	r"Dummy.*: No process state file found",
	]

	ignore_pats.extend(RemoteDriver.errorstoignore(self))
	return ignore_pats

	AllTestClasses.append(RemoteRscFailure)

	# vim:ts=4:sw=4:et:
	diff --git a/cts/benchmark/clubench.in b/cts/benchmark/clubench.in
	index 9194505cac..6adbe46cb0 100644
	--- a/cts/benchmark/clubench.in
	+++ b/cts/benchmark/clubench.in
	@@ -1,195 +1,190 @@
	#!/bin/sh
	#

	-PROG=`basename $0`
	-DIR=`dirname $0`
	SSHOPTS="-l root -o PasswordAuthentication=no -o ConnectTimeout=5"

	msg() {
	- echo $@ >&2
	+ echo "$@" >&2
	}
	usage() {
	echo "usage: $0 <dir>"
	echo " dir: working directory (with the control file)"
	exit 0
	}

	[ $# -eq 0 ] && usage
	WORKDIR=$1
	test -d "$WORKDIR" \|\| usage

	CTSCTRL=~/.cts
	CTRL=$WORKDIR/control
	CSV=$WORKDIR/bench.csv
	STATS=$WORKDIR/bench.stats

	test -f $CTRL && . $CTRL

	@datadir@/@PACKAGE@/tests/cts/cluster_test 500 \|\| {
	msg "cluster_test failed"
	exit 1
	}

	test -f $CTSCTRL \|\| {
	msg no CTS control file $CTSCTRL
	exit 1
	}
	. $CTSCTRL

	: ${CTS_logfacility:=local7}
	: ${CTS_stack:=corosync}
	: ${CTS_logfile:="@CRM_LOG_DIR@/ha-log-bench"}
	: ${CTS_adv:="--schema pacemaker-1.2 --clobber-cib -r"}
	: ${RUNS:=3}
	: ${CTSTESTS:="--benchmark"}
	: ${CTSDIR:="@datadir@/@PACKAGE@/tests/cts"}

	[ "$CTS_node_list" ] \|\| {
	msg no node list specified
	exit 1
	}

	case "$CTS_stack" in
	corosync) CRM_REPORT_OPTS="--corosync";;
	*) msg "$CTS_stack: cluster stack not recognized"; exit 1;;
	esac

	CTSOPTS="--stack $CTS_stack --at-boot $CTS_boot $CTS_adv"
	CTSOPTS="$CTSOPTS --facility $CTS_logfacility --logfile $CTS_logfile"

	if [ "x$CTS_stonith" != "x" ]; then
	CTSOPTS="$CTSOPTS --stonith-type $CTS_stonith"
	[ "x$CTS_stonith_args" != "x" ] &&
	CTSOPTS="$CTSOPTS --stonith-params \"$CTS_stonith_args\""
	else
	CTSOPTS="$CTSOPTS --stonith 0"
	fi

	CTSOPTS="$CTSOPTS $CTSTESTS"

	fibonacci() {
	- local limit=$1
	- local n=2 prev=1 tmp_n
	- while [ $n -le $limit ]; do
	- echo $n
	- tmp_n=$n
	- n=$((n+prev))
	- prev=$tmp_n
	+ F_LIMIT=$1
	+ F_N=2
	+ F_N_PREV=1
	+ while [ $F_N -le $F_LIMIT ]; do
	+ echo $F_N
	+ F_N_TMP=$F_N
	+ F_N=$((F_N+F_N_PREV))
	+ F_N_PREV=$F_N_TMP
	done
	- [ $prev -ne $limit ] && echo $limit
	+ [ $F_N_PREV -ne $F_LIMIT ] && echo $F_LIMIT
	}
	[ "$SERIES" ] \|\|
	- SERIES=$(fibonacci `echo $CTS_node_list \| wc -w`)
	+ SERIES=$(fibonacci "$(echo $CTS_node_list \| wc -w)")

	get_nodes() {
	- local c_nodes
	- c_nodes=`echo $CTS_node_list \| awk -v n=$1 '
	+ GN_C_NODES=$(echo $CTS_node_list \| awk -v n="$1" '
	{ for( i=1; i<=NF; i++ ) node[cnt++]=$i }
	END{for( i=0; i<n; i++ ) print node[i] }
	- '`
	- if [ `echo $c_nodes \| wc -w` != $1 ]; then
	+ ')
	+ if [ "$(echo $GN_C_NODES \| wc -w)" != "$1" ]; then
	msg "not enough nodes in $CTSCTRL"
	exit 1
	fi
	- echo $c_nodes
	+ echo $GN_C_NODES
	}
	node_cleanup() {
	msg "CIB cleanup ($nodes)"
	- local n
	- for n in $nodes; do
	- ssh $SSHOPTS $n 'rm @CRM_CONFIG_DIR@/*'
	+ for NC_N in $nodes; do
	+ ssh $SSHOPTS $NC_N 'rm @CRM_CONFIG_DIR@/*'
	done
	}
	testnum() {
	printf '%03d' $1
	}
	mkreports() {
	msg "Creating reports for the CTS run"
	- local ctsdir=$1
	- grep "Running test " $ctsdir/ctsrun.out \| tr -d \[\] \|
	+ MKR_CTS_DIR=$1
	+ grep "Running test " $MKR_CTS_DIR/ctsrun.out \| tr -d \[\] \|
	awk '{print $6,$NF}' \|
	while read type num; do
	teststg="`testnum $num`-$type"
	(
	- cd $ctsdir
	- crm_report $CRM_REPORT_OPTS -f cts:$num -n "$nodes" `pwd`/$teststg < /dev/null
	+ cd $MKR_CTS_DIR \|\| return
	+ crm_report $CRM_REPORT_OPTS -f "cts:$num" -n "$nodes" "$(pwd)/$teststg" < /dev/null
	)
	done
	}
	runcts() {
	- local odir=$1
	+ RC_ODIR="$1"
	msg "Running CTS"
	- python $CTSDIR/CTSlab.py $CTSOPTS --nodes "$nodes" > $odir/ctsrun.out 2>&1 &
	+ python "$CTSDIR/CTSlab.py" $CTSOPTS --nodes "$nodes" > "$RC_ODIR/ctsrun.out" 2>&1 &
	ctspid=$!
	- tail -f $odir/ctsrun.out &
	+ tail -f "$RC_ODIR/ctsrun.out" &
	tailpid=$!
	wait $ctspid
	kill $tailpid >/dev/null 2>&1
	}

	bench_re='CTS:.*runtime:'
	diginfo() {
	- local d v
	- local ctsdir=$1
	- local s="$2"
	- filter=$3
	+ DI_CTS_DIR="$1"
	+ DI_S="$2"
	+ filter="$3"
	(
	- cd $ctsdir
	+ cd "$DI_CTS_DIR" \|\| return
	for r in [0-9]*.tar.bz2; do
	tar xjf $r
	- d=`basename $r .tar.bz2`
	- for v in `grep $bench_re $d/ha-log.txt \| eval $filter`; do
	- s="$s,$v"
	+ DI_D=$(basename "$r" .tar.bz2)
	+ for DI_V in $(grep "$bench_re" "$DI_D/ha-log.txt" \| eval "$filter"); do
	+ DI_S="$DI_S,$DI_V"
	done
	- rm -r $d
	+ rm -r "$DI_D"
	done
	- echo $s
	+ echo $DI_S
	)
	}
	printheader() {
	diginfo $1 "" "awk '{print \$(NF-2)}'"
	}
	printstats() {
	diginfo $1 "$clusize" "awk '{print \$(NF)}'"
	}
	printmedians() {
	- local f=$1
	- local s="$clusize"
	- local middle=$((RUNS/2 + 1))
	- set `head -1 $f \| sed 's/,/ /g'`
	- local cols=$#
	- local i v
	- for i in `seq 2 $cols`; do
	- v=`awk -v i=$i -F, '{print $i}' < $f \| sort -n \| head -$middle \| tail -1`
	- s="$s,$v"
	+ PM_F="$1"
	+ PM_S="$clusize"
	+ PM_MIDDLE=$((RUNS/2 + 1))
	+ set $(head -1 "$PM_F" \| sed 's/,/ /g')
	+ PM_COLS=$#
	+ for PM_I in $(seq 2 $PM_COLS); do
	+ PM_V=$(awk -v i=$PM_I -F, '{print $i}' < $PM_F \| sort -n \| head -$PM_MIDDLE \| tail -1)
	+ PM_S="$PM_S,$PM_V"
	done
	- echo $s
	+ echo $PM_S
	}

	rm -f $CSV
	tmpf=`mktemp`
	test -f "$tmpf" \|\| {
	msg "can't create temporary file"
	exit 1
	}
	trap "rm -f $tmpf" 0
	for clusize in $SERIES; do
	nodes=`get_nodes $clusize`
	outdir=$WORKDIR/$clusize
	rm -rf $outdir
	mkdir -p $outdir
	rm -f $tmpf
	node_cleanup
	for i in `seq $RUNS`; do
	- > $CTS_logfile
	+ true > $CTS_logfile
	mkdir -p $outdir/$i
	runcts $outdir/$i
	mkreports $outdir/$i
	printstats $outdir/$i >> $tmpf
	done
	[ -f "$CSV" ] \|\| printheader $outdir/1 > $CSV
	printmedians $tmpf >> $CSV
	cat $tmpf >> $STATS
	msg "Statistics for $clusize-node cluster saved"
	done
	msg "Tests done for series $SERIES, output in $CSV and $STATS"
	diff --git a/cts/cts-cli.in b/cts/cts-cli.in
	index 03cb67e7d7..a23b1e4ba6 100755
	--- a/cts/cts-cli.in
	+++ b/cts/cts-cli.in
	@@ -1,968 +1,979 @@
	#!@BASH_PATH@
	#
	# Copyright 2008-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	#
	# Note on portable usage of sed: GNU/POSIX/*BSD sed have a limited subset of
	# compatible functionality. Do not use the -i option, alternation (\\|),
	# \0, or character sequences such as \n or \s.
	#

	USAGE_TEXT="Usage: cts-cli [<options>]
	Options:
	--help Display this text, then exit
	-V, --verbose Display any differences from expected output
	-t 'TEST [...]' Run only specified tests (default: 'dates tools acls validity upgrade')
	-p DIR Look for executables in DIR (may be specified multiple times)
	-v, --valgrind Run all commands under valgrind
	-s Save actual output as expected output"

	# If readlink supports -e (i.e. GNU), use it
	readlink -e / >/dev/null 2>/dev/null
	if [ $? -eq 0 ]; then
	- test_home="$(dirname $(readlink -e $0))"
	+ test_home="$(dirname "$(readlink -e "$0")")"
	else
	- test_home="$(dirname $0)"
	+ test_home="$(dirname "$0")"
	fi

	: ${shadow=cts-cli}
	shadow_dir=$(mktemp -d ${TMPDIR:-/tmp}/cts-cli.shadow.XXXXXXXXXX)
	num_errors=0
	num_passed=0
	-GREP_OPTIONS=
	verbose=0
	tests="dates tools acls validity upgrade"
	do_save=0
	VALGRIND_CMD=
	VALGRIND_OPTS="
	-q
	--gen-suppressions=all
	--show-reachable=no
	--leak-check=full
	--trace-children=no
	--time-stamp=yes
	--num-callers=20
	--suppressions=$test_home/valgrind-pcmk.suppressions
	"

	# These constants must track crm_exit_t values
	CRM_EX_OK=0
	CRM_EX_ERROR=1
	CRM_EX_INSUFFICIENT_PRIV=4
	CRM_EX_USAGE=64
	CRM_EX_CONFIG=78
	CRM_EX_OLD=103
	CRM_EX_NOSUCH=105
	CRM_EX_UNSAFE=107
	CRM_EX_EXISTS=108
	CRM_EX_MULTIPLE=109

	function test_assert() {
	target=$1; shift
	cib=$1; shift
	app=`echo "$cmd" \| sed 's/\ .*//'`
	printf "* Running: $app - $desc\n" 1>&2

	printf "=#=#=#= Begin test: $desc =#=#=#=\n"
	eval $VALGRIND_CMD $cmd 2>&1
	rc=$?

	if [ x$cib != x0 ]; then
	printf "=#=#=#= Current cib after: $desc =#=#=#=\n"
	CIB_user=root cibadmin -Q
	fi

	printf "=#=#=#= End test: $desc - $(crm_error --exit $rc) ($rc) =#=#=#=\n"

	if [ $rc -ne $target ]; then
	num_errors=$(( $num_errors + 1 ))
	printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc"
	printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc (`which $app`)" 1>&2
	return
	exit $CRM_EX_ERROR
	else
	printf "* Passed: %-14s - %s\n" $app "$desc"
	num_passed=$(( $num_passed + 1 ))
	fi
	}

	function test_tools() {
	- local TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX)
	- local TMPORIG=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.existing.xml.XXXXXXXXXX)
	+ local TMPXML
	+ local TMPORIG
	+
	+ TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX)
	+ TMPORIG=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.existing.xml.XXXXXXXXXX)
	export CIB_shadow_dir="${shadow_dir}"

	$VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow 2>&1
	export CIB_shadow=$shadow

	desc="Validate CIB"
	cmd="cibadmin -Q"
	test_assert $CRM_EX_OK

	desc="Configure something before erasing"
	cmd="crm_attribute -n cluster-delay -v 60s"
	test_assert $CRM_EX_OK

	desc="Require --force for CIB erasure"
	cmd="cibadmin -E"
	test_assert $CRM_EX_UNSAFE

	desc="Allow CIB erasure with --force"
	cmd="cibadmin -E --force"
	test_assert $CRM_EX_OK

	desc="Query CIB"
	cmd="cibadmin -Q > $TMPORIG"
	test_assert $CRM_EX_OK

	desc="Set cluster option"
	cmd="crm_attribute -n cluster-delay -v 60s"
	test_assert $CRM_EX_OK

	desc="Query new cluster option"
	cmd="cibadmin -Q -o crm_config \| grep cib-bootstrap-options-cluster-delay"
	test_assert $CRM_EX_OK

	desc="Query cluster options"
	cmd="cibadmin -Q -o crm_config > $TMPXML"
	test_assert $CRM_EX_OK

	desc="Set no-quorum policy"
	cmd="crm_attribute -n no-quorum-policy -v ignore"
	test_assert $CRM_EX_OK

	desc="Delete nvpair"
	cmd="cibadmin -D -o crm_config --xml-text '<nvpair id=\"cib-bootstrap-options-cluster-delay\"/>'"
	test_assert $CRM_EX_OK

	desc="Create operation should fail"
	cmd="cibadmin -C -o crm_config --xml-file $TMPXML"
	test_assert $CRM_EX_EXISTS

	desc="Modify cluster options section"
	cmd="cibadmin -M -o crm_config --xml-file $TMPXML"
	test_assert $CRM_EX_OK

	desc="Query updated cluster option"
	cmd="cibadmin -Q -o crm_config \| grep cib-bootstrap-options-cluster-delay"
	test_assert $CRM_EX_OK

	desc="Set duplicate cluster option"
	cmd="crm_attribute -n cluster-delay -v 40s -s duplicate"
	test_assert $CRM_EX_OK

	desc="Setting multiply defined cluster option should fail"
	cmd="crm_attribute -n cluster-delay -v 30s"
	test_assert $CRM_EX_MULTIPLE

	desc="Set cluster option with -s"
	cmd="crm_attribute -n cluster-delay -v 30s -s duplicate"
	test_assert $CRM_EX_OK

	desc="Delete cluster option with -i"
	cmd="crm_attribute -n cluster-delay -D -i cib-bootstrap-options-cluster-delay"
	test_assert $CRM_EX_OK

	desc="Create node1 and bring it online"
	cmd="crm_simulate --live-check --in-place --node-up=node1"
	test_assert $CRM_EX_OK

	desc="Create node attribute"
	cmd="crm_attribute -n ram -v 1024M -N node1 -t nodes"
	test_assert $CRM_EX_OK

	desc="Query new node attribute"
	cmd="cibadmin -Q -o nodes \| grep node1-ram"
	test_assert $CRM_EX_OK

	desc="Set a transient (fail-count) node attribute"
	cmd="crm_attribute -n fail-count-foo -v 3 -N node1 -t status"
	test_assert $CRM_EX_OK

	desc="Query a fail count"
	cmd="crm_failcount --query -r foo -N node1"
	test_assert $CRM_EX_OK

	desc="Delete a transient (fail-count) node attribute"
	cmd="crm_attribute -n fail-count-foo -D -N node1 -t status"
	test_assert $CRM_EX_OK

	desc="Digest calculation"
	cmd="cibadmin -Q \| cibadmin -5 -p 2>&1 > /dev/null"
	test_assert $CRM_EX_OK

	# This update will fail because it has version numbers
	desc="Replace operation should fail"
	cmd="cibadmin -R --xml-file $TMPORIG"
	test_assert $CRM_EX_OLD

	desc="Default standby value"
	cmd="crm_standby -N node1 -G"
	test_assert $CRM_EX_OK

	desc="Set standby status"
	cmd="crm_standby -N node1 -v true"
	test_assert $CRM_EX_OK

	desc="Query standby value"
	cmd="crm_standby -N node1 -G"
	test_assert $CRM_EX_OK

	desc="Delete standby value"
	cmd="crm_standby -N node1 -D"
	test_assert $CRM_EX_OK

	desc="Create a resource"
	cmd="cibadmin -C -o resources --xml-text '<primitive id=\"dummy\" class=\"ocf\" provider=\"pacemaker\" type=\"Dummy\"/>'"
	test_assert $CRM_EX_OK

	desc="Create a resource meta attribute"
	cmd="crm_resource -r dummy --meta -p is-managed -v false"
	test_assert $CRM_EX_OK

	desc="Query a resource meta attribute"
	cmd="crm_resource -r dummy --meta -g is-managed"
	test_assert $CRM_EX_OK

	desc="Remove a resource meta attribute"
	cmd="crm_resource -r dummy --meta -d is-managed"
	test_assert $CRM_EX_OK

	desc="Create a resource attribute"
	cmd="crm_resource -r dummy -p delay -v 10s"
	test_assert $CRM_EX_OK

	desc="List the configured resources"
	cmd="crm_resource -L"
	test_assert $CRM_EX_OK

	desc="Require a destination when migrating a resource that is stopped"
	cmd="crm_resource -r dummy -M"
	test_assert $CRM_EX_USAGE

	desc="Don't support migration to non-existent locations"
	cmd="crm_resource -r dummy -M -N i.dont.exist"
	test_assert $CRM_EX_NOSUCH

	desc="Create a fencing resource"
	cmd="cibadmin -C -o resources --xml-text '<primitive id=\"Fence\" class=\"stonith\" type=\"fence_true\"/>'"
	test_assert $CRM_EX_OK

	desc="Bring resources online"
	cmd="crm_simulate --live-check --in-place -S"
	test_assert $CRM_EX_OK

	desc="Try to move a resource to its existing location"
	cmd="crm_resource -r dummy --move --host node1"
	test_assert $CRM_EX_EXISTS

	desc="Move a resource from its existing location"
	cmd="crm_resource -r dummy --move"
	test_assert $CRM_EX_OK

	desc="Clear out constraints generated by --move"
	cmd="crm_resource -r dummy --clear"
	test_assert $CRM_EX_OK

	desc="Default ticket granted state"
	cmd="crm_ticket -t ticketA -G granted -d false"
	test_assert $CRM_EX_OK

	desc="Set ticket granted state"
	cmd="crm_ticket -t ticketA -r --force"
	test_assert $CRM_EX_OK

	desc="Query ticket granted state"
	cmd="crm_ticket -t ticketA -G granted"
	test_assert $CRM_EX_OK

	desc="Delete ticket granted state"
	cmd="crm_ticket -t ticketA -D granted --force"
	test_assert $CRM_EX_OK

	desc="Make a ticket standby"
	cmd="crm_ticket -t ticketA -s"
	test_assert $CRM_EX_OK

	desc="Query ticket standby state"
	cmd="crm_ticket -t ticketA -G standby"
	test_assert $CRM_EX_OK

	desc="Activate a ticket"
	cmd="crm_ticket -t ticketA -a"
	test_assert $CRM_EX_OK

	desc="Delete ticket standby state"
	cmd="crm_ticket -t ticketA -D standby"
	test_assert $CRM_EX_OK

	desc="Ban a resource on unknown node"
	cmd="crm_resource -r dummy -B -N host1"
	test_assert $CRM_EX_NOSUCH

	desc="Create two more nodes and bring them online"
	cmd="crm_simulate --live-check --in-place --node-up=node2 --node-up=node3"
	test_assert $CRM_EX_OK

	desc="Ban dummy from node1"
	cmd="crm_resource -r dummy -B -N node1"
	test_assert $CRM_EX_OK

	desc="Ban dummy from node2"
	cmd="crm_resource -r dummy -B -N node2"
	test_assert $CRM_EX_OK

	desc="Relocate resources due to ban"
	cmd="crm_simulate --live-check --in-place -S"
	test_assert $CRM_EX_OK

	desc="Move dummy to node1"
	cmd="crm_resource -r dummy -M -N node1"
	test_assert $CRM_EX_OK

	desc="Clear implicit constraints for dummy on node2"
	cmd="crm_resource -r dummy -U -N node2"
	test_assert $CRM_EX_OK

	desc="Drop the status section"
	cmd="cibadmin -R -o status --xml-text '<status/>'"
	test_assert $CRM_EX_OK 0

	desc="Create a clone"
	cmd="cibadmin -C -o resources --xml-text '<clone id=\"test-clone\"><primitive id=\"test-primitive\" class=\"ocf\" provider=\"pacemaker\" type=\"Dummy\"/></clone>'"
	test_assert $CRM_EX_OK 0

	desc="Create a resource meta attribute"
	cmd="crm_resource -r test-primitive --meta -p is-managed -v false"
	test_assert $CRM_EX_OK

	desc="Create a resource meta attribute in the primitive"
	cmd="crm_resource -r test-primitive --meta -p is-managed -v false --force"
	test_assert $CRM_EX_OK

	desc="Update resource meta attribute with duplicates"
	cmd="crm_resource -r test-clone --meta -p is-managed -v true"
	test_assert $CRM_EX_OK

	desc="Update resource meta attribute with duplicates (force clone)"
	cmd="crm_resource -r test-clone --meta -p is-managed -v true --force"
	test_assert $CRM_EX_OK

	desc="Update child resource meta attribute with duplicates"
	cmd="crm_resource -r test-primitive --meta -p is-managed -v false"
	test_assert $CRM_EX_OK

	desc="Delete resource meta attribute with duplicates"
	cmd="crm_resource -r test-clone --meta -d is-managed"
	test_assert $CRM_EX_OK

	desc="Delete resource meta attribute in parent"
	cmd="crm_resource -r test-primitive --meta -d is-managed"
	test_assert $CRM_EX_OK

	desc="Create a resource meta attribute in the primitive"
	cmd="crm_resource -r test-primitive --meta -p is-managed -v false --force"
	test_assert $CRM_EX_OK

	desc="Update existing resource meta attribute"
	cmd="crm_resource -r test-clone --meta -p is-managed -v true"
	test_assert $CRM_EX_OK

	desc="Create a resource meta attribute in the parent"
	cmd="crm_resource -r test-clone --meta -p is-managed -v true --force"
	test_assert $CRM_EX_OK

	desc="Copy resources"
	cmd="cibadmin -Q -o resources > $TMPXML"
	test_assert $CRM_EX_OK 0

	desc="Delete resource paremt meta attribute (force)"
	cmd="crm_resource -r test-clone --meta -d is-managed --force"
	test_assert $CRM_EX_OK

	desc="Restore duplicates"
	cmd="cibadmin -R -o resources --xml-file $TMPXML"
	test_assert $CRM_EX_OK

	desc="Delete resource child meta attribute"
	cmd="crm_resource -r test-primitive --meta -d is-managed"
	test_assert $CRM_EX_OK

	unset CIB_shadow_dir
	rm -f "$TMPXML" "$TMPORIG"
	}

	function test_dates() {
	desc="2014-01-01 00:30:00 - 1 Hour"
	cmd="iso8601 -d '2014-01-01 00:30:00Z' -D P-1H -E '2013-12-31 23:30:00Z'"
	test_assert $CRM_EX_OK 0

	for y in 06 07 08 09 10 11 12 13 14 15 16 17 18; do
	desc="20$y-W01-7"
	cmd="iso8601 -d '20$y-W01-7 00Z'"
	test_assert $CRM_EX_OK 0

	desc="20$y-W01-7 - round-trip"
	cmd="iso8601 -d '20$y-W01-7 00Z' -W -E '20$y-W01-7 00:00:00Z'"
	test_assert $CRM_EX_OK 0

	desc="20$y-W01-1"
	cmd="iso8601 -d '20$y-W01-1 00Z'"
	test_assert $CRM_EX_OK 0

	desc="20$y-W01-1 - round-trip"
	cmd="iso8601 -d '20$y-W01-1 00Z' -W -E '20$y-W01-1 00:00:00Z'"
	test_assert $CRM_EX_OK 0
	done

	desc="2009-W53-07"
	cmd="iso8601 -d '2009-W53-7 00:00:00Z' -W -E '2009-W53-7 00:00:00Z'"
	test_assert $CRM_EX_OK 0

	desc="2009-01-31 + 1 Month"
	cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P1M -E '2009-02-28 00:00:00Z'"
	test_assert $CRM_EX_OK 0

	desc="2009-01-31 + 2 Months"
	cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P2M -E '2009-03-31 00:00:00Z'"
	test_assert $CRM_EX_OK 0

	desc="2009-01-31 + 3 Months"
	cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P3M -E '2009-04-30 00:00:00Z'"
	test_assert $CRM_EX_OK 0

	desc="2009-03-31 - 1 Month"
	cmd="iso8601 -d '2009-03-31 00:00:00Z' -D P-1M -E '2009-02-28 00:00:00Z'"
	test_assert $CRM_EX_OK 0
	}

	function test_acl_loop() {
	- local TMPXML="$1"
	+ local TMPXML
	+
	+ TMPXML="$1"

	# Make sure we're rejecting things for the right reasons
	export PCMK_trace_functions=__xml_acl_check,__xml_acl_post_process
	export PCMK_stderr=1

	CIB_user=root cibadmin --replace --xml-text '<resources/>'

	export CIB_user=unknownguy
	desc="$CIB_user: Query configuration"
	cmd="cibadmin -Q"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Set enable-acl"
	cmd="crm_attribute -n enable-acl -v false"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Set stonith-enabled"
	cmd="crm_attribute -n stonith-enabled -v false"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Create a resource"
	cmd="cibadmin -C -o resources --xml-text '<primitive id=\"dummy\" class=\"ocf\" provider=\"pacemaker\" type=\"Dummy\"/>'"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	export CIB_user=l33t-haxor
	desc="$CIB_user: Query configuration"
	cmd="cibadmin -Q"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Set enable-acl"
	cmd="crm_attribute -n enable-acl -v false"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Set stonith-enabled"
	cmd="crm_attribute -n stonith-enabled -v false"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Create a resource"
	cmd="cibadmin -C -o resources --xml-text '<primitive id=\"dummy\" class=\"ocf\" provider=\"pacemaker\" type=\"Dummy\"/>'"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	export CIB_user=niceguy
	desc="$CIB_user: Query configuration"
	cmd="cibadmin -Q"
	test_assert $CRM_EX_OK 0

	desc="$CIB_user: Set enable-acl"
	cmd="crm_attribute -n enable-acl -v false"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Set stonith-enabled"
	cmd="crm_attribute -n stonith-enabled -v false"
	test_assert $CRM_EX_OK

	desc="$CIB_user: Create a resource"
	cmd="cibadmin -C -o resources --xml-text '<primitive id=\"dummy\" class=\"ocf\" provider=\"pacemaker\" type=\"Dummy\"/>'"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	export CIB_user=root
	desc="$CIB_user: Query configuration"
	cmd="cibadmin -Q"
	test_assert $CRM_EX_OK 0

	desc="$CIB_user: Set stonith-enabled"
	cmd="crm_attribute -n stonith-enabled -v true"
	test_assert $CRM_EX_OK

	desc="$CIB_user: Create a resource"
	cmd="cibadmin -C -o resources --xml-text '<primitive id=\"dummy\" class=\"ocf\" provider=\"pacemaker\" type=\"Dummy\"/>'"
	test_assert $CRM_EX_OK

	export CIB_user=l33t-haxor

	desc="$CIB_user: Create a resource meta attribute"
	cmd="crm_resource -r dummy --meta -p target-role -v Stopped"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Query a resource meta attribute"
	cmd="crm_resource -r dummy --meta -g target-role"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	desc="$CIB_user: Remove a resource meta attribute"
	cmd="crm_resource -r dummy --meta -d target-role"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	export CIB_user=niceguy

	desc="$CIB_user: Create a resource meta attribute"
	cmd="crm_resource -r dummy --meta -p target-role -v Stopped"
	test_assert $CRM_EX_OK

	desc="$CIB_user: Query a resource meta attribute"
	cmd="crm_resource -r dummy --meta -g target-role"
	test_assert $CRM_EX_OK

	desc="$CIB_user: Remove a resource meta attribute"
	cmd="crm_resource -r dummy --meta -d target-role"
	test_assert $CRM_EX_OK

	desc="$CIB_user: Create a resource meta attribute"
	cmd="crm_resource -r dummy --meta -p target-role -v Started"
	test_assert $CRM_EX_OK

	export CIB_user=badidea
	desc="$CIB_user: Query configuration - implied deny"
	cmd="cibadmin -Q"
	test_assert $CRM_EX_OK 0

	export CIB_user=betteridea
	desc="$CIB_user: Query configuration - explicit deny"
	cmd="cibadmin -Q"
	test_assert $CRM_EX_OK 0

	CIB_user=root cibadmin -Q > "$TMPXML"
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --delete --xml-text '<acls/>'
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql

	export CIB_user=niceguy
	desc="$CIB_user: Replace - remove acls"
	cmd="cibadmin --replace --xml-file $TMPXML"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	CIB_user=root cibadmin -Q > "$TMPXML"
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -C -o resources --xml-text '<primitive id="dummy2" class="ocf" provider="pacemaker" type="Dummy"/>'
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql

	desc="$CIB_user: Replace - create resource"
	cmd="cibadmin --replace --xml-file $TMPXML"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	CIB_user=root cibadmin -Q > "$TMPXML"
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" crm_attribute -n enable-acl -v false
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql

	desc="$CIB_user: Replace - modify attribute (deny)"
	cmd="cibadmin --replace --xml-file $TMPXML"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	CIB_user=root cibadmin -Q > "$TMPXML"
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --replace --xml-text '<nvpair id="cib-bootstrap-options-enable-acl" name="enable-acl"/>'
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql

	desc="$CIB_user: Replace - delete attribute (deny)"
	cmd="cibadmin --replace --xml-file $TMPXML"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	CIB_user=root cibadmin -Q > "$TMPXML"
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '<primitive id="dummy" description="nothing interesting"/>'
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql

	desc="$CIB_user: Replace - create attribute (deny)"
	cmd="cibadmin --replace --xml-file $TMPXML"
	test_assert $CRM_EX_INSUFFICIENT_PRIV 0

	CIB_user=bob
	CIB_user=root cibadmin -Q > "$TMPXML"
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '<primitive id="dummy" description="nothing interesting"/>'
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql

	desc="$CIB_user: Replace - create attribute (allow)"
	cmd="cibadmin --replace -o resources --xml-file $TMPXML"
	test_assert $CRM_EX_OK 0

	CIB_user=root cibadmin -Q > "$TMPXML"
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '<primitive id="dummy" description="something interesting"/>'
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql

	desc="$CIB_user: Replace - modify attribute (allow)"
	cmd="cibadmin --replace -o resources --xml-file $TMPXML"
	test_assert $CRM_EX_OK 0

	CIB_user=root cibadmin -Q > "$TMPXML"
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --replace -o resources --xml-text '<primitive id="dummy" class="ocf" provider="pacemaker" type="Dummy"/>'
	CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql

	desc="$CIB_user: Replace - delete attribute (allow)"
	cmd="cibadmin --replace -o resources --xml-file $TMPXML"
	test_assert $CRM_EX_OK 0
	}

	function test_acls() {
	local SHADOWPATH
	- local TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.acls.xml.XXXXXXXXXX)
	+ local TMPXML
	+
	+ TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.acls.xml.XXXXXXXXXX)
	export CIB_shadow_dir="${shadow_dir}"

	$VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow --validate-with pacemaker-1.3 2>&1
	export CIB_shadow=$shadow

	cat <<EOF > "$TMPXML"
	<acls>
	<acl_user id="l33t-haxor">
	<deny id="crook-nothing" xpath="/cib"/>
	</acl_user>
	<acl_user id="niceguy">
	<role_ref id="observer"/>
	</acl_user>
	<acl_user id="bob">
	<role_ref id="admin"/>
	</acl_user>
	<acl_role id="observer">
	<read id="observer-read-1" xpath="/cib"/>
	<write id="observer-write-1" xpath="//nvpair[@name='stonith-enabled']"/>
	<write id="observer-write-2" xpath="//nvpair[@name='target-role']"/>
	</acl_role>
	<acl_role id="admin">
	<read id="admin-read-1" xpath="/cib"/>
	<write id="admin-write-1" xpath="//resources"/>
	</acl_role>
	</acls>
	EOF

	desc="Configure some ACLs"
	cmd="cibadmin -M -o acls --xml-file $TMPXML"
	test_assert $CRM_EX_OK

	desc="Enable ACLs"
	cmd="crm_attribute -n enable-acl -v true"
	test_assert $CRM_EX_OK

	desc="Set cluster option"
	cmd="crm_attribute -n no-quorum-policy -v ignore"
	test_assert $CRM_EX_OK

	desc="New ACL"
	cmd="cibadmin --create -o acls --xml-text '<acl_user id=\"badidea\"><read id=\"badidea-resources\" xpath=\"//meta_attributes\"/></acl_user>'"
	test_assert $CRM_EX_OK

	desc="Another ACL"
	cmd="cibadmin --create -o acls --xml-text '<acl_user id=\"betteridea\"><read id=\"betteridea-resources\" xpath=\"//meta_attributes\"/></acl_user>'"
	test_assert $CRM_EX_OK

	desc="Updated ACL"
	cmd="cibadmin --replace -o acls --xml-text '<acl_user id=\"betteridea\"><deny id=\"betteridea-nothing\" xpath=\"/cib\"/><read id=\"betteridea-resources\" xpath=\"//meta_attributes\"/></acl_user>'"
	test_assert $CRM_EX_OK

	test_acl_loop "$TMPXML"

	printf "\n\n !#!#!#!#! Upgrading to latest CIB schema and re-testing !#!#!#!#!\n"
	printf "\nUpgrading to latest CIB schema and re-testing\n" 1>&2

	export CIB_user=root
	desc="$CIB_user: Upgrade to latest CIB schema"
	cmd="cibadmin --upgrade --force -V"
	test_assert $CRM_EX_OK

	SHADOWPATH="$(crm_shadow --file)"
	# sed -i isn't portable :-(
	cp -p "$SHADOWPATH" "${SHADOWPATH}.$$" # to keep permissions
	sed -e 's/epoch=.2/epoch=\"6/g' -e 's/admin_epoch=.1/admin_epoch=\"0/g' \
	"$SHADOWPATH" > "${SHADOWPATH}.$$"
	mv -- "${SHADOWPATH}.$$" "$SHADOWPATH"

	test_acl_loop "$TMPXML"

	unset CIB_shadow_dir
	rm -f "$TMPXML"
	}

	function test_validity() {
	- local TMPGOOD=$(mktemp ${TMPDIR:-/tmp}/cts-cli.validity.good.xml.XXXXXXXXXX)
	- local TMPBAD=$(mktemp ${TMPDIR:-/tmp}/cts-cli.validity.bad.xml.XXXXXXXXXX)
	+ local TMPGOOD
	+ local TMPBAD
	+
	+ TMPGOOD=$(mktemp ${TMPDIR:-/tmp}/cts-cli.validity.good.xml.XXXXXXXXXX)
	+ TMPBAD=$(mktemp ${TMPDIR:-/tmp}/cts-cli.validity.bad.xml.XXXXXXXXXX)
	export CIB_shadow_dir="${shadow_dir}"

	$VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow --validate-with pacemaker-1.2 2>&1
	export CIB_shadow=$shadow
	export PCMK_trace_functions=apply_upgrade,update_validation,cli_config_update
	export PCMK_stderr=1

	cibadmin -C -o resources --xml-text '<primitive id="dummy1" class="ocf" provider="pacemaker" type="Dummy"/>'
	cibadmin -C -o resources --xml-text '<primitive id="dummy2" class="ocf" provider="pacemaker" type="Dummy"/>'
	cibadmin -C -o constraints --xml-text '<rsc_order id="ord_1-2" first="dummy1" first-action="start" then="dummy2"/>'
	cibadmin -Q > "$TMPGOOD"


	desc="Try to make resulting CIB invalid (enum violation)"
	cmd="cibadmin -M -o constraints --xml-text '<rsc_order id=\"ord_1-2\" first=\"dummy1\" first-action=\"break\" then=\"dummy2\"/>'"
	test_assert $CRM_EX_CONFIG

	sed 's\|"start"\|"break"\|' "$TMPGOOD" > "$TMPBAD"
	desc="Run crm_simulate with invalid CIB (enum violation)"
	cmd="crm_simulate -x $TMPBAD -S"
	test_assert $CRM_EX_CONFIG 0


	desc="Try to make resulting CIB invalid (unrecognized validate-with)"
	cmd="cibadmin -M --xml-text '<cib validate-with=\"pacemaker-9999.0\"/>'"
	test_assert $CRM_EX_CONFIG

	sed 's\|"pacemaker-1.2"\|"pacemaker-9999.0"\|' "$TMPGOOD" > "$TMPBAD"
	desc="Run crm_simulate with invalid CIB (unrecognized validate-with)"
	cmd="crm_simulate -x $TMPBAD -S"
	test_assert $CRM_EX_CONFIG 0


	desc="Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1)"
	cmd="cibadmin -C -o configuration --xml-text '<tags/>'"
	test_assert $CRM_EX_CONFIG

	sed 's\|</configuration>\|<tags/></configuration>\|' "$TMPGOOD" > "$TMPBAD"
	desc="Run crm_simulate with invalid, but possibly recoverable CIB (valid with X.Y+1)"
	cmd="crm_simulate -x $TMPBAD -S"
	test_assert $CRM_EX_OK 0


	sed 's\|[ ][ ]validate-with="[^"]"\|\|' "$TMPGOOD" > "$TMPBAD"
	desc="Make resulting CIB valid, although without validate-with attribute"
	cmd="cibadmin -R --xml-file $TMPBAD"
	test_assert $CRM_EX_OK

	desc="Run crm_simulate with valid CIB, but without validate-with attribute"
	cmd="crm_simulate -x $TMPBAD -S"
	test_assert $CRM_EX_OK 0


	# this will just disable validation and accept the config, outputting
	# validation errors
	sed -e 's\|[ ][ ]validate-with="[^"]"\|\|' \
	-e 's\|$[ ][ ]epoch="[^"]$"\|\10"\|' -e 's\|"start"\|"break"\|' \
	"$TMPGOOD" > "$TMPBAD"
	desc="Make resulting CIB invalid, and without validate-with attribute"
	cmd="cibadmin -R --xml-file $TMPBAD"
	test_assert $CRM_EX_OK

	desc="Run crm_simulate with invalid CIB, also without validate-with attribute"
	cmd="crm_simulate -x $TMPBAD -S"
	test_assert $CRM_EX_OK 0

	unset CIB_shadow_dir
	rm -f "$TMPGOOD" "$TMPBAD"
	}

	test_upgrade() {
	- local TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX)
	+ local TMPXML
	+
	+ TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX)
	export CIB_shadow_dir="${shadow_dir}"

	$VALGRIND_CMD crm_shadow --batch --force --create-empty $shadow --validate-with pacemaker-2.10 2>&1
	export CIB_shadow=$shadow

	desc="Set stonith-enabled=false"
	cmd="crm_attribute -n stonith-enabled -v false"
	test_assert $CRM_EX_OK

	cat <<EOF > "$TMPXML"
	<resources>
	<primitive id="mySmartFuse" class="ocf" provider="experiment" type="SmartFuse">
	<operations>
	<op id="mySmartFuse-start" name="start" interval="0" timeout="40s"/>
	<op id="mySmartFuse-monitor-inputpower" name="monitor" interval="30s">
	<instance_attributes id="mySmartFuse-inputpower-instanceparams">
	<nvpair id="mySmartFuse-inputpower-requires" name="requires" value="inputpower"/>
	</instance_attributes>
	</op>
	<op id="mySmartFuse-monitor-outputpower" name="monitor" interval="2s">
	<instance_attributes id="mySmartFuse-outputpower-instanceparams">
	<nvpair id="mySmartFuse-outputpower-requires" name="requires" value="outputpower"/>
	</instance_attributes>
	</op>
	</operations>
	<instance_attributes id="mySmartFuse-params">
	<nvpair id="mySmartFuse-params-ip" name="ip" value="192.0.2.10"/>
	</instance_attributes>
	<!-- a bit hairy but valid -->
	<instance_attributes id-ref="mySmartFuse-outputpower-instanceparams"/>
	</primitive>
	</resources>
	EOF

	desc="Configure the initial resource"
	cmd="cibadmin -M -o resources --xml-file $TMPXML"
	test_assert $CRM_EX_OK

	desc="Upgrade to latest CIB schema (trigger 2.10.xsl + the wrapping)"
	cmd="cibadmin --upgrade --force -V -V"
	test_assert $CRM_EX_OK

	desc="Query a resource instance attribute (shall survive)"
	cmd="crm_resource -r mySmartFuse -g requires"
	test_assert $CRM_EX_OK

	unset CIB_shadow_dir
	rm -f "$TMPXML"
	}

	# Process command-line arguments
	while [ $# -gt 0 ]; do
	case "$1" in
	-t)
	tests="$2"
	shift 2
	;;
	-V\|--verbose)
	verbose=1
	shift
	;;
	-v\|--valgrind)
	export G_SLICE=always-malloc
	VALGRIND_CMD="valgrind $VALGRIND_OPTS"
	shift
	;;
	-s)
	do_save=1
	shift
	;;
	-p)
	export PATH="$2:$PATH"
	shift
	;;
	--help)
	echo "$USAGE_TEXT"
	exit $CRM_EX_OK
	;;
	*)
	echo "error: unknown option $1"
	echo
	echo "$USAGE_TEXT"
	exit $CRM_EX_USAGE
	;;
	esac
	done

	for t in $tests; do
	case "$t" in
	dates) ;;
	tools) ;;
	acls) ;;
	validity) ;;
	upgrade) ;;
	*)
	echo "error: unknown test $t"
	echo
	echo "$USAGE_TEXT"
	exit $CRM_EX_USAGE
	;;
	esac
	done

	# Check whether we're running from source directory
	SRCDIR=$(dirname $test_home)
	if [ -x "$SRCDIR/tools/crm_simulate" ]; then
	export PATH="$SRCDIR/tools:$PATH"
	echo "Using local binaries from: $SRCDIR/tools"

	if [ -x "$SRCDIR/xml" ]; then
	export PCMK_schema_directory="$SRCDIR/xml"
	echo "Using local schemas from: $PCMK_schema_directory"
	fi
	fi

	for t in $tests; do
	echo "Testing $t"
	TMPFILE=$(mktemp ${TMPDIR:-/tmp}/cts-cli.$t.XXXXXXXXXX)
	eval TMPFILE_$t="$TMPFILE"
	test_$t > "$TMPFILE"

	sed -e 's/cib-last-written.*>/>/'\
	-e 's/ last-run=\"[0-9]*\"//'\
	-e 's/crm_feature_set="[^"]*" //'\
	-e 's/validate-with="[^"]*" //'\
	-e 's/Created new pacemaker-.* configuration/Created new pacemaker configuration/'\
	-e 's/.$__xml_.$@.\.c:[0-9][0-9])/\1/g' \
	-e 's/.$unpack_.$@.\.c:[0-9][0-9])/\1/g' \
	-e 's/.$update_validation$@.\.c:[0-9][0-9]*)/\1/g' \
	-e 's/.$apply_upgrade$@.\.c:[0-9][0-9]*)/\1/g' \
	-e 's/ last-rc-change=\"[0-9]*\"//'\
	-e 's\|^/tmp/cts-cli\.validity\.bad.xml\.[^:]*:\|validity.bad.xml:\|'\
	-e 's/^Entity: line [0-9][0-9]*: //'\
	-e 's/$validation ([0-9][0-9]* of $[0-9][0-9]$).$/\1X\2/' \
	"$TMPFILE" > "${TMPFILE}.$$"
	mv -- "${TMPFILE}.$$" "$TMPFILE"

	if [ $do_save -eq 1 ]; then
	cp "$TMPFILE" $test_home/cli/regression.$t.exp
	fi
	done

	rm -rf "${shadow_dir}"

	failed=0

	if [ $verbose -eq 1 ]; then
	echo -e "\n\nResults"
	fi
	for t in $tests; do
	eval TMPFILE="\$TMPFILE_$t"
	if [ $verbose -eq 1 ]; then
	diff -wu $test_home/cli/regression.$t.exp "$TMPFILE"
	else
	diff -w $test_home/cli/regression.$t.exp "$TMPFILE" >/dev/null 2>&1
	fi
	if [ $? -ne 0 ]; then
	failed=1
	fi
	done

	echo -e "\n\nSummary"
	for t in $tests; do
	eval TMPFILE="\$TMPFILE_$t"
	- grep -e "^*" "$TMPFILE"
	+ grep -e '^\*' "$TMPFILE"
	done

	if [ $num_errors -ne 0 ]; then
	echo "$num_errors tests failed; see output in:"
	for t in $tests; do
	eval TMPFILE="\$TMPFILE_$t"
	echo " $TMPFILE"
	done
	exit $CRM_EX_ERROR

	elif [ $failed -eq 1 ]; then
	echo "$num_passed tests passed but output was unexpected; see output in:"
	for t in $tests; do
	eval TMPFILE="\$TMPFILE_$t"
	echo " $TMPFILE"
	done
	exit $CRM_EX_DIGEST

	else
	echo $num_passed tests passed
	for t in $tests; do
	eval TMPFILE="\$TMPFILE_$t"
	rm -f "$TMPFILE"
	done
	crm_shadow --force --delete $shadow >/dev/null 2>&1
	exit $CRM_EX_OK
	fi
	diff --git a/cts/cts-coverage.in b/cts/cts-coverage.in
	index 0fdfe918fd..ba831810a2 100644
	--- a/cts/cts-coverage.in
	+++ b/cts/cts-coverage.in
	@@ -1,62 +1,68 @@
	#!@BASH_PATH@
	+#
	+# Copyright 2012-2018 Andrew Beekhof <andrew@beekhof.net>
	+#
	+# This source code is licensed under the GNU General Public License version 2
	+# or later (GPLv2+) WITHOUT ANY WARRANTY.
	+#

	-start=$PWD
	-test_home=`dirname $0`
	+start="$PWD"
	+test_home=$(dirname "$0")
	test_dir="@datadir@/@PACKAGE@/tests"

	if [ "$test_home" != "$test_dir" ]; then
	# Running against the source tree
	- GCOV_BASE=@abs_top_srcdir@
	+ GCOV_BASE="@abs_top_srcdir@"
	test_dir="@abs_top_srcdir@/cts"
	- cd @abs_top_srcdir@
	+ cd "@abs_top_srcdir@" \|\| exit 1

	grep with-gcov config.log
	- if [ $? = 0 ]; then
	+ if [ $? -eq 0 ]; then
	echo "Pacemaker was built with gcov support"
	else
	echo "Re-building with gcov support"
	last=`grep --color=never "$.configure" config.log \| tail -n 1 \| sed s:.configure:./configure: \| sed s:--no-create:--with-gcov:`
	eval $last
	fi

	#sudo make core core-install

	else
	GCOV_BASE=@localstatedir@/lib/pacemaker/gcov/
	mkdir -p $GCOV_BASE

	export GCOV_PREFIX_STRIP=4
	export GCOV_PREFIX=$GCOV_BASE

	top=`find / -name crm_internal.h 2>/dev/null \| grep debug \| head -n 1`
	if [ "x$top" = x ]; then
	echo "Could not locate the pacemaker headers"
	exit 1
	fi

	- cd `dirname $top`
	- cd ..
	+ cd "$(dirname "$top")" \|\| exit 1
	+ cd .. \|\| exit 1

	echo "Creating the directory structure in $GCOV_BASE from $PWD"
	# The .gcno files will already be there for sources,
	# but we still need to create the include/ subtree
	find . -type d -exec mkdir -p $GCOV_BASE/\{\} \;

	echo "Now linking the source files into place"
	find . -type f -name "*.c" -exec ln -s $PWD/\{\} $GCOV_BASE\{\} \;
	find . -type f -name "*.h" -exec ln -s $PWD/\{\} $GCOV_BASE\{\} \;
	find . -type f -name "*.debug" -exec ln -s $PWD/\{\} $GCOV_BASE\{\} \;
	fi

	-cd $start
	+cd "$start" \|\| exit 1
	lcov -d $GCOV_BASE -z

	# Run all active regression tests
	$test_dir/cts-regression

	lcov -d $GCOV_BASE -c -o pacemaker.info

	rm -rf html
	mkdir html
	genhtml -o html pacemaker.info

	diff --git a/cts/cts-regression.in b/cts/cts-regression.in
	index d458a5e989..19d8612a73 100755
	--- a/cts/cts-regression.in
	+++ b/cts/cts-regression.in
	@@ -1,215 +1,214 @@
	#!@BASH_PATH@
	#
	# cts-regression
	#
	# Convenience wrapper for running any of the Pacemaker regression tests
	#
	# Copyright 2012-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	USAGE_TEXT="Usage: cts-regression [<options>] [<test> ...]
	Options:
	--help Display this text, then exit
	-V, --verbose Increase test verbosity
	-v, --valgrind Run test commands under valgrind

	Tests (default tests are 'scheduler cli'):
	scheduler Action scheduler
	cli Command-line tools
	exec Local resource agent executor
	pacemaker_remote Resource agent executor in remote mode
	fencing Fencer
	all Synonym for 'scheduler cli exec fencing'"

	# If readlink supports -e (i.e. GNU), use it
	readlink -e / >/dev/null 2>/dev/null
	if [ $? -eq 0 ]; then
	- test_home="$(dirname $(readlink -e $0))"
	+ test_home="$(dirname "$(readlink -e "$0")")"
	else
	- test_home="$(dirname $0)"
	+ test_home="$(dirname "$0")"
	fi

	valgrind=""
	verbose=""
	tests=""

	# These constants must track crm_exit_t values
	CRM_EX_OK=0
	CRM_EX_ERROR=1
	CRM_EX_NOT_INSTALLED=5
	CRM_EX_USAGE=64

	function info() {
	printf "$*\n"
	}

	function error() {
	printf " * ERROR: $*\n"
	}

	function run_as_root() {
	CMD="$1"
	shift
	- ARGS="$@"
	+ ARGS="$*" # assumes arguments don't need quoting

	# Test might not be executable if run from source directory
	chmod a+x $CMD

	CMD="$CMD $ARGS $verbose"

	if [ $EUID -eq 0 ]; then
	$CMD

	elif [ -z $TRAVIS ]; then
	# sudo doesn't work in buildbot, su doesn't work in travis
	echo "Enter the root password..."
	su root -c "$CMD"

	else
	echo "Enter the root password if prompted..."
	sudo -- $CMD
	fi
	}

	add_test() {
	local TEST="$1"

	case "$TEST" in
	scheduler\|exec\|pacemaker_remote\|fencing\|cli)
	if [[ ! $tests =~ $TEST ]]; then
	tests="$tests $TEST"
	fi
	;;
	*)
	error "unknown test: $TEST"
	echo
	echo "$USAGE_TEXT"
	exit $CRM_EX_USAGE
	;;
	esac
	}

	run_test() {
	local t="$1"

	info "Executing the $t regression tests"
	info "============================================================"
	case $t in
	scheduler)
	if [ -x $test_home/cts-scheduler ]; then
	$test_home/cts-scheduler $verbose $valgrind
	rc=$?
	else
	error "scheduler regression test not found"
	rc=$CRM_EX_NOT_INSTALLED
	fi
	;;
	exec)
	if [ -x $test_home/cts-exec ]; then
	run_as_root $test_home/cts-exec
	rc=$?
	else
	error "executor regression test not found"
	rc=$CRM_EX_NOT_INSTALLED
	fi
	;;
	pacemaker_remote)
	if [ -x $test_home/cts-exec ]; then
	run_as_root $test_home/cts-exec -R
	rc=$?
	else
	error "pacemaker_remote regression test not found"
	rc=$CRM_EX_NOT_INSTALLED
	fi
	;;
	fencing)
	if [ -x $test_home/cts-fencing ]; then
	run_as_root $test_home/cts-fencing
	rc=$?
	else
	error "fencing regression test not found"
	rc=$CRM_EX_NOT_INSTALLED
	fi
	;;
	cli)
	if [ -x $test_home/cts-cli ]; then
	$test_home/cts-cli $verbose $valgrind
	rc=$?
	else
	error "cli regression test not found"
	rc=$CRM_EX_NOT_INSTALLED
	fi
	;;
	esac

	info "============================================================"
	info ""
	info ""
	return $rc
	}

	run_tests() {
	- TESTS="$@"
	local TEST
	local TEST_RC
	local FAILED

	FAILED=""
	- for TEST in $TESTS; do
	+ for TEST in "$@"; do
	run_test $TEST
	TEST_RC=$?
	if [ $TEST_RC -ne 0 ]; then
	info "$TEST regression tests failed ($TEST_RC)"
	FAILED="$FAILED $TEST"
	fi
	done
	if [ -n "$FAILED" ]; then
	error "failed regression tests: $FAILED"
	return $CRM_EX_ERROR
	fi
	return $CRM_EX_OK
	}

	while [ $# -gt 0 ] ; do
	case "$1" in
	--help)
	echo "$USAGE_TEXT"
	exit $CRM_EX_OK
	;;
	-V\|--verbose)
	verbose="-V"
	shift
	;;
	-v\|--valgrind)
	valgrind="-v"
	shift
	;;
	scheduler\|exec\|pacemaker_remote\|fencing\|cli)
	add_test $1
	shift
	;;
	all)
	add_test scheduler
	add_test cli
	add_test exec
	add_test fencing
	shift
	;;
	*)
	error "unknown option: $1"
	echo
	echo "$USAGE_TEXT"
	exit $CRM_EX_USAGE
	;;
	esac
	done

	if [ -z "$tests" ]; then
	add_test scheduler
	add_test cli
	fi

	run_tests $tests
	diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in
	index 9a6fb6b3a3..d3c1778343 100644
	--- a/cts/cts-scheduler.in
	+++ b/cts/cts-scheduler.in
	@@ -1,1292 +1,1294 @@
	#!@BASH_PATH@
	#
	# Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	USAGE_TEXT="Usage: cts-scheduler [<options>]
	Options:
	--help Display this text, then exit
	-V, --verbose Display any differences from expected output
	--run TEST Run only single specified test
	--update Update expected results with actual results
	-b, --binary PATH Specify path to crm_simulate
	-i, --io-dir PATH Specify path to regression test data directory
	-v, --valgrind Run all commands under valgrind
	--valgrind-dhat Run all commands under valgrind with heap analyzer
	--valgrind-skip-output If running under valgrind, don't display output
	--testcmd-options Additional options for command under test"

	SBINDIR="@sbindir@"
	BUILDDIR="@abs_top_builddir@"
	CRM_SCHEMA_DIRECTORY="@CRM_SCHEMA_DIRECTORY@"

	# If readlink supports -e (i.e. GNU), use it
	readlink -e / >/dev/null 2>/dev/null
	if [ $? -eq 0 ]; then
	- test_home="$(dirname $(readlink -e $0))"
	+ test_home="$(dirname "$(readlink -e "$0")")"
	else
	- test_home="$(dirname $0)"
	+ test_home="$(dirname "$0")"
	fi

	io_dir="$test_home/scheduler"
	failed="$test_home/.regression.failed.diff"
	test_binary=
	testcmd_options=

	single_test=
	verbose=0
	num_failed=0
	num_tests=0
	VALGRIND_CMD=""
	VALGRIND_OPTS="-q
	--gen-suppressions=all
	--log-file=%q{valgrind_output}
	--time-stamp=yes
	--trace-children=no
	--show-reachable=no
	--leak-check=full
	--num-callers=20
	--suppressions=$test_home/valgrind-pcmk.suppressions"
	VALGRIND_DHAT_OPTS="--tool=exp-dhat
	--log-file=%q{valgrind_output}
	--time-stamp=yes
	--trace-children=no
	--show-top-n=100
	--num-callers=4"
	diff_opts="--ignore-all-space --ignore-blank-lines -u -N"

	# These constants must track crm_exit_t values
	CRM_EX_OK=0
	CRM_EX_ERROR=1
	CRM_EX_NOT_INSTALLED=5
	CRM_EX_USAGE=64
	CRM_EX_NOINPUT=66

	EXITCODE=$CRM_EX_OK

	function info() {
	printf "$*\n"
	}

	function error() {
	printf " * ERROR: $*\n"
	}

	function failed() {
	printf " * FAILED: $*\n"
	}

	function show_test() {
	name=$1; shift
	printf " Test %-25s $*\n" "$name:"
	}

	# Normalize scheduler output for comparison
	normalize() {
	for NORMALIZE_FILE in "$@"; do
	# sed -i is not portable :-(
	sed -e 's/crm_feature_set="[^"]*"//' \
	-e 's/batch-limit="[0-9]*"//' \
	"$NORMALIZE_FILE" > "${NORMALIZE_FILE}.$$"
	mv -- "${NORMALIZE_FILE}.$$" "$NORMALIZE_FILE"
	done
	}

	info "Test home is:\t$test_home"

	create_mode="false"
	while [ $# -gt 0 ] ; do
	case "$1" in
	-V\|--verbose)
	verbose=1
	shift
	;;
	-v\|--valgrind)
	export G_SLICE=always-malloc
	VALGRIND_CMD="valgrind $VALGRIND_OPTS"
	shift
	;;
	--valgrind-dhat)
	VALGRIND_CMD="valgrind $VALGRIND_DHAT_OPTS"
	shift
	;;
	--valgrind-skip-output)
	VALGRIND_SKIP_OUTPUT=1
	shift
	;;
	--update)
	create_mode="true"
	shift
	;;
	--run)
	single_test=$(basename "$2" ".xml")
	shift 2
	+ break # any remaining arguments will be passed to test command
	;;
	-b\|--binary)
	test_binary="$2"
	shift 2
	;;
	-i\|--io-dir)
	io_dir="$2"
	shift 2
	;;
	--help)
	echo "$USAGE_TEXT"
	exit $CRM_EX_OK
	;;
	--testcmd-options)
	testcmd_options=$2
	shift 2
	;;
	*)
	error "unknown option: $1"
	exit $CRM_EX_USAGE
	;;
	esac
	done

	if [ -z "$PCMK_schema_directory" ]; then
	if [ -d "$BUILDDIR/xml" ]; then
	export PCMK_schema_directory="$BUILDDIR/xml"
	elif [ -d "$CRM_SCHEMA_DIRECTORY" ]; then
	export PCMK_schema_directory="$CRM_SCHEMA_DIRECTORY"
	fi
	fi

	if [ -z "$test_binary" ]; then
	if [ -x "$BUILDDIR/tools/crm_simulate" ]; then
	test_binary="$BUILDDIR/tools/crm_simulate"
	elif [ -x "$SBINDIR/crm_simulate" ]; then
	test_binary="$SBINDIR/crm_simulate"
	fi
	fi
	if [ ! -x "$test_binary" ]; then
	error "Test binary $test_binary not found"
	exit $CRM_EX_NOT_INSTALLED
	fi

	info "Test binary is:\t$test_binary"
	if [ -n "$PCMK_schema_directory" ]; then
	info "Schema home is:\t$PCMK_schema_directory"
	fi
	if [ "x$VALGRIND_CMD" != "x" ]; then
	info "Activating memory testing with valgrind";
	fi

	info " "

	test_cmd="$VALGRIND_CMD $test_binary $testcmd_options"
	#echo $test_cmd

	-if [ `whoami` != root ]; then
	+if [ "$(whoami)" != "root" ]; then
	declare -x CIB_shadow_dir=/tmp
	fi

	do_test() {
	did_fail=0
	expected_rc=0
	num_tests=$(( $num_tests + 1 ))

	base=$1; shift
	name=$1; shift

	input=$io_dir/${base}.xml
	output=$io_dir/${base}.out
	expected=$io_dir/${base}.exp

	- dot_png=$io_dir/${base}.png
	dot_expected=$io_dir/${base}.dot
	dot_output=$io_dir/${base}.pe.dot

	scores=$io_dir/${base}.scores
	score_output=$io_dir/${base}.scores.pe

	stderr_expected=$io_dir/${base}.stderr
	stderr_output=$io_dir/${base}.stderr.pe

	summary=$io_dir/${base}.summary
	summary_output=$io_dir/${base}.summary.pe

	valgrind_output=$io_dir/${base}.valgrind
	export valgrind_output

	if [ "x$1" = "x--rc" ]; then
	expected_rc=$2
	shift; shift;
	fi

	show_test "$base" "$name"

	if [ ! -f $input ]; then
	error "No input";
	did_fail=1
	num_failed=$(( $num_failed + 1 ))
	return $CRM_EX_NOINPUT;
	fi

	- if [ "$create_mode" != "true" -a ! -f $expected ]; then
	+ if [ "$create_mode" != "true" ] && [ ! -f "$expected" ]; then
	error "no stored output";
	return $CRM_EX_NOINPUT;
	fi

	# ../admin/crm_verify -X $input
	if [ ! -z "$single_test" ]; then
	- echo CIB_shadow_dir=$io_dir $test_cmd -x $input -D $dot_output -G $output -S $*
	- CIB_shadow_dir=$io_dir $test_cmd -x $input -D $dot_output -G $output -S $* 2>&1 \| tee $summary_output
	+ echo "CIB_shadow_dir=\"$io_dir\" $test_cmd -x \"$input\" -D \"$dot_output\" -G \"$output\" -S" "$@"
	+ CIB_shadow_dir="$io_dir" $test_cmd -x "$input" -D "$dot_output" \
	+ -G "$output" -S "$@" 2>&1 \| tee "$summary_output"
	else
	- CIB_shadow_dir=$io_dir $test_cmd -x $input -S &> $summary_output
	+ CIB_shadow_dir="$io_dir" $test_cmd -x "$input" -S &> "$summary_output"
	fi

	- CIB_shadow_dir=$io_dir $test_cmd -x $input -D $dot_output -G $output -SQ -s $* 2> $stderr_output > $score_output
	+ CIB_shadow_dir="$io_dir" $test_cmd -x "$input" -D "$dot_output" \
	+ -G "$output" -SQ -s "$@" 2> "$stderr_output" > "$score_output"
	rc=$?

	if [ $rc -ne $expected_rc ]; then
	failed "Test returned: $rc";
	did_fail=1
	- echo "CIB_shadow_dir=$io_dir $test_cmd -x $input -D $dot_output -G $output -SQ -s $*"
	+ echo "CIB_shadow_dir=\"$io_dir\" $test_cmd -x \"$input\" -D \"$dot_output\" -G \"$output\" -SQ -s" "$@"
	fi

	if [ -z "$VALGRIND_SKIP_OUTPUT" ]; then
	if [ -s "${valgrind_output}" ]; then
	error "Valgrind reported errors";
	did_fail=1
	cat ${valgrind_output}
	fi
	rm -f ${valgrind_output}
	fi

	if [ -s core ]; then
	error "Core-file detected: core.${base}";
	did_fail=1
	rm -f $test_home/core.$base
	mv core $test_home/core.$base
	fi

	if [ -e "$stderr_expected" ]; then

	diff $diff_opts $stderr_expected $stderr_output >/dev/null
	rc2=$?
	if [ $rc2 -ne 0 ]; then
	failed "stderr changed";
	diff $diff_opts $stderr_expected $stderr_output 2>/dev/null >> $failed
	echo "" >> $failed
	did_fail=1
	fi

	elif [ -s "$stderr_output" ]; then
	error "Output was written to stderr"
	did_fail=1
	cat $stderr_output
	fi
	rm -f $stderr_output

	if [ ! -s $output ]; then
	error "No graph produced";
	did_fail=1
	num_failed=$(( $num_failed + 1 ))
	rm -f $output
	return $CRM_EX_ERROR;
	fi

	if [ ! -s $dot_output ]; then
	error "No dot-file summary produced";
	did_fail=1
	num_failed=$(( $num_failed + 1 ))
	rm -f $output
	return $CRM_EX_ERROR;
	else
	echo "digraph \"g\" {" > $dot_output.sort
	- LC_ALL=POSIX sort -u $dot_output \| grep -v -e ^}$ -e digraph >> $dot_output.sort
	+ LC_ALL=POSIX sort -u $dot_output \| grep -v -e '^}$' -e digraph >> $dot_output.sort
	echo "}" >> $dot_output.sort
	mv -f $dot_output.sort $dot_output
	fi

	if [ ! -s $score_output ]; then
	error "No allocation scores produced";
	did_fail=1
	num_failed=$(( $num_failed + 1 ))
	rm $output
	return $CRM_EX_ERROR;
	else
	LC_ALL=POSIX sort $score_output > $score_output.sorted
	mv -f $score_output.sorted $score_output
	fi

	if [ "$create_mode" = "true" ]; then
	cp "$output" "$expected"
	cp "$dot_output" "$dot_expected"
	cp "$score_output" "$scores"
	cp "$summary_output" "$summary"
	info " Updated expected outputs"
	fi

	diff $diff_opts $summary $summary_output >/dev/null
	rc2=$?
	if [ $rc2 -ne 0 ]; then
	failed "summary changed";
	diff $diff_opts $summary $summary_output 2>/dev/null >> $failed
	echo "" >> $failed
	did_fail=1
	fi

	diff $diff_opts $dot_expected $dot_output >/dev/null
	rc=$?
	if [ $rc -ne 0 ]; then
	failed "dot-file summary changed";
	diff $diff_opts $dot_expected $dot_output 2>/dev/null >> $failed
	echo "" >> $failed
	did_fail=1
	else
	rm -f $dot_output
	fi

	normalize "$expected" "$output"
	diff $diff_opts $expected $output >/dev/null
	rc2=$?
	if [ $rc2 -ne 0 ]; then
	failed "xml-file changed";
	diff $diff_opts $expected $output 2>/dev/null >> $failed
	echo "" >> $failed
	did_fail=1
	fi

	diff $diff_opts $scores $score_output >/dev/null
	rc=$?
	if [ $rc -ne 0 ]; then
	failed "scores-file changed";
	diff $diff_opts $scores $score_output 2>/dev/null >> $failed
	echo "" >> $failed
	did_fail=1
	fi
	rm -f $output $score_output $summary_output
	if [ $did_fail -eq 1 ]; then
	num_failed=$(( $num_failed + 1 ))
	return $CRM_EX_ERROR
	fi
	return $CRM_EX_OK
	}

	function test_results {
	if [ $num_failed -ne 0 ]; then
	if [ -s "$failed" ]; then
	if [ $verbose -eq 1 ]; then
	error "Results of $num_failed failed tests (out of $num_tests)...."
	cat $failed
	else
	error "Results of $num_failed failed tests (out of $num_tests) are in $failed...."
	error "Use $0 -V to display them automatically."
	fi
	else
	error "$num_failed (of $num_tests) tests failed (no diff results)"
	rm $failed
	fi
	EXITCODE=$CRM_EX_ERROR
	fi
	}

	# zero out the error log
	-> $failed
	+true > $failed

	if [ -n "$single_test" ]; then
	- do_test $single_test "Single shot" $*
	+ do_test "$single_test" "Single shot" "$@"
	TEST_RC=$?
	- cat $failed
	+ cat "$failed"
	exit $TEST_RC
	fi

	DO_VERSIONED_TESTS=0
	create_mode=true
	# info Creating the following tests from $io_dir
	# do_test order-expired-failure "Order failcount cleanup after remote fencing"

	create_mode=false

	info Performing the following tests from $io_dir
	echo ""

	do_test simple1 "Offline "
	do_test simple2 "Start "
	do_test simple3 "Start 2 "
	do_test simple4 "Start Failed"
	do_test simple6 "Stop Start "
	do_test simple7 "Shutdown "
	#do_test simple8 "Stonith "
	#do_test simple9 "Lower version"
	#do_test simple10 "Higher version"
	do_test simple11 "Priority (ne)"
	do_test simple12 "Priority (eq)"
	do_test simple8 "Stickiness"

	echo ""
	do_test group1 "Group "
	do_test group2 "Group + Native "
	do_test group3 "Group + Group "
	do_test group4 "Group + Native (nothing)"
	do_test group5 "Group + Native (move) "
	do_test group6 "Group + Group (move) "
	do_test group7 "Group colocation"
	do_test group13 "Group colocation (cant run)"
	do_test group8 "Group anti-colocation"
	do_test group9 "Group recovery"
	do_test group10 "Group partial recovery"
	do_test group11 "Group target_role"
	do_test group14 "Group stop (graph terminated)"
	do_test group15 "Negative group colocation"
	do_test bug-1573 "Partial stop of a group with two children"
	do_test bug-1718 "Mandatory group ordering - Stop group_FUN"
	do_test bug-lf-2613 "Move group on failure"
	do_test bug-lf-2619 "Move group on clone failure"
	do_test group-fail "Ensure stop order is preserved for partially active groups"
	do_test group-unmanaged "No need to restart r115 because r114 is unmanaged"
	do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails"
	do_test group-dependents "Account for the location preferences of things colocated with a group"

	echo ""
	do_test rsc_dep1 "Must not "
	do_test rsc_dep3 "Must "
	do_test rsc_dep5 "Must not 3 "
	do_test rsc_dep7 "Must 3 "
	do_test rsc_dep10 "Must (but cant)"
	do_test rsc_dep2 "Must (running) "
	do_test rsc_dep8 "Must (running : alt) "
	do_test rsc_dep4 "Must (running + move)"
	do_test asymmetric "Asymmetric - require explicit location constraints"

	echo ""
	do_test orphan-0 "Orphan ignore"
	do_test orphan-1 "Orphan stop"
	do_test orphan-2 "Orphan stop, remove failcount"

	echo ""
	do_test params-0 "Params: No change"
	do_test params-1 "Params: Changed"
	do_test params-2 "Params: Resource definition"
	do_test params-4 "Params: Reload"
	do_test params-5 "Params: Restart based on probe digest"
	do_test novell-251689 "Resource definition change + target_role=stopped"
	do_test bug-lf-2106 "Restart all anonymous clone instances after config change"
	do_test params-6 "Params: Detect reload in previously migrated resource"
	do_test nvpair-id-ref "Support id-ref in nvpair with optional name"
	do_test not-reschedule-unneeded-monitor "Do not reschedule unneeded monitors while resource definitions have changed"
	do_test reload-becomes-restart "Cancel reload if restart becomes required"

	echo ""
	do_test target-0 "Target Role : baseline"
	do_test target-1 "Target Role : master"
	do_test target-2 "Target Role : invalid"

	echo ""
	do_test base-score "Set a node's default score for all nodes"

	echo ""
	do_test date-1 "Dates" -t "2005-020"
	do_test date-2 "Date Spec - Pass" -t "2005-020T12:30"
	do_test date-3 "Date Spec - Fail" -t "2005-020T11:30"
	do_test origin "Timing of recurring operations" -t "2014-05-07 00:28:00"
	do_test probe-0 "Probe (anon clone)"
	do_test probe-1 "Pending Probe"
	do_test probe-2 "Correctly re-probe cloned groups"
	do_test probe-3 "Probe (pending node)"
	do_test probe-4 "Probe (pending node + stopped resource)"
	do_test standby "Standby"
	do_test comments "Comments"

	echo ""
	do_test one-or-more-0 "Everything starts"
	do_test one-or-more-1 "Nothing starts because of A"
	do_test one-or-more-2 "D can start because of C"
	do_test one-or-more-3 "D cannot start because of B and C"
	do_test one-or-more-4 "D cannot start because of target-role"
	do_test one-or-more-5 "Start A and F even though C and D are stopped"
	do_test one-or-more-6 "Leave A running even though B is stopped"
	do_test one-or-more-7 "Leave A running even though C is stopped"
	do_test bug-5140-require-all-false "Allow basegrp:0 to stop"
	do_test clone-require-all-1 "clone B starts node 3 and 4"
	do_test clone-require-all-2 "clone B remains stopped everywhere"
	do_test clone-require-all-3 "clone B stops everywhere because A stops everywhere"
	do_test clone-require-all-4 "clone B remains on node 3 and 4 with only one instance of A remaining."
	do_test clone-require-all-5 "clone B starts on node 1 3 and 4"
	do_test clone-require-all-6 "clone B remains active after shutting down instances of A"
	do_test clone-require-all-7 "clone A and B both start at the same time. all instances of A start before B."
	do_test clone-require-all-no-interleave-1 "C starts everywhere after A and B"
	do_test clone-require-all-no-interleave-2 "C starts on nodes 1, 2, and 4 with only one active instance of B"
	do_test clone-require-all-no-interleave-3 "C remains active when instance of B is stopped on one node and started on another."
	do_test one-or-more-unrunnable-instances "Avoid dependencies on instances that won't ever be started"

	echo ""
	do_test order1 "Order start 1 "
	do_test order2 "Order start 2 "
	do_test order3 "Order stop "
	do_test order4 "Order (multiple) "
	do_test order5 "Order (move) "
	do_test order6 "Order (move w/ restart) "
	do_test order7 "Order (mandatory) "
	do_test order-optional "Order (score=0) "
	do_test order-required "Order (score=INFINITY) "
	do_test bug-lf-2171 "Prevent group start when clone is stopped"
	do_test order-clone "Clone ordering should be able to prevent startup of dependent clones"
	do_test order-sets "Ordering for resource sets"
	do_test order-serialize "Serialize resources without inhibiting migration"
	do_test order-serialize-set "Serialize a set of resources without inhibiting migration"
	do_test clone-order-primitive "Order clone start after a primitive"
	do_test clone-order-16instances "Verify ordering of 16 cloned resources"
	do_test order-optional-keyword "Order (optional keyword)"
	do_test order-mandatory "Order (mandatory keyword)"
	do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones"
	do_test ordered-set-basic-startup "Constraint set with default order settings."
	do_test ordered-set-natural "Allow natural set ordering"
	do_test order-wrong-kind "Order (error)"

	echo ""
	do_test coloc-loop "Colocation - loop"
	do_test coloc-many-one "Colocation - many-to-one"
	do_test coloc-list "Colocation - many-to-one with list"
	do_test coloc-group "Colocation - groups"
	do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation"
	do_test coloc-attr "Colocation based on node attributes"
	do_test coloc-negative-group "Negative colocation with a group"
	do_test coloc-intra-set "Intra-set colocation"
	do_test bug-lf-2435 "Colocation sets with a negative score"
	do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependent must stop"
	do_test coloc_fp_logic "Verify floating point calculations in colocation are working"
	do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc."
	do_test colo_slave_w_native "cl#5070 - Verify promotion order is affected when colocating slave to native rsc."
	do_test anti-colocation-order "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node"
	do_test anti-colocation-master "Organize order of actions for master resources in anti-colocations"
	do_test anti-colocation-slave "Organize order of actions for slave resources in anti-colocations"
	do_test enforce-colo1 "Always enforce B with A INFINITY."
	do_test complex_enforce_colo "Always enforce B with A INFINITY. (make sure heat-engine stops)"

	echo ""
	do_test rsc-sets-seq-true "Resource Sets - sequential=false"
	do_test rsc-sets-seq-false "Resource Sets - sequential=true"
	do_test rsc-sets-clone "Resource Sets - Clone"
	do_test rsc-sets-master "Resource Sets - Master"
	do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)"

	#echo ""
	#do_test agent1 "version: lt (empty)"
	#do_test agent2 "version: eq "
	#do_test agent3 "version: gt "

	echo ""
	do_test attrs1 "string: eq (and) "
	do_test attrs2 "string: lt / gt (and)"
	do_test attrs3 "string: ne (or) "
	do_test attrs4 "string: exists "
	do_test attrs5 "string: not_exists "
	do_test attrs6 "is_dc: true "
	do_test attrs7 "is_dc: false "
	do_test attrs8 "score_attribute "
	do_test per-node-attrs "Per node resource parameters"

	echo ""
	do_test mon-rsc-1 "Schedule Monitor - start"
	do_test mon-rsc-2 "Schedule Monitor - move "
	do_test mon-rsc-3 "Schedule Monitor - pending start "
	do_test mon-rsc-4 "Schedule Monitor - move/pending start"

	echo ""
	do_test rec-rsc-0 "Resource Recover - no start "
	do_test rec-rsc-1 "Resource Recover - start "
	do_test rec-rsc-2 "Resource Recover - monitor "
	do_test rec-rsc-3 "Resource Recover - stop - ignore"
	do_test rec-rsc-4 "Resource Recover - stop - block "
	do_test rec-rsc-5 "Resource Recover - stop - fence "
	do_test rec-rsc-6 "Resource Recover - multiple - restart"
	do_test rec-rsc-7 "Resource Recover - multiple - stop "
	do_test rec-rsc-8 "Resource Recover - multiple - block "
	do_test rec-rsc-9 "Resource Recover - group/group"
	do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor"
	do_test stop-failure-no-quorum "Stop failure without quorum"
	do_test stop-failure-no-fencing "Stop failure without fencing available"
	do_test stop-failure-with-fencing "Stop failure with fencing available"
	do_test multiple-active-block-group "Support of multiple-active=block for resource groups"
	do_test multiple-monitor-one-failed "Consider resource failed if any of the configured monitor operations failed"

	echo ""
	do_test quorum-1 "No quorum - ignore"
	do_test quorum-2 "No quorum - freeze"
	do_test quorum-3 "No quorum - stop "
	do_test quorum-4 "No quorum - start anyway"
	do_test quorum-5 "No quorum - start anyway (group)"
	do_test quorum-6 "No quorum - start anyway (clone)"
	do_test bug-cl-5212 "No promotion with no-quorum-policy=freeze"
	do_test suicide-needed-inquorate "no-quorum-policy=suicide: suicide necessary"
	do_test suicide-not-needed-initial-quorum "no-quorum-policy=suicide: suicide not necessary at initial quorum"
	do_test suicide-not-needed-never-quorate "no-quorum-policy=suicide: suicide not necessary if never quorate"
	do_test suicide-not-needed-quorate "no-quorum-policy=suicide: suicide necessary if quorate"

	echo ""
	do_test rec-node-1 "Node Recover - Startup - no fence"
	do_test rec-node-2 "Node Recover - Startup - fence "
	do_test rec-node-3 "Node Recover - HA down - no fence"
	do_test rec-node-4 "Node Recover - HA down - fence "
	do_test rec-node-5 "Node Recover - CRM down - no fence"
	do_test rec-node-6 "Node Recover - CRM down - fence "
	do_test rec-node-7 "Node Recover - no quorum - ignore "
	do_test rec-node-8 "Node Recover - no quorum - freeze "
	do_test rec-node-9 "Node Recover - no quorum - stop "
	do_test rec-node-10 "Node Recover - no quorum - stop w/fence"
	do_test rec-node-11 "Node Recover - CRM down w/ group - fence "
	do_test rec-node-12 "Node Recover - nothing active - fence "
	do_test rec-node-13 "Node Recover - failed resource + shutdown - fence "
	do_test rec-node-15 "Node Recover - unknown lrm section"
	do_test rec-node-14 "Serialize all stonith's"

	echo ""
	do_test multi1 "Multiple Active (stop/start)"

	echo ""
	do_test migrate-begin "Normal migration"
	do_test migrate-success "Completed migration"
	do_test migrate-partial-1 "Completed migration, missing stop on source"
	do_test migrate-partial-2 "Successful migrate_to only"
	do_test migrate-partial-3 "Successful migrate_to only, target down"
	do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from"
	do_test bug-5186-partial-migrate "Handle partial migration when src node loses membership"

	do_test migrate-fail-2 "Failed migrate_from"
	do_test migrate-fail-3 "Failed migrate_from + stop on source"
	do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target"
	do_test migrate-fail-5 "Failed migrate_from + stop on source and target"

	do_test migrate-fail-6 "Failed migrate_to"
	do_test migrate-fail-7 "Failed migrate_to + stop on source"
	do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target"
	do_test migrate-fail-9 "Failed migrate_to + stop on source and target"

	do_test migrate-stop "Migration in a stopping stack"
	do_test migrate-start "Migration in a starting stack"
	do_test migrate-stop_start "Migration in a restarting stack"
	do_test migrate-stop-complex "Migration in a complex stopping stack"
	do_test migrate-start-complex "Migration in a complex starting stack"
	do_test migrate-stop-start-complex "Migration in a complex moving stack"
	do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown"

	do_test migrate-1 "Migrate (migrate)"
	do_test migrate-2 "Migrate (stable)"
	do_test migrate-3 "Migrate (failed migrate_to)"
	do_test migrate-4 "Migrate (failed migrate_from)"
	do_test novell-252693 "Migration in a stopping stack"
	do_test novell-252693-2 "Migration in a starting stack"
	do_test novell-252693-3 "Non-Migration in a starting and stopping stack"
	do_test bug-1820 "Migration in a group"
	do_test bug-1820-1 "Non-migration in a group"
	do_test migrate-5 "Primitive migration with a clone"
	do_test migrate-fencing "Migration after Fencing"
	do_test migrate-both-vms "Migrate two VMs that have no colocation"
	do_test migration-behind-migrating-remote "Migrate resource behind migrating remote connection"

	do_test 1-a-then-bm-move-b "Advanced migrate logic. A then B. migrate B."
	do_test 2-am-then-b-move-a "Advanced migrate logic, A then B, migrate A without stopping B"
	do_test 3-am-then-bm-both-migrate "Advanced migrate logic. A then B. migrate both"
	do_test 4-am-then-bm-b-not-migratable "Advanced migrate logic, A then B, B not migratable"
	do_test 5-am-then-bm-a-not-migratable "Advanced migrate logic. A then B. move both, a not migratable"
	do_test 6-migrate-group "Advanced migrate logic, migrate a group"
	do_test 7-migrate-group-one-unmigratable "Advanced migrate logic, migrate group mixed with allow-migrate true/false"
	do_test 8-am-then-bm-a-migrating-b-stopping "Advanced migrate logic, A then B, A migrating, B stopping"
	do_test 9-am-then-bm-b-migrating-a-stopping "Advanced migrate logic, A then B, B migrate, A stopping"
	do_test 10-a-then-bm-b-move-a-clone "Advanced migrate logic, A clone then B, migrate B while stopping A"
	do_test 11-a-then-bm-b-move-a-clone-starting "Advanced migrate logic, A clone then B, B moving while A is start/stopping"

	do_test a-promote-then-b-migrate "A promote then B start. migrate B"
	do_test a-demote-then-b-migrate "A demote then B stop. migrate B"

	if [ $DO_VERSIONED_TESTS -eq 1 ]; then
	do_test migrate-versioned "Disable migration for versioned resources"
	fi

	#echo ""
	#do_test complex1 "Complex "

	do_test bug-lf-2422 "Dependency on partially active group - stop ocfs:*"

	echo ""
	do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node"
	do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones"
	do_test clone-anon-failcount "Merge failcounts for anonymous clones"
	do_test inc0 "Incarnation start"
	do_test inc1 "Incarnation start order"
	do_test inc2 "Incarnation silent restart, stop, move"
	do_test inc3 "Inter-incarnation ordering, silent restart, stop, move"
	do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)"
	do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)"
	do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)"
	do_test inc7 "Clone colocation"
	do_test inc8 "Clone anti-colocation"
	do_test inc9 "Non-unique clone"
	do_test inc10 "Non-unique clone (stop)"
	do_test inc11 "Primitive colocation with clones"
	do_test inc12 "Clone shutdown"
	do_test cloned-group "Make sure only the correct number of cloned groups are started"
	do_test cloned-group-stop "Ensure stopping qpidd also stops glance and cinder"
	do_test clone-no-shuffle "Don't prioritize allocation of instances that must be moved"
	do_test clone-max-zero "Orphan processing with clone-max=0"
	do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node"
	do_test bug-lf-2160 "Don't shuffle clones due to colocation"
	do_test bug-lf-2213 "clone-node-max enforcement for cloned groups"
	do_test bug-lf-2153 "Clone ordering constraints"
	do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable"
	do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone"
	do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)"
	do_test clone-colocate-instance-2 "Colocation with a specific clone instance"
	do_test clone-order-instance "Ordering with specific clone instances"
	do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation"
	do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups"
	do_test bug-lf-2544 "Balanced clone placement"
	do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0"
	do_test bug-lf-2574 "Avoid clone shuffle"
	do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start"
	do_test bug-cl-5168 "Don't shuffle clones"
	do_test bug-cl-5170 "Prevent clone from starting with on-fail=block"
	do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block"
	do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)"
	do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)"
	do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)"
	do_test rebalance-unique-clones "Rebalance unique clone instances with no stickiness"
	do_test clone-requires-quorum-recovery "Clone with requires=quorum on failed node needing recovery"
	do_test clone-requires-quorum "Clone with requires=quorum with presumed-inactive instance on failed node"

	echo ""
	do_test cloned_start_one "order first clone then clone... first clone_min=2"
	do_test cloned_start_two "order first clone then clone... first clone_min=2"
	do_test cloned_stop_one "order first clone then clone... first clone_min=2"
	do_test cloned_stop_two "order first clone then clone... first clone_min=2"
	do_test clone_min_interleave_start_one "order first clone then clone... first clone_min=2 and then has interleave=true"
	do_test clone_min_interleave_start_two "order first clone then clone... first clone_min=2 and then has interleave=true"
	do_test clone_min_interleave_stop_one "order first clone then clone... first clone_min=2 and then has interleave=true"
	do_test clone_min_interleave_stop_two "order first clone then clone... first clone_min=2 and then has interleave=true"
	do_test clone_min_start_one "order first clone then primitive... first clone_min=2"
	do_test clone_min_start_two "order first clone then primitive... first clone_min=2"
	do_test clone_min_stop_all "order first clone then primitive... first clone_min=2"
	do_test clone_min_stop_one "order first clone then primitive... first clone_min=2"
	do_test clone_min_stop_two "order first clone then primitive... first clone_min=2"

	echo ""
	do_test unfence-startup "Clean unfencing"
	do_test unfence-definition "Unfencing when the agent changes"
	do_test unfence-parameters "Unfencing when the agent parameters changes"
	do_test unfence-device "Unfencing when a cluster has only fence devices"

	echo ""
	do_test master-0 "Stopped -> Slave"
	do_test master-1 "Stopped -> Promote"
	do_test master-2 "Stopped -> Promote : notify"
	do_test master-3 "Stopped -> Promote : master location"
	do_test master-4 "Started -> Promote : master location"
	do_test master-5 "Promoted -> Promoted"
	do_test master-6 "Promoted -> Promoted (2)"
	do_test master-7 "Promoted -> Fenced"
	do_test master-8 "Promoted -> Fenced -> Moved"
	do_test master-9 "Stopped + Promotable + No quorum"
	do_test master-10 "Stopped -> Promotable : notify with monitor"
	do_test master-11 "Stopped -> Promote : colocation"
	do_test novell-239082 "Demote/Promote ordering"
	do_test novell-239087 "Stable master placement"
	do_test master-12 "Promotion based solely on rsc_location constraints"
	do_test master-13 "Include preferences of colocated resources when placing master"
	do_test master-demote "Ordering when actions depends on demoting a slave resource"
	do_test master-ordering "Prevent resources from starting that need a master"
	do_test bug-1765 "Master-Master Colocation (dont stop the slaves)"
	do_test master-group "Promotion of cloned groups"
	do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily"
	do_test master-failed-demote "Don't retry failed demote actions"
	do_test master-failed-demote-2 "Don't retry failed demote actions (notify=false)"
	do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does"
	do_test master-reattach "Re-attach to a running master"
	do_test master-allow-start "Don't include master score if it would prevent allocation"
	do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints"
	do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly"
	do_test master-role "Prevent target-role from promoting more than master-max instances"
	do_test bug-lf-2358 "Master-Master anti-colocation"
	do_test master-promotion-constraint "Mandatory master colocation constraints"
	do_test unmanaged-master "Ensure role is preserved for unmanaged resources"
	do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters"
	do_test master-demote-2 "Demote does not clear past failure"
	do_test master-move "Move master based on failure of colocated group"
	do_test master-probed-score "Observe the promotion score of probed resources"
	do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint"
	do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint"
	do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by order constraint"
	do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint"
	do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion."
	do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive"
	do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score"
	do_test master-demote-block "Block promotion if demote fails with on-fail=block"
	do_test master-dependent-ban "Don't stop instances from being active because a dependent is banned from that host"
	do_test master-stop "Stop instances due to location constraint with role=Started"
	do_test master-partially-demoted-group "Allow partially demoted group to finish demoting"
	do_test bug-cl-5213 "Ensure role colocation with -INFINITY is enforced"
	do_test bug-cl-5219 "Allow unrelated resources with a common colocation target to remain promoted"
	do_test master-asymmetrical-order "Fix the behaviors of multi-state resources with asymmetrical ordering"
	do_test master-notify "Master promotion with notifies"
	do_test master-score-startup "Use permanent master scores without LRM history"
	do_test failed-demote-recovery "Recover resource in slave role after demote fails"
	do_test failed-demote-recovery-master "Recover resource in master role after demote fails"

	echo ""
	do_test history-1 "Correctly parse stateful-1 resource state"

	echo ""
	do_test managed-0 "Managed (reference)"
	do_test managed-1 "Not managed - down "
	do_test managed-2 "Not managed - up "
	do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource"
	do_test bug-5028-detach "Ensure detach still works"
	do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack"
	do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged "
	do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged "
	do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged "
	do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged "
	do_test unmanaged-block-restart "Block restart of resources if any dependent resource in a group is unmanaged"

	echo ""
	do_test interleave-0 "Interleave (reference)"
	do_test interleave-1 "coloc - not interleaved"
	do_test interleave-2 "coloc - interleaved "
	do_test interleave-3 "coloc - interleaved (2)"
	do_test interleave-pseudo-stop "Interleaved clone during stonith"
	do_test interleave-stop "Interleaved clone during stop"
	do_test interleave-restart "Interleaved clone during dependency restart"

	echo ""
	do_test notify-0 "Notify reference"
	do_test notify-1 "Notify simple"
	do_test notify-2 "Notify simple, confirm"
	do_test notify-3 "Notify move, confirm"
	do_test novell-239079 "Notification priority"
	#do_test notify-2 "Notify - 764"
	do_test notifs-for-unrunnable "Don't schedule notifications for an unrunnable action"

	echo ""
	do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition"
	do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1"
	do_test 696 "OSDL #696 - CRM starts stonith RA without monitor"
	do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop"
	do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3"
	do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1"
	do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id"
	do_test 829 "OSDL #829"
	do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted"
	do_test 994-2 "OSDL #994 - with a dependent resource"
	do_test 1360 "OSDL #1360 - Clone stickiness"
	do_test 1484 "OSDL #1484 - on_fail=stop"
	do_test 1494 "OSDL #1494 - Clone stability"
	do_test unrunnable-1 "Unrunnable"
	do_test unrunnable-2 "Unrunnable 2"
	do_test stonith-0 "Stonith loop - 1"
	do_test stonith-1 "Stonith loop - 2"
	do_test stonith-2 "Stonith loop - 3"
	do_test stonith-3 "Stonith startup"
	do_test stonith-4 "Stonith node state"
	do_test bug-1572-1 "Recovery of groups depending on master/slave"
	do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted"
	do_test bug-1685 "Depends-on-master ordering"
	do_test bug-1822 "Don't promote partially active groups"
	do_test bug-pm-11 "New resource added to a m/s group"
	do_test bug-pm-12 "Recover only the failed portion of a cloned group"
	do_test bug-n-387749 "Don't shuffle clone instances"
	do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped"
	do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node"
	do_test bug-lf-1920 "Correctly handle probes that find active resources"
	do_test bnc-515172 "Location constraint with multiple expressions"
	do_test colocate-primitive-with-clone "Optional colocation with a clone"
	do_test use-after-free-merge "Use-after-free in native_merge_weights"
	do_test bug-lf-2551 "STONITH ordering for stop"
	do_test bug-lf-2606 "Stonith implies demote"
	do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults"
	do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering"
	do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false"
	do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false"
	do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false"
	do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts."
	do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false"
	do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false."
	do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false."
	do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false."
	do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false."
	do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false"
	do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true"
	do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources."
	do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases"
	do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload"
	do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change."
	do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart"
	do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed."
	do_test failcount "Ensure failcounts are correctly expired"
	do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present"
	do_test per-op-failcount "Ensure per-operation failcount is handled and not passed to fence agent"
	do_test on-fail-ignore "Ensure on-fail=ignore works even beyond migration-threshold"
	do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart"
	do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop"
	do_test bug-5059 "No need to restart p_stateful1:*"
	do_test bug-5069-op-enabled "Test on-fail=ignore with failure when monitor is enabled."
	do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled."
	do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections"
	do_test expire-non-blocked-failure "Ignore failure-timeout only if the failed operation has on-fail=block"
	do_test asymmetrical-order-move "Respect asymmetrical ordering when trying to move resources"
	do_test start-then-stop-with-unfence "Avoid graph loop with start-then-stop constraint plus unfencing"
	do_test order-expired-failure "Order failcount cleanup after remote fencing"

	do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc."
	do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith."
	do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group"
	do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group"
	do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)."
	do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)"
	do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group."
	do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs."
	do_test multiply-active-stonith
	do_test probe-timeout "cl#5099 - Default probe timeout"

	do_test concurrent-fencing "Allow performing fencing operations in parallel"

	echo ""
	do_test systemhealth1 "System Health () #1"
	do_test systemhealth2 "System Health () #2"
	do_test systemhealth3 "System Health () #3"
	do_test systemhealthn1 "System Health (None) #1"
	do_test systemhealthn2 "System Health (None) #2"
	do_test systemhealthn3 "System Health (None) #3"
	do_test systemhealthm1 "System Health (Migrate On Red) #1"
	do_test systemhealthm2 "System Health (Migrate On Red) #2"
	do_test systemhealthm3 "System Health (Migrate On Red) #3"
	do_test systemhealtho1 "System Health (Only Green) #1"
	do_test systemhealtho2 "System Health (Only Green) #2"
	do_test systemhealtho3 "System Health (Only Green) #3"
	do_test systemhealthp1 "System Health (Progessive) #1"
	do_test systemhealthp2 "System Health (Progessive) #2"
	do_test systemhealthp3 "System Health (Progessive) #3"

	echo ""
	do_test utilization "Placement Strategy - utilization"
	do_test minimal "Placement Strategy - minimal"
	do_test balanced "Placement Strategy - balanced"

	echo ""
	do_test placement-stickiness "Optimized Placement Strategy - stickiness"
	do_test placement-priority "Optimized Placement Strategy - priority"
	do_test placement-location "Optimized Placement Strategy - location"
	do_test placement-capacity "Optimized Placement Strategy - capacity"

	echo ""
	do_test utilization-order1 "Utilization Order - Simple"
	do_test utilization-order2 "Utilization Order - Complex"
	do_test utilization-order3 "Utilization Order - Migrate"
	do_test utilization-order4 "Utilization Order - Live Migration (bnc#695440)"
	do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3"
	do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)"
	do_test load-stopped-loop-2 "cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering"

	echo ""
	do_test colocated-utilization-primitive-1 "Colocated Utilization - Primitive"
	do_test colocated-utilization-primitive-2 "Colocated Utilization - Choose the most capable node"
	do_test colocated-utilization-group "Colocated Utilization - Group"
	do_test colocated-utilization-clone "Colocated Utilization - Clone"

	do_test utilization-check-allowed-nodes "Only check the capacities of the nodes that can run the resource"

	echo ""
	do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources"
	do_test node-maintenance-1 "cl#5128 - Node maintenance"
	do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)"
	do_test shutdown-maintenance-node "Do not fence a maintenance node if it shuts down cleanly"

	do_test rsc-maintenance "Per-resource maintenance"

	echo ""
	do_test not-installed-agent "The resource agent is missing"
	do_test not-installed-tools "Something the resource agent needs is missing"

	echo ""
	do_test stopped-monitor-00 "Stopped Monitor - initial start"
	do_test stopped-monitor-01 "Stopped Monitor - failed started"
	do_test stopped-monitor-02 "Stopped Monitor - started multi-up"
	do_test stopped-monitor-03 "Stopped Monitor - stop started"
	do_test stopped-monitor-04 "Stopped Monitor - failed stop"
	do_test stopped-monitor-05 "Stopped Monitor - start unmanaged"
	do_test stopped-monitor-06 "Stopped Monitor - unmanaged multi-up"
	do_test stopped-monitor-07 "Stopped Monitor - start unmanaged multi-up"
	do_test stopped-monitor-08 "Stopped Monitor - migrate"
	do_test stopped-monitor-09 "Stopped Monitor - unmanage started"
	do_test stopped-monitor-10 "Stopped Monitor - unmanaged started multi-up"
	do_test stopped-monitor-11 "Stopped Monitor - stop unmanaged started"
	-do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")"
	+do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (target-role=Stopped)"
	do_test stopped-monitor-20 "Stopped Monitor - initial stop"
	do_test stopped-monitor-21 "Stopped Monitor - stopped single-up"
	do_test stopped-monitor-22 "Stopped Monitor - stopped multi-up"
	do_test stopped-monitor-23 "Stopped Monitor - start stopped"
	do_test stopped-monitor-24 "Stopped Monitor - unmanage stopped"
	do_test stopped-monitor-25 "Stopped Monitor - unmanaged stopped multi-up"
	do_test stopped-monitor-26 "Stopped Monitor - start unmanaged stopped"
	-do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role="Started")"
	+do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role=Started)"
	do_test stopped-monitor-30 "Stopped Monitor - new node started"
	do_test stopped-monitor-31 "Stopped Monitor - new node stopped"

	echo ""
	# This is a combo test to check:
	# - probe timeout defaults to the minimum-interval monitor's
	# - duplicate recurring operations are ignored
	# - if timeout spec is bad, the default timeout is used
	# - failure is blocked with on-fail=block even if ISO8601 interval is specified
	# - started/stopped role monitors are started/stopped on right nodes
	do_test intervals "Recurring monitor interval handling"

	echo""
	do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)"
	do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)"
	do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)"
	do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)"
	do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)"
	do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)"
	do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)"
	do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)"
	do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)"
	do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)"
	do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)"
	do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)"

	do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)"
	do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)"
	do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)"
	do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)"
	do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)"
	do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)"
	do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)"
	do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)"
	do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)"
	do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)"
	do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)"
	do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)"

	echo""
	do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)"
	do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)"
	do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)"
	do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)"
	do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)"
	do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)"
	do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)"
	do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)"
	do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)"
	do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)"
	do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)"
	do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)"

	do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)"
	do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)"
	do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)"
	do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)"
	do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)"
	do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)"
	do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)"
	do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)"
	do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)"
	do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)"
	do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)"
	do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)"

	echo""
	do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)"
	do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)"
	do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)"
	do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)"
	do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)"
	do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)"
	do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)"
	do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)"
	do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)"
	do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)"
	do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)"
	do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)"

	do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)"
	do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)"
	do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)"
	do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)"
	do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)"
	do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)"
	do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)"
	do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)"
	do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)"
	do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)"
	do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)"
	do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)"

	echo""
	do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)"
	do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)"
	do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)"
	do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)"
	do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)"
	do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)"
	do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)"
	do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)"
	do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)"
	do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)"
	do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)"
	do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)"

	do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)"
	do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)"
	do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)"
	do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)"
	do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)"
	do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)"
	do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)"
	do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)"
	do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)"
	do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)"
	do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)"
	do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)"

	echo ""
	do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)"
	do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)"
	do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)"
	do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)"
	do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)"
	do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)"
	do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)"

	do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)"
	do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)"
	do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)"
	do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)"
	do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)"
	do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)"
	do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)"

	do_test cluster-specific-params "Cluster-specific instance attributes based on rules"
	do_test site-specific-params "Site-specific instance attributes based on rules"

	echo ""
	do_test template-1 "Template - 1"
	do_test template-2 "Template - 2"
	do_test template-3 "Template - 3 (merge operations)"

	do_test template-coloc-1 "Template - Colocation 1"
	do_test template-coloc-2 "Template - Colocation 2"
	do_test template-coloc-3 "Template - Colocation 3"
	do_test template-order-1 "Template - Order 1"
	do_test template-order-2 "Template - Order 2"
	do_test template-order-3 "Template - Order 3"
	do_test template-ticket "Template - Ticket"

	do_test template-rsc-sets-1 "Template - Resource Sets 1"
	do_test template-rsc-sets-2 "Template - Resource Sets 2"
	do_test template-rsc-sets-3 "Template - Resource Sets 3"
	do_test template-rsc-sets-4 "Template - Resource Sets 4"

	do_test template-clone-primitive "Cloned primitive from template"
	do_test template-clone-group "Cloned group from template"

	do_test location-sets-templates "Resource sets and templates - Location"

	do_test tags-coloc-order-1 "Tags - Colocation and Order (Simple)"
	do_test tags-coloc-order-2 "Tags - Colocation and Order (Resource Sets with Templates)"
	do_test tags-location "Tags - Location"
	do_test tags-ticket "Tags - Ticket"

	echo ""
	do_test container-1 "Container - initial"
	do_test container-2 "Container - monitor failed"
	do_test container-3 "Container - stop failed"
	do_test container-4 "Container - reached migration-threshold"
	do_test container-group-1 "Container in group - initial"
	do_test container-group-2 "Container in group - monitor failed"
	do_test container-group-3 "Container in group - stop failed"
	do_test container-group-4 "Container in group - reached migration-threshold"
	do_test container-is-remote-node "Place resource within container when container is remote-node"
	do_test bug-rh-1097457 "Kill user defined container/contents ordering"
	do_test bug-cl-5247 "Graph loop when recovering m/s resource in a container"

	do_test bundle-order-startup "Bundle startup ordering"
	do_test bundle-order-partial-start "Bundle startup ordering when some dependancies are already running"
	do_test bundle-order-partial-start-2 "Bundle startup ordering when some dependancies and the container are already running"
	do_test bundle-order-stop "Bundle stop ordering"
	do_test bundle-order-partial-stop "Bundle startup ordering when some dependancies are already stopped"
	do_test bundle-order-stop-on-remote "Stop nested resource after bringing up the connection"

	do_test bundle-order-startup-clone "Prevent startup because bundle isn't promoted"
	do_test bundle-order-startup-clone-2 "Bundle startup with clones"
	do_test bundle-order-stop-clone "Stop bundle because clone is stopping"
	do_test bundle-nested-colocation "Colocation of nested connection resources"

	do_test bundle-order-fencing "Order pseudo bundle fencing after parent node fencing if both are happening"

	do_test bundle-probe-order-1 "order 1"
	do_test bundle-probe-order-2 "order 2"
	do_test bundle-probe-order-3 "order 3"
	do_test bundle-probe-remotes "Ensure remotes get probed too"
	do_test bundle-replicas-change "Change bundle from 1 replica to multiple"

	echo ""
	do_test whitebox-fail1 "Fail whitebox container rsc."
	do_test whitebox-fail2 "Fail cluster connection to guest node"
	do_test whitebox-fail3 "Failed containers should not run nested on remote nodes."
	do_test whitebox-start "Start whitebox container with resources assigned to it"
	do_test whitebox-stop "Stop whitebox container with resources assigned to it"
	do_test whitebox-move "Move whitebox container with resources assigned to it"
	do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource"
	do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established"
	do_test whitebox-ms-ordering-move "Stop/Start cycle within a moving container"
	do_test whitebox-orphaned "Properly shutdown orphaned whitebox container"
	do_test whitebox-orphan-ms "Properly tear down orphan ms resources on remote-nodes"
	do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start."
	do_test whitebox-migrate1 "Migrate both container and connection resource"
	do_test whitebox-imply-stop-on-fence "imply stop action on container node rsc when host node is fenced"
	do_test whitebox-nested-group "Verify guest remote-node works nested in a group"
	do_test guest-node-host-dies "Verify guest node is recovered if host goes away"

	echo ""
	do_test remote-startup-probes "Baremetal remote-node startup probes"
	do_test remote-startup "Startup a newly discovered remote-nodes with no status."
	do_test remote-fence-unclean "Fence unclean baremetal remote-node"
	do_test remote-fence-unclean2 "Fence baremetal remote-node after cluster node fails and connection can not be recovered"
	do_test remote-fence-unclean-3 "Probe failed remote nodes (triggers fencing)"
	do_test remote-move "Move remote-node connection resource"
	do_test remote-disable "Disable a baremetal remote-node"
	do_test remote-probe-disable "Probe then stop a baremetal remote-node"
	do_test remote-orphaned "Properly shutdown orphaned connection resource"
	do_test remote-orphaned2 "verify we can handle orphaned remote connections with active resources on the remote"
	do_test remote-recover "Recover connection resource after cluster-node fails."
	do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section"
	do_test remote-partial-migrate "Make sure partial migrations are handled before ops on the remote node."
	do_test remote-partial-migrate2 "Make sure partial migration target is prefered for remote connection."
	do_test remote-recover-fail "Make sure start failure causes fencing if rsc are active on remote."
	do_test remote-start-fail "Make sure a start failure does not result in fencing if no active resources are on remote."
	do_test remote-unclean2 "Make monitor failure always results in fencing, even if no rsc are active on remote."
	do_test remote-fence-before-reconnect "Fence before clearing recurring monitor failure"
	do_test remote-recovery "Recover remote connections before attempting demotion"
	do_test remote-recover-connection "Optimistically recovery of only the connection"
	do_test remote-recover-all "Fencing when the connection has no home"
	do_test remote-recover-no-resources "Fencing when the connection has no home and no active resources"
	do_test remote-recover-unknown "Fencing when the connection has no home and the remote has no operation history"
	do_test remote-reconnect-delay "Waiting for remote reconnect interval to expire"
	do_test remote-connection-unrecoverable "Remote connection host must be fenced, with connection unrecoverable"

	echo ""
	do_test resource-discovery "Exercises resource-discovery location constraint option."
	do_test rsc-discovery-per-node "Disable resource discovery per node"

	if [ $DO_VERSIONED_TESTS -eq 1 ]; then
	echo ""
	do_test versioned-resources "Start resources with #ra-version rules"
	do_test restart-versioned "Restart resources on #ra-version change"
	do_test reload-versioned "Reload resources on #ra-version change"

	echo ""
	do_test versioned-operations-1 "Use #ra-version to configure operations of native resources"
	do_test versioned-operations-2 "Use #ra-version to configure operations of stonith resources"
	do_test versioned-operations-3 "Use #ra-version to configure operations of master/slave resources"
	do_test versioned-operations-4 "Use #ra-version to configure operations of groups of the resources"
	fi

	echo ""
	test_results
	exit $EXITCODE
	diff --git a/cts/cts-support.in b/cts/cts-support.in
	index f3ab7924b4..ca87ff7a41 100644
	--- a/cts/cts-support.in
	+++ b/cts/cts-support.in
	@@ -1,128 +1,128 @@
	#!/bin/sh
	#
	# Installer for support files needed by Pacemaker's Cluster Test Suite
	#
	# Copyright 2018 Red Hat, Inc.
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	USAGE_TEXT="Usage: $0 <install\|uninstall\|--help>"

	HELP_TEXT="$USAGE_TEXT
	Commands (must be run as root):
	install Install support files needed by Pacemaker CTS
	uninstall Remove support files needed by Pacemaker CTS"

	# These constants must track crm_exit_t values
	CRM_EX_OK=0
	CRM_EX_ERROR=1
	CRM_EX_USAGE=64

	UNIT_DIR="@systemdunitdir@"
	LIBEXEC_DIR="@libexecdir@/pacemaker"
	INIT_DIR="@INITDIR@"
	DATA_DIR="@datadir@/pacemaker/tests/cts"
	UPSTART_DIR="/etc/init"

	DUMMY_DAEMON="pacemaker-cts-dummyd"
	DUMMY_DAEMON_UNIT="pacemaker-cts-dummyd@.service"

	LSB_DUMMY="LSBDummy"
	UPSTART_DUMMY="pacemaker-cts-dummyd.conf"

	# If the install directory doesn't exist, assume we're in a build directory.
	if [ ! -d "$DATA_DIR" ]; then
	# If readlink supports -e (i.e. GNU), use it.
	readlink -e / >/dev/null 2>/dev/null
	if [ $? -eq 0 ]; then
	- DATA_DIR="$(dirname $(readlink -e $0))"
	+ DATA_DIR="$(dirname "$(readlink -e "$0")")"
	else
	- DATA_DIR="$(dirname $0)"
	+ DATA_DIR="$(dirname "$0")"
	fi
	fi

	usage() {
	- echo "Error: $@"
	+ echo "Error:" "$@"
	echo "$USAGE_TEXT"
	exit $CRM_EX_USAGE
	}

	must_be_root() {
	if ! [ "$(id -u)" = "0" ]; then
	usage "this command must be run as root"
	return $CRM_EX_ERROR
	fi
	return $CRM_EX_OK
	}

	support_uninstall() {
	must_be_root \|\| return $CRM_EX_ERROR

	if [ -e "$UNIT_DIR/$DUMMY_DAEMON_UNIT" ]; then
	echo "Removing $UNIT_DIR/$DUMMY_DAEMON_UNIT ..."
	rm -f "$UNIT_DIR/$DUMMY_DAEMON_UNIT"
	systemctl daemon-reload # Ignore failure
	fi

	for FILE in \
	"$LIBEXEC_DIR/$DUMMY_DAEMON" \
	"$UPSTART_DIR/$UPSTART_DUMMY" \
	"$INIT_DIR/$LSB_DUMMY"
	do
	if [ -e "$FILE" ]; then
	echo "Removing $FILE ..."
	rm -f "$FILE"
	fi
	done

	return $CRM_EX_OK
	}

	support_install() {
	support_uninstall \|\| return $CRM_EX_ERROR
	- cd "$DATA_DIR"
	+ cd "$DATA_DIR" \|\| return $CRM_EX_ERROR
	if [ -d "$UNIT_DIR" ]; then

	echo "Installing $DUMMY_DAEMON ..."
	mkdir -p "$LIBEXEC_DIR"
	install -m 0755 "$DUMMY_DAEMON" "$LIBEXEC_DIR" \|\| return $CRM_EX_ERROR

	echo "Installing $DUMMY_DAEMON_UNIT ..."
	install -m 0644 "$DUMMY_DAEMON_UNIT" "$UNIT_DIR" \|\| return $CRM_EX_ERROR
	systemctl daemon-reload # Ignore failure
	fi

	echo "Installing $LSB_DUMMY to $INIT_DIR ..."
	mkdir -p "$INIT_DIR"
	install -m 0755 "$LSB_DUMMY" "$INIT_DIR" \|\| return $CRM_EX_ERROR

	if [ -d "$UPSTART_DIR" -a -f "$UPSTART_DUMMY" ]; then
	echo "Installing $UPSTART_DUMMY to $UPSTART_DIR ..."
	install -m 0644 "$UPSTART_DUMMY" "$UPSTART_DIR" \|\| return $CRM_EX_ERROR
	fi
	return $CRM_EX_OK
	}

	COMMAND=""
	while [ $# -gt 0 ] ; do
	case "$1" in
	--help)
	echo "$HELP_TEXT"
	exit $CRM_EX_OK
	;;
	install\|uninstall)
	COMMAND="$1"
	shift
	;;
	*)
	usage "unknown option '$1'"
	;;
	esac
	done
	case "$COMMAND" in
	install) support_install ;;
	uninstall) support_uninstall ;;
	*) usage "must specify command" ;;
	esac
	diff --git a/cts/cts.in b/cts/cts.in
	index 8b5074991f..eb908dc88b 100755
	--- a/cts/cts.in
	+++ b/cts/cts.in
	@@ -1,328 +1,328 @@
	#!@BASH_PATH@
	#
	# Copyright 2012-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	if [ -e $PWD/cts/CTSlab.py ]; then
	cts_root=$PWD/cts
	elif [ -e $PWD/CTSlab.py ]; then
	cts_root=$PWD
	else
	cts_root=`dirname $0`
	fi

	logfile=0
	summary=0
	verbose=0
	watch=0
	saved=0
	tests=""

	install=0
	clean=0
	build=0
	kill=0
	run=0
	boot=0
	setup=0
	target=rhel-7
	cmd=""
	trace=""

	custom_log=""
	patterns="-e CTS:"


	helpmsg=$(cat <<EOF
	Usage: %s [options] {[{init\|local-init\|setup} [TARGET]] \| [OTHER-CMDS]}

	[--]help, -h show help screen and exit
	-x turn on debugging
	-a show relevant screen sessions and exit
	-c,-g CLUSTER_NAME set the cluster name
	-S show summary from the last CTS run
	-s show summary for the current log (see -l)
	-v increase verbosity
	-p (currently unused)
	-e PATTERN grep pattern to apply when 'summary' or 'watch' requested
	-l print the filename of the log that would be operated on
	-w continous (filtered) monitoring of the log file
	-f,-sf FILE show summary for the provided log
	-t TEST, [0-9]* add a test to the working set
	[--]build [???] request building Pacemaker
	[--]kill request termination of cluster software
	[--]run request CTS run (passing remaining arguments through)
	[--]boot, start request CTS run (with --boot option)
	[--]clean request cleaning up after CTS run
	[--]install, --inst request installing packages to get ready to run CTS
	[--]setup request initialization to get ready to run CTS
	trace-ls, tls list traced functions
	trace-add, tadd FUNC add a function to the list of traced ones
	trace-rm, trm FUNC remove a function from the list of traced ones
	trace-set, tset FUNC set function(s) as the only to be traced
	(f\|fedora\|r\|rhel).* specify target distro
	init, local-init [local] initialize CTS environment
	--wget [local] download up-to-date CTS helpers
	-- delimits tests that follow
	EOF
	)

	# Modify an uninstalled source checkout to allow running CTS there
	local_init() {
	local_root="$(dirname "$cts_root")"
	if [ ! -r "$cts_root/CTSvars.py.in" ]; then
	echo "$local_root does not appear to be a source code directory"
	exit 1
	elif [ ! -r "$cts_root/CTSvars.py" ]; then
	echo "You must run configure and make first"
	exit 1
	fi

	# Update CTS_home and Fencing_Home in CTSvars.py
	sed -e "s:@datadir@/@PACKAGE@/tests/cts:$local_root/cts:" \
	"$cts_root/CTSvars.py" > "$cts_root/CTSvars.py.$$"
	mv -- "$cts_root/CTSvars.py.$$" "$cts_root/CTSvars.py"

	files="extra/cluster-init extra/cluster-helper extra/cluster-clean"
	for f in $files; do
	cp "$local_root/$f" "$cts_root/"
	done

	# Update report_data in a local copy of crm_report
	sed -e "s:@datadir@/@PACKAGE@:$local_root/tools:" \
	"$local_root/tools/crm_report" > "$cts_root/crm_report"
	chmod +x "$cts_root/crm_report"

	# Install the necessary helpers to system locations (must be done as root)
	"$cts_root/cts-support" install

	echo "Make sure you add $cts_root to your PATH and set a value for \$cluster_name in .bashrc"
	exit 0
	}

	while true; do
	case $1 in
	-h\|--help\|help) printf "${helpmsg}\n" "$0"; exit;;
	-x) set -x; shift;;
	-a)
	screen -ls \| grep cts
	exit 0;;
	-c\|-g) cluster_name=$2; shift; shift;;
	-S) summary=1; saved=1; shift;;
	-s) summary=1; shift;;
	-v) verbose=`expr $verbose + 1`; shift;;
	-p) shift;;
	-e) patterns="$patterns -e `echo $2 \| sed 's/ /\\\W/g'`"; shift; shift;;
	-l) logfile=1; shift;;
	-w) watch=1; shift;;
	-f\|-sf) summary=1; custom_log=$2; shift; shift;;
	-t) tests="$tests $2"; shift; shift;;
	[0-9]*) tests="$tests $1"; shift;;
	--build\|build) build=1; shift;;
	--kill\|kill) kill=1; shift; break;;
	--run\|run) run=1; shift; break;;
	--boot\|boot\|start) boot=1; clean=1; shift; break;;
	--clean\|clean) clean=1; shift;;
	--inst\|--install\|install) install=1; clean=1; shift;;
	--setup\|setup) setup=1; shift;;
	trace-ls\|tls) cmd=$1; shift;;
	trace-add\|tadd\|trace-rm\|trm\|trace-set\|tset) cmd=$1; trace=$2; shift; shift;;
	- f\|fedora)
	+ f*)
	target="fedora-`echo $1 \| sed -e s/fedora// -e s/-// -e s/f//`"
	shift;;
	r\|rhel) target="rhel-7"; shift;;
	- r\|rhel)
	+ r*)
	target="rhel-`echo $1 \| sed -e s/rhel// -e s/-// -e s/r//`"
	shift;;
	init\|local-init) local_init ;;
	--wget)
	files="cluster-helper cluster-init cluster-clean"
	for f in $files; do
	rm -f $cts_root/$f
	echo "Downloading helper script $f from GitHub"
	wget -O $cts_root/$f https://raw.github.com/ClusterLabs/pacemaker/master/extra/$f
	chmod +x $cts_root/$f
	done
	shift
	;;
	--) shift; tests="$tests $*"; break;;
	"") break;;
	*) echo "Unknown argument: $1"; exit 1;;
	esac
	done

	# Add the location of this script
	export PATH="$PATH:$cts_root"
	which cluster-helper &>/dev/null
	if [ $? != 0 ]; then
	echo $0 needs the cluster-helper script to be in your path
	echo You can obtain it from: https://raw.github.com/ClusterLabs/pacemaker/master/extra/cluster-helper
	exit 1
	fi

	which cluster-clean &>/dev/null
	if [ $? != 0 ]; then
	echo $0 needs the cluster-clean script to be in your path
	echo You can obtain it from: https://raw.github.com/ClusterLabs/pacemaker/master/extra/cluster-clean
	exit 1
	fi

	-if [ "x$cluster_name" = x -o "x$cluster_name" = xpick ]; then
	+if [ "x$cluster_name" = x ] \|\| [ "x$cluster_name" = xpick ]; then
	clusters=`ls -1 ~/.dsh/group/[a-z]+[0-9] \| sed s/.*group.// \| tr '\n' ' ' `

	echo "custom) interactively define a cluster"
	for i in $clusters; do
	echo "$i) `cluster-helper --list short -g $i`"
	done

	read -p "Choose a cluster [custom]: " cluster_name
	echo
	fi

	if [ -z $cluster_name ]; then
	cluster_name=custom
	fi


	case $cluster_name in
	custom)
	read -p "Cluster name: " cluster_name
	read -p "Cluster hosts: " cluster_hosts
	read -p "Cluster log file: " cluster_log
	cluster-helper add -g "$cluster_name" -w "$cluster_hosts"
	;;
	*)
	cluster_hosts=`cluster-helper --list short -g $cluster_name`
	cluster_log=~/cluster-$cluster_name.log;
	;;
	esac

	if [ x$cmd != x ]; then
	config=/etc/sysconfig/pacemaker
	case $cmd in
	trace-ls\|tls)
	cluster-helper -g $cluster_name -- grep PCMK_trace_functions $config
	;;
	trace-add\|tadd)
	echo "Adding $trace to PCMK_trace_functions"
	cluster-helper -g $cluster_name -- sed -i "s/.*PCMK_trace_functions=/PCMK_trace_functions=$trace,/" $config
	;;
	trace-rm\|trm)
	echo "Removing $trace from PCMK_trace_functions"
	cluster-helper -g $cluster_name -- sed -i "s/.PCMK_trace_functions=\\\\\$.\\\\\$$trace,\\\\\$.*\\\\\$/PCMK_trace_functions=\\\\\\1\\\\\\2/" $config
	;;
	trace-set\|tset)
	echo "Setting PCMK_trace_functions to '$trace'"
	cluster-helper -g $cluster_name -- sed -i "s/.PCMK_trace_functions./PCMK_trace_functions=$trace/" $config
	;;
	esac
	exit 0
	fi

	-if [ $build = 1 -a $run = 1 ]; then
	+if [ $build = 1 ] && [ $run = 1 ]; then
	install=1
	clean=1
	fi

	if [ $build = 1 ]; then
	which build-pcmk
	if [ $? != 0 ]; then
	echo "You'll need to write/obtain build-pcmk in order to build pacemaker from here. Skipping"
	else
	build-pcmk r7
	rc=$?
	if [ $rc != 0 ]; then
	echo "Build failed: $rc"
	exit $rc
	fi
	fi
	fi

	if [ $clean = 1 ]; then
	rm -f $cluster_log; cluster-clean -g $cluster_name --kill
	elif [ $kill = 1 ]; then
	cluster-clean -g $cluster_name --kill-only
	exit 0
	fi

	if [ $install = 1 ]; then
	cluster-helper -g $cluster_name -- yum install -y pacemaker pacemaker-debuginfo pacemaker-cts libqb libqb-debuginfo
	fi

	if [ $setup = 1 ]; then
	cluster-init -g $cluster_name $target -u --test
	exit 0

	elif [ $boot = 1 ]; then
	$cts_root/CTSlab.py -r -c -g $cluster_name --boot
	rc=$?
	if [ $rc = 0 ]; then
	echo "The cluster is ready..."
	fi
	exit $rc

	elif [ $run = 1 ]; then
	$cts_root/CTSlab.py -r -c -g $cluster_name 500 "$@"
	exit $?

	elif [ $clean = 1 ]; then
	exit 0
	fi

	screen -ls \| grep cts-$cluster_name &>/dev/null
	active=$?

	if [ ! -z $custom_log ]; then
	cluster_log=$custom_log
	fi

	-if [ "x$tests" != x -a "x$tests" != "x " ]; then
	+if [ "x$tests" != x ] && [ "x$tests" != "x " ]; then
	for t in $tests; do
	echo "crm_report --cts-log $cluster_log -d -T $t"
	crm_report --cts-log $cluster_log -d -T $t
	done

	elif [ $logfile = 1 ]; then
	echo $cluster_log

	elif [ $summary = 1 ]; then
	files=$cluster_log
	if [ $saved = 1 ]; then
	files=`ls -1tr ~/CTS-*/cluster-log.txt`
	fi
	for f in $files; do
	echo $f
	case $verbose in
	0) cat -n $f \| grep $patterns \| grep -v "CTS: debug:"
	;;
	1) cat -n $f \| grep $patterns \| grep -v "CTS:.* cmd:"
	;;
	*) cat -n $f \| grep $patterns
	;;
	esac
	echo ""
	done

	elif [ $watch = 1 ]; then
	case $verbose in
	0) tail -F $cluster_log \| grep $patterns \| grep -v "CTS: debug:"
	;;
	1) tail -F $cluster_log \| grep $patterns \| grep -v "CTS:.* cmd:"
	;;
	*) tail -F $cluster_log \| grep $patterns
	;;
	esac

	elif [ $active = 0 ]; then
	screen -x cts-$cluster_name

	else
	touch $cluster_log

	# . ~/.bashrc
	export cluster_name cluster_hosts cluster_log
	screen -S cts-$cluster_name bash
	fi
	diff --git a/cts/lxc_autogen.sh.in b/cts/lxc_autogen.sh.in
	index 1479296af8..62c62d960e 100644
	--- a/cts/lxc_autogen.sh.in
	+++ b/cts/lxc_autogen.sh.in
	@@ -1,532 +1,532 @@
	#!@BASH_PATH@
	#
	# Copyright 2013-2018 David Vossel <davidvossel@gmail.com>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	containers="2"
	download=0
	share_configs=0
	# different than default libvirt network in case this is run nested in a KVM instance
	addr="192.168.123.1"
	restore=0
	restore_pcmk=0
	restore_all=0
	generate=0
	key_gen=0
	cib=0
	anywhere=0
	add_master=0
	verify=0
	working_dir="@CRM_CONFIG_CTS@/lxc"
	run_dirs="/run /var/run /usr/var/run"

	SSH_CMD_OPTS="
	-o StrictHostKeyChecking=no
	-o ConnectTimeout=30
	-o BatchMode=yes
	-l root
	-T
	"
	# must be on one line b/c used inside quotes
	SSH_RSYNC_OPTS="-o UserKnownHostsFile=/dev/null -o BatchMode=yes -o StrictHostKeyChecking=no"

	function helptext() {
	echo "lxc_autogen.sh - A tool for generating libvirt lxc containers for testing purposes."
	echo ""
	echo "Usage: lxc-autogen [options]"
	echo ""
	echo "Options:"
	echo "-g, --generate Generate libvirt lxc environment in the directory this script is run from."
	echo "-k, --key-gen Generate pacemaker remote key only."
	echo "-r, --restore-libvirt Restore the default network, and libvirt config to before this script ran."
	echo "-p, --restore-cib Remove cib entries this script generated."
	echo "-R, --restore-all Restore both libvirt and cib plus clean working directory. This will leave libvirt xml files though so rsc can be stopped properly."
	echo ""
	echo "-A, --allow-anywhere Allow the containers to live anywhere in the cluster"
	echo "-a, --add-cib Add remote-node entries for each lxc instance into the cib"
	echo "-m, --add-master Add master resource shared between remote-nodes"
	echo "-d, --download-agent Download and install the latest VirtualDomain agent."
	echo "-s, --share-configs Synchronize on all known cluster nodes"
	echo "-c, --containers Specify the number of containers to generate, defaults to $containers. Used with -g"
	echo "-n, --network What network to override default libvirt network to. Example: -n 192.168.123.1. Used with -g"
	echo "-v, --verify Verify environment is capable of running lxc"
	echo ""
	exit $1
	}

	while true ; do
	case "$1" in
	--help\|-h\|-\?) helptext 0;;
	-c\|--containers) containers="$2"; shift; shift;;
	-d\|--download-agent) download=1; shift;;
	-s\|--share-configs) share_configs=1; shift;;
	-n\|--network) addr="$2"; shift; shift;;
	-r\|--restore-libvirt) restore=1; shift;;
	-p\|--restore-cib) restore_pcmk=1; shift;;
	-R\|--restore-all)
	restore_all=1
	restore=1
	restore_pcmk=1
	shift;;
	-g\|--generate) generate=1; key_gen=1; shift;;
	-k\|--key-gen) key_gen=1; shift;;
	-a\|--add-cib) cib=1; shift;;
	-A\|--allow-anywhere) anywhere=1; shift;;
	-m\|--add-master) add_master=1; shift;;
	-v\|--verify) verify=1; shift;;
	"") break;;
	*) helptext 1;;
	esac
	done

	if [ $verify -eq 1 ]; then
	# verify virsh tool is available and that
	# we can connect to lxc driver.
	virsh -c lxc:/// list --all > /dev/null 2>&1
	if [ $? -ne 0 ]; then
	echo "Could not connect 'virsh -c lxc:///' check that libvirt lxc driver is installed"
	# yum install -y libvirt-daemon-driver-lxc libvirt-daemon-lxc libvirt-login-shell
	exit 1
	fi


	cat /etc/selinux/config \| grep -e "SELINUX.=.permissive" -e "SELINUX.=.enforcing" > /dev/null 2>&1
	if [ $? -ne 0 ]; then
	echo "/etc/selinux/config must have SELINUX set to permissive or enforcing mode."
	exit 1
	fi

	ps x > /tmp/lxc-autogen-libvirt-test.txt
	grep "libvirtd" /tmp/lxc-autogen-libvirt-test.txt
	if [ $? -ne 0 ]; then
	rm -f /tmp/lxc-autogen-libvirt-test.txt
	echo "libvirtd isn't up."
	exit 1
	fi
	rm -f /tmp/lxc-autogen-libvirt-test.txt

	which rsync > /dev/null 2>&1
	if [ $? -ne 0 ]; then
	echo "rsync is required"
	fi

	which pacemaker-remoted > /dev/null 2>&1
	if [ $? -ne 0 ]; then
	echo "pacemaker-remoted is required"
	fi
	fi

	#strip last digits off addr
	addr=$(echo $addr \| awk -F. '{print $1"."$2"."$3}')

	this_node()
	{
	crm_node -n
	}

	other_nodes()
	{
	crm_node -l \| awk "\$2 != \"$(this_node)\" {print \$2}"
	}

	make_directory()
	{
	# argument must be full path
	DIR="$1"

	mkdir -p "$DIR"
	if [ $share_configs -eq 1 ]; then
	for node in $(other_nodes); do
	ssh $SSH_CMD_OPTS $node mkdir -p "$DIR"
	done
	fi
	}

	sync_file()
	{
	TARGET="$1"

	if [ $share_configs -eq 1 ]; then
	for node in $(other_nodes); do
	rsync -ave "ssh $SSH_RSYNC_OPTS" "$TARGET" "${node}:${TARGET}"
	done
	fi
	}

	download_agent()
	{
	wget https://raw.github.com/ClusterLabs/resource-agents/master/heartbeat/VirtualDomain
	chmod 755 VirtualDomain
	mv -f VirtualDomain /usr/lib/ocf/resource.d/heartbeat/VirtualDomain
	sync_file /usr/lib/ocf/resource.d/heartbeat/VirtualDomain
	}

	set_network()
	{
	rm -f cur_network.xml
	cat << END >> cur_network.xml
	<network>
	<name>default</name>
	<uuid>41ebdb84-7134-1111-a136-91f0f1119225</uuid>
	<forward mode='nat'/>
	<bridge name='virbr0' stp='on' delay='0' />
	<mac address='52:54:00:A8:12:35'/>
	<ip address='$addr.1' netmask='255.255.255.0'>
	<dhcp>
	<range start='$addr.2' end='$addr.254' />
	</dhcp>
	</ip>
	</network>
	END
	sync_file ${working_dir}/cur_network.xml
	}

	distribute_configs()
	{
	for node in $(other_nodes); do
	rsync -ave "ssh $SSH_RSYNC_OPTS" ${working_dir}/lxc*.xml ${node}:${working_dir}
	rsync -ave "ssh $SSH_RSYNC_OPTS" ${working_dir}/lxc*-filesystem ${node}:${working_dir}
	done
	}

	start_network()
	{
	NODE="$1"

	ssh $SSH_CMD_OPTS $NODE <<-EOF
	cd $working_dir
	virsh net-info default >/dev/null 2>&1
	if [ \$? -eq 0 ]; then
	if [ ! -f restore_default.xml ]; then
	virsh net-dumpxml default > restore_default.xml
	fi
	virsh net-destroy default
	virsh net-undefine default
	fi
	virsh net-define cur_network.xml
	virsh net-start default
	virsh net-autostart default
	EOF
	}

	start_network_all()
	{
	- start_network $(this_node)
	+ start_network "$(this_node)"
	if [ $share_configs -eq 1 ]; then
	for node in $(other_nodes); do
	- start_network $node
	+ start_network "$node"
	done
	fi
	}

	add_hosts_entry()
	{
	IP="$1"
	HNAME="$2"

	echo $IP $HNAME >>/etc/hosts
	if [ $share_configs -eq 1 ]; then
	for node in $(other_nodes); do
	ssh $SSH_CMD_OPTS $node "echo $IP $HNAME >>/etc/hosts"
	done
	fi
	}

	generate_key()
	{
	if [ ! -e /etc/pacemaker/authkey ]; then
	make_directory /etc/pacemaker
	dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1
	sync_file /etc/pacemaker/authkey
	fi
	}

	generate()
	{
	set_network

	# Generate libvirt domains in xml
	for (( c=1; c <= $containers; c++ ))
	do
	# Clean any previous definition
	rm -rf lxc$c.xml lxc$c-filesystem

	# Create a basic filesystem with run directories
	for dir in $run_dirs; do
	mkdir -p lxc$c-filesystem/$dir
	done

	# Create libvirt definition
	suffix=$((10 + $c))
	prefix=$(echo $addr \| awk -F. '{print $1"."$2}')
	subnet=$(echo $addr \| awk -F. '{print $3}')
	while [ $suffix -gt 255 ]; do
	subnet=$(($subnet + 1))
	suffix=$(($subnet - 255))
	done
	cip=$prefix.$subnet.$suffix

	cat << END >> lxc$c.xml
	<domain type='lxc'>
	<name>lxc$c</name>
	<memory unit='KiB'>200704</memory>
	<os>
	<type>exe</type>
	<init>$working_dir/lxc$c-filesystem/launch-helper</init>
	</os>
	<devices>
	<console type='pty'/>
	<filesystem type='ram'>
	<source usage='150528'/>
	<target dir='/dev/shm'/>
	</filesystem>
	END
	for dir in $run_dirs; do
	cat << END >> lxc$c.xml
	<filesystem type='mount'>
	<source dir='$working_dir/lxc$c-filesystem${dir}'/>
	<target dir='$dir'/>
	</filesystem>
	END
	done
	cat << END >> lxc$c.xml
	<interface type='network'>
	<mac address='52:54:$(($RANDOM % 9))$(($RANDOM % 9)):$(($RANDOM % 9))$(($RANDOM % 9)):$(($RANDOM % 9))$(($RANDOM % 9)):$(($RANDOM % 9))$(($RANDOM % 9))'/>
	<source network='default'/>
	</interface>
	</devices>
	</domain>
	END

	# Create CIB definition
	rm -f container$c.cib
	cat << END >> container$c.cib
	<primitive class="ocf" id="container$c" provider="heartbeat" type="VirtualDomain">
	<instance_attributes id="container$c-instance_attributes">
	<nvpair id="container$c-instance_attributes-force_stop" name="force_stop" value="true"/>
	<nvpair id="container$c-instance_attributes-hypervisor" name="hypervisor" value="lxc:///"/>
	<nvpair id="container$c-instance_attributes-config" name="config" value="$working_dir/lxc$c.xml"/>
	</instance_attributes>
	<utilization id="container$c-utilization">
	<nvpair id="container$c-utilization-cpu" name="cpu" value="1"/>
	<nvpair id="container$c-utilization-hv_memory" name="hv_memory" value="100"/>
	</utilization>
	<meta_attributes id="container$c-meta_attributes">
	<nvpair id="container$c-meta_attributes-remote-node" name="remote-node" value="lxc$c"/>
	</meta_attributes>
	</primitive>
	END

	# Create container init
	rm -f lxc$c-filesystem/launch-helper
	cat << END >> lxc$c-filesystem/launch-helper
	#!@BASH_PATH@
	ip -f inet addr add $cip/24 dev eth0
	ip link set eth0 up
	ip route add default via $addr.1
	hostname lxc$c
	df > $working_dir/lxc$c-filesystem/disk_usage.txt
	export PCMK_debugfile=@CRM_LOG_DIR@/pacemaker_remote_lxc$c.log
	/usr/sbin/pacemaker-remoted
	END
	chmod 711 lxc$c-filesystem/launch-helper

	add_hosts_entry $cip lxc$c
	done

	# Create CIB fragment for a master-slave resource
	rm -f lxc-ms.cib
	cat << END >> lxc-ms.cib
	<master id="lxc-ms-master">
	<primitive class="ocf" id="lxc-ms" provider="pacemaker" type="Stateful">
	<instance_attributes id="lxc-ms-instance_attributes"/>
	<operations>
	<op id="lxc-ms-monitor-interval-10s" interval="10s" name="monitor"/>
	</operations>
	</primitive>
	<meta_attributes id="lxc-ms-meta_attributes">
	<nvpair id="lxc-ms-meta_attributes-master-max" name="master-max" value="1"/>
	<nvpair id="lxc-ms-meta_attributes-clone-max" name="clone-max" value="$containers"/>
	</meta_attributes>
	</master>
	END

	}

	apply_cib_master()
	{
	cibadmin -Q > cur.cib
	export CIB_file=cur.cib

	cibadmin -o resources -Mc -x lxc-ms.cib
	for tmp in $(ls lxc*.xml \| sed -e 's/\.xml//g'); do
	echo "<rsc_location id=\"lxc-ms-location-${tmp}\" node=\"${tmp}\" rsc=\"lxc-ms-master\" score=\"INFINITY\"/>" > tmp_constraint
	cibadmin -o constraints -Mc -x tmp_constraint
	done
	# Make sure the version changes even if the content doesn't
	cibadmin -B
	unset CIB_file

	cibadmin --replace -o configuration --xml-file cur.cib
	rm -f cur.cib
	}

	apply_cib_entries()
	{
	cibadmin -Q > cur.cib
	export CIB_file=cur.cib
	- for tmp in $(ls container*.cib); do
	+ for tmp in container*.cib; do
	cibadmin -o resources -Mc -x $tmp

	remote_node=$(cat ${tmp} \| grep remote-node \| sed -n -e 's/^.value=\"$.$\".*/\1/p')
	if [ $anywhere -eq 0 ]; then
	tmp=$(echo $tmp \| sed -e 's/\.cib//g')
	- crm_resource -M -r $tmp -H $(this_node)
	+ crm_resource -M -r "$tmp" -H "$(this_node)"
	fi
	echo "<rsc_location id=\"lxc-ping-location-${remote_node}\" node=\"${remote_node}\" rsc=\"Connectivity\" score=\"-INFINITY\"/>" > tmp_constraint
	# it's fine if applying this constraint fails. it's just to help with cts
	# when the connectivity resources are in use. those resources fail the remote-nodes.
	cibadmin -o constraints -Mc -x tmp_constraint > /dev/null 2>&1

	for rsc in $(crm_resource -l \| grep rsc_ ); do
	echo "<rsc_location id=\"lxc-${rsc}-location-${remote_node}\" node=\"${remote_node}\" rsc=\"${rsc}\" score=\"-INFINITY\"/>" > tmp_constraint
	cibadmin -o constraints -Mc -x tmp_constraint > /dev/null 2>&1
	done

	rm -f tmp_constraint
	done

	# Make sure the version changes even if the content doesn't
	cibadmin -B

	unset CIB_file

	cibadmin --replace -o configuration --xml-file cur.cib
	rm -f cur.cib
	}

	restore_cib()
	{
	cibadmin -Q > cur.cib
	export CIB_file=cur.cib

	for tmp in $(ls lxc*.xml \| sed -e 's/\.xml//g'); do
	echo "<rsc_location id=\"lxc-ms-location-${tmp}\" node=\"${tmp}\" rsc=\"lxc-ms-master\" score=\"INFINITY\"/>" > tmp_constraint
	cibadmin -o constraints -D -x tmp_constraint
	echo "<rsc_location id=\"lxc-ping-location-${tmp}\" node=\"${tmp}\" rsc=\"Connectivity\" score=\"-INFINITY\"/>" > tmp_constraint
	cibadmin -o constraints -D -x tmp_constraint

	for rsc in $(crm_resource -l \| grep rsc_ ); do
	echo "<rsc_location id=\"lxc-${rsc}-location-${tmp}\" node=\"${tmp}\" rsc=\"${rsc}\" score=\"-INFINITY\"/>" > tmp_constraint
	cibadmin -o constraints -D -x tmp_constraint
	done
	rm -f tmp_constraint
	done
	cibadmin -o resources -D -x lxc-ms.cib

	- for tmp in $(ls container*.cib); do
	+ for tmp in container*.cib; do
	tmp=$(echo $tmp \| sed -e 's/\.cib//g')
	- crm_resource -U -r $tmp -H $(this_node)
	- crm_resource -D -r $tmp -t primitive
	+ crm_resource -U -r "$tmp" -H "$(this_node)"
	+ crm_resource -D -r "$tmp" -t primitive
	done
	# Make sure the version changes even if the content doesn't
	cibadmin -B
	unset CIB_file

	cibadmin --replace -o configuration --xml-file cur.cib
	rm -f cur.cib

	# Allow the cluster to stabilize before continuing
	crm_resource --wait

	# Purge nodes from caches and CIB status section
	for tmp in $(ls lxc*.xml \| sed -e 's/\.xml//g'); do
	crm_node --force --remove $tmp
	done
	}

	restore_network()
	{
	NODE="$1"

	ssh $SSH_CMD_OPTS $NODE <<-EOF
	cd $working_dir
	for tmp in \$(ls lxc*.xml \| sed -e 's/\.xml//g'); do
	virsh -c lxc:/// destroy \$tmp >/dev/null 2>&1
	virsh -c lxc:/// undefine \$tmp >/dev/null 2>&1
	sed -i.bak "/...\....\....\..* \${tmp}/d" /etc/hosts
	done
	virsh net-destroy default >/dev/null 2>&1
	virsh net-undefine default >/dev/null 2>&1
	if [ -f restore_default.xml ]; then
	virsh net-define restore_default.xml
	virsh net-start default
	rm restore_default.xml
	fi
	EOF
	echo "Containers destroyed and default network restored on $NODE"
	}

	restore_libvirt()
	{
	- restore_network $(this_node)
	+ restore_network "$(this_node)"
	if [ $share_configs -eq 1 ]; then
	for node in $(other_nodes); do
	restore_network $node
	done
	fi
	}

	restore_files()
	{
	- ls \| grep -v "lxc.\.xml" \| xargs rm -rf
	+ find . -maxdepth 1 -not -name "lxc*.xml" -a -not -name . -exec rm -rf "{}" ";"
	if [ $share_configs -eq 1 ]; then
	for node in $(other_nodes); do
	ssh $SSH_CMD_OPTS $node rm -rf \
	$working_dir/lxc*-filesystem \
	$working_dir/cur_network.xml
	done
	fi
	}

	make_directory $working_dir
	-cd $working_dir
	+cd $working_dir \|\| exit 1

	# Generate files as requested
	if [ $download -eq 1 ]; then
	download_agent
	fi
	if [ $key_gen -eq 1 ]; then
	generate_key
	fi
	if [ $generate -eq 1 ]; then
	generate
	fi
	if [ $share_configs -eq 1 ]; then
	distribute_configs
	fi
	if [ $generate -eq 1 ]; then
	start_network_all
	fi

	# Update cluster as requested
	if [ $cib -eq 1 ]; then
	apply_cib_entries
	fi
	if [ $add_master -eq 1 ]; then
	apply_cib_master
	fi

	# Restore original state as requested
	if [ $restore_pcmk -eq 1 ]; then
	restore_cib
	fi
	if [ $restore -eq 1 ]; then
	restore_libvirt
	fi
	if [ $restore_all -eq 1 ]; then
	restore_files
	fi
	diff --git a/cts/scheduler/origin.exp b/cts/scheduler/origin.exp
	index 780be47616..cda2b5808d 100644
	--- a/cts/scheduler/origin.exp
	+++ b/cts/scheduler/origin.exp
	@@ -1,11 +1,11 @@
	<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY" transition_id="0">
	<synapse id="0">
	<action_set>
	<rsc_op id="4" operation="monitor" operation_key="resD_monitor_3600000" on_node="node1" on_node_uuid="node1">
	<primitive id="resD" class="ocf" provider="heartbeat" type="Dummy"/>
	- <attributes CRM_meta_interval="3600000" CRM_meta_interval_origin="2014-06-01 00:35:00" CRM_meta_name="monitor" CRM_meta_on_node="node1" CRM_meta_on_node_uuid="node1" CRM_meta_start_delay="2100000" CRM_meta_timeout="60000" />
	+ <attributes CRM_meta_interval="3600000" CRM_meta_interval_origin="2014-06-01 00:35:00" CRM_meta_name="monitor" CRM_meta_on_node="node1" CRM_meta_on_node_uuid="node1" CRM_meta_start_delay="420000" CRM_meta_timeout="60000" />
	</rsc_op>
	</action_set>
	<inputs/>
	</synapse>
	</transition_graph>
	diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
	index 167c66e131..9748a3a6ec 100644
	--- a/daemons/controld/controld_execd.c
	+++ b/daemons/controld/controld_execd.c
	@@ -1,2675 +1,2675 @@
	/*
	* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/wait.h>

	#include <crm/crm.h>
	#include <crm/services.h>

	#include <crm/msg_xml.h>
	#include <crm/common/xml.h>

	#include <pacemaker-controld.h>
	#include <controld_fsa.h>
	#include <controld_messages.h>
	#include <controld_callbacks.h>
	#include <controld_lrm.h>
	#include <regex.h>
	#include <crm/pengine/rules.h>

	#define START_DELAY_THRESHOLD 5 * 60 * 1000
	#define MAX_LRM_REG_FAILS 30

	#define s_if_plural(i) (((i) == 1)? "" : "s")

	struct delete_event_s {
	int rc;
	const char *rsc;
	lrm_state_t *lrm_state;
	};

	static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
	static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
	static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
	static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
	const char *user_name);

	static lrmd_event_data_t construct_op(lrm_state_t lrm_state, xmlNode * rsc_op,
	const char rsc_id, const char operation);
	static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
	xmlNode * msg, xmlNode * request);

	void send_direct_ack(const char to_host, const char to_sys,
	lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);

	static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
	int log_level);
	static int do_update_resource(const char node_name, lrmd_rsc_info_t rsc, lrmd_event_data_t * op);

	static void
	lrm_connection_destroy(void)
	{
	if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
	crm_crit("Connection to executor failed");
	register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
	clear_bit(fsa_input_register, R_LRM_CONNECTED);

	} else {
	crm_info("Disconnected from executor");
	}

	}

	static char *
	make_stop_id(const char *rsc, int call_id)
	{
	return crm_strdup_printf("%s:%d", rsc, call_id);
	}

	static void
	copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
	{
	if (strstr(key, CRM_META "_") == NULL) {
	g_hash_table_replace(user_data, strdup((const char )key), strdup((const char )value));
	}
	}

	static void
	copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
	{
	if (strstr(key, CRM_META "_") != NULL) {
	g_hash_table_replace(user_data, strdup((const char )key), strdup((const char )value));
	}
	}

	/*!
	* \internal
	* \brief Remove a recurring operation from a resource's history
	*
	* \param[in,out] history Resource history to modify
	* \param[in] op Operation to remove
	*
	* \return TRUE if the operation was found and removed, FALSE otherwise
	*/
	static gboolean
	history_remove_recurring_op(rsc_history_t history, const lrmd_event_data_t op)
	{
	GList *iter;

	for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
	lrmd_event_data_t *existing = iter->data;

	if ((op->interval_ms == existing->interval_ms)
	&& crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
	&& safe_str_eq(op->op_type, existing->op_type)) {

	history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
	lrmd_free_event(existing);
	return TRUE;
	}
	}
	return FALSE;
	}

	/*!
	* \internal
	* \brief Free all recurring operations in resource history
	*
	* \param[in,out] history Resource history to modify
	*/
	static void
	history_free_recurring_ops(rsc_history_t *history)
	{
	GList *iter;

	for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
	lrmd_free_event(iter->data);
	}
	g_list_free(history->recurring_op_list);
	history->recurring_op_list = NULL;
	}

	/*!
	* \internal
	* \brief Free resource history
	*
	* \param[in,out] history Resource history to free
	*/
	void
	history_free(gpointer data)
	{
	rsc_history_t history = (rsc_history_t)data;

	if (history->stop_params) {
	g_hash_table_destroy(history->stop_params);
	}

	/* Don't need to free history->rsc.id because it's set to history->id */
	free(history->rsc.type);
	free(history->rsc.standard);
	free(history->rsc.provider);

	lrmd_free_event(history->failed);
	lrmd_free_event(history->last);
	free(history->id);
	history_free_recurring_ops(history);
	free(history);
	}

	static void
	update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
	{
	int target_rc = 0;
	rsc_history_t *entry = NULL;

	if (op->rsc_deleted) {
	crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
	delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
	return;
	}

	if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
	return;
	}

	crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);

	entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
	if (entry == NULL && rsc) {
	entry = calloc(1, sizeof(rsc_history_t));
	entry->id = strdup(op->rsc_id);
	g_hash_table_insert(lrm_state->resource_history, entry->id, entry);

	entry->rsc.id = entry->id;
	entry->rsc.type = strdup(rsc->type);
	entry->rsc.standard = strdup(rsc->standard);
	if (rsc->provider) {
	entry->rsc.provider = strdup(rsc->provider);
	} else {
	entry->rsc.provider = NULL;
	}

	} else if (entry == NULL) {
	crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
	return;
	}

	entry->last_callid = op->call_id;
	target_rc = rsc_op_expected_rc(op);
	if (op->op_status == PCMK_LRM_OP_CANCELLED) {
	if (op->interval_ms > 0) {
	crm_trace("Removing cancelled recurring op: " CRM_OP_FMT,
	op->rsc_id, op->op_type, op->interval_ms);
	history_remove_recurring_op(entry, op);
	return;
	} else {
	crm_trace("Skipping " CRM_OP_FMT " rc=%d, status=%d",
	op->rsc_id, op->op_type, op->interval_ms, op->rc,
	op->op_status);
	}

	} else if (did_rsc_op_fail(op, target_rc)) {
	/* Store failed monitors here, otherwise the block below will cause them
	* to be forgotten when a stop happens.
	*/
	if (entry->failed) {
	lrmd_free_event(entry->failed);
	}
	entry->failed = lrmd_copy_event(op);

	} else if (op->interval_ms == 0) {
	if (entry->last) {
	lrmd_free_event(entry->last);
	}
	entry->last = lrmd_copy_event(op);

	if (op->params &&
	(safe_str_eq(CRMD_ACTION_START, op->op_type) \|\|
	safe_str_eq("reload", op->op_type) \|\|
	safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {

	if (entry->stop_params) {
	g_hash_table_destroy(entry->stop_params);
	}
	entry->stop_params = crm_str_table_new();

	g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
	}
	}

	if (op->interval_ms > 0) {
	/* Ensure there are no duplicates */
	history_remove_recurring_op(entry, op);

	crm_trace("Adding recurring op: " CRM_OP_FMT,
	op->rsc_id, op->op_type, op->interval_ms);
	entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));

	} else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
	crm_trace("Dropping %d recurring ops because of: " CRM_OP_FMT,
	g_list_length(entry->recurring_op_list), op->rsc_id,
	op->op_type, op->interval_ms);
	history_free_recurring_ops(entry);
	}
	}

	/*!
	* \internal
	* \brief Send a direct OK ack for a resource task
	*
	* \param[in] lrm_state LRM connection
	* \param[in] input Input message being ack'ed
	* \param[in] rsc_id ID of affected resource
	* \param[in] rsc Affected resource (if available)
	* \param[in] task Operation task being ack'ed
	* \param[in] ack_host Name of host to send ack to
	* \param[in] ack_sys IPC system name to ack
	*/
	static void
	send_task_ok_ack(lrm_state_t lrm_state, ha_msg_input_t input,
	const char rsc_id, lrmd_rsc_info_t rsc, const char *task,
	const char ack_host, const char ack_sys)
	{
	lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);

	op->rc = PCMK_OCF_OK;
	op->op_status = PCMK_LRM_OP_DONE;
	send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id);
	lrmd_free_event(op);
	}

	void
	lrm_op_callback(lrmd_event_data_t * op)
	{
	const char *nodename = NULL;
	lrm_state_t *lrm_state = NULL;

	CRM_CHECK(op != NULL, return);

	/* determine the node name for this connection. */
	nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname;

	if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) {
	/* If this is the local executor IPC connection, set the right bits in the
	* controller when the connection goes down.
	*/
	lrm_connection_destroy();
	return;
	} else if (op->type != lrmd_event_exec_complete) {
	/* we only need to process execution results */
	return;
	}

	lrm_state = lrm_state_find(nodename);
	CRM_ASSERT(lrm_state != NULL);

	process_lrm_event(lrm_state, op, NULL);
	}

	/* A_LRM_CONNECT */
	void
	do_lrm_control(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	/* This only pertains to local executor connections. Remote connections are
	* handled as resources within the scheduler. Connecting and disconnecting
	* from remote executor instances is handled differently.
	*/

	lrm_state_t *lrm_state = NULL;

	if(fsa_our_uname == NULL) {
	return; /* Nothing to do */
	}
	lrm_state = lrm_state_find_or_create(fsa_our_uname);
	if (lrm_state == NULL) {
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	return;
	}

	if (action & A_LRM_DISCONNECT) {
	if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
	if (action == A_LRM_DISCONNECT) {
	crmd_fsa_stall(FALSE);
	return;
	}
	}

	clear_bit(fsa_input_register, R_LRM_CONNECTED);
	crm_info("Disconnecting from the executor");
	lrm_state_disconnect(lrm_state);
	lrm_state_reset_tables(lrm_state, FALSE);
	crm_notice("Disconnected from the executor");
	}

	if (action & A_LRM_CONNECT) {
	int ret = pcmk_ok;

	crm_debug("Connecting to the executor");
	ret = lrm_state_ipc_connect(lrm_state);

	if (ret != pcmk_ok) {
	if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
	crm_warn("Failed to connect to the executor %d time%s (%d max)",
	lrm_state->num_lrm_register_fails,
	s_if_plural(lrm_state->num_lrm_register_fails),
	MAX_LRM_REG_FAILS);

	crm_timer_start(wait_timer);
	crmd_fsa_stall(FALSE);
	return;
	}
	}

	if (ret != pcmk_ok) {
	crm_err("Failed to connect to the executor the max allowed %d time%s",
	lrm_state->num_lrm_register_fails,
	s_if_plural(lrm_state->num_lrm_register_fails));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	return;
	}

	set_bit(fsa_input_register, R_LRM_CONNECTED);
	crm_info("Connection to the executor established");
	}

	if (action & ~(A_LRM_CONNECT \| A_LRM_DISCONNECT)) {
	crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
	}
	}

	static gboolean
	lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
	{
	int counter = 0;
	gboolean rc = TRUE;
	const char *when = "lrm disconnect";

	GHashTableIter gIter;
	const char *key = NULL;
	rsc_history_t *entry = NULL;
	struct recurring_op_s *pending = NULL;

	crm_debug("Checking for active resources before exit");

	if (cur_state == S_TERMINATE) {
	log_level = LOG_ERR;
	when = "shutdown";

	} else if (is_set(fsa_input_register, R_SHUTDOWN)) {
	when = "shutdown... waiting";
	}

	if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
	guint removed = g_hash_table_foreach_remove(
	lrm_state->pending_ops, stop_recurring_actions, lrm_state);
	guint nremaining = g_hash_table_size(lrm_state->pending_ops);

	if (removed \|\| nremaining) {
	crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
	removed, s_if_plural(removed), when, nremaining);
	}
	}

	if (lrm_state->pending_ops) {
	g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
	/* Ignore recurring actions in the shutdown calculations */
	if (pending->interval_ms == 0) {
	counter++;
	}
	}
	}

	if (counter > 0) {
	do_crm_log(log_level, "%d pending executor operation%s at %s",
	counter, s_if_plural(counter), when);

	if (cur_state == S_TERMINATE \|\| !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
	g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
	while (g_hash_table_iter_next(&gIter, (gpointer)&key, (gpointer)&pending)) {
	do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
	}

	} else {
	rc = FALSE;
	}
	return rc;
	}

	if (lrm_state->resource_history == NULL) {
	return rc;
	}

	if (is_set(fsa_input_register, R_SHUTDOWN)) {
	/* At this point we're not waiting, we're just shutting down */
	when = "shutdown";
	}

	counter = 0;
	g_hash_table_iter_init(&gIter, lrm_state->resource_history);
	while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
	if (is_rsc_active(lrm_state, entry->id) == FALSE) {
	continue;
	}

	counter++;
	if (log_level == LOG_ERR) {
	crm_info("Found %s active at %s", entry->id, when);
	} else {
	crm_trace("Found %s active at %s", entry->id, when);
	}
	if (lrm_state->pending_ops) {
	GHashTableIter hIter;

	g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
	while (g_hash_table_iter_next(&hIter, (gpointer)&key, (gpointer)&pending)) {
	if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
	crm_notice("%sction %s (%s) incomplete at %s",
	pending->interval_ms == 0 ? "A" : "Recurring a",
	key, pending->op_key, when);
	}
	}
	}
	}

	if (counter) {
	crm_err("%d resource%s active at %s",
	counter, (counter == 1)? " was" : "s were", when);
	}

	return rc;
	}

	static char *
	build_parameter_list(const lrmd_event_data_t *op,
	const struct ra_metadata_s *metadata,
	xmlNode *result, enum ra_param_flags_e param_type,
	bool invert_for_xml)
	{
	int len = 0;
	int max = 0;
	char *list = NULL;
	GList *iter = NULL;

	/* Newer resource agents support the "private" parameter attribute to
	* indicate sensitive parameters. For backward compatibility with older
	* agents, this list is used if the agent doesn't specify any as "private".
	*/
	const char *secure_terms[] = {
	"password",
	"passwd",
	"user",
	};

	if (is_not_set(metadata->ra_flags, ra_uses_private)
	&& (param_type == ra_param_private)) {

	max = DIMOF(secure_terms);
	}

	for (iter = metadata->ra_params; iter != NULL; iter = iter->next) {
	struct ra_param_s param = (struct ra_param_s ) iter->data;
	bool accept = FALSE;

	if (is_set(param->rap_flags, param_type)) {
	accept = TRUE;

	} else if (max) {
	for (int lpc = 0; lpc < max; lpc++) {
	if (safe_str_eq(secure_terms[lpc], param->rap_name)) {
	accept = TRUE;
	break;
	}
	}
	}

	if (accept) {
	int start = len;

	crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));

	len += strlen(param->rap_name) + 2; // include spaces around
	list = realloc_safe(list, len + 1); // include null terminator

	// spaces before and after make parsing simpler
	sprintf(list + start, " %s ", param->rap_name);

	} else {
	crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
	}

	if (result && (invert_for_xml? !accept : accept)) {
	const char *v = g_hash_table_lookup(op->params, param->rap_name);

	if (v != NULL) {
	crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
	crm_xml_add(result, param->rap_name, v);
	}
	}
	}

	return list;
	}

	static void
	append_restart_list(lrmd_event_data_t op, struct ra_metadata_s metadata,
	xmlNode update, const char version)
	{
	char *list = NULL;
	char *digest = NULL;
	xmlNode *restart = NULL;

	CRM_LOG_ASSERT(op->params != NULL);

	if (op->interval_ms > 0) {
	/* monitors are not reloadable */
	return;
	}

	if (is_set(metadata->ra_flags, ra_supports_reload)) {
	restart = create_xml_node(NULL, XML_TAG_PARAMS);
	/* Add any parameters with unique="1" to the "op-force-restart" list.
	*
	* (Currently, we abuse "unique=0" to indicate reloadability. This is
	* nonstandard and should eventually be replaced once the OCF standard
	* is updated with something better.)
	*/
	list = build_parameter_list(op, metadata, restart, ra_param_unique,
	FALSE);

	} else {
	/* Resource does not support reloads */
	return;
	}

	digest = calculate_operation_digest(restart, version);
	/* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
	* no matter if it actually supports any parameters with unique="1"). */
	crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: "");
	crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);

	crm_trace("%s: %s, %s", op->rsc_id, digest, list);
	crm_log_xml_trace(restart, "restart digest source");

	free_xml(restart);
	free(digest);
	free(list);
	}

	static void
	append_secure_list(lrmd_event_data_t op, struct ra_metadata_s metadata,
	xmlNode update, const char version)
	{
	char *list = NULL;
	char *digest = NULL;
	xmlNode *secure = NULL;

	CRM_LOG_ASSERT(op->params != NULL);

	/*
	* To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
	* secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
	* the insecure ones
	*/
	secure = create_xml_node(NULL, XML_TAG_PARAMS);
	list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE);

	if (list != NULL) {
	digest = calculate_operation_digest(secure, version);
	crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
	crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);

	crm_trace("%s: %s, %s", op->rsc_id, digest, list);
	crm_log_xml_trace(secure, "secure digest source");
	} else {
	crm_trace("%s: no secure parameters", op->rsc_id);
	}

	free_xml(secure);
	free(digest);
	free(list);
	}

	static gboolean
	build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
	const char node_name, const char src)
	{
	int target_rc = 0;
	xmlNode *xml_op = NULL;
	struct ra_metadata_s *metadata = NULL;
	const char *caller_version = NULL;
	lrm_state_t *lrm_state = NULL;

	if (op == NULL) {
	return FALSE;
	}

	target_rc = rsc_op_expected_rc(op);

	/* there is a small risk in formerly mixed clusters that it will
	* be sub-optimal.
	*
	* however with our upgrade policy, the update we send should
	* still be completely supported anyway
	*/
	caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
	CRM_LOG_ASSERT(caller_version != NULL);

	if(caller_version == NULL) {
	caller_version = CRM_FEATURE_SET;
	}

	crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
	xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG);
	if (xml_op == NULL) {
	return TRUE;
	}

	if ((rsc == NULL) \|\| (op->params == NULL)
	\|\| !crm_op_needs_metadata(rsc->standard, op->op_type)) {

	crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
	op->op_type, op->rsc_id, op->params, rsc);
	return TRUE;
	}

	lrm_state = lrm_state_find(node_name);
	if (lrm_state == NULL) {
	crm_warn("Cannot calculate digests for operation " CRM_OP_FMT
	" because we have no connection to executor for %s",
	op->rsc_id, op->op_type, op->interval_ms, node_name);
	return TRUE;
	}

	metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
	if (metadata == NULL) {
	/* For now, we always collect resource agent meta-data via a local,
	* synchronous, direct execution of the agent. This has multiple issues:
	* the executor should execute agents, not the controller; meta-data for
	* Pacemaker Remote nodes should be collected on those nodes, not
	* locally; and the meta-data call shouldn't eat into the timeout of the
	* real action being performed.
	*
	* These issues are planned to be addressed by having the scheduler
	* schedule a meta-data cache check at the beginning of each transition.
	* Once that is working, this block will only be a fallback in case the
	* initial collection fails.
	*/
	char *metadata_str = NULL;

	int rc = lrm_state_get_metadata(lrm_state, rsc->standard,
	rsc->provider, rsc->type,
	&metadata_str, 0);

	if (rc != pcmk_ok) {
	crm_warn("Failed to get metadata for %s (%s:%s:%s)",
	rsc->id, rsc->standard, rsc->provider, rsc->type);
	return TRUE;
	}

	metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
	metadata_str);
	free(metadata_str);
	if (metadata == NULL) {
	crm_warn("Failed to update metadata for %s (%s:%s:%s)",
	rsc->id, rsc->standard, rsc->provider, rsc->type);
	return TRUE;
	}
	}

	#if ENABLE_VERSIONED_ATTRS
	crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version);
	#endif

	crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type);
	append_restart_list(op, metadata, xml_op, caller_version);
	append_secure_list(op, metadata, xml_op, caller_version);

	return TRUE;
	}

	static gboolean
	is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
	{
	rsc_history_t *entry = NULL;

	entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
	if (entry == NULL \|\| entry->last == NULL) {
	return FALSE;
	}

	crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
	entry->last->interval_ms, entry->last->rc);
	if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
	return FALSE;

	} else if (entry->last->rc == PCMK_OCF_OK
	&& safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
	/* a stricter check is too complex...
	* leave that to the PE
	*/
	return FALSE;

	} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
	return FALSE;

	} else if ((entry->last->interval_ms == 0)
	&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
	/* Badly configured resources can't be reliably stopped */
	return FALSE;
	}

	return TRUE;
	}

	static gboolean
	build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
	{
	GHashTableIter iter;
	rsc_history_t *entry = NULL;

	g_hash_table_iter_init(&iter, lrm_state->resource_history);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {

	GList *gIter = NULL;
	xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);

	crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
	crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
	crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
	crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);

	if (entry->last && entry->last->params) {
	const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
	if (container) {
	crm_trace("Resource %s is a part of container resource %s", entry->id, container);
	crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
	}
	}
	build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__);
	build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__);
	for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
	build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__);
	}
	}

	return FALSE;
	}

	static xmlNode *
	do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
	{
	xmlNode *xml_state = NULL;
	xmlNode *xml_data = NULL;
	xmlNode *rsc_list = NULL;
	crm_node_t *peer = NULL;

	peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
	CRM_CHECK(peer != NULL, return NULL);

	xml_state = create_node_state_update(peer, update_flags, NULL,
	__FUNCTION__);
	if (xml_state == NULL) {
	return NULL;
	}

	xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
	crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
	rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);

	/* Build a list of active (not always running) resources */
	build_active_RAs(lrm_state, rsc_list);

	crm_log_xml_trace(xml_state, "Current executor state");

	return xml_state;
	}

	xmlNode *
	do_lrm_query(gboolean is_replace, const char *node_name)
	{
	lrm_state_t *lrm_state = lrm_state_find(node_name);

	if (!lrm_state) {
	crm_err("Could not find executor state for node %s", node_name);
	return NULL;
	}
	return do_lrm_query_internal(lrm_state,
	node_update_cluster\|node_update_peer);
	}

	static void
	notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
	{
	lrmd_event_data_t *op = NULL;
	const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
	const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);

	crm_info("Notifying %s on %s that %s was%s deleted",
	from_sys, (from_host? from_host : "localhost"), rsc_id,
	((rc == pcmk_ok)? "" : " not"));

	op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);

	if (rc == pcmk_ok) {
	op->op_status = PCMK_LRM_OP_DONE;
	op->rc = PCMK_OCF_OK;
	} else {
	op->op_status = PCMK_LRM_OP_ERROR;
	op->rc = PCMK_OCF_UNKNOWN_ERROR;
	}

	send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
	lrmd_free_event(op);

	if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
	/* this isn't expected - trigger a new transition */
	time_t now = time(NULL);
	char *now_s = crm_itoa(now);

	crm_debug("Triggering a refresh after %s deleted %s from the executor",
	from_sys, rsc_id);

	update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
	"last-lrm-refresh", now_s, FALSE, NULL, NULL);

	free(now_s);
	}
	}

	static gboolean
	lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
	{
	struct delete_event_s *event = user_data;
	struct pending_deletion_op_s *op = value;

	if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
	notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
	return TRUE;
	}
	return FALSE;
	}

	static gboolean
	lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
	{
	const char *rsc = user_data;
	struct recurring_op_s *pending = value;

	if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
	crm_info("Removing op %s:%d for deleted resource %s",
	pending->op_key, pending->call_id, rsc);
	return TRUE;
	}
	return FALSE;
	}

	/*
	* Remove the rsc from the CIB
	*
	* Avoids refreshing the entire LRM section of this host
	*/
	#define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"

	static int
	delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
	const char *user_name)
	{
	char *rsc_xpath = NULL;
	int rc = pcmk_ok;

	CRM_CHECK(rsc_id != NULL, return -ENXIO);

	rsc_xpath = crm_strdup_printf(rsc_template, lrm_state->node_name, rsc_id);

	rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
	NULL, NULL, call_options \| cib_xpath, user_name);

	free(rsc_xpath);
	return rc;
	}

	static void
	delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
	GHashTableIter * rsc_gIter, int rc, const char *user_name)
	{
	struct delete_event_s event;

	CRM_CHECK(rsc_id != NULL, return);

	if (rc == pcmk_ok) {
	char *rsc_id_copy = strdup(rsc_id);

	if (rsc_gIter)
	g_hash_table_iter_remove(rsc_gIter);
	else
	g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
	crm_debug("sync: Sending delete op for %s", rsc_id_copy);
	delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);

	g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
	free(rsc_id_copy);
	}

	if (input) {
	notify_deleted(lrm_state, input, rsc_id, rc);
	}

	event.rc = rc;
	event.rsc = rsc_id;
	event.lrm_state = lrm_state;
	g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
	}

	/*!
	* \internal
	* \brief Erase an LRM history entry from the CIB, given the operation data
	*
	* \param[in] lrm_state LRM state of the desired node
	* \param[in] op Operation whose history should be deleted
	*/
	static void
	erase_lrm_history_by_op(lrm_state_t lrm_state, lrmd_event_data_t op)
	{
	xmlNode *xml_top = NULL;

	CRM_CHECK(op != NULL, return);

	xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
	crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
	crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);

	if (op->interval_ms > 0) {
	char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);

	/* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
	crm_xml_add(xml_top, XML_ATTR_ID, op_id);
	free(op_id);
	}

	crm_debug("Erasing resource operation history for " CRM_OP_FMT " (call=%d)",
	op->rsc_id, op->op_type, op->interval_ms, op->call_id);

	fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top,
	cib_quorum_override);

	crm_log_xml_trace(xml_top, "op:cancel");
	free_xml(xml_top);
	}

	/* Define xpath to find LRM resource history entry by node and resource */
	#define XPATH_HISTORY \
	"/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
	"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
	"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
	"/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
	"/" XML_LRM_TAG_RSC_OP

	/* ... and also by operation key */
	#define XPATH_HISTORY_ID XPATH_HISTORY \
	"[@" XML_ATTR_ID "='%s']"

	/* ... and also by operation key and operation call ID */
	#define XPATH_HISTORY_CALL XPATH_HISTORY \
	"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']"

	/* ... and also by operation key and original operation key */
	#define XPATH_HISTORY_ORIG XPATH_HISTORY \
	"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']"

	/*!
	* \internal
	* \brief Erase an LRM history entry from the CIB, given operation identifiers
	*
	* \param[in] lrm_state LRM state of the node to clear history for
	* \param[in] rsc_id Name of resource to clear history for
	* \param[in] key Operation key of operation to clear history for
	* \param[in] orig_op If specified, delete only if it has this original op
	* \param[in] call_id If specified, delete entry only if it has this call ID
	*/
	static void
	erase_lrm_history_by_id(lrm_state_t lrm_state, const char rsc_id,
	const char key, const char orig_op, int call_id)
	{
	char *op_xpath = NULL;

	CRM_CHECK((rsc_id != NULL) && (key != NULL), return);

	if (call_id > 0) {
	op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL,
	lrm_state->node_name, rsc_id, key,
	call_id);

	} else if (orig_op) {
	op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG,
	lrm_state->node_name, rsc_id, key,
	orig_op);
	} else {
	op_xpath = crm_strdup_printf(XPATH_HISTORY_ID,
	lrm_state->node_name, rsc_id, key);
	}

	crm_debug("Erasing resource operation history for %s on %s (call=%d)",
	key, rsc_id, call_id);
	fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL,
	cib_quorum_override \| cib_xpath);
	free(op_xpath);
	}

	static inline gboolean
	last_failed_matches_op(rsc_history_t entry, const char op, guint interval_ms)
	{
	if (entry == NULL) {
	return FALSE;
	}
	if (op == NULL) {
	return TRUE;
	}
	return (safe_str_eq(op, entry->failed->op_type)
	&& (interval_ms == entry->failed->interval_ms));
	}

	/*!
	* \internal
	* \brief Clear a resource's last failure
	*
	* Erase a resource's last failure on a particular node from both the
	* LRM resource history in the CIB, and the resource history remembered
	* for the LRM state.
	*
	* \param[in] rsc_id Resource name
	* \param[in] node_name Node name
	* \param[in] operation If specified, only clear if matching this operation
	* \param[in] interval_ms If operation is specified, it has this interval
	*/
	void
	lrm_clear_last_failure(const char rsc_id, const char node_name,
	const char *operation, guint interval_ms)
	{
	char *op_key = NULL;
	char *orig_op_key = NULL;
	lrm_state_t *lrm_state = NULL;

	lrm_state = lrm_state_find(node_name);
	if (lrm_state == NULL) {
	return;
	}

	/* Erase from CIB */
	op_key = generate_op_key(rsc_id, "last_failure", 0);
	if (operation) {
	orig_op_key = generate_op_key(rsc_id, operation, interval_ms);
	}
	erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0);
	free(op_key);
	free(orig_op_key);

	/* Remove from memory */
	if (lrm_state->resource_history) {
	rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
	rsc_id);

	if (last_failed_matches_op(entry, operation, interval_ms)) {
	lrmd_free_event(entry->failed);
	entry->failed = NULL;
	}
	}
	}

	/* Returns: gboolean - cancellation is in progress */
	static gboolean
	cancel_op(lrm_state_t * lrm_state, const char rsc_id, const char key, int op, gboolean remove)
	{
	int rc = pcmk_ok;
	char *local_key = NULL;
	struct recurring_op_s *pending = NULL;

	CRM_CHECK(op != 0, return FALSE);
	CRM_CHECK(rsc_id != NULL, return FALSE);
	if (key == NULL) {
	local_key = make_stop_id(rsc_id, op);
	key = local_key;
	}
	pending = g_hash_table_lookup(lrm_state->pending_ops, key);

	if (pending) {
	if (remove && pending->remove == FALSE) {
	pending->remove = TRUE;
	crm_debug("Scheduling %s for removal", key);
	}

	if (pending->cancelled) {
	crm_debug("Operation %s already cancelled", key);
	free(local_key);
	return FALSE;
	}

	pending->cancelled = TRUE;

	} else {
	crm_info("No pending op found for %s", key);
	free(local_key);
	return FALSE;
	}

	crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
	rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
	pending->interval_ms);
	if (rc == pcmk_ok) {
	crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
	free(local_key);
	return TRUE;
	}

	crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
	/* The caller needs to make sure the entry is
	* removed from the pending_ops list
	*
	* Usually by returning TRUE inside the worker function
	* supplied to g_hash_table_foreach_remove()
	*
	* Not removing the entry from pending_ops will block
	* the node from shutting down
	*/
	free(local_key);
	return FALSE;
	}

	struct cancel_data {
	gboolean done;
	gboolean remove;
	const char *key;
	lrmd_rsc_info_t *rsc;
	lrm_state_t *lrm_state;
	};

	static gboolean
	cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
	{
	gboolean remove = FALSE;
	struct cancel_data *data = user_data;
	struct recurring_op_s op = (struct recurring_op_s )value;

	if (crm_str_eq(op->op_key, data->key, TRUE)) {
	data->done = TRUE;
	remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
	}
	return remove;
	}

	static gboolean
	cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
	{
	guint removed = 0;
	struct cancel_data data;

	CRM_CHECK(rsc != NULL, return FALSE);
	CRM_CHECK(key != NULL, return FALSE);

	data.key = key;
	data.rsc = rsc;
	data.done = FALSE;
	data.remove = remove;
	data.lrm_state = lrm_state;

	removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
	crm_trace("Removed %u op cache entries, new size: %u",
	removed, g_hash_table_size(lrm_state->pending_ops));
	return data.done;
	}

	/*!
	* \internal
	* \brief Retrieve resource information from LRM
	*
	* \param[in] lrm_state LRM connection to use
	* \param[in] rsc_xml XML containing resource configuration
	* \param[in] do_create If true, register resource with LRM if not already
	* \param[out] rsc_info Where to store resource information obtained from LRM
	*
	* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
	* \retval -EINVAL Required information is missing from arguments
	* \retval -ENOTCONN No active connection to LRM
	* \retval -ENODEV Resource not found
	* \retval -errno Error communicating with executor when registering resource
	*
	* \note Caller is responsible for freeing result on success.
	*/
	static int
	get_lrm_resource(lrm_state_t lrm_state, xmlNode rsc_xml, gboolean do_create,
	lrmd_rsc_info_t **rsc_info)
	{
	const char *id = ID(rsc_xml);

	CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
	CRM_CHECK(id, return -EINVAL);

	if (lrm_state_is_connected(lrm_state) == FALSE) {
	return -ENOTCONN;
	}

	crm_trace("Retrieving resource information for %s from the executor", id);
	*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);

	// If resource isn't known by ID, try clone name, if provided
	if (!*rsc_info) {
	const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);

	if (long_id) {
	*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
	}
	}

	if ((*rsc_info == NULL) && do_create) {
	const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
	const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
	const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
	int rc;

	crm_trace("Registering resource %s with the executor", id);
	rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
	lrmd_opt_drop_recurring);
	if (rc != pcmk_ok) {
	fsa_data_t *msg_data = NULL;

	crm_err("Could not register resource %s with the executor on %s: %s "
	CRM_XS " rc=%d",
	id, lrm_state->node_name, pcmk_strerror(rc), rc);

	/* Register this as an internal error if this involves the local
	* executor. Otherwise, we're likely dealing with an unresponsive
	* remote node, which is not an FSA failure.
	*/
	if (lrm_state_is_local(lrm_state) == TRUE) {
	register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
	}
	return rc;
	}

	*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
	}
	return *rsc_info? pcmk_ok : -ENODEV;
	}

	static void
	delete_resource(lrm_state_t * lrm_state,
	const char *id,
	lrmd_rsc_info_t * rsc,
	GHashTableIter * gIter,
	const char *sys,
	const char *host,
	const char *user,
	ha_msg_input_t * request,
	gboolean unregister)
	{
	int rc = pcmk_ok;

	crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);

	if (rsc && unregister) {
	rc = lrm_state_unregister_rsc(lrm_state, id, 0);
	}

	if (rc == pcmk_ok) {
	crm_trace("Resource '%s' deleted", id);
	} else if (rc == -EINPROGRESS) {
	crm_info("Deletion of resource '%s' pending", id);
	if (request) {
	struct pending_deletion_op_s *op = NULL;
	char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);

	op = calloc(1, sizeof(struct pending_deletion_op_s));
	op->rsc = strdup(rsc->id);
	op->input = copy_ha_msg_input(request);
	g_hash_table_insert(lrm_state->deletion_ops, ref, op);
	}
	return;
	} else {
	crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
	id, sys, user ? user : "internal", host, rc);
	}

	delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
	}

	static int
	get_fake_call_id(lrm_state_t lrm_state, const char rsc_id)
	{
	int call_id = 999999999;
	rsc_history_t *entry = NULL;

	if(lrm_state) {
	entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
	}

	/* Make sure the call id is greater than the last successful operation,
	* otherwise the failure will not result in a possible recovery of the resource
	* as it could appear the failure occurred before the successful start */
	if (entry) {
	call_id = entry->last_callid + 1;
	}

	if (call_id < 0) {
	call_id = 1;
	}
	return call_id;
	}

	static void
	fake_op_status(lrm_state_t lrm_state, lrmd_event_data_t op, int op_status,
	enum ocf_exitcode op_exitcode)
	{
	op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
	op->t_run = time(NULL);
	op->t_rcchange = op->t_run;
	op->op_status = op_status;
	op->rc = op_exitcode;
	}

	static void
	force_reprobe(lrm_state_t lrm_state, const char from_sys,
	const char from_host, const char user_name,
	gboolean is_remote_node)
	{
	GHashTableIter gIter;
	rsc_history_t *entry = NULL;

	crm_info("Clearing resource history on node %s", lrm_state->node_name);
	g_hash_table_iter_init(&gIter, lrm_state->resource_history);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
	/* only unregister the resource during a reprobe if it is not a remote connection
	* resource. otherwise unregistering the connection will terminate remote-node
	* membership */
	gboolean unregister = TRUE;

	if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
	lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
	if (remote_lrm_state) {
	/* when forcing a reprobe, make sure to clear remote node before
	* clearing the remote node's connection resource */
	force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
	}
	unregister = FALSE;
	}

	delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
	user_name, NULL, unregister);
	}

	/* Now delete the copy in the CIB */
	erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);

	/* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE
	* would result in the scheduler sending us back here again
	*/
	update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
	}

	static void
	synthesize_lrmd_failure(lrm_state_t lrm_state, xmlNode action, int rc)
	{
	lrmd_event_data_t *op = NULL;
	lrmd_rsc_info_t *rsc_info = NULL;
	const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
	const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
	xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);

	if ((xml_rsc == NULL) \|\| (ID(xml_rsc) == NULL)) {
	/* @TODO Should we do something else, like direct ack? */
	crm_info("Can't fake %s failure (%d) on %s without resource configuration",
	crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
	target_node);
	return;

	} else if(operation == NULL) {
	/* This probably came from crm_resource -C, nothing to do */
	crm_info("Can't fake %s failure (%d) on %s without operation",
	ID(xml_rsc), rc, target_node);
	return;
	}

	op = construct_op(lrm_state, action, ID(xml_rsc), operation);

	if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail
	fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK);
	} else {
	fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc);
	}

	crm_info("Faking " CRM_OP_FMT " result (%d) on %s",
	op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);

	/* Process the result as if it came from the LRM, if possible
	* (i.e. resource info can be obtained from the lrm_state).
	*/
	if (lrm_state) {
	rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
	}
	if (rsc_info) {
	lrmd_free_rsc_info(rsc_info);
	process_lrm_event(lrm_state, op, NULL);

	} else if (controld_action_is_recordable(op->op_type)) {
	/* If we can't process the result normally, at least write it to the CIB
	* if possible, so the scheduler can act on it.
	*/
	const char *standard = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS);
	const char *provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER);
	const char *type = crm_element_value(xml_rsc, XML_ATTR_TYPE);

	if (standard && type) {
	rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
	do_update_resource(target_node, rsc_info, op);
	lrmd_free_rsc_info(rsc_info);
	} else {
	// @TODO Should we direct ack?
	crm_info("Can't fake %s failure (%d) on %s without resource standard and type",
	crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
	target_node);
	}
	}
	lrmd_free_event(op);
	}

	/*!
	* \internal
	* \brief Get target of an LRM operation
	*
	* \param[in] xml LRM operation data XML
	*
	* \return LRM operation target node name (local node or Pacemaker Remote node)
	*/
	static const char *
	lrm_op_target(xmlNode *xml)
	{
	const char *target = NULL;

	if (xml) {
	target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
	}
	if (target == NULL) {
	target = fsa_our_uname;
	}
	return target;
	}

	static void
	fail_lrm_resource(xmlNode xml, lrm_state_t lrm_state, const char *user_name,
	const char from_host, const char from_sys)
	{
	lrmd_event_data_t *op = NULL;
	lrmd_rsc_info_t *rsc = NULL;
	xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);

	CRM_CHECK(xml_rsc != NULL, return);

	/* The executor simply executes operations and reports the results, without
	* any concept of success or failure, so to fail a resource, we must fake
	* what a failure looks like.
	*
	* To do this, we create a fake executor operation event for the resource,
	* and pass that event to the executor client callback so it will be
	* processed as if it came from the executor.
	*/
	op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
	fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);

	free((char*) op->user_data);
	op->user_data = NULL;
	op->interval_ms = 0;

	#if ENABLE_ACL
	if (user_name && is_privileged(user_name) == FALSE) {
	crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
	send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
	lrmd_free_event(op);
	return;
	}
	#endif

	if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
	crm_info("Failing resource %s...", rsc->id);
	process_lrm_event(lrm_state, op, NULL);
	op->op_status = PCMK_LRM_OP_DONE;
	op->rc = PCMK_OCF_OK;
	lrmd_free_rsc_info(rsc);

	} else {
	crm_info("Cannot find/create resource in order to fail it...");
	crm_log_xml_warn(xml, "bad input");
	}

	send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
	lrmd_free_event(op);
	}

	static void
	handle_refresh_op(lrm_state_t lrm_state, const char user_name,
	const char from_host, const char from_sys)
	{
	int rc = pcmk_ok;
	xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);

	fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
	crm_info("Forced a local resource history refresh: call=%d", rc);

	if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) {
	xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host,
	from_sys, CRM_SYSTEM_LRMD,
	fsa_our_uuid);

	crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);

	if (relay_message(reply, TRUE) == FALSE) {
	crm_log_xml_err(reply, "Unable to route reply");
	}
	free_xml(reply);
	}

	free_xml(fragment);
	}

	static void
	handle_query_op(xmlNode msg, lrm_state_t lrm_state)
	{
	xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
	xmlNode *reply = create_reply(msg, data);

	if (relay_message(reply, TRUE) == FALSE) {
	crm_err("Unable to route reply");
	crm_log_xml_err(reply, "reply");
	}
	free_xml(reply);
	free_xml(data);
	}

	static void
	handle_reprobe_op(lrm_state_t lrm_state, const char from_sys,
	const char from_host, const char user_name,
	gboolean is_remote_node)
	{
	crm_notice("Forcing the status of all resources to be redetected");
	force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);

	if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys)
	&& safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) {

	xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
	from_sys, CRM_SYSTEM_LRMD,
	fsa_our_uuid);

	crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);

	if (relay_message(reply, TRUE) == FALSE) {
	crm_log_xml_err(reply, "Unable to route reply");
	}
	free_xml(reply);
	}
	}

	static bool do_lrm_cancel(ha_msg_input_t input, lrm_state_t lrm_state,
	lrmd_rsc_info_t rsc, const char from_host, const char *from_sys)
	{
	char *op_key = NULL;
	char *meta_key = NULL;
	int call = 0;
	const char *call_id = NULL;
	const char *op_task = NULL;
	const char *interval_ms_s = NULL;
	gboolean in_progress = FALSE;
	xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);

	CRM_CHECK(params != NULL, return FALSE);

	meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
	interval_ms_s = crm_element_value(params, meta_key);
	free(meta_key);
	CRM_CHECK(interval_ms_s != NULL, return FALSE);

	meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
	op_task = crm_element_value(params, meta_key);
	free(meta_key);
	CRM_CHECK(op_task != NULL, return FALSE);

	meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
	call_id = crm_element_value(params, meta_key);
	free(meta_key);

	op_key = generate_op_key(rsc->id, op_task, crm_parse_ms(interval_ms_s));

	crm_debug("Scheduler requested op %s (call=%s) be cancelled",
	op_key, (call_id? call_id : "NA"));
	call = crm_parse_int(call_id, "0");
	if (call == 0) {
	// Normal case when the scheduler cancels a recurring op
	in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);

	} else {
	// Normal case when the scheduler cancels an orphan op
	in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
	}

	// Acknowledge cancellation operation if for a remote connection resource
	if (!in_progress \|\| is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
	char *op_id = make_stop_id(rsc->id, call);

	if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
	crm_info("Nothing known about operation %d for %s", call, op_key);
	}
	erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call);
	send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
	from_host, from_sys);

	/* needed at least for cancellation of a remote operation */
	g_hash_table_remove(lrm_state->pending_ops, op_id);
	free(op_id);

	} else {
	/* No ack is needed since abcdaa8, but peers with older versions
	* in a rolling upgrade need one. We didn't bump the feature set
	* at that commit, so we can only compare against the previous
	* CRM version (3.0.8). If any peers have feature set 3.0.9 but
	* not abcdaa8, they will time out waiting for the ack (no
	* released versions of Pacemaker are affected).
	*/
	const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);

	if (compare_version(peer_version, "3.0.8") <= 0) {
	crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
	op_key, from_host, peer_version);
	send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
	from_host, from_sys);
	}
	}

	free(op_key);
	return TRUE;
	}

	static void
	do_lrm_delete(ha_msg_input_t input, lrm_state_t lrm_state,
	lrmd_rsc_info_t rsc, const char from_sys, const char *from_host,
	bool crm_rsc_delete, const char *user_name)
	{
	gboolean unregister = TRUE;

	#if ENABLE_ACL
	int cib_rc = delete_rsc_status(lrm_state, rsc->id,
	cib_dryrun\|cib_sync_call, user_name);

	if (cib_rc != pcmk_ok) {
	lrmd_event_data_t *op = NULL;

	crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s"
	CRM_XS " rc=%d",
	rsc->id, from_sys, (user_name? user_name : "unknown"),
	from_host, pcmk_strerror(cib_rc), cib_rc);

	op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE);
	op->op_status = PCMK_LRM_OP_ERROR;

	if (cib_rc == -EACCES) {
	op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
	} else {
	op->rc = PCMK_OCF_UNKNOWN_ERROR;
	}
	send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
	lrmd_free_event(op);
	return;
	}
	#endif

	if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
	unregister = FALSE;
	}

	delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host,
	user_name, input, unregister);
	}

	/* A_LRM_INVOKE */
	void
	do_lrm_invoke(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	lrm_state_t *lrm_state = NULL;
	const char *crm_op = NULL;
	const char *from_sys = NULL;
	const char *from_host = NULL;
	const char *operation = NULL;
	ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
	const char *user_name = NULL;
	const char *target_node = NULL;
	gboolean is_remote_node = FALSE;
	bool crm_rsc_delete = FALSE;

	target_node = lrm_op_target(input->xml);
	is_remote_node = safe_str_neq(target_node, fsa_our_uname);

	lrm_state = lrm_state_find(target_node);
	if ((lrm_state == NULL) && is_remote_node) {
	crm_err("Failing action because local node has never had connection to remote node %s",
	target_node);
	synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED);
	return;
	}
	CRM_ASSERT(lrm_state != NULL);

	#if ENABLE_ACL
	user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
	crm_trace("Executor command from user '%s'", user_name);
	#endif

	crm_op = crm_element_value(input->msg, F_CRM_TASK);
	from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
	if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
	from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
	}
	crm_trace("Executor %s command from %s", crm_op, from_sys);

	if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
	crm_rsc_delete = TRUE; // Only crm_resource uses this op
	operation = CRMD_ACTION_DELETE;

	} else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
	fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
	from_sys);
	return;

	} else if (input->xml != NULL) {
	operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
	}

	if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
	handle_refresh_op(lrm_state, user_name, from_host, from_sys);

	} else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
	handle_query_op(input->msg, lrm_state);

	} else if (safe_str_eq(operation, CRM_OP_PROBED)) {
	update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
	user_name, is_remote_node);

	} else if (safe_str_eq(operation, CRM_OP_REPROBE)
	\|\| safe_str_eq(crm_op, CRM_OP_REPROBE)) {
	handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
	is_remote_node);

	} else if (operation != NULL) {
	lrmd_rsc_info_t *rsc = NULL;
	xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
	gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE);
	int rc;

	// We can't return anything meaningful without a resource ID
	CRM_CHECK(xml_rsc && ID(xml_rsc), return);

	rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
	if (rc == -ENOTCONN) {
	synthesize_lrmd_failure(lrm_state, input->xml,
	PCMK_OCF_CONNECTION_DIED);
	return;

	} else if ((rc < 0) && !create_rsc) {
	/* Delete of malformed or nonexistent resource
	* (deleting something that does not exist is a success)
	*/
	crm_notice("Not registering resource '%s' for a %s event "
	CRM_XS " get-rc=%d (%s) transition-key=%s",
	ID(xml_rsc), operation,
	rc, pcmk_strerror(rc), ID(input->xml));
	delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
	user_name);
	send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation,
	from_host, from_sys);
	return;

	} else if (rc == -EINVAL) {
	// Resource operation on malformed resource
	crm_err("Invalid resource definition for %s", ID(xml_rsc));
	crm_log_xml_warn(input->msg, "invalid resource");
	synthesize_lrmd_failure(lrm_state, input->xml,
	PCMK_OCF_NOT_CONFIGURED); // fatal error
	return;

	} else if (rc < 0) {
	// Error communicating with the executor
	crm_err("Could not register resource '%s' with executor: %s "
	CRM_XS " rc=%d",
	ID(xml_rsc), pcmk_strerror(rc), rc);
	crm_log_xml_warn(input->msg, "failed registration");
	synthesize_lrmd_failure(lrm_state, input->xml,
	PCMK_OCF_INVALID_PARAM); // hard error
	return;
	}

	if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
	if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
	crm_log_xml_warn(input->xml, "Bad command");
	}

	} else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
	do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
	crm_rsc_delete, user_name);

	} else {
	do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
	}

	lrmd_free_rsc_info(rsc);

	} else {
	crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}

	static lrmd_event_data_t *
	construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char rsc_id, const char operation)
	{
	lrmd_event_data_t *op = NULL;
	const char *op_delay = NULL;
	const char *op_timeout = NULL;
	const char *interval_ms_s = NULL;
	GHashTable *params = NULL;

	const char *transition = NULL;

	CRM_ASSERT(rsc_id && operation);

	op = calloc(1, sizeof(lrmd_event_data_t));
	CRM_ASSERT(op != NULL);

	op->type = lrmd_event_exec_complete;
	op->op_type = strdup(operation);
	op->op_status = PCMK_LRM_OP_PENDING;
	op->rc = -1;
	op->rsc_id = strdup(rsc_id);
	op->interval_ms = 0;
	op->timeout = 0;
	op->start_delay = 0;

	if (rsc_op == NULL) {
	CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
	op->user_data = NULL;
	/* the stop_all_resources() case
	* by definition there is no DC (or they'd be shutting
	* us down).
	* So we should put our version here.
	*/
	op->params = crm_str_table_new();

	g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));

	crm_trace("Constructed %s op for %s", operation, rsc_id);
	return op;
	}

	params = xml2list(rsc_op);
	g_hash_table_remove(params, CRM_META "_op_target_rc");

	op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
	op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
	interval_ms_s = crm_meta_value(params, XML_LRM_ATTR_INTERVAL_MS);

	op->interval_ms = crm_parse_ms(interval_ms_s);
	op->timeout = crm_parse_int(op_timeout, "0");
	op->start_delay = crm_parse_int(op_delay, "0");

	#if ENABLE_VERSIONED_ATTRS
	// Resolve any versioned parameters
	if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA)
	&& safe_str_neq(op->op_type, CRMD_ACTION_DELETE)
	&& !is_remote_lrmd_ra(NULL, NULL, rsc_id)) {

	// Resource info should already be cached, so we don't get executor call
	lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0);
	struct ra_metadata_s *metadata;

	metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
	if (metadata) {
	xmlNode *versioned_attrs = NULL;
	GHashTable *hash = NULL;
	char *key = NULL;
	char *value = NULL;
	GHashTableIter iter;

	versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS);
	hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
	g_hash_table_iter_init(&iter, hash);
	while (g_hash_table_iter_next(&iter, (gpointer ) &key, (gpointer ) &value)) {
	g_hash_table_iter_steal(&iter);
	g_hash_table_replace(params, key, value);
	}
	g_hash_table_destroy(hash);

	versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META);
	hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
	g_hash_table_iter_init(&iter, hash);
	while (g_hash_table_iter_next(&iter, (gpointer ) &key, (gpointer ) &value)) {
	g_hash_table_replace(params, crm_meta_name(key), strdup(value));

	if (safe_str_eq(key, XML_ATTR_TIMEOUT)) {
	op->timeout = crm_parse_int(value, "0");
	} else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) {
	op->start_delay = crm_parse_int(value, "0");
	}
	}
	g_hash_table_destroy(hash);

	versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS);
	hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
	g_hash_table_iter_init(&iter, hash);
	while (g_hash_table_iter_next(&iter, (gpointer ) &key, (gpointer ) &value)) {
	g_hash_table_iter_steal(&iter);
	g_hash_table_replace(params, key, value);
	}
	g_hash_table_destroy(hash);
	}

	lrmd_free_rsc_info(rsc);
	}
	#endif

	if (safe_str_neq(operation, RSC_STOP)) {
	op->params = params;

	} else {
	rsc_history_t *entry = NULL;

	if (lrm_state) {
	entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
	}

	/* If we do not have stop parameters cached, use
	* whatever we are given */
	if (!entry \|\| !entry->stop_params) {
	op->params = params;
	} else {
	/* Copy the cached parameter list so that we stop the resource
	* with the old attributes, not the new ones */
	op->params = crm_str_table_new();

	g_hash_table_foreach(params, copy_meta_keys, op->params);
	g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
	g_hash_table_destroy(params);
	params = NULL;
	}
	}

	/* sanity */
	if (op->timeout <= 0) {
	op->timeout = op->interval_ms;
	}
	if (op->start_delay < 0) {
	op->start_delay = 0;
	}

	transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
	CRM_CHECK(transition != NULL, return op);

	op->user_data = strdup(transition);

	if (op->interval_ms != 0) {
	if (safe_str_eq(operation, CRMD_ACTION_START)
	\|\| safe_str_eq(operation, CRMD_ACTION_STOP)) {
	crm_err("Start and Stop actions cannot have an interval: %u",
	op->interval_ms);
	op->interval_ms = 0;
	}
	}

	crm_trace("Constructed %s op for %s: interval=%u",
	operation, rsc_id, op->interval_ms);

	return op;
	}

	void
	send_direct_ack(const char to_host, const char to_sys,
	lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
	{
	xmlNode *reply = NULL;
	xmlNode update, iter;
	crm_node_t *peer = NULL;

	CRM_CHECK(op != NULL, return);
	if (op->rsc_id == NULL) {
	CRM_ASSERT(rsc_id != NULL);
	op->rsc_id = strdup(rsc_id);
	}
	if (to_sys == NULL) {
	to_sys = CRM_SYSTEM_TENGINE;
	}

	peer = crm_get_peer(0, fsa_our_uname);
	update = create_node_state_update(peer, node_update_none, NULL,
	__FUNCTION__);

	iter = create_xml_node(update, XML_CIB_TAG_LRM);
	crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
	iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
	iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);

	crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);

	build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__);
	reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);

	crm_log_xml_trace(update, "ACK Update");

	crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s",
	op->rsc_id, op->op_type, op->interval_ms, op->user_data,
	crm_element_value(reply, XML_ATTR_REFERENCE));

	if (relay_message(reply, TRUE) == FALSE) {
	crm_log_xml_err(reply, "Unable to route reply");
	}

	free_xml(update);
	free_xml(reply);
	}

	gboolean
	verify_stopped(enum crmd_fsa_state cur_state, int log_level)
	{
	gboolean res = TRUE;
	GList *lrm_state_list = lrm_state_get_list();
	GList *state_entry;

	for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
	lrm_state_t *lrm_state = state_entry->data;

	if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
	/* keep iterating through all even when false is returned */
	res = FALSE;
	}
	}

	set_bit(fsa_input_register, R_SENT_RSC_STOP);
	g_list_free(lrm_state_list); lrm_state_list = NULL;
	return res;
	}

	struct stop_recurring_action_s {
	lrmd_rsc_info_t *rsc;
	lrm_state_t *lrm_state;
	};

	static gboolean
	stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
	{
	gboolean remove = FALSE;
	struct stop_recurring_action_s *event = user_data;
	struct recurring_op_s op = (struct recurring_op_s )value;

	if ((op->interval_ms != 0)
	&& crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {

	crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
	remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
	}

	return remove;
	}

	static gboolean
	stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
	{
	gboolean remove = FALSE;
	lrm_state_t *lrm_state = user_data;
	struct recurring_op_s op = (struct recurring_op_s )value;

	if (op->interval_ms != 0) {
	crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
	(const char *) key);
	remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
	}

	return remove;
	}

	static void
	record_pending_op(const char node_name, lrmd_rsc_info_t rsc, lrmd_event_data_t *op)
	{
	const char *record_pending = NULL;

	CRM_CHECK(node_name != NULL, return);
	CRM_CHECK(rsc != NULL, return);
	CRM_CHECK(op != NULL, return);

	// Never record certain operation types as pending
	if ((op->op_type == NULL) \|\| (op->params == NULL)
	\|\| !controld_action_is_recordable(op->op_type)) {
	return;
	}

	// defaults to true
	record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING);
	if (record_pending && !crm_is_true(record_pending)) {
	return;
	}

	op->call_id = -1;
	op->op_status = PCMK_LRM_OP_PENDING;
	op->rc = PCMK_OCF_UNKNOWN;

	op->t_run = time(NULL);
	op->t_rcchange = op->t_run;

	/* write a "pending" entry to the CIB, inhibit notification */
	crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB",
	op->rsc_id, op->op_type, op->interval_ms, node_name);

	do_update_resource(node_name, rsc, op);
	}

	static void
	do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char operation, xmlNode msg,
	xmlNode * request)
	{
	int call_id = 0;
	char *op_id = NULL;
	lrmd_event_data_t *op = NULL;
	lrmd_key_value_t *params = NULL;
	fsa_data_t *msg_data = NULL;
	const char *transition = NULL;
	gboolean stop_recurring = FALSE;
	bool send_nack = FALSE;

	CRM_CHECK(rsc != NULL, return);
	CRM_CHECK(operation != NULL, return);

	if (msg != NULL) {
	transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
	if (transition == NULL) {
	crm_log_xml_err(msg, "Missing transition number");
	}
	}

	op = construct_op(lrm_state, msg, rsc->id, operation);
	CRM_CHECK(op != NULL, return);

	if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
	&& (op->interval_ms == 0)
	&& strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {

	/* pcmk remote connections are a special use case.
	* We never ever want to stop monitoring a connection resource until
	* the entire migration has completed. If the connection is unexpectedly
	* severed, even during a migration, this is an event we must detect.*/
	stop_recurring = FALSE;

	} else if ((op->interval_ms == 0)
	&& strcmp(operation, CRMD_ACTION_STATUS) != 0
	&& strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {

	/* stop any previous monitor operations before changing the resource state */
	stop_recurring = TRUE;
	}

	if (stop_recurring == TRUE) {
	guint removed = 0;
	struct stop_recurring_action_s data;

	data.rsc = rsc;
	data.lrm_state = lrm_state;
	removed = g_hash_table_foreach_remove(
	lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);

	if (removed) {
	crm_debug("Stopped %u recurring operation%s in preparation for " CRM_OP_FMT,
	removed, s_if_plural(removed),
	rsc->id, operation, op->interval_ms);
	}
	}

	/* now do the op */
	crm_info("Performing key=%s op=" CRM_OP_FMT,
	transition, rsc->id, operation, op->interval_ms);

	if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) {
	register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
	send_nack = TRUE;

	} else if (fsa_state != S_NOT_DC
	&& fsa_state != S_POLICY_ENGINE /* Recalculating */
	&& fsa_state != S_TRANSITION_ENGINE
	&& safe_str_neq(operation, CRMD_ACTION_STOP)) {
	send_nack = TRUE;
	}

	if(send_nack) {
	crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)",
	operation, rsc->id, fsa_state2string(fsa_state),
	is_set(fsa_input_register, R_SHUTDOWN)?"true":"false");

	op->rc = CRM_DIRECT_NACK_RC;
	op->op_status = PCMK_LRM_OP_ERROR;
	send_direct_ack(NULL, NULL, rsc, op, rsc->id);
	lrmd_free_event(op);
	free(op_id);
	return;
	}

	record_pending_op(lrm_state->node_name, rsc, op);

	op_id = generate_op_key(rsc->id, op->op_type, op->interval_ms);

	if (op->interval_ms > 0) {
	/* cancel it so we can then restart it without conflict */
	cancel_op_key(lrm_state, rsc, op_id, FALSE);
	}

	if (op->params) {
	char *key = NULL;
	char *value = NULL;
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, op->params);
	while (g_hash_table_iter_next(&iter, (gpointer ) & key, (gpointer ) & value)) {
	params = lrmd_key_value_add(params, key, value);
	}
	}

	call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data,
	op->interval_ms, op->timeout, op->start_delay,
	params);

	if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
	crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
	register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);

	} else if (call_id <= 0) {
	crm_err("Operation %s on resource %s failed to execute on remote node %s: %d",
	operation, rsc->id, lrm_state->node_name, call_id);
	fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
	process_lrm_event(lrm_state, op, NULL);

	} else {
	/* record all operations so we can wait
	* for them to complete during shutdown
	*/
	char *call_id_s = make_stop_id(rsc->id, call_id);
	struct recurring_op_s *pending = NULL;

	pending = calloc(1, sizeof(struct recurring_op_s));
	crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);

	pending->call_id = call_id;
	pending->interval_ms = op->interval_ms;
	pending->op_type = strdup(operation);
	pending->op_key = strdup(op_id);
	pending->rsc_id = strdup(rsc->id);
	pending->start_time = time(NULL);
	- pending->user_data = strdup(op->user_data);
	+ pending->user_data = op->user_data? strdup(op->user_data) : NULL;
	g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);

	if ((op->interval_ms > 0)
	&& (op->start_delay > START_DELAY_THRESHOLD)) {

	char *uuid = NULL;
	int dummy = 0, target_rc = 0;

	crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);

	decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
	free(uuid);

	op->rc = target_rc;
	op->op_status = PCMK_LRM_OP_DONE;
	send_direct_ack(NULL, NULL, rsc, op, rsc->id);
	}

	pending->params = op->params;
	op->params = NULL;
	}

	free(op_id);
	lrmd_free_event(op);
	return;
	}

	int last_resource_update = 0;

	static void
	cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	switch (rc) {
	case pcmk_ok:
	case -pcmk_err_diff_failed:
	case -pcmk_err_diff_resync:
	crm_trace("Resource update %d complete: rc=%d", call_id, rc);
	break;
	default:
	crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
	}

	if (call_id == last_resource_update) {
	last_resource_update = 0;
	trigger_fsa(fsa_source);
	}
	}

	static int
	do_update_resource(const char node_name, lrmd_rsc_info_t rsc, lrmd_event_data_t * op)
	{
	/*
	<status>
	<nodes_status id=uname>
	<lrm>
	<lrm_resources>
	<lrm_resource id=...>
	</...>
	*/
	int rc = pcmk_ok;
	xmlNode update, iter = NULL;
	int call_opt = crmd_cib_smart_opt();
	const char *uuid = NULL;

	CRM_CHECK(op != NULL, return 0);

	iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
	update = iter;
	iter = create_xml_node(iter, XML_CIB_TAG_STATE);

	if (safe_str_eq(node_name, fsa_our_uname)) {
	uuid = fsa_our_uuid;

	} else {
	/* remote nodes uuid and uname are equal */
	uuid = node_name;
	crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
	}

	CRM_LOG_ASSERT(uuid != NULL);
	if(uuid == NULL) {
	rc = -EINVAL;
	goto done;
	}

	crm_xml_add(iter, XML_ATTR_UUID, uuid);
	crm_xml_add(iter, XML_ATTR_UNAME, node_name);
	crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);

	iter = create_xml_node(iter, XML_CIB_TAG_LRM);
	crm_xml_add(iter, XML_ATTR_ID, uuid);

	iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
	iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
	crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);

	build_operation_update(iter, rsc, op, node_name, __FUNCTION__);

	if (rsc) {
	const char *container = NULL;

	crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
	crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard);
	crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);

	if (op->params) {
	container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
	}
	if (container) {
	crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
	crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
	}

	} else {
	crm_warn("Resource %s no longer exists in the executor", op->rsc_id);
	send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
	goto cleanup;
	}

	crm_log_xml_trace(update, __FUNCTION__);

	/* make it an asynchronous call and be done with it
	*
	* Best case:
	* the resource state will be discovered during
	* the next signup or election.
	*
	* Bad case:
	* we are shutting down and there is no DC at the time,
	* but then why were we shutting down then anyway?
	* (probably because of an internal error)
	*
	* Worst case:
	* we get shot for having resources "running" that really weren't
	*
	* the alternative however means blocking here for too long, which
	* isn't acceptable
	*/
	fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);

	if (rc > 0) {
	last_resource_update = rc;
	}
	done:
	/* the return code is a call number, not an error code */
	crm_trace("Sent resource state update message: %d for %s=%u on %s",
	rc, op->op_type, op->interval_ms, op->rsc_id);
	fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);

	cleanup:
	free_xml(update);
	return rc;
	}

	void
	do_lrm_event(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
	{
	CRM_CHECK(FALSE, return);
	}

	static char *
	unescape_newlines(const char *string)
	{
	char *pch = NULL;
	char *ret = NULL;
	static const char *escaped_newline = "\\n";

	if (!string) {
	return NULL;
	}

	ret = strdup(string);
	pch = strstr(ret, escaped_newline);
	while (pch != NULL) {
	/* 2 chars for 2 chars, null-termination irrelevant */
	memcpy(pch, "\n ", 2 * sizeof(char));
	pch = strstr(pch, escaped_newline);
	}

	return ret;
	}

	gboolean
	process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending)
	{
	char *op_id = NULL;
	char *op_key = NULL;

	int update_id = 0;
	gboolean remove = FALSE;
	gboolean removed = FALSE;
	lrmd_rsc_info_t *rsc = NULL;

	CRM_CHECK(op != NULL, return FALSE);
	CRM_CHECK(op->rsc_id != NULL, return FALSE);

	op_id = make_stop_id(op->rsc_id, op->call_id);
	op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);
	rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
	if(pending == NULL) {
	remove = TRUE;
	pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
	}

	if (op->op_status == PCMK_LRM_OP_ERROR) {
	switch(op->rc) {
	case PCMK_OCF_NOT_RUNNING:
	case PCMK_OCF_RUNNING_MASTER:
	case PCMK_OCF_DEGRADED:
	case PCMK_OCF_DEGRADED_MASTER:
	// Leave it to the TE/scheduler to decide if this is an error
	op->op_status = PCMK_LRM_OP_DONE;
	break;
	default:
	/* Nothing to do */
	break;
	}
	}

	if (op->op_status != PCMK_LRM_OP_CANCELLED) {
	if (controld_action_is_recordable(op->op_type)) {
	update_id = do_update_resource(lrm_state->node_name, rsc, op);
	} else {
	send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
	}
	} else if (op->interval_ms == 0) {
	/* This will occur when "crm resource cleanup" is called while actions are in-flight */
	crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id);
	send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);

	} else if (pending == NULL) {
	/* We don't need to do anything for cancelled ops
	* that are not in our pending op list. There are no
	* transition actions waiting on these operations. */

	} else if (op->user_data == NULL) {
	/* At this point we have a pending entry, but no transition
	* key present in the user_data field. report this */
	crm_err("Op %s (call=%d): No user data", op_key, op->call_id);

	} else if (pending->remove) {
	/* The tengine canceled this op, we have been waiting for the cancel to finish. */
	erase_lrm_history_by_op(lrm_state, op);

	} else if (op->rsc_deleted) {
	/* The tengine initiated this op, but it was cancelled outside of the
	* tengine's control during a resource cleanup/re-probe request. The tengine
	* must be alerted that this operation completed, otherwise the tengine
	* will continue waiting for this update to occur until it is timed out.
	* We don't want this update going to the cib though, so use a direct ack. */
	crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id);
	send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);

	} else {
	/* Before a stop is called, no need to direct ack */
	crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id);
	}

	if(remove == FALSE) {
	/* The caller will do this afterwards, but keep the logging consistent */
	removed = TRUE;

	} else if ((op->interval_ms == 0)
	&& g_hash_table_remove(lrm_state->pending_ops, op_id)) {

	removed = TRUE;
	crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
	op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops));

	} else if ((op->interval_ms != 0)
	&& (op->op_status == PCMK_LRM_OP_CANCELLED)) {

	removed = TRUE;
	g_hash_table_remove(lrm_state->pending_ops, op_id);
	}

	switch (op->op_status) {
	case PCMK_LRM_OP_CANCELLED:
	crm_info("Result of %s operation for %s on %s: %s "
	CRM_XS " call=%d key=%s confirmed=%s",
	crm_action_str(op->op_type, op->interval_ms),
	op->rsc_id, lrm_state->node_name,
	services_lrm_status_str(op->op_status),
	op->call_id, op_key, (removed? "true" : "false"));
	break;

	case PCMK_LRM_OP_DONE:
	do_crm_log((op->interval_ms? LOG_INFO : LOG_NOTICE),
	"Result of %s operation for %s on %s: %d (%s) "
	CRM_XS " call=%d key=%s confirmed=%s cib-update=%d",
	crm_action_str(op->op_type, op->interval_ms),
	op->rsc_id, lrm_state->node_name,
	op->rc, services_ocf_exitcode_str(op->rc),
	op->call_id, op_key, (removed? "true" : "false"),
	update_id);
	break;

	case PCMK_LRM_OP_TIMEOUT:
	crm_err("Result of %s operation for %s on %s: %s "
	CRM_XS " call=%d key=%s timeout=%dms",
	crm_action_str(op->op_type, op->interval_ms),
	op->rsc_id, lrm_state->node_name,
	services_lrm_status_str(op->op_status),
	op->call_id, op_key, op->timeout);
	break;

	default:
	crm_err("Result of %s operation for %s on %s: %s "
	CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d",
	crm_action_str(op->op_type, op->interval_ms),
	op->rsc_id, lrm_state->node_name,
	services_lrm_status_str(op->op_status), op->call_id, op_key,
	(removed? "true" : "false"), op->op_status, update_id);
	}

	if (op->output) {
	char *prefix =
	crm_strdup_printf("%s-" CRM_OP_FMT ":%d", lrm_state->node_name,
	op->rsc_id, op->op_type, op->interval_ms,
	op->call_id);

	if (op->rc) {
	crm_log_output(LOG_NOTICE, prefix, op->output);
	} else {
	crm_log_output(LOG_DEBUG, prefix, op->output);
	}
	free(prefix);
	}

	if (safe_str_neq(op->op_type, RSC_METADATA)) {
	crmd_alert_resource_op(lrm_state->node_name, op);
	} else if (op->rc == PCMK_OCF_OK) {
	char *metadata = unescape_newlines(op->output);

	metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
	free(metadata);
	}

	if (op->rsc_deleted) {
	crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
	delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
	}

	/* If a shutdown was escalated while operations were pending,
	* then the FSA will be stalled right now... allow it to continue
	*/
	mainloop_set_trigger(fsa_source);
	update_history_cache(lrm_state, rsc, op);

	lrmd_free_rsc_info(rsc);
	free(op_key);
	free(op_id);

	return TRUE;
	}
	diff --git a/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt
	index 096e9c10e3..345ccaa042 100644
	--- a/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt
	+++ b/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt
	@@ -1,1450 +1,1454 @@
	= Advanced Resource Types =

	[[group-resources]]
	== Groups - A Syntactic Shortcut ==
	indexterm:[Group Resources]
	indexterm:[Resource,Groups]


	One of the most common elements of a cluster is a set of resources
	that need to be located together, start sequentially, and stop in the
	reverse order. To simplify this configuration, we support the concept
	of groups.

	.A group of two primitive resources
	======
	[source,XML]
	-------
	<group id="shortcut">
	<primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat">
	<instance_attributes id="params-public-ip">
	<nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/>
	</instance_attributes>
	</primitive>
	<primitive id="Email" class="lsb" type="exim"/>
	</group>
	-------
	======


	Although the example above contains only two resources, there is no
	limit to the number of resources a group can contain. The example is
	also sufficient to explain the fundamental properties of a group:

	* Resources are started in the order they appear in (+Public-IP+
	first, then +Email+)
	* Resources are stopped in the reverse order to which they appear in
	(+Email+ first, then +Public-IP+)

	If a resource in the group can't run anywhere, then nothing after that
	is allowed to run, too.

	* If +Public-IP+ can't run anywhere, neither can +Email+;
	* but if +Email+ can't run anywhere, this does not affect +Public-IP+
	in any way

	The group above is logically equivalent to writing:

	.How the cluster sees a group resource
	======
	[source,XML]
	-------
	<configuration>
	<resources>
	<primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat">
	<instance_attributes id="params-public-ip">
	<nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/>
	</instance_attributes>
	</primitive>
	<primitive id="Email" class="lsb" type="exim"/>
	</resources>
	<constraints>
	<rsc_colocation id="xxx" rsc="Email" with-rsc="Public-IP" score="INFINITY"/>
	<rsc_order id="yyy" first="Public-IP" then="Email"/>
	</constraints>
	</configuration>
	-------
	======

	Obviously as the group grows bigger, the reduced configuration effort
	can become significant.

	Another (typical) example of a group is a DRBD volume, the filesystem
	mount, an IP address, and an application that uses them.

	=== Group Properties ===
	.Properties of a Group Resource
	[width="95%",cols="3m,5<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Description

	\|id
	\|A unique name for the group
	indexterm:[id,Group Resource Property]
	indexterm:[Resource,Group Property,id]

	\|=========================================================

	=== Group Options ===

	Groups inherit the +priority+, +target-role+, and +is-managed+ properties
	from primitive resources. See <<s-resource-options>> for information about
	those properties.

	=== Group Instance Attributes ===

	Groups have no instance attributes. However, any that are set for the group
	object will be inherited by the group's children.

	=== Group Contents ===

	Groups may only contain a collection of cluster resources (see
	<<primitive-resource>>). To refer to a child of a group resource, just use
	the child's +id+ instead of the group's.

	=== Group Constraints ===

	Although it is possible to reference a group's children in
	constraints, it is usually preferable to reference the group itself.

	.Some constraints involving groups
	======
	[source,XML]
	-------
	<constraints>
	<rsc_location id="group-prefers-node1" rsc="shortcut" node="node1" score="500"/>
	<rsc_colocation id="webserver-with-group" rsc="Webserver" with-rsc="shortcut"/>
	<rsc_order id="start-group-then-webserver" first="Webserver" then="shortcut"/>
	</constraints>
	-------
	======

	=== Group Stickiness ===
	indexterm:[resource-stickiness,Groups]

	Stickiness, the measure of how much a resource wants to stay where it
	is, is additive in groups. Every active resource of the group will
	contribute its stickiness value to the group's total. So if the
	default +resource-stickiness+ is 100, and a group has seven members,
	five of which are active, then the group as a whole will prefer its
	current location with a score of 500.

	[[s-resource-clone]]
	== Clones - Resources That Can Have Multiple Active Instances ==
	indexterm:[Clone Resources]
	indexterm:[Resource,Clones]

	'Clone' resources are resources that can have more than one copy active at the
	same time. This allows you, for example, to run a copy of a daemon on every
	node. You can clone any primitive or group resource.
	footnote:[
	Of course, the service must support running multiple instances.
	]

	=== Anonymous versus Unique Clones ===

	A clone resource is configured to be either 'anonymous' or 'globally unique'.

	Anonymous clones are the simplest. These behave completely identically
	everywhere they are running. Because of this, there can be only one instance of
	an anonymous clone active per node.

	The instances of globally unique clones are distinct entities. All instances
	are launched identically, but one instance of the clone is not identical to any
	other instance, whether running on the same node or a different node. As an
	example, a cloned IP address can use special kernel functionality such that
	each instance handles a subset of requests for the same IP address.

	[[s-resource-promotable]]
	=== Promotable clones ===

	indexterm:[Promotable Clone Resources]
	indexterm:[Resource,Promotable]

	If a clone is 'promotable', its instances can perform a special role that
	Pacemaker will manage via the +promote+ and +demote+ actions of the resource
	agent.

	Services that support such a special role have various terms for the special
	role and the default role: primary and secondary, master and replica,
	controller and worker, etc. Pacemaker uses the terms 'master' and 'slave',
	footnote:[
	These are historical terms that will eventually be replaced, but the extensive
	use of them and the need for backward compatibility makes it a long process.
	You may see examples using a +master+ tag instead of a +clone+ tag with the
	+promotable+ meta-attribute set to +true+; the +master+ tag is supported, but
	deprecated, and will be removed in a future version. You may also see such
	services referred to as 'multi-state' or 'stateful'; these means the same thing
	as 'promotable'.
	]
	but is agnostic to what the service calls them or what they do.

	All that Pacemaker cares about is that an instance comes up in the default role
	when started, and the resource agent supports the +promote+ and +demote+ actions
	to manage entering and exiting the special role.

	=== Clone Properties ===

	.Properties of a Clone Resource
	[width="95%",cols="3m,5<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Description

	\|id
	\|A unique name for the clone
	indexterm:[id,Clone Property]
	indexterm:[Clone,Property,id]

	\|=========================================================

	=== Clone Options ===

	<<s-resource-options,Options>> inherited from primitive resources:
	+priority, target-role, is-managed+

	.Clone-specific configuration options
	[width="95%",cols="1m,1,3<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|globally-unique
	\|false
	\|If +true+, each clone instance performs a distinct function
	indexterm:[globally-unique,Clone Option]
	indexterm:[Clone,Option,globally-unique]

	\|clone-max
	\|number of nodes in cluster
	\|The maximum number of clone instances that can be started across the entire
	cluster
	indexterm:[clone-max,Clone Option]
	indexterm:[Clone,Option,clone-max]

	\|clone-node-max
	\|1
	\|If +globally-unique+ is +true+, the maximum number of clone instances that can
	be started on a single node
	indexterm:[clone-node-max,Clone Option]
	indexterm:[Clone,Option,clone-node-max]

	\|clone-min
	\|0
	\|Require at least this number of clone instances to be runnable before allowing
	resources depending on the clone to be runnable. A value of 0 means require
	all clone instances to be runnable.
	indexterm:[clone-min,Clone Option]
	indexterm:[Clone,Option,clone-min]

	\|notify
	\|false
	\|Call the resource agent's +notify+ action for all active instances, before and
	after starting or stopping any clone instance. The resource agent must support
	this action. Allowed values: +false+, +true+
	indexterm:[notify,Clone Option]
	indexterm:[Clone,Option,notify]

	\|ordered
	\|false
	\|If +true+, clone instances must be started sequentially instead of in parallel
	Allowed values: +false+, +true+
	indexterm:[ordered,Clone Option]
	indexterm:[Clone,Option,ordered]

	\|interleave
	\|false
	\|When this clone is ordered relative to another clone, if this option is
	+false+ (the default), the ordering is relative to 'all' instances of the
	other clone, whereas if this option is +true+, the ordering is relative only
	to instances on the same node.
	Allowed values: +false+, +true+
	indexterm:[interleave,Clone Option]
	indexterm:[Clone,Option,interleave]

	\|promotable
	\|false
	\|If +true+, clone instances can perform a special role that Pacemaker will
	manage via the resource agent's +promote+ and +demote+ actions. The resource
	agent must support these actions.
	Allowed values: +false+, +true+
	indexterm:[promotable,Clone Option]
	indexterm:[Clone,Option,promotable]

	\|promoted-max
	\|1
	\|If +promotable+ is +true+, the number of instances that can be promoted at one
	time across the entire cluster
	indexterm:[promoted-max,Clone Option]
	indexterm:[Clone,Option,promoted-max]

	\|promoted-node-max
	\|1
	\|If +promotable+ is +true+ and +globally-unique+ is +false+, the number of
	clone instances can be promoted at one time on a single node
	indexterm:[promoted-node-max,Clone Option]
	indexterm:[Clone,Option,promoted-node-max]

	\|=========================================================

	For backward compatibility, +master-max+ and +master-node-max+ are accepted as
	aliases for +promoted-max+ and +promoted-node-max+, but are deprecated since
	2.0.0, and support for them will be removed in a future version.

	=== Clone Contents ===

	Clones must contain exactly one primitive or group resource.

	.A clone that runs a web server on all nodes
	====
	[source,XML]
	----
	<clone id="apache-clone">
	<primitive id="apache" class="lsb" type="apache">
	<operations>
	<op id="apache-monitor" name="monitor" interval="30"/>
	</operations>
	</primitive>
	</clone>
	----
	====

	[WARNING]
	You should never reference the name of a clone's child (the primitive or group
	resource being cloned). If you think you need to do this, you probably need to
	re-evaluate your design.

	=== Clone Instance Attributes ===

	Clones have no instance attributes; however, any that are set here will be
	inherited by the clone's child.

	=== Clone Constraints ===

	In most cases, a clone will have a single instance on each active cluster
	node. If this is not the case, you can indicate which nodes the
	cluster should preferentially assign copies to with resource location
	constraints. These constraints are written no differently from those
	for primitive resources except that the clone's +id+ is used.

	.Some constraints involving clones
	======
	[source,XML]
	-------
	<constraints>
	<rsc_location id="clone-prefers-node1" rsc="apache-clone" node="node1" score="500"/>
	<rsc_colocation id="stats-with-clone" rsc="apache-stats" with="apache-clone"/>
	<rsc_order id="start-clone-then-stats" first="apache-clone" then="apache-stats"/>
	</constraints>
	-------
	======

	Ordering constraints behave slightly differently for clones. In the
	example above, +apache-stats+ will wait until all copies of +apache-clone+
	that need to be started have done so before being started itself.
	Only if _no_ copies can be started will +apache-stats+ be prevented
	from being active. Additionally, the clone will wait for
	+apache-stats+ to be stopped before stopping itself.

	Colocation of a primitive or group resource with a clone means that
	the resource can run on any node with an active instance of the clone.
	The cluster will choose an instance based on where the clone is running and
	the resource's own location preferences.

	Colocation between clones is also possible. If one clone +A+ is colocated
	with another clone +B+, the set of allowed locations for +A+ is limited to
	nodes on which +B+ is (or will be) active. Placement is then performed
	normally.

	==== Promotable Clone Constraints ====

	For promotable clone resources, the +first-action+ and/or +then-action+ fields
	for ordering constraints may be set to +promote+ or +demote+ to constrain the
	master role, and colocation constraints may contain +rsc-role+ and/or
	+with-rsc-role+ fields.

	.Additional colocation constraint options for promotable clone resources
	[width="95%",cols="1m,1,3<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|rsc-role
	\|Started
	\|An additional attribute of colocation constraints that specifies the
	role that +rsc+ must be in. Allowed values: +Started+, +Master+,
	+Slave+.
	indexterm:[rsc-role,Ordering Constraints]
	indexterm:[Constraints,Ordering,rsc-role]

	\|with-rsc-role
	\|Started
	\|An additional attribute of colocation constraints that specifies the
	role that +with-rsc+ must be in. Allowed values: +Started+,
	+Master+, +Slave+.
	indexterm:[with-rsc-role,Ordering Constraints]
	indexterm:[Constraints,Ordering,with-rsc-role]

	\|=========================================================

	.Constraints involving promotable clone resources
	======
	[source,XML]
	-------
	<constraints>
	<rsc_location id="db-prefers-node1" rsc="database" node="node1" score="500"/>
	<rsc_colocation id="backup-with-db-slave" rsc="backup"
	with-rsc="database" with-rsc-role="Slave"/>
	<rsc_colocation id="myapp-with-db-master" rsc="myApp"
	with-rsc="database" with-rsc-role="Master"/>
	<rsc_order id="start-db-before-backup" first="database" then="backup"/>
	<rsc_order id="promote-db-then-app" first="database" first-action="promote"
	then="myApp" then-action="start"/>
	</constraints>
	-------
	======

	In the example above, +myApp+ will wait until one of the database
	copies has been started and promoted to master before being started
	itself on the same node. Only if no copies can be promoted will +myApp+ be
	prevented from being active. Additionally, the cluster will wait for
	+myApp+ to be stopped before demoting the database.

	Colocation of a primitive or group resource with a promotable clone
	resource means that it can run on any node with an active instance of
	the promotable clone resource that has the specified role (+master+ or
	+slave+). In the example above, the cluster will choose a location based on
	where database is running as a +master+, and if there are multiple
	+master+ instances it will also factor in +myApp+'s own location
	preferences when deciding which location to choose.

	Colocation with regular clones and other promotable clone resources is also
	possible. In such cases, the set of allowed locations for the +rsc+
	clone is (after role filtering) limited to nodes on which the
	+with-rsc+ promotable clone resource is (or will be) in the specified role.
	Placement is then performed as normal.

	==== Using Promotable Clone Resources in Colocation Sets ====

	.Additional colocation set options relevant to promotable clone resources
	[width="95%",cols="1m,1,6<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|role
	\|Started
	\|The role that 'all members' of the set must be in. Allowed values: +Started+, +Master+,
	+Slave+.
	indexterm:[role,Ordering Constraints]
	indexterm:[Constraints,Ordering,role]

	\|=========================================================

	In the following example +B+'s master must be located on the same node as +A+'s master.
	Additionally resources +C+ and +D+ must be located on the same node as +A+'s
	and +B+'s masters.

	.Colocate C and D with A's and B's master instances
	======
	[source,XML]
	-------
	<constraints>
	<rsc_colocation id="coloc-1" score="INFINITY" >
	<resource_set id="colocated-set-example-1" sequential="true" role="Master">
	<resource_ref id="A"/>
	<resource_ref id="B"/>
	</resource_set>
	<resource_set id="colocated-set-example-2" sequential="true">
	<resource_ref id="C"/>
	<resource_ref id="D"/>
	</resource_set>
	</rsc_colocation>
	</constraints>
	-------
	======

	==== Using Promotable Clone Resources in Ordered Sets ====

	.Additional ordered set options relevant to promotable clone resources
	[width="95%",cols="1m,1,3<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|action
	\|value of +first-action+
	\|An additional attribute of ordering constraint sets that specifies the
	action that applies to 'all members' of the set. Allowed
	values: +start+, +stop+, +promote+, +demote+.
	indexterm:[action,Ordering Constraints]
	indexterm:[Constraints,Ordering,action]

	\|=========================================================

	.Start C and D after first promoting A and B
	======
	[source,XML]
	-------
	<constraints>
	<rsc_order id="order-1" score="INFINITY" >
	<resource_set id="ordered-set-1" sequential="true" action="promote">
	<resource_ref id="A"/>
	<resource_ref id="B"/>
	</resource_set>
	<resource_set id="ordered-set-2" sequential="true" action="start">
	<resource_ref id="C"/>
	<resource_ref id="D"/>
	</resource_set>
	</rsc_order>
	</constraints>
	-------
	======

	In the above example, +B+ cannot be promoted to a master role until +A+ has
	been promoted. Additionally, resources +C+ and +D+ must wait until +A+ and +B+
	have been promoted before they can start.


	[[s-clone-stickiness]]
	=== Clone Stickiness ===

	indexterm:[resource-stickiness,Clones]

	To achieve a stable allocation pattern, clones are slightly sticky by
	default. If no value for +resource-stickiness+ is provided, the clone
	will use a value of 1. Being a small value, it causes minimal
	disturbance to the score calculations of other resources but is enough
	to prevent Pacemaker from needlessly moving copies around the cluster.

	[NOTE]
	====
	For globally unique clones, this may result in multiple instances of the
	clone staying on a single node, even after another eligible node becomes
	active (for example, after being put into standby mode then made active again).
	If you do not want this behavior, specify a +resource-stickiness+ of 0
	for the clone temporarily and let the cluster adjust, then set it back
	to 1 if you want the default behavior to apply again.
	====

	=== Clone Resource Agent Requirements ===

	Any resource can be used as an anonymous clone, as it requires no
	additional support from the resource agent. Whether it makes sense to
	do so depends on your resource and its resource agent.

	==== Resource Agent Requirements for Globally Unique Clones ====

	Globally unique clones require additional support in the resource agent. In
	particular, it must only respond with +$\{OCF_SUCCESS}+ if the node has that
	exact instance active. All other probes for instances of the clone should
	result in +$\{OCF_NOT_RUNNING}+ (or one of the other OCF error codes if
	they are failed).

	Individual instances of a clone are identified by appending a colon and a
	numerical offset, e.g. +apache:2+.

	Resource agents can find out how many copies there are by examining
	the +OCF_RESKEY_CRM_meta_clone_max+ environment variable and which
	instance it is by examining +OCF_RESKEY_CRM_meta_clone+.

	The resource agent must not make any assumptions (based on
	+OCF_RESKEY_CRM_meta_clone+) about which numerical instances are active. In
	particular, the list of active copies will not always be an unbroken
	sequence, nor always start at 0.

	==== Resource Agent Requirements for Promotable Clones ====

	Promotable clone resources require two extra actions, +demote+ and +promote+,
	which are responsible for changing the state of the resource. Like +start+ and
	+stop+, they should return +$\{OCF_SUCCESS}+ if they completed successfully or
	a relevant error code if they did not.

	The states can mean whatever you wish, but when the resource is
	started, it must come up in the mode called +slave+. From there the
	cluster will decide which instances to promote to +master+.

	In addition to the clone requirements for monitor actions, agents must
	also _accurately_ report which state they are in. The cluster relies
	on the agent to report its status (including role) accurately and does
	not indicate to the agent what role it currently believes it to be in.

	.Role implications of OCF return codes
	[width="95%",cols="1,1<",options="header",align="center"]
	\|=========================================================

	\|Monitor Return Code
	\|Description

	\|OCF_NOT_RUNNING
	\|Stopped
	indexterm:[Return Code,OCF_NOT_RUNNING]

	\|OCF_SUCCESS
	\|Running (Slave)
	indexterm:[Return Code,OCF_SUCCESS]

	\|OCF_RUNNING_MASTER
	\|Running (Master)
	indexterm:[Return Code,OCF_RUNNING_MASTER]

	\|OCF_FAILED_MASTER
	\|Failed (Master)
	indexterm:[Return Code,OCF_FAILED_MASTER]

	\|Other
	\|Failed (Slave)

	\|=========================================================

	==== Clone Notifications ====

	If the clone has the +notify+ meta-attribute set to +true+, and the resource
	agent supports the +notify+ action, Pacemaker will call the action when
	appropriate, passing a number of extra variables which, when combined with
	additional context, can be used to calculate the current state of the cluster
	and what is about to happen to it.

	.Environment variables supplied with Clone notify actions
	[width="95%",cols="5,3<",options="header",align="center"]
	\|=========================================================

	\|Variable
	\|Description

	\|OCF_RESKEY_CRM_meta_notify_type
	\|Allowed values: +pre+, +post+
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,type]
	indexterm:[type,Notification Environment Variable]

	\|OCF_RESKEY_CRM_meta_notify_operation
	\|Allowed values: +start+, +stop+
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,operation]
	indexterm:[operation,Notification Environment Variable]

	\|OCF_RESKEY_CRM_meta_notify_start_resource
	\|Resources to be started
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,start_resource]
	indexterm:[start_resource,Notification Environment Variable]

	\|OCF_RESKEY_CRM_meta_notify_stop_resource
	\|Resources to be stopped
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,stop_resource]
	indexterm:[stop_resource,Notification Environment Variable]

	\|OCF_RESKEY_CRM_meta_notify_active_resource
	\|Resources that are running
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,active_resource]
	indexterm:[active_resource,Notification Environment Variable]

	\|OCF_RESKEY_CRM_meta_notify_inactive_resource
	\|Resources that are not running
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,inactive_resource]
	indexterm:[inactive_resource,Notification Environment Variable]

	\|OCF_RESKEY_CRM_meta_notify_start_uname
	\|Nodes on which resources will be started
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,start_uname]
	indexterm:[start_uname,Notification Environment Variable]

	\|OCF_RESKEY_CRM_meta_notify_stop_uname
	\|Nodes on which resources will be stopped
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,stop_uname]
	indexterm:[stop_uname,Notification Environment Variable]

	\|OCF_RESKEY_CRM_meta_notify_active_uname
	\|Nodes on which resources are running
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,active_uname]
	indexterm:[active_uname,Notification Environment Variable]

	\|=========================================================

	The variables come in pairs, such as
	+OCF_RESKEY_CRM_meta_notify_start_resource+ and
	+OCF_RESKEY_CRM_meta_notify_start_uname+ and should be treated as an
	array of whitespace-separated elements.

	+OCF_RESKEY_CRM_meta_notify_inactive_resource+ is an exception as the
	matching +uname+ variable does not exist since inactive resources
	are not running on any node.

	Thus in order to indicate that +clone:0+ will be started on +sles-1+,
	+clone:2+ will be started on +sles-3+, and +clone:3+ will be started
	on +sles-2+, the cluster would set

	.Notification variables
	======
	[source,Bash]
	-------
	OCF_RESKEY_CRM_meta_notify_start_resource="clone:0 clone:2 clone:3"
	OCF_RESKEY_CRM_meta_notify_start_uname="sles-1 sles-3 sles-2"
	-------
	======

	==== Interpretation of Notification Variables ====

	.Pre-notification (stop):

	* Active resources: +$OCF_RESKEY_CRM_meta_notify_active_resource+
	* Inactive resources: +$OCF_RESKEY_CRM_meta_notify_inactive_resource+
	* Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+


	.Post-notification (stop) / Pre-notification (start):

	* Active resources
	** +$OCF_RESKEY_CRM_meta_notify_active_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* Inactive resources
	** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* Resources that were started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+


	.Post-notification (start):

	* Active resources:
	** +$OCF_RESKEY_CRM_meta_notify_active_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Inactive resources:
	** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources that were started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+

	==== Extra Notifications for Promotable Clones ====

	.Extra environment variables supplied for promotable clones
	[width="95%",cols="5,3<",options="header",align="center"]
	\|=========================================================

	\|_OCF_RESKEY_CRM_meta_notify_master_resource_
	\|Resources that are running in +Master+ mode
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,master_resource]
	indexterm:[master_resource,Notification Environment Variable]

	\|_OCF_RESKEY_CRM_meta_notify_slave_resource_
	\|Resources that are running in +Slave+ mode
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,slave_resource]
	indexterm:[slave_resource,Notification Environment Variable]

	\|_OCF_RESKEY_CRM_meta_notify_promote_resource_
	\|Resources to be promoted
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,promote_resource]
	indexterm:[promote_resource,Notification Environment Variable]

	\|_OCF_RESKEY_CRM_meta_notify_demote_resource_
	\|Resources to be demoted
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,demote_resource]
	indexterm:[demote_resource,Notification Environment Variable]

	\|_OCF_RESKEY_CRM_meta_notify_promote_uname_
	\|Nodes on which resources will be promoted
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,promote_uname]
	indexterm:[promote_uname,Notification Environment Variable]

	\|_OCF_RESKEY_CRM_meta_notify_demote_uname_
	\|Nodes on which resources will be demoted
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,demote_uname]
	indexterm:[demote_uname,Notification Environment Variable]

	\|_OCF_RESKEY_CRM_meta_notify_master_uname_
	\|Nodes on which resources are running in +Master+ mode
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,master_uname]
	indexterm:[master_uname,Notification Environment Variable]

	\|_OCF_RESKEY_CRM_meta_notify_slave_uname_
	\|Nodes on which resources are running in +Slave+ mode
	indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,slave_uname]
	indexterm:[slave_uname,Notification Environment Variable]

	\|=========================================================

	==== Interpretation of Promotable Notification Variables ====

	.Pre-notification (demote):

	* +Active+ resources: +$OCF_RESKEY_CRM_meta_notify_active_resource+
	* +Master+ resources: +$OCF_RESKEY_CRM_meta_notify_master_resource+
	* +Slave+ resources: +$OCF_RESKEY_CRM_meta_notify_slave_resource+
	* Inactive resources: +$OCF_RESKEY_CRM_meta_notify_inactive_resource+
	* Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+
	* Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+


	.Post-notification (demote) / Pre-notification (stop):

	* +Active+ resources: +$OCF_RESKEY_CRM_meta_notify_active_resource+
	* +Master+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_master_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* +Slave+ resources: +$OCF_RESKEY_CRM_meta_notify_slave_resource+
	* Inactive resources: +$OCF_RESKEY_CRM_meta_notify_inactive_resource+
	* Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+
	* Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* Resources that were demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+


	.Post-notification (stop) / Pre-notification (start)

	* +Active+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_active_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* +Master+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_master_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* +Slave+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_slave_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* Inactive resources:
	** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+
	* Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* Resources that were demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+


	.Post-notification (start) / Pre-notification (promote)

	* +Active+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_active_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* +Master+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_master_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* +Slave+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_slave_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Inactive resources:
	** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+
	* Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* Resources that were started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources that were demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+

	.Post-notification (promote)

	* +Active+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_active_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* +Master+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_master_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_promote_resource+
	* +Slave+ resources:
	** +$OCF_RESKEY_CRM_meta_notify_slave_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_promote_resource+
	* Inactive resources:
	** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+
	** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	** minus +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+
	* Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+
	* Resources that were started: +$OCF_RESKEY_CRM_meta_notify_start_resource+
	* Resources that were promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+
	* Resources that were demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+
	* Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+

	=== Monitoring Promotable Clone Resources ===

	The usual monitor actions are insufficient to monitor a promotable clone
	resource, because Pacemaker needs to verify not only that the resource is
	active, but also that its actual role matches its intended one.

	Define two monitoring actions: the usual one will cover the slave role,
	and an additional one with +role="master"+ will cover the master role.

	.Monitoring both states of a promotable clone resource
	======
	[source,XML]
	-------
	<clone id="myMasterRsc">
	<meta_attributes id="myMasterRsc-meta">
	<nvpair name="promotable" value="true"/>
	</meta_attributes>
	<primitive id="myRsc" class="ocf" type="myApp" provider="myCorp">
	<operations>
	<op id="public-ip-slave-check" name="monitor" interval="60"/>
	<op id="public-ip-master-check" name="monitor" interval="61" role="Master"/>
	</operations>
	</primitive>
	</clone>
	-------
	======

	[IMPORTANT]
	===========
	It is crucial that _every_ monitor operation has a different interval!
	Pacemaker currently differentiates between operations
	only by resource and interval; so if (for example) a promotable clone resource
	had the same monitor interval for both roles, Pacemaker would ignore the
	role when checking the status -- which would cause unexpected return
	codes, and therefore unnecessary complications.
	===========

	[[s-promotion-scores]]
	=== Determining Which Instance is Promoted ===

	Pacemaker can choose a promotable clone instance to be promoted in one of two
	ways:

	* Promotion scores: These are node attributes set via the `crm_master` utility,
	which generally would be called by the resource agent's start action if it
	supports promotable clones. This tool automatically detects both the resource
	and host, and should be used to set a preference for being promoted. Based on
	this, +promoted-max+, and +promoted-node-max+, the instance(s) with the
	highest preference will be promoted.

	* Constraints: Location constraints can indicate which nodes are most preferred
	as masters.

	.Explicitly preferring node1 to be promoted to master
	======
	[source,XML]
	-------
	<rsc_location id="master-location" rsc="myMasterRsc">
	<rule id="master-rule" score="100" role="Master">
	<expression id="master-exp" attribute="#uname" operation="eq" value="node1"/>
	</rule>
	</rsc_location>
	-------
	======

	[[s-resource-bundle]]
	== Bundles - Isolated Environments ==
	indexterm:[bundle]
	indexterm:[Resource,bundle]
	indexterm:[Docker,bundle]
	indexterm:[rkt,bundle]

	Pacemaker supports a special syntax for launching a
	https://en.wikipedia.org/wiki/Operating-system-level_virtualization[container]
	with any infrastructure it requires: the 'bundle'.

	Pacemaker bundles support https://www.docker.com/[Docker] and
	https://coreos.com/rkt/[rkt] container technologies.
	footnote:[Docker is a trademark of Docker, Inc. No endorsement by or
	association with Docker, Inc. is implied.]

	.A bundle for a containerized web server
	====
	[source,XML]
	----
	<bundle id="httpd-bundle">
	<docker image="pcmk:http" replicas="3"/>
	<network ip-range-start="192.168.122.131"
	host-netmask="24"
	host-interface="eth0">
	<port-mapping id="httpd-port" port="80"/>
	</network>
	<storage>
	<storage-mapping id="httpd-syslog"
	source-dir="/dev/log"
	target-dir="/dev/log"
	options="rw"/>
	<storage-mapping id="httpd-root"
	source-dir="/srv/html"
	target-dir="/var/www/html"
	options="rw"/>
	<storage-mapping id="httpd-logs"
	source-dir-root="/var/log/pacemaker/bundles"
	target-dir="/etc/httpd/logs"
	options="rw"/>
	</storage>
	<primitive class="ocf" id="httpd" provider="heartbeat" type="apache"/>
	</bundle>
	----
	====

	=== Bundle Properties ===

	.Properties of a Bundle
	[width="95%",cols="3m,5<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Description

	\|id
	\|A unique name for the bundle (required)
	indexterm:[id,bundle]
	indexterm:[bundle,Property,id]

	\|description
	\|Arbitrary text (not used by Pacemaker)
	indexterm:[description,bundle]
	indexterm:[bundle,Property,description]

	\|=========================================================

	A bundle must contain exactly one +<docker>+ or +<rkt>+ element.

	=== Docker Properties ===

	Before configuring a Docker bundle in Pacemaker, the user must install Docker
	and supply a fully configured Docker image on every node allowed to run the
	bundle.

	Pacemaker will create an implicit +ocf:heartbeat:docker+ resource to manage
	a bundle's Docker container. The user must ensure that resource agent is
	installed on every node allowed to run the bundle.

	.Properties of a Bundle's Docker Element
	[width="95%",cols="3m,4,5<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|image
	\|
	\|Docker image tag (required)
	indexterm:[image,Docker]
	indexterm:[Docker,Property,image]

	\|replicas
	\|Value of +promoted-max+ if that is positive, else 1
	\|A positive integer specifying the number of container instances to launch
	indexterm:[replicas,Docker]
	indexterm:[Docker,Property,replicas]

	\|replicas-per-host
	\|1
	\|A positive integer specifying the number of container instances allowed to run
	on a single node
	indexterm:[replicas-per-host,Docker]
	indexterm:[Docker,Property,replicas-per-host]

	\|promoted-max
	\|0
	\|A non-negative integer that, if positive, indicates that the containerized
	service should be treated as a promotable service, with this many replicas
	allowed to run the service in the master role
	indexterm:[promoted-max,Docker]
	indexterm:[Docker,Property,promoted-max]

	\|network
	\|
	\|If specified, this will be passed to +docker run+ as the
	https://docs.docker.com/engine/reference/run/#network-settings[network setting]
	for the Docker container.
	indexterm:[network,Docker]
	indexterm:[Docker,Property,network]

	\|run-command
	\|`/usr/sbin/pacemaker-remoted` if bundle contains a +primitive+, otherwise none
	\|This command will be run inside the container when launching it ("PID 1"). If
	the bundle contains a +primitive+, this command 'must' start pacemaker-remoted
	(but could, for example, be a script that does other stuff, too). If the
	container image has a pre-2.0.0 version of Pacemaker, set this to
	+/usr/sbin/pacemaker_remoted+ (note the underbar instead of dash).
	indexterm:[run-command,Docker]
	indexterm:[Docker,Property,run-command]

	\|options
	\|
	\|Extra command-line options to pass to `docker run`
	indexterm:[options,Docker]
	indexterm:[Docker,Property,options]

	\|=========================================================

	For backward compatibility, +masters+ is accepted as an alias for
	+promoted-max+, but is deprecated since 2.0.0, and support for it will be
	removed in a future version.

	=== rkt Properties ===

	Before configuring a rkt bundle in Pacemaker, the user must install rkt
	and supply a fully configured container image on every node allowed to run the
	bundle.

	Pacemaker will create an implicit +ocf:heartbeat:rkt+ resource to manage
	a bundle's rkt container. The user must ensure that resource agent is
	installed on every node allowed to run the bundle.

	.Properties of a Bundle's rkt Element
	[width="95%",cols="3m,4,5<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|image
	\|
	\|Container image tag (required)
	indexterm:[image,rkt]
	indexterm:[rkt,Property,image]

	\|replicas
	\|Value of +promoted-max+ if that is positive, else 1
	\|A positive integer specifying the number of container instances to launch
	indexterm:[replicas,rkt]
	indexterm:[rkt,Property,replicas]

	\|replicas-per-host
	\|1
	\|A positive integer specifying the number of container instances allowed to run
	on a single node
	indexterm:[replicas-per-host,rkt]
	indexterm:[rkt,Property,replicas-per-host]

	\|promoted-max
	\|0
	\|A non-negative integer that, if positive, indicates that the containerized
	service should be treated as a promotable service, with this many replicas
	allowed to run the service in the master role
	indexterm:[promoted-max,rkt]
	indexterm:[rkt,Property,promoted-max]

	\|network
	\|
	\|If specified, this will be passed to +rkt run+ as the
	network setting for the rkt container.
	indexterm:[network,rkt]
	indexterm:[rkt,Property,network]

	\|run-command
	\|`/usr/sbin/pacemaker-remoted` if bundle contains a +primitive+, otherwise none
	\|This command will be run inside the container when launching it ("PID 1"). If
	the bundle contains a +primitive+, this command 'must' start pacemaker-remoted
	(but could, for example, be a script that does other stuff, too). If the
	container image has a pre-2.0.0 version of Pacemaker, set this to
	+/usr/sbin/pacemaker_remoted+ (note the underbar instead of dash).
	indexterm:[run-command,rkt]
	indexterm:[rkt,Property,run-command]

	\|options
	\|
	\|Extra command-line options to pass to `rkt run`
	indexterm:[options,rkt]
	indexterm:[rkt,Property,options]

	\|=========================================================

	For backward compatibility, +masters+ is accepted as an alias for
	+promoted-max+, but is deprecated since 2.0.0, and support for it will be
	removed in a future version.

	=== Bundle Network Properties ===

	A bundle may optionally contain one +<network>+ element.
	indexterm:[bundle,network]

	.Properties of a Bundle's Network Element
	[width="95%",cols="2m,1,4<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|add-host
	\|TRUE
	\|If TRUE, and +ip-range-start+ is used, Pacemaker will automatically ensure
	that +/etc/hosts+ inside the containers has entries for each
	<<s-resource-bundle-note-replica-names,replica name>> and its assigned IP.
	indexterm:[add-host,network]
	indexterm:[network,Property,add-host]

	\|ip-range-start
	\|
	\|If specified, Pacemaker will create an implicit +ocf:heartbeat:IPaddr2+
	resource for each container instance, starting with this IP address,
	using up to +replicas+ sequential addresses. These addresses can be used
	from the host's network to reach the service inside the container, though
	it is not visible within the container itself. Only IPv4 addresses are
	currently supported.
	indexterm:[ip-range-start,network]
	indexterm:[network,Property,ip-range-start]

	\|host-netmask
	\|32
	\|If +ip-range-start+ is specified, the IP addresses are created with this
	CIDR netmask (as a number of bits).
	indexterm:[host-netmask,network]
	indexterm:[network,Property,host-netmask]

	\|host-interface
	\|
	\|If +ip-range-start+ is specified, the IP addresses are created on this
	host interface (by default, it will be determined from the IP address).
	indexterm:[host-interface,network]
	indexterm:[network,Property,host-interface]

	\|control-port
	\|3121
	\|If the bundle contains a +primitive+, the cluster will use this integer TCP
	port for communication with Pacemaker Remote inside the container. Changing
	this is useful when the container is unable to listen on the default port,
	for example, when the container uses the host's network rather than
	+ip-range-start+ (in which case +replicas-per-host+ must be 1), or when the
	bundle may run on a Pacemaker Remote node that is already listening on the
	default port. Any PCMK_remote_port environment variable set on the host or in
	the container is ignored for bundle connections.
	indexterm:[control-port,network]
	indexterm:[network,Property,control-port]

	\|=========================================================

	[[s-resource-bundle-note-replica-names]]
	[NOTE]
	====
	Replicas are named by the bundle id plus a dash and an integer counter starting
	with zero. For example, if a bundle named +httpd-bundle+ has +replicas=2+, its
	containers will be named +httpd-bundle-0+ and +httpd-bundle-1+.
	====

	Additionally, a +<network>+ element may optionally contain one or more
	+<port-mapping>+ elements.
	indexterm:[bundle,network,port-mapping]

	.Properties of a Bundle's Port-Mapping Element
	[width="95%",cols="2m,1,4<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|id
	\|
	\|A unique name for the port mapping (required)
	indexterm:[id,port-mapping]
	indexterm:[port-mapping,Property,id]

	\|port
	\|
	\|If this is specified, connections to this TCP port number on the host network
	(on the container's assigned IP address, if +ip-range-start+ is specified)
	will be forwarded to the container network. Exactly one of +port+ or +range+
	must be specified in a +port-mapping+.
	indexterm:[port,port-mapping]
	indexterm:[port-mapping,Property,port]

	\|internal-port
	\|value of +port+
	\|If +port+ and this are specified, connections to +port+ on the host's network
	will be forwarded to this port on the container network.
	indexterm:[internal-port,port-mapping]
	indexterm:[port-mapping,Property,internal-port]

	\|range
	\|
	\|If this is specified, connections to these TCP port numbers (expressed as
	'first_port'-'last_port') on the host network (on the container's assigned IP
	address, if +ip-range-start+ is specified) will be forwarded to the same ports
	in the container network. Exactly one of +port+ or +range+ must be specified
	in a +port-mapping+.
	indexterm:[range,port-mapping]
	indexterm:[port-mapping,Property,range]

	\|=========================================================

	[NOTE]
	====
	If the bundle contains a +primitive+, Pacemaker will automatically map the
	+control-port+, so it is not necessary to specify that port in a
	+port-mapping+.
	====

	=== Bundle Storage Properties ===

	A bundle may optionally contain one +<storage>+ element. A +<storage>+ element
	has no properties of its own, but may contain one or more +<storage-mapping>+
	elements.
	indexterm:[bundle,storage,storage-mapping]

	.Properties of a Bundle's Storage-Mapping Element
	[width="95%",cols="2m,1,4<",options="header",align="center"]
	\|=========================================================

	\|Field
	\|Default
	\|Description

	\|id
	\|
	\|A unique name for the storage mapping (required)
	indexterm:[id,storage-mapping]
	indexterm:[storage-mapping,Property,id]

	\|source-dir
	\|
	\|The absolute path on the host's filesystem that will be mapped into the
	container. Exactly one of +source-dir+ and +source-dir-root+ must be specified
	in a +storage-mapping+.
	indexterm:[source-dir,storage-mapping]
	indexterm:[storage-mapping,Property,source-dir]

	\|source-dir-root
	\|
	\|The start of a path on the host's filesystem that will be mapped into the
	container, using a different subdirectory on the host for each container
	instance. The subdirectory will be named the same as the
	<<s-resource-bundle-note-replica-names,replica name>>.
	Exactly one of +source-dir+ and +source-dir-root+ must be specified in a
	+storage-mapping+.
	indexterm:[source-dir-root,storage-mapping]
	indexterm:[storage-mapping,Property,source-dir-root]

	\|target-dir
	\|
	\|The path name within the container where the host storage will be mapped
	(required)
	indexterm:[target-dir,storage-mapping]
	indexterm:[storage-mapping,Property,target-dir]

	\|options
	\|
	\|File system mount options to use when mapping the storage
	indexterm:[options,storage-mapping]
	indexterm:[storage-mapping,Property,options]

	\|=========================================================

	[NOTE]
	====
	Pacemaker does not define the behavior if the source directory does not already
	exist on the host. However, it is expected that the container technology and/or
	its resource agent will create the source directory in that case.
	====

	[NOTE]
	====
	If the bundle contains a +primitive+,
	Pacemaker will automatically map the equivalent of
	+source-dir=/etc/pacemaker/authkey target-dir=/etc/pacemaker/authkey+
	and +source-dir-root=/var/log/pacemaker/bundles target-dir=/var/log+ into the
	container, so it is not necessary to specify those paths in a
	+storage-mapping+.
	====

	[IMPORTANT]
	====
	The +PCMK_authkey_location+ environment variable must not be set to anything
	other than the default of `/etc/pacemaker/authkey` on any node in the cluster.
	====

	=== Bundle Primitive ===

	A bundle may optionally contain one +<primitive>+ resource
	(see <<s-resource-primitive>>). The primitive may have operations,
	instance attributes and meta-attributes defined, as usual.

	If a bundle contains a primitive resource, the container image must include
	the Pacemaker Remote daemon, and at least one of +ip-range-start+ or
	+control-port+ must be configured in the bundle. Pacemaker will create an
	implicit +ocf:pacemaker:remote+ resource for the connection, launch
	Pacemaker Remote within the container, and monitor and manage the primitive
	resource via Pacemaker Remote.

	If the bundle has more than one container instance (replica), the primitive
	resource will function as an implicit clone (see <<s-resource-clone>>) --
	a promotable clone if the bundle has +masters+ greater than zero
	(see <<s-resource-promotable>>).

	[IMPORTANT]
	====
	Containers in bundles with a +primitive+ must have an accessible networking
	environment, so that Pacemaker on the cluster nodes can contact
	Pacemaker Remote inside the container. For example, the Docker option
	`--net=none` should not be used with a +primitive+. The default (using a
	distinct network space inside the container) works in combination with
	+ip-range-start+. If the Docker option `--net=host` is used (making the
	container share the host's network space), a unique +control-port+ should be
	specified for each bundle. Any firewall must allow access to the
	+control-port+.
	====

	[[s-bundle-attributes]]
	=== Bundle Node Attributes ===

	If the bundle has a +primitive+, the primitive's resource agent may want to set
	node attributes such as <<s-promotion-scores,promotion scores>>. However, with
	containers, it is not apparent which node should get the attribute.

	If the container uses shared storage that is the same no matter which node the
	container is hosted on, then it is appropriate to use the promotion score on the
	bundle node itself.

	On the other hand, if the container uses storage exported from the underlying host,
	then it may be more appropriate to use the promotion score on the underlying host.

	Since this depends on the particular situation, the
	+container-attribute-target+ resource meta-attribute allows the user to specify
	which approach to use. If it is set to +host+, then user-defined node attributes
	will be checked on the underlying host. If it is anything else, the local node
	(in this case the bundle node) is used as usual.

	This only applies to user-defined attributes; the cluster will always check the
	local node for cluster-defined attributes such as +#uname+.

	If +container-attribute-target+ is +host+, the cluster will pass additional
	environment variables to the primitive's resource agent that allow it to set
	-node attributes appropriately: +container_attribute_target+ (identical to the
	-meta-attribute value) and +physical_host+ (the name of the underlying host).
	+node attributes appropriately: +CRM_meta_container_attribute_target+ (identical
	+to the meta-attribute value) and +CRM_meta_physical_host+ (the name of the
	+underlying host).

	[NOTE]
	====
	-It is up to the resource agent to check for the additional variables and use
	-them when setting node attributes.
	+When called by a resource agent, the attrd_updater and crm_attribute commands
	+will automatically check those environment variables and set attributes
	+appropriately.
	====

	=== Bundle Meta-Attributes ===

	Any meta-attribute set on a bundle will be inherited by the bundle's
	primitive and any resources implicitly created by Pacemaker for the bundle.

	This includes options such as +priority+, +target-role+, and +is-managed+. See
	<<s-resource-options>> for more information.

	=== Limitations of Bundles ===

	Restarting pacemaker while a bundle is unmanaged or the cluster is in
	maintenance mode may cause the bundle to fail.

	-Bundles may not be cloned or included in groups. This includes the bundle's
	-primitive and any resources implicitly created by Pacemaker for the bundle.
	+Bundles may not be explicitly cloned or included in groups. This includes the
	+bundle's primitive and any resources implicitly created by Pacemaker for the
	+bundle. (If +replicas+ is greater than 1, the bundle will behave like a clone
	+implicitly.)

	Bundles do not have instance attributes, utilization attributes, or operations,
	though a bundle's primitive may have them.

	A bundle with a primitive can run on a Pacemaker Remote node only if the bundle
	uses a distinct +control-port+.
	diff --git a/extra/resources/ClusterMon.in b/extra/resources/ClusterMon.in
	index 04fb43f1e6..2cd2d57f23 100755
	--- a/extra/resources/ClusterMon.in
	+++ b/extra/resources/ClusterMon.in
	@@ -1,276 +1,274 @@
	#!@BASH_PATH@
	#
	#
	# ClusterMon OCF RA.
	# Starts crm_mon in background which logs cluster status as
	# html to the specified file.
	#
	-# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Br馥
	+# Copyright 2004-2018 SUSE LINUX AG, Lars Marowsky-Br馥
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	#
	# OCF instance parameters:
	# OCF_RESKEY_user
	# OCF_RESKEY_pidfile
	# OCF_RESKEY_update
	# OCF_RESKEY_extra_options
	# OCF_RESKEY_htmlfile

	#######################################################################
	# Initialization:
	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}

	#######################################################################

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="ClusterMon" version="1.0">
	<version>1.0</version>

	<longdesc lang="en">
	This is a ClusterMon Resource Agent.
	It outputs current cluster status to the html.
	</longdesc>
	<shortdesc lang="en">Runs crm_mon in the background, recording the cluster status to an HTML file</shortdesc>

	<parameters>

	<parameter name="user" unique="0">
	<longdesc lang="en">
	The user we want to run crm_mon as
	</longdesc>
	<shortdesc lang="en">The user we want to run crm_mon as</shortdesc>
	<content type="string" default="root" />
	</parameter>

	<parameter name="update" unique="0">
	<longdesc lang="en">
	How frequently should we update the cluster status (in milliseconds).
	For compatibility with old documentation, values less than 1000 will be treated
	as seconds.
	</longdesc>
	<shortdesc lang="en">Update interval in milliseconds</shortdesc>
	<content type="integer" default="15000" />
	</parameter>

	<parameter name="extra_options" unique="0">
	<longdesc lang="en">
	Additional options to pass to crm_mon. Eg. -n -r
	</longdesc>
	<shortdesc lang="en">Extra options</shortdesc>
	<content type="string" default="" />
	</parameter>

	<parameter name="pidfile" unique="1">
	<longdesc lang="en">
	PID file location to ensure only one instance is running
	</longdesc>
	<shortdesc lang="en">PID file</shortdesc>
	<content type="string" default="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid" />
	</parameter>

	<parameter name="htmlfile" unique="1" required="0">
	<longdesc lang="en">
	Location to write HTML output to.
	</longdesc>
	<shortdesc lang="en">HTML output</shortdesc>
	<content type="string" default="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html" />
	</parameter>
	</parameters>

	<actions>
	<action name="start" timeout="20" />
	<action name="stop" timeout="20" />
	<action name="monitor" depth="0" timeout="20" interval="10" />
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="30" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	ClusterMon_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	ClusterMon_exit() {
	if [ $1 != 0 ]; then
	exit $OCF_ERR_GENERIC
	else
	exit $OCF_SUCCESS
	fi
	}

	ClusterMon_start() {
	- cmd_prefix=""
	- cmd_suffix=""
	if [ ! -z $OCF_RESKEY_user ]; then
	su - $OCF_RESKEY_user -c "$CMON_CMD"
	else
	$CMON_CMD
	fi
	ClusterMon_exit $?
	}

	ClusterMon_stop() {
	if [ -f $OCF_RESKEY_pidfile ]; then
	pid=`cat $OCF_RESKEY_pidfile`
	if [ ! -z $pid ]; then
	kill -s 9 $pid
	rm -f $OCF_RESKEY_pidfile
	fi
	fi
	ClusterMon_exit 0
	}

	ClusterMon_monitor() {
	if [ -f $OCF_RESKEY_pidfile ]; then
	pid=`cat $OCF_RESKEY_pidfile`
	if [ ! -z $pid ]; then
	str=$(echo "su - $OCF_RESKEY_user -c \"$CMON_CMD\"" \| tr 'crmon, \t' 'xxxxxxxx')
	ps -o "args=${str}" -p $pid 2>/dev/null \| \
	grep -qE "[c]rm_mon.*${OCF_RESKEY_pidfile}"
	rc=$?
	case $rc in
	0) exit $OCF_SUCCESS;;
	1) exit $OCF_NOT_RUNNING;;
	*) exit $OCF_ERR_GENERIC;;
	esac
	fi
	fi
	exit $OCF_NOT_RUNNING
	}

	CheckOptions() {
	while getopts Vi:nrh:cdp: OPTION
	do
	case $OPTION in
	V\|n\|r\|c\|d);;
	i) ocf_log warn "You should not have specified the -i option, since OCF_RESKEY_update is set already!";;
	h) ocf_log warn "You should not have specified the -h option, since OCF_RESKEY_htmlfile is set already!";;
	p) ocf_log warn "You should not have specified the -p option, since OCF_RESKEY_pidfile is set already!";;
	*) return $OCF_ERR_ARGS;;
	esac
	done

	if [ $? -ne 0 ]; then
	return $OCF_ERR_ARGS
	fi

	# We should have eaten all options at this stage
	shift $(($OPTIND -1))
	if [ $# -gt 0 ]; then
	false
	else
	true
	fi
	}

	ClusterMon_validate() {
	# Existence of the user
	if [ ! -z $OCF_RESKEY_user ]; then
	getent passwd "$OCF_RESKEY_user" >/dev/null
	if [ $? -eq 0 ]; then
	: Yes, user exists. We can further check his permission on crm_mon if necessary
	else
	ocf_log err "The user $OCF_RESKEY_user does not exist!"
	exit $OCF_ERR_ARGS
	fi
	fi

	# Pidfile better be an absolute path
	case $OCF_RESKEY_pidfile in
	/*) ;;
	*) ocf_log warn "You should have pidfile($OCF_RESKEY_pidfile) of absolute path!" ;;
	esac

	# Check the update interval
	if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then
	:
	else
	ocf_log err "Invalid update interval $OCF_RESKEY_update. It should be positive integer!"
	exit $OCF_ERR_ARGS
	fi

	if CheckOptions $OCF_RESKEY_extra_options; then
	:
	else
	ocf_log err "Invalid options $OCF_RESKEY_extra_options!"
	exit $OCF_ERR_ARGS
	fi

	# Htmlfile better be an absolute path
	case $OCF_RESKEY_htmlfile in
	/*) ;;
	*) ocf_log warn "You should have htmlfile($OCF_RESKEY_htmlfile) of absolute path!" ;;
	esac


	echo "Validate OK"
	return $OCF_SUCCESS
	}

	if [ $# -ne 1 ]; then
	ClusterMon_usage
	exit $OCF_ERR_ARGS
	fi

	: ${OCF_RESKEY_update:="15000"}
	: ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"}
	: ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"}

	if [ ${OCF_RESKEY_update} -ge 1000 ]; then
	OCF_RESKEY_update=$(( $OCF_RESKEY_update / 1000 ))
	fi

	CMON_CMD="${HA_SBIN_DIR}/crm_mon -p $OCF_RESKEY_pidfile -d -i $OCF_RESKEY_update $OCF_RESKEY_extra_options -h $OCF_RESKEY_htmlfile"

	case $__OCF_ACTION in
	meta-data) meta_data
	exit $OCF_SUCCESS
	;;
	start) ClusterMon_start
	;;
	stop) ClusterMon_stop
	;;
	monitor) ClusterMon_monitor
	;;
	validate-all) ClusterMon_validate
	;;
	usage\|help) ClusterMon_usage
	exit $OCF_SUCCESS
	;;
	*) ClusterMon_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac

	exit $?
	diff --git a/extra/resources/Dummy b/extra/resources/Dummy
	index 722b9108cd..bb311117e6 100755
	--- a/extra/resources/Dummy
	+++ b/extra/resources/Dummy
	@@ -1,271 +1,269 @@
	#!/bin/sh
	#
	# Dummy OCF RA. Does nothing but wait a few seconds, can be
	# configured to fail occassionally.
	#
	# Copyright 2004-2018 SUSE LINUX AG, Lars Marowsky-Br馥
	# All Rights Reserved.
	#
	# This source code is licensed under the GNU General Public License version 2
	# (GPLv2) WITHOUT ANY WARRANTY.
	#

	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}

	#######################################################################

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="Dummy" version="1.0">
	<version>1.0</version>

	<longdesc lang="en">
	This is a Dummy Resource Agent. It does absolutely nothing except
	keep track of whether its running or not.
	Its purpose in life is for testing and to serve as a template for RA writers.

	NB: Please pay attention to the timeouts specified in the actions
	section below. They should be meaningful for the kind of resource
	the agent manages. They should be the minimum advised timeouts,
	but they shouldn't/cannot cover _all_ possible resource
	instances. So, try to be neither overly generous nor too stingy,
	but moderate. The minimum timeouts should never be below 10 seconds.
	</longdesc>
	<shortdesc lang="en">Example stateless resource agent</shortdesc>

	<parameters>
	<parameter name="state" unique="1">
	<longdesc lang="en">
	Location to store the resource state in.
	</longdesc>
	<shortdesc lang="en">State file</shortdesc>
	<content type="string" default="${HA_VARRUN%%/}/Dummy-${OCF_RESOURCE_INSTANCE}.state" />
	</parameter>

	<parameter name="passwd" unique="1">
	<longdesc lang="en">
	Fake password field
	</longdesc>
	<shortdesc lang="en">Password</shortdesc>
	<content type="string" default="" />
	</parameter>

	<parameter name="fake" unique="0">
	<longdesc lang="en">
	Fake attribute that can be changed to cause a reload
	</longdesc>
	<shortdesc lang="en">Fake attribute that can be changed to cause a reload</shortdesc>
	<content type="string" default="dummy" />
	</parameter>

	<parameter name="op_sleep" unique="1">
	<longdesc lang="en">
	Number of seconds to sleep during operations. This can be used to test how
	the cluster reacts to operation timeouts.
	</longdesc>
	<shortdesc lang="en">Operation sleep duration in seconds.</shortdesc>
	<content type="string" default="0" />
	</parameter>

	<parameter name="fail_start_on" unique="0">
	<longdesc lang="en">
	Start actions will return failure if running on the host specified here, but
	the resource will start successfully anyway (future monitor calls will find it
	running). This can be used to test on-fail=ignore.
	</longdesc>
	<shortdesc lang="en">Report bogus start failure on specified host</shortdesc>
	<content type="string" default="" />
	</parameter>

	<parameter name="envfile" unique="1">
	<longdesc lang="en">
	If this is set, the environment will be dumped to this file for every call.
	</longdesc>
	<shortdesc lang="en">Environment dump file</shortdesc>
	<content type="string" default="" />
	</parameter>

	</parameters>

	<actions>
	<action name="start" timeout="20" />
	<action name="stop" timeout="20" />
	<action name="monitor" timeout="20" interval="10" depth="0"/>
	<action name="reload" timeout="20" />
	<action name="migrate_to" timeout="20" />
	<action name="migrate_from" timeout="20" />
	<action name="validate-all" timeout="20" />
	<action name="meta-data" timeout="5" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	# don't exit on TERM, to test that pacemaker-execd makes sure that we do exit
	trap sigterm_handler TERM
	sigterm_handler() {
	ocf_log info "They use TERM to bring us down. No such luck."
	return
	}

	dummy_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|migrate_to\|migrate_from\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	dump_env() {
	if [ "${OCF_RESKEY_envfile}" != "" ]; then
	echo "### ${__OCF_ACTION} @ $(date) ###
	$(env \| sort)
	###" >> "${OCF_RESKEY_envfile}"
	fi
	}

	dummy_start() {
	- local RETVAL
	-
	dummy_monitor

	- RETVAL=$?
	- if [ $RETVAL -eq $OCF_SUCCESS ]; then
	+ DS_RETVAL=$?
	+ if [ $DS_RETVAL -eq $OCF_SUCCESS ]; then
	if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then
	- RETVAL=$OCF_ERR_GENERIC
	+ DS_RETVAL=$OCF_ERR_GENERIC
	fi
	- return $RETVAL
	+ return $DS_RETVAL
	fi

	touch "${OCF_RESKEY_state}"
	- RETVAL=$?
	+ DS_RETVAL=$?
	if [ "$(uname -n)" = "${OCF_RESKEY_fail_start_on}" ]; then
	- RETVAL=$OCF_ERR_GENERIC
	+ DS_RETVAL=$OCF_ERR_GENERIC
	fi
	- return $RETVAL
	+ return $DS_RETVAL
	}

	dummy_stop() {
	dummy_monitor --force
	if [ $? -eq $OCF_SUCCESS ]; then
	rm ${OCF_RESKEY_state}
	fi
	rm -f "${VERIFY_SERIALIZED_FILE}"
	return $OCF_SUCCESS
	}

	dummy_monitor() {
	# Monitor _MUST!_ differentiate correctly between running
	# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
	# That is THREE states, not just yes/no.

	if [ $OCF_RESKEY_op_sleep -ne 0 ]; then
	- if [ "$1" = "" -a -f "${VERIFY_SERIALIZED_FILE}" ]; then
	+ if [ "$1" = "" ] && [ -f "${VERIFY_SERIALIZED_FILE}" ]; then
	# two monitor ops have occurred at the same time.
	# This verifies a condition in pacemaker-execd regression tests.
	ocf_log err "$VERIFY_SERIALIZED_FILE exists already"
	return $OCF_ERR_GENERIC
	fi

	touch "${VERIFY_SERIALIZED_FILE}"
	sleep ${OCF_RESKEY_op_sleep}
	rm "${VERIFY_SERIALIZED_FILE}"
	fi

	if [ -f "${OCF_RESKEY_state}" ]; then
	# Multiple monitor levels are defined to support various tests
	case "$OCF_CHECK_LEVEL" in
	10)
	# monitor level with delay, useful for testing timeouts
	sleep 30
	;;

	20)
	# monitor level that fails intermittently
	- n=$(expr $(dd if=/dev/urandom bs=1 count=1 2>/dev/null \| od \| head -1 \| cut -f2 -d' ') % 5)
	+ n=$(expr "$(dd if=/dev/urandom bs=1 count=1 2>/dev/null \| od \| head -1 \| cut -f2 -d' ')" % 5)
	if [ $n -eq 1 ]; then
	ocf_exit_reason "smoke detected near CPU fan"
	return $OCF_ERR_GENERIC
	fi
	;;

	30)
	# monitor level that always fails
	ocf_exit_reason "hyperdrive quota reached"
	return $OCF_ERR_GENERIC
	;;

	*)
	;;
	esac
	return $OCF_SUCCESS
	fi
	return $OCF_NOT_RUNNING
	}

	dummy_validate() {

	# Is the state directory writable?
	state_dir=`dirname "$OCF_RESKEY_state"`
	touch "$state_dir/$$"
	if [ $? -ne 0 ]; then
	return $OCF_ERR_ARGS
	fi
	rm "$state_dir/$$"

	return $OCF_SUCCESS
	}

	: ${OCF_RESKEY_fake=dummy}
	: ${OCF_RESKEY_op_sleep=0}
	: ${OCF_RESKEY_CRM_meta_interval=0}
	: ${OCF_RESKEY_CRM_meta_globally_unique:="false"}

	if [ -z "$OCF_RESKEY_state" ]; then
	OCF_RESKEY_state="${HA_VARRUN%%/}/Dummy-${OCF_RESOURCE_INSTANCE}.state"

	if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
	# Strip off the trailing clone marker (note + is not portable in sed)
	OCF_RESKEY_state=`echo $OCF_RESKEY_state \| sed s/:[0-9][0-9]*\.state/.state/`
	fi
	fi
	VERIFY_SERIALIZED_FILE="${OCF_RESKEY_state}.serialized"

	dump_env

	case $__OCF_ACTION in
	meta-data) meta_data
	exit $OCF_SUCCESS
	;;
	start) dummy_start;;
	stop) dummy_stop;;
	monitor) dummy_monitor;;
	migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
	dummy_stop
	;;
	migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}."
	dummy_start
	;;
	reload) ocf_log err "Reloading..."
	dummy_start
	;;
	validate-all) dummy_validate;;
	usage\|help) dummy_usage
	exit $OCF_SUCCESS
	;;
	*) dummy_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac
	rc=$?
	ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
	exit $rc
	diff --git a/extra/resources/HealthSMART.in b/extra/resources/HealthSMART.in
	index 9e91298a9e..e02060581e 100755
	--- a/extra/resources/HealthSMART.in
	+++ b/extra/resources/HealthSMART.in
	@@ -1,328 +1,328 @@
	#!@BASH_PATH@
	#
	#
	# HealthSMART OCF RA. Checks the S.M.A.R.T. status of all given
	# drives and writes the #health-smart status into the CIB
	#
	# Copyright (c) 2009 Michael Schwartzkopff, 2010 Matthew Richardson
	#
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	#
	#######################################################################

	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}
	#
	SMARTCTL=/usr/sbin/smartctl
	ATTRDUP=/usr/sbin/attrd_updater

	#######################################################################

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="HealthSMART" version="0.1">
	<version>1.0</version>

	<longdesc lang="en">
	Systhem health agent that checks the S.M.A.R.T. status of the given drives and
	updates the #health-smart attribute.
	</longdesc>
	<shortdesc lang="en">SMART health status</shortdesc>

	<parameters>
	<parameter name="state" unique="1">
	<longdesc lang="en">
	Location to store the resource state in.
	</longdesc>
	<shortdesc lang="en">State file</shortdesc>
	<content type="string" default="${HA_VARRUN%%/}/HealthSMART-${OCF_RESOURCE_INSTANCE}.state" />
	</parameter>

	<parameter name="drives" unique="0">
	<longdesc lang="en">
	The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda".
	</longdesc>
	<shortdesc lang="en">Drives to check</shortdesc>
	<content type="string" default="/dev/sda" />
	</parameter>

	<parameter name="devices" unique="0">
	<longdesc lang="en">
	The device type(s) to assume for the drive(s) being tested as a SPACE separated list.
	</longdesc>
	<shortdesc lang="en">Device types</shortdesc>
	<content type="string" />
	</parameter>

	<parameter name="temp_lower_limit" unique="0">
	<longdesc lang="en">
	Lower limit of the temperature in deg C of the drive(s). Below this limit the status will be red.
	</longdesc>
	<shortdesc lang="en">Lower limit for the red smart attribute</shortdesc>
	<content type="string" default="0"/>
	</parameter>

	<parameter name="temp_upper_limit" unique="0">
	<longdesc lang="en">
	Upper limit of the temperature if deg C of the drives(s). If the drive reports
	a temperature higher than this value the status of #health-smart will be red.
	</longdesc>
	<shortdesc lang="en">Upper limit for red smart attribute</shortdesc>
	<content type="string" default="60"/>
	</parameter>

	<parameter name="temp_warning" unique="0">
	<longdesc lang="en">
	Number of deg C below/above the upper/lower temp limits at which point the status of #health-smart will change to yellow.
	</longdesc>
	<shortdesc lang="en">Deg C below/above the upper limits for yellow smart attribute</shortdesc>
	<content type="string" default="5"/>
	</parameter>

	</parameters>

	<actions>
	<action name="start" timeout="10" />
	<action name="stop" timeout="10" />
	<action name="monitor" timeout="10" interval="10" start-delay="0" />
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="10" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	check_temperature() {

	if [ $1 -lt ${lower_red_limit} ] ; then
	ocf_log info "Drive ${DRIVE} ${DEVICE} too cold: ${1} C"
	$ATTRDUP -n "#health-smart" -U "red" -d "5s"
	return 1
	fi

	if [ $1 -gt ${upper_red_limit} ] ; then
	ocf_log info "Drive ${DRIVE} ${DEVICE} too hot: ${1} C"
	$ATTRDUP -n "#health-smart" -U "red" -d "5s"
	return 1
	fi

	if [ $1 -lt ${lower_yellow_limit} ] ; then
	ocf_log info "Drive ${DRIVE} ${DEVICE} quite cold: ${1} C"
	$ATTRDUP -n "#health-smart" -U "yellow" -d "5s"
	return 1
	fi

	if [ $1 -gt ${upper_yellow_limit} ] ; then
	ocf_log info "Drive ${DRIVE} ${DEVICE} quite hot: ${1} C"
	$ATTRDUP -n "#health-smart" -U "yellow" -d "5s"
	return 1
	fi
	}


	init_smart() {
	#Set temperature defaults
	if [ -z ${OCF_RESKEY_temp_warning} ]; then
	yellow_threshold=5
	else
	yellow_threshold=${OCF_RESKEY_temp_warning}
	fi

	if [ -z ${OCF_RESKEY_temp_lower_limit} ] ; then
	lower_red_limit=0
	else
	lower_red_limit=${OCF_RESKEY_temp_lower_limit}
	fi
	lower_yellow_limit=$((${lower_red_limit}+${yellow_threshold}))

	if [ -z ${OCF_RESKEY_temp_upper_limit} ] ; then
	upper_red_limit=60
	else
	upper_red_limit=${OCF_RESKEY_temp_upper_limit}
	fi
	upper_yellow_limit=$((${upper_red_limit}-${yellow_threshold}))

	#Set disk defaults
	if [ -z "${OCF_RESKEY_drives}" ] ; then
	DRIVES="/dev/sda"
	else
	DRIVES=${OCF_RESKEY_drives}
	fi

	#Test for presence of smartctl
	if [ ! -x $SMARTCTL ] ; then
	ocf_log err "${SMARTCTL} not installed."
	exit $OCF_ERR_INSTALLED
	fi

	for DRIVE in $DRIVES; do
	if [ "${OCF_RESKEY_devices}" ]; then
	for DEVICE in ${OCF_RESKEY_devices}; do
	$SMARTCTL -d $DEVICE -i ${DRIVE} \| grep -q "SMART support is: Enabled"
	if [ $? -ne "0" ] ; then
	ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE}
	exit $OCF_ERR_INSTALLED
	fi
	done
	else
	$SMARTCTL -i ${DRIVE} \| grep -q "SMART support is: Enabled"
	if [ $? -ne "0" ] ; then
	ocf_log err "S.M.A.R.T. not enabled for drive "${DRIVE}
	exit $OCF_ERR_INSTALLED
	fi
	fi
	done
	}

	HealthSMART_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	HealthSMART_start() {
	HealthSMART_monitor
	if [ $? = $OCF_SUCCESS ]; then
	return $OCF_SUCCESS
	fi
	touch ${OCF_RESKEY_state}
	}

	HealthSMART_stop() {
	HealthSMART_monitor
	if [ $? = $OCF_SUCCESS ]; then
	rm ${OCF_RESKEY_state}
	fi
	return $OCF_SUCCESS
	}

	HealthSMART_monitor() {

	init_smart

	# Monitor _MUST!_ differentiate correctly between running
	# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
	# That is THREE states, not just yes/no.

	if [ -f ${OCF_RESKEY_state} ]; then

	# Check overall S.M.A.R.T. status
	for DRIVE in $DRIVES; do
	if [ "${OCF_RESKEY_devices}" ]; then
	for DEVICE in ${OCF_RESKEY_devices}; do
	$SMARTCTL -d $DEVICE -H ${DRIVE} \| grep -q "SMART overall-health self-assessment test result: PASSED"
	if [ $? -ne "0" ]; then
	$ATTRDUP -n "#health-smart" -U "red" -d "5s"
	return $OCF_SUCCESS
	fi
	done
	else
	$SMARTCTL -H ${DRIVE} \| grep -q "SMART overall-health self-assessment test result: PASSED"
	if [ $? -ne "0" ]; then
	$ATTRDUP -n "#health-smart" -U "red" -d "5s"
	return $OCF_SUCCESS
	fi
	fi

	# Check drive temperature(s)
	if [ "${OCF_RESKEY_devices}" ]; then
	for DEVICE in ${OCF_RESKEY_devices}; do
	- check_temperature `$SMARTCTL -d $DEVICE -A ${DRIVE} \| awk '/^194/ { print $10 }'`
	- if [ $? != 0 ]; then
	+ check_temperature "$("$SMARTCTL" -d "$DEVICE" -A "${DRIVE}" \| awk '/^194/ { print $10 }')"
	+ if [ $? -ne 0 ]; then
	return $OCF_SUCCESS
	fi
	done
	else
	- check_temperature `$SMARTCTL -A ${DRIVE} \| awk '/^194/ { print $10 }'`
	- if [ $? != 0 ]; then
	+ check_temperature "$("$SMARTCTL" -A "${DRIVE}" \| awk '/^194/ { print $10 }')"
	+ if [ $? -ne 0 ]; then
	return $OCF_SUCCESS
	fi
	fi
	done

	$ATTRDUP -n "#health-smart" -U "green" -d "5s"
	return $OCF_SUCCESS
	fi

	return $OCF_NOT_RUNNING

	}

	HealthSMART_validate() {

	init_smart

	# Is the state directory writable?
	state_dir=`dirname "$OCF_RESKEY_state"`
	touch "$state_dir/$$"
	if [ $? != 0 ]; then
	return $OCF_ERR_ARGS
	fi
	rm "$state_dir/$$"

	return $OCF_SUCCESS
	}

	: ${OCF_RESKEY_CRM_meta_interval=0}
	: ${OCF_RESKEY_CRM_meta_globally_unique:="true"}

	if [ "x$OCF_RESKEY_state" = "x" ]; then
	if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
	state="${HA_VARRUN%%/}/HealthSMART-${OCF_RESOURCE_INSTANCE}.state"

	# Strip off the trailing clone marker
	OCF_RESKEY_state=`echo $state \| sed s/:[0-9][0-9]*\.state/.state/`
	else
	OCF_RESKEY_state="${HA_VARRUN%%/}/HealthSMART-${OCF_RESOURCE_INSTANCE}.state"
	fi
	fi

	case $__OCF_ACTION in
	start) HealthSMART_start;;
	stop) HealthSMART_stop;;
	monitor) HealthSMART_monitor;;
	validate-all) HealthSMART_validate;;
	meta-data)
	meta_data
	exit $OCF_SUCCESS
	;;
	usage\|help)
	HealthSMART_usage
	exit $OCF_SUCCESS
	;;
	*) HealthSMART_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac
	rc=$?
	ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
	exit $rc
	diff --git a/extra/resources/Stateful b/extra/resources/Stateful
	index 6c2f1b6a6b..f1b7a40169 100755
	--- a/extra/resources/Stateful
	+++ b/extra/resources/Stateful
	@@ -1,231 +1,231 @@
	#!/bin/sh
	#
	# Example of a stateful OCF Resource Agent
	# Copyright 2006-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}
	CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"

	#######################################################################

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="Stateful" version="1.1">
	<version>1.0</version>

	<longdesc lang="en">
	This is an example resource agent that implements two states
	</longdesc>
	<shortdesc lang="en">Example stateful resource agent</shortdesc>

	<parameters>

	<parameter name="state" unique="1">
	<longdesc lang="en">
	Location to store the resource state in
	</longdesc>
	<shortdesc lang="en">State file</shortdesc>
	<content type="string" default="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state" />
	</parameter>

	<parameter name="envfile" unique="1">
	<longdesc lang="en">
	If this is set, the environment will be dumped to this file for every call.
	</longdesc>
	<shortdesc lang="en">Environment dump file</shortdesc>
	<content type="string" default="" />
	</parameter>

	<parameter name="notify_delay" unique="0">
	<longdesc lang="en">
	The notify action will sleep for this many seconds before returning,
	to simulate a long-running notify.
	</longdesc>
	<shortdesc lang="en">Notify delay in seconds</shortdesc>
	<content type="string" default="" />
	</parameter>

	</parameters>

	<actions>
	<action name="start" timeout="20" />
	<action name="stop" timeout="20" />
	<action name="monitor" depth="0" timeout="20" interval="10" role="Master"/>
	<action name="monitor" depth="0" timeout="20" interval="10" role="Slave"/>
	<action name="notify" timeout="5" />
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="30" />
	</actions>
	</resource-agent>
	END
	exit $OCF_SUCCESS
	}

	#######################################################################

	stateful_usage() {
	cat <<END
	usage: $0 {start\|stop\|promote\|demote\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	exit $1
	}

	stateful_update() {
	echo $1 > ${OCF_RESKEY_state}
	}

	stateful_check_state() {
	target=$1
	if [ -f ${OCF_RESKEY_state} ]; then
	state=`cat ${OCF_RESKEY_state}`
	if [ "x$target" = "x$state" ]; then
	return 0
	fi

	else
	if [ "x$target" = "x" ]; then
	return 0
	fi
	fi

	return 1
	}

	dump_env() {
	if [ "${OCF_RESKEY_envfile}" != "" ]; then
	echo "### ${__OCF_ACTION} @ $(date) ###
	$(env \| sort)
	###" >> "${OCF_RESKEY_envfile}"
	fi
	}

	stateful_start() {
	stateful_check_state master
	if [ $? = 0 ]; then
	# CRM Error - Should never happen
	return $OCF_RUNNING_MASTER
	fi
	stateful_update slave
	$CRM_MASTER -v ${slave_score}
	return 0
	}

	stateful_demote() {
	stateful_check_state
	if [ $? = 0 ]; then
	# CRM Error - Should never happen
	return $OCF_NOT_RUNNING
	fi
	stateful_update slave
	$CRM_MASTER -v ${slave_score}
	return 0
	}

	stateful_promote() {
	stateful_check_state
	if [ $? = 0 ]; then
	return $OCF_NOT_RUNNING
	fi
	stateful_update master
	$CRM_MASTER -v ${master_score}
	return 0
	}

	stateful_stop() {
	$CRM_MASTER -D
	stateful_check_state master
	if [ $? = 0 ]; then
	# CRM Error - Should never happen
	return $OCF_RUNNING_MASTER
	fi
	if [ -f ${OCF_RESKEY_state} ]; then
	rm ${OCF_RESKEY_state}
	fi
	return 0
	}

	stateful_monitor() {
	stateful_check_state "master"
	if [ $? = 0 ]; then
	if [ $OCF_RESKEY_CRM_meta_interval = 0 ]; then
	# Restore the master setting during probes
	$CRM_MASTER -v ${master_score}
	fi
	return $OCF_RUNNING_MASTER
	fi

	stateful_check_state "slave"
	if [ $? = 0 ]; then
	if [ $OCF_RESKEY_CRM_meta_interval = 0 ]; then
	# Restore the master setting during probes
	$CRM_MASTER -v ${slave_score}
	fi
	return $OCF_SUCCESS
	fi

	if [ -f ${OCF_RESKEY_state} ]; then
	echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents"
	cat ${OCF_RESKEY_state}
	return $OCF_ERR_GENERIC
	fi
	return 7
	}

	stateful_notify() {
	if [ "${OCF_RESKEY_notify_delay}" != "0" ]; then
	sleep "${OCF_RESKEY_notify_delay}"
	fi
	return $OCF_SUCCESS
	}

	stateful_validate() {
	exit $OCF_SUCCESS
	}

	: ${slave_score=5}
	: ${master_score=10}

	: ${OCF_RESKEY_CRM_meta_interval=0}
	-: ${OCF_RESKEY_CRM_notify_delay=0}
	+: ${OCF_RESKEY_notify_delay=0}
	: ${OCF_RESKEY_CRM_meta_globally_unique:="false"}

	if [ "x$OCF_RESKEY_state" = "x" ]; then
	if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
	state="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state"

	# Strip off the trailing clone marker
	OCF_RESKEY_state=`echo $state \| sed s/:[0-9][0-9]*\.state/.state/`
	else
	OCF_RESKEY_state="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state"
	fi
	fi

	dump_env

	case $__OCF_ACTION in
	meta-data) meta_data;;
	start) stateful_start;;
	promote) stateful_promote;;
	demote) stateful_demote;;
	notify) stateful_notify ;;
	stop) stateful_stop;;
	monitor) stateful_monitor;;
	validate-all) stateful_validate;;
	usage\|help) stateful_usage $OCF_SUCCESS;;
	*) stateful_usage $OCF_ERR_UNIMPLEMENTED;;
	esac

	exit $?
	diff --git a/extra/resources/SysInfo.in b/extra/resources/SysInfo.in
	index 24c259cc1b..8cba0e5cbe 100755
	--- a/extra/resources/SysInfo.in
	+++ b/extra/resources/SysInfo.in
	@@ -1,386 +1,382 @@
	#!@BASH_PATH@
	#
	#
	# SysInfo OCF Resource Agent
	# It records (in the CIB) various attributes of a node
	#
	-# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Br馥
	+# Copyright 2004-2018 SUSE LINUX AG, Lars Marowsky-Br馥
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	#
	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}

	#######################################################################

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="SysInfo" version="1.0">
	<version>1.0</version>

	<longdesc lang="en">
	This is a SysInfo Resource Agent.
	It records (in the CIB) various attributes of a node
	Sample Linux output:
	arch: i686
	os: Linux-2.4.26-gentoo-r14
	free_swap: 1999
	cpu_info: Intel(R) Celeron(R) CPU 2.40GHz
	cpu_speed: 4771.02
	cpu_cores: 1
	cpu_load: 0.00
	ram_total: 513
	ram_free: 117
	root_free: 2.4
	#health_disk: red

	Sample Darwin output:
	arch: i386
	os: Darwin-8.6.2
	cpu_info: Intel Core Duo
	cpu_speed: 2.16
	cpu_cores: 2
	cpu_load: 0.18
	ram_total: 2016
	ram_free: 787
	root_free: 13
	#health_disk: green

	Units:
	free_swap: MB
	ram_*: MB
	cpu_speed (Linux): bogomips
	cpu_speed (Darwin): GHz
	*_free: GB (or user-defined: disk_unit)

	</longdesc>
	<shortdesc lang="en">SysInfo resource agent</shortdesc>

	<parameters>

	<parameter name="pidfile" unique="1">
	<longdesc lang="en">PID file</longdesc>
	<shortdesc lang="en">PID file</shortdesc>
	<content type="string" default="$OCF_RESKEY_pidfile" />
	</parameter>

	<parameter name="delay" unique="0">
	<longdesc lang="en">Interval to allow values to stabilize</longdesc>
	<shortdesc lang="en">Dampening Delay</shortdesc>
	<content type="string" default="0s" />
	</parameter>

	<parameter name="disks" unique="0">
	<longdesc lang="en">
	Filesystems or Paths to be queried for free disk space as a SPACE
	separated list - e.g "/dev/sda1 /tmp".
	Results will be written to an attribute with leading slashes
	removed, and other slashes replaced with underscore, and the word
	'free' appended - e.g for /dev/sda1 it would be 'dev_sda1_free'.
	Note: The root filesystem '/' is always queried to an attribute
	named 'root_free'
	</longdesc>
	<shortdesc lang="en">List of Filesytems/Paths to query for free disk space</shortdesc>
	<content type="string" />
	</parameter>

	<parameter name="disk_unit" unique="0">
	<longdesc lang="en">
	Unit to report disk free space in.
	Can be one of: B, K, M, G, T, P (case-insensitive)
	</longdesc>
	<shortdesc lang="en">Unit to report disk free space in</shortdesc>
	<content type="string" default="G"/>
	</parameter>

	<parameter name="min_disk_free" unique="0">
	<longdesc lang="en">
	The amount of free space required in monitored disks. If any
	of the monitored disks has less than this amount of free space,
	, with the node attribute "#health_disk" changing to "red",
	all resources will move away from the node. Set the node-health-strategy
	property appropriately for this to take effect.
	If the unit is not specified, it defaults to disk_unit.
	</longdesc>
	<shortdesc lang="en">minimum disk free space required</shortdesc>
	<content type="string" default=""/>
	</parameter>


	</parameters>
	<actions>
	<action name="start" timeout="20s" />
	<action name="stop" timeout="20s" />
	<action name="monitor" timeout="20s" interval="60s"/>
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="30" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	UpdateStat() {
	name=$1; shift
	value="$*"
	printf "%s:\t%s\n" "$name" "$value"
	if [ "$__OCF_ACTION" = "start" ] ; then
	${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -B "$value"
	else
	${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value"
	fi
	}

	SysInfoStats() {

	UpdateStat arch "`uname -m`"
	UpdateStat os "`uname -s`-`uname -r`"

	case `uname -s` in
	"Darwin")
	mem=`top -l 1 \| grep Mem: \| awk '{print $10}'`
	mem_used=`top -l 1 \| grep Mem: \| awk '{print $8}'`
	mem=`SysInfo_mem_units $mem`
	mem_used=`SysInfo_mem_units $mem_used`
	mem_total=`expr $mem_used + $mem`
	cpu_type=`system_profiler SPHardwareDataType \| awk -F': ' '/^CPU Type/ {print $2; exit}'`
	cpu_speed=`system_profiler SPHardwareDataType \| awk -F': ' '/^CPU Speed/ {print $2; exit}'`
	cpu_cores=`system_profiler SPHardwareDataType \| awk -F': ' '/^Number Of/ {print $2; exit}'`
	cpu_load=`uptime \| awk '{ print $10 }'`
	;;
	"FreeBSD")
	cpu_type=`sysctl -in hw.model`
	cpu_speed=`sysctl -in dev.cpu.0.freq`
	cpu_cores=`sysctl -in hw.ncpu`
	cpu_load=`sysctl -in vm.loadavg \| awk '{ print $4 }'`

	free_pages=`sysctl -in vm.stats.vm.v_free_count`
	page_count=`sysctl -in vm.stats.vm.v_page_count`
	page_size=`sysctl -in vm.stats.vm.v_page_size`

	mem=`expr $free_pages \* $page_size / 1024 / 1024`M
	mem_total=`expr $page_count \* $page_size / 1024 / 1024`M
	;;
	"Linux")
	if [ -f /proc/cpuinfo ]; then
	cpu_type=`awk -F': ' '/model name/ {print $2; exit}' /proc/cpuinfo`
	cpu_speed=`awk -F': ' '/bogomips/ {print $2; exit}' /proc/cpuinfo`
	cpu_cores=`grep "^processor" /proc/cpuinfo \| wc -l`
	fi
	cpu_load=`uptime \| awk '{ print $10 }'`

	if [ -f /proc/meminfo ]; then
	# meminfo results are in kB
	mem=`grep "SwapFree" /proc/meminfo \| awk '{print $2"k"}'`
	if [ ! -z $mem ]; then
	- UpdateStat free_swap `SysInfo_mem_units $mem`
	+ UpdateStat free_swap "$(SysInfo_mem_units "$mem")"
	fi
	mem=`grep "Inactive" /proc/meminfo \| awk '{print $2"k"}'`
	mem_total=`grep "MemTotal" /proc/meminfo \| awk '{print $2"k"}'`
	else
	mem=`top -n 1 \| grep Mem: \| awk '{print $7}'`
	fi
	;;
	*)
	esac

	if [ x != x"$cpu_type" ]; then
	UpdateStat cpu_info "$cpu_type"
	fi

	if [ x != x"$cpu_speed" ]; then
	UpdateStat cpu_speed "$cpu_speed"
	fi

	if [ x != x"$cpu_cores" ]; then
	UpdateStat cpu_cores "$cpu_cores"
	fi

	if [ x != x"$cpu_load" ]; then
	UpdateStat cpu_load "$cpu_load"
	fi

	if [ ! -z "$mem" ]; then
	# Massage the memory values
	- UpdateStat ram_total `SysInfo_mem_units $mem_total`
	- UpdateStat ram_free `SysInfo_mem_units $mem`
	+ UpdateStat ram_total "$(SysInfo_mem_units "$mem_total")"
	+ UpdateStat ram_free "$(SysInfo_mem_units "$mem")"
	fi

	# Portability notes:
	# o tail: explicit "-n" not available in Solaris; instead simplify
	# 'tail -n <c>' to the equivalent 'tail -<c>'.
	for disk in "/" ${OCF_RESKEY_disks}; do
	unset disk_free disk_label
	disk_free=`df -h ${disk} \| tail -1 \| awk '{print $4}'`
	if [ x != x"$disk_free" ]; then
	disk_label=`echo $disk \| sed -e 's#^/$#root#;s#^/*##;s#/#_#g'`
	disk_free=`SysInfo_hdd_units $disk_free`
	UpdateStat ${disk_label}_free $disk_free
	if [ -n "$MIN_FREE" ]; then
	if [ $disk_free -le $MIN_FREE ]; then
	UpdateStat "#health_disk" "red"
	else
	UpdateStat "#health_disk" "green"
	fi
	fi
	fi
	done
	}

	SysInfo_megabytes() {
	# Size in megabytes
	echo $1 \| awk '{ n = $0;
	sub(/[0-9]+(.[0-9]+)?/, "");
	split(n, a, $0);
	n=a[1];
	if ($0 == "G" \|\| $0 == "") { n *= 1024 };
	if (/^kB?/) { n /= 1024 };
	printf "%d\n", n }' # Intentionally round to an integer
	}

	SysInfo_mem_units() {
	mem=$1

	if [ -z $1 ]; then
	return
	fi

	mem=$(SysInfo_megabytes "$1")
	# Round to the next multiple of 50
	r=$(($mem % 50))
	if [ $r != 0 ]; then
	mem=$(($mem + 50 - $r))
	fi

	echo $mem
	}

	SysInfo_hdd_units() {
	# Defauts to size in gigabytes

	case $OCF_RESKEY_disk_unit in
	[Pp]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024 / 1024));;
	[Tt]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024));;
	[Gg]) echo $(($(SysInfo_megabytes "$1") / 1024));;
	- [Mm]) echo $(SysInfo_megabytes "$1");;
	+ [Mm]) echo "$(SysInfo_megabytes "$1")" ;;
	[Kk]) echo $(($(SysInfo_megabytes "$1") * 1024));;
	[Bb]) echo $(($(SysInfo_megabytes "$1") * 1024 * 1024));;
	*)
	ocf_log err "Invalid value for disk_unit: $OCF_RESKEY_disk_unit"
	echo $(($(SysInfo_megabytes "$1") / 1024));;
	esac
	}

	SysInfo_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	SysInfo_start() {
	echo $OCF_RESKEY_clone > $OCF_RESKEY_pidfile
	SysInfoStats
	exit $OCF_SUCCESS
	}

	SysInfo_stop() {
	rm $OCF_RESKEY_pidfile
	exit $OCF_SUCCESS
	}

	SysInfo_monitor() {
	if [ -f $OCF_RESKEY_pidfile ]; then
	clone=`cat $OCF_RESKEY_pidfile`
	fi

	if [ x$clone = x ]; then
	rm $OCF_RESKEY_pidfile
	exit $OCF_NOT_RUNNING

	elif [ $clone = $OCF_RESKEY_clone ]; then
	SysInfoStats
	exit $OCF_SUCCESS

	- elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue
	- -o x$OCF_RESKEY_CRM_meta_globally_unique = xTrue
	- -o x$OCF_RESKEY_CRM_meta_globally_unique = xyes
	- -o x$OCF_RESKEY_CRM_meta_globally_unique = xYes
	- ]; then
	+ elif ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
	SysInfoStats
	exit $OCF_SUCCESS
	fi
	exit $OCF_NOT_RUNNING
	}

	SysInfo_validate() {
	return $OCF_SUCCESS
	}

	if [ $# -ne 1 ]; then
	SysInfo_usage
	exit $OCF_ERR_ARGS
	fi

	: ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/SysInfo-${OCF_RESOURCE_INSTANCE}"}
	: ${OCF_RESKEY_disk_unit:="G"}
	: ${OCF_RESKEY_clone:="0"}
	if [ x != x${OCF_RESKEY_delay} ]; then
	OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}"
	else
	OCF_RESKEY_delay="-d 0"
	fi
	MIN_FREE=""
	if [ -n "$OCF_RESKEY_min_disk_free" ]; then
	ocf_is_decimal "$OCF_RESKEY_min_disk_free" &&
	OCF_RESKEY_min_disk_free="$OCF_RESKEY_min_disk_free$OCF_RESKEY_disk_unit"
	MIN_FREE=`SysInfo_hdd_units $OCF_RESKEY_min_disk_free`
	fi

	case $__OCF_ACTION in
	meta-data) meta_data
	exit $OCF_SUCCESS
	;;
	start) SysInfo_start
	;;
	stop) SysInfo_stop
	;;
	monitor) SysInfo_monitor
	;;
	validate-all) SysInfo_validate
	;;
	usage\|help) SysInfo_usage
	exit $OCF_SUCCESS
	;;
	*) SysInfo_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac

	exit $?
	diff --git a/extra/resources/SystemHealth b/extra/resources/SystemHealth
	index 3e76fc3221..4f5701843a 100755
	--- a/extra/resources/SystemHealth
	+++ b/extra/resources/SystemHealth
	@@ -1,252 +1,252 @@
	#!/bin/sh
	#
	# SystemHealth OCF RA.
	#
	-# Copyright (c) 2009 International Business Machines (IBM), Mark Hamzy
	+# Copyright 2009-2018 International Business Machines (IBM), Mark Hamzy
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	#

	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}

	#######################################################################

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="SystemHealth" version="0.1">
	<version>1.0</version>

	<longdesc lang="en">
	This is a SystemHealth Resource Agent. It is used to monitor
	the health of a system via IPMI.
	</longdesc>
	<shortdesc lang="en">SystemHealth resource agent</shortdesc>

	<parameters>
	</parameters>

	<actions>
	<action name="start" timeout="20" />
	<action name="stop" timeout="20" />
	<action name="monitor" timeout="20" />
	<action name="reload" timeout="20" />
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="20" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	SystemHealth_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	SystemHealth_check_tools() {
	which servicelog_notify > /dev/null 2>&1
	RC=$?

	if [ $RC != 0 ]; then
	ocf_log err "servicelog_notify not found!"
	return $OCF_ERR_INSTALLED
	fi

	which ipmiservicelogd > /dev/null 2>&1
	RC=$?

	if [ $RC != 0 ]; then
	ocf_log err "ipmiservicelogd not found!"
	return $OCF_ERR_INSTALLED
	fi

	test -x $OCF_RESKEY_program
	RC=$?

	if [ $RC != 0 ]; then
	ocf_log err "$OCF_RESKEY_program not found!"
	return $OCF_ERR_INSTALLED
	fi
	}

	SystemHealth_start() {
	SystemHealth_monitor
	RC=$?

	if [ $RC = $OCF_ERR_GENERIC ]; then
	return $OCF_ERR_GENERIC
	elif [ $RC = $OCF_SUCCESS ]; then
	ocf_log warn "starting an already started SystemHealth"
	return $OCF_SUCCESS
	fi

	service ipmi start > /dev/null 2>&1
	RC=$?

	if [ $RC != 0 ]; then
	ocf_log err "Could not start service IPMI!"
	return $OCF_ERR_GENERIC
	fi

	ipmiservicelogd smi 0 > /dev/null 2>&1 &
	RC=$?

	if [ $RC != 0 ]; then
	ocf_log err "Could not start ipmiservicelogd!"
	return $OCF_ERR_GENERIC
	fi

	servicelog_notify --add --type=EVENT --command="$OCF_RESKEY_program" --method=num_arg --match='type=4' > /dev/null 2>&1
	RC=$?

	if [ $RC != 0 ]; then
	ocf_log err "servicelog_notify register handler failed!"
	return $OCF_ERR_GENERIC
	fi

	return $OCF_SUCCESS
	}

	SystemHealth_stop() {
	SystemHealth_monitor
	RC=$?

	if [ $RC = $OCF_ERR_GENERIC ]; then
	return $OCF_ERR_GENERIC
	elif [ $RC = $OCF_SUCCESS ]; then
	killall ipmiservicelogd
	RC1=$?

	if [ $RC1 != 0 ]; then
	ocf_log err "Could not stop ipmiservicelogd!"
	fi

	servicelog_notify --remove --command="$OCF_RESKEY_program" > /dev/null 2>&1
	RC2=$?

	if [ $RC2 != 0 ]; then
	ocf_log err "servicelog_notify remove handler failed!"
	fi

	- if [ $RC1 = 0 -a $RC2 = 0 ]; then
	+ if [ $RC1 -eq 0 ] && [ $RC2 -eq 0 ]; then
	return $OCF_SUCCESS
	else
	return $OCF_ERR_GENERIC
	fi
	elif [ $RC = $OCF_NOT_RUNNING ]; then
	ocf_log warn "stopping an already stopped SystemHealth"
	return $OCF_SUCCESS
	else
	ocf_log err "SystemHealth_stop: should not be here!"
	return $OCF_ERR_GENERIC
	fi
	}

	SystemHealth_monitor() {
	# Monitor _MUST!_ differentiate correctly between running
	# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
	# That is THREE states, not just yes/no.

	if [ ! -f /var/run/ipmiservicelogd.pid0 ]; then
	ocf_log debug "ipmiservicelogd is not running!"
	return $OCF_NOT_RUNNING
	fi

	- ps -p `cat /var/run/ipmiservicelogd.pid0` > /dev/null 2>&1
	+ ps -p "$(cat /var/run/ipmiservicelogd.pid0)" >/dev/null 2>&1
	RC=$?

	if [ $RC != 0 ]; then
	ocf_log debug "ipmiservicelogd's pid `cat /var/run/ipmiservicelogd.pid0` is not running!"

	rm /var/run/ipmiservicelogd.pid0

	return $OCF_ERR_GENERIC
	fi

	servicelog_notify --list --command="$OCF_RESKEY_program" > /dev/null 2>&1
	RC=$?

	if [ $RC = 0 ]; then
	return $OCF_SUCCESS
	else
	return $OCF_NOT_RUNNING
	fi
	}

	SystemHealth_validate() {

	SystemHealth_check_tools
	RC=$?

	if [ $RC != 0 ]; then
	return $RC
	fi

	return $OCF_SUCCESS
	}

	: ${OCF_RESKEY_program=/usr/sbin/notifyServicelogEvent}

	case $__OCF_ACTION in
	meta-data) meta_data
	exit $OCF_SUCCESS
	;;
	usage\|help) SystemHealth_usage
	exit $OCF_SUCCESS
	;;
	esac

	SystemHealth_check_tools
	RC=$?

	if [ $RC != 0 ]; then
	case $__OCF_ACTION in
	stop) exit $OCF_SUCCESS;;
	*) exit $RC;;
	esac
	fi

	case $__OCF_ACTION in
	start) SystemHealth_start;;
	stop) SystemHealth_stop;;
	monitor) SystemHealth_monitor;;
	reload) ocf_log info "Reloading..."
	SystemHealth_start
	;;
	validate-all) ;;
	*) SystemHealth_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac
	rc=$?
	ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
	exit $rc
	diff --git a/extra/resources/attribute b/extra/resources/attribute
	index 7444ec607e..80679d1d96 100755
	--- a/extra/resources/attribute
	+++ b/extra/resources/attribute
	@@ -1,233 +1,233 @@
	#!/bin/sh
	#
	# ocf:pacemaker:attribute resource agent
	#
	-# Copyright (C) 2016 Andrew Beekhof <andrew@beekhof.net>
	+# Copyright 2016-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under GNU General Public License version 2 or
	# later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	USAGE="Usage: $0 {start\|stop\|monitor\|migrate_to\|migrate_from\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set."

	# Load OCF helper functions
	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}

	# Ensure certain variables are set and not empty
	: ${HA_VARRUN:="/var/run"}
	: ${OCF_RESKEY_CRM_meta_globally_unique:="false"}
	: ${OCF_RESOURCE_INSTANCE:="undef"}

	DEFAULT_STATE_FILE="${HA_VARRUN%%/}/opa-${OCF_RESOURCE_INSTANCE}.state"
	if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
	# Strip off any trailing clone marker (note + is not portable in sed)
	DEFAULT_STATE_FILE=$(echo "$DEFAULT_STATE_FILE" \| sed s/:[0-9][0-9]*\.state/.state/)
	fi

	DEFAULT_ATTR_NAME="opa-${OCF_RESOURCE_INSTANCE}"
	DEFAULT_ACTIVE_VALUE="1"
	DEFAULT_INACTIVE_VALUE="0"

	: ${OCF_RESKEY_state:="$DEFAULT_STATE_FILE"}
	: ${OCF_RESKEY_name:="$DEFAULT_ATTR_NAME"}

	# Values may be empty string
	if [ -z ${OCF_RESKEY_active_value+x} ]; then
	OCF_RESKEY_active_value="$DEFAULT_ACTIVE_VALUE"
	fi
	if [ -z ${OCF_RESKEY_inactive_value+x} ]; then
	OCF_RESKEY_inactive_value="$DEFAULT_INACTIVE_VALUE"
	fi

	usage() {
	USAGE_RC=$1
	cat <<END
	$USAGE
	END
	return $USAGE_RC
	}

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="attribute" version="1.0">
	<version>1.0</version>
	<shortdesc lang="en">Manages a node attribute</shortdesc>
	<longdesc lang="en">
	This resource agent controls a node attribute for the node it's running on.
	It sets the attribute one way when started, and another way when stopped,
	according to the configuration parameters.
	</longdesc>
	<parameters>

	<parameter name="state" unique="1">
	<longdesc lang="en">
	Full path of a temporary file to store the resource state in
	</longdesc>
	<shortdesc lang="en">State file</shortdesc>
	<content type="string" default="${DEFAULT_STATE_FILE}" />
	</parameter>

	<parameter name="name" unique="1">
	<longdesc lang="en">
	Name of node attribute to manage
	</longdesc>
	<shortdesc lang="en">Attribute name</shortdesc>
	<content type="string" default="${DEFAULT_ATTR_NAME}" />
	</parameter>

	<parameter name="active_value" unique="0">
	<longdesc lang="en">
	Value to use for node attribute when resource becomes active (empty string is
	discouraged, because monitor cannot distinguish it from a query error)
	</longdesc>
	<shortdesc lang="en">Attribute value when active</shortdesc>
	<content type="string" default="$DEFAULT_ACTIVE_VALUE" />
	</parameter>

	<parameter name="inactive_value" unique="0">
	<longdesc lang="en">
	Value to use for node attribute when resource becomes inactive
	</longdesc>
	<shortdesc lang="en">Attribute value when inactive</shortdesc>
	<content type="string" default="$DEFAULT_INACTIVE_VALUE" />
	</parameter>

	</parameters>
	<actions>
	<action name="start" timeout="20" />
	<action name="stop" timeout="20" />
	<action name="monitor" timeout="20" interval="10" depth="0"/>
	<action name="reload" timeout="20" />
	<action name="migrate_to" timeout="20" />
	<action name="migrate_from" timeout="20" />
	<action name="validate-all" timeout="20" />
	<action name="meta-data" timeout="5" />
	</actions>
	</resource-agent>
	END
	return $OCF_SUCCESS
	}

	validate() {
	if [ "$OCF_RESKEY_active_value" = "$OCF_RESKEY_inactive_value" ]; then
	ocf_exit_reason "active value '%s' must be different from inactive value '%s'" \
	"$OCF_RESKEY_active_value" "$OCF_RESKEY_inactive_value"
	return $OCF_ERR_CONFIGURED
	fi

	VALIDATE_DIR=$(dirname "${OCF_RESKEY_state}")
	if [ ! -d "$VALIDATE_DIR" ]; then
	ocf_exit_reason "state file '$OCF_RESKEY_state' does not have a valid directory"
	return $OCF_ERR_PERM
	fi
	- if [ ! -w "$VALIDATE_DIR" -o ! -x "$VALIDATE_DIR" ]; then
	+ if [ ! -w "$VALIDATE_DIR" ] \|\| [ ! -x "$VALIDATE_DIR" ]; then
	ocf_exit_reason "insufficient privileges on directory of state file '$OCF_RESKEY_state'"
	return $OCF_ERR_PERM
	fi

	return $OCF_SUCCESS
	}

	get_attribute() {
	GET_LINE=$(attrd_updater -n "$OCF_RESKEY_name" -Q 2>/dev/null)
	if [ $? -ne 0 ]; then
	echo ""
	else
	echo "$GET_LINE" \| sed -e "s/.* value=\"$.*$\"$/\1/"
	fi
	}

	set_attribute() {
	attrd_updater -n "$OCF_RESKEY_name" -U "$1" 2>/dev/null
	# TODO if above call is async, loop until get_attribute returns expected value
	}

	check_attribute() {
	CHECK_VALUE=$(get_attribute)
	CHECK_REASON=""
	if [ ! -f "$OCF_RESKEY_state" ]; then
	- if [ "$CHECK_VALUE" != "" -a "$CHECK_VALUE" != "$OCF_RESKEY_inactive_value" ]; then
	+ if [ "$CHECK_VALUE" != "" ] && [ "$CHECK_VALUE" != "$OCF_RESKEY_inactive_value" ]; then
	CHECK_REASON="Node attribute $OCF_RESKEY_name='$CHECK_VALUE' differs from expected value '$OCF_RESKEY_inactive_value'"
	return $OCF_ERR_GENERIC
	fi
	return $OCF_NOT_RUNNING
	fi
	if [ "$CHECK_VALUE" != "$OCF_RESKEY_active_value" ]; then
	CHECK_REASON="Node attribute $OCF_RESKEY_name='$CHECK_VALUE' differs from expected value '$OCF_RESKEY_active_value'"
	return $OCF_ERR_GENERIC
	fi
	return $OCF_SUCCESS
	}

	monitor() {
	check_attribute
	MONITOR_RC=$?
	if [ $MONITOR_RC -eq $OCF_ERR_GENERIC ]; then
	ocf_exit_reason "$CHECK_REASON"
	fi
	return $MONITOR_RC
	}

	start() {
	check_attribute
	if [ $? -eq $OCF_SUCCESS ]; then
	return $OCF_SUCCESS
	fi

	touch "${OCF_RESKEY_state}" 2>/dev/null
	if [ $? -ne 0 ]; then
	ocf_exit_reason "Unable to manage state file $OCF_RESKEY_state"
	return $OCF_ERR_GENERIC
	fi

	set_attribute "${OCF_RESKEY_active_value}"
	if [ $? -ne 0 ]; then
	rm -f "${OCF_RESKEY_state}"
	ocf_exit_reason "Unable to set node attribute $OCF_RESKEY_name='$OCF_RESKEY_active_value'"
	return $OCF_ERR_GENERIC
	fi

	return $OCF_SUCCESS
	}

	stop() {
	check_attribute
	if [ $? -eq $OCF_NOT_RUNNING ]; then
	return $OCF_SUCCESS
	fi

	rm -f ${OCF_RESKEY_state}

	set_attribute "${OCF_RESKEY_inactive_value}"
	if [ $? -ne 0 ]; then
	ocf_exit_reason "Unable to set node attribute $OCF_RESKEY_name='$OCF_RESKEY_inactive_value'"
	return $OCF_ERR_GENERIC
	fi

	return $OCF_SUCCESS
	}

	case $__OCF_ACTION in
	meta-data) meta_data ;;
	start) start ;;
	stop) stop ;;
	monitor) monitor ;;
	# We don't do anything special for live migration, but we support it so that
	# other resources that live migrate can depend on this one.
	migrate_to) stop ;;
	migrate_from) start ;;
	reload) start ;;
	validate-all) validate ;;
	usage\|help) usage $OCF_SUCCESS ;;
	*) usage $OCF_ERR_UNIMPLEMENTED ;;
	esac

	exit $?

	# vim: expandtab:tabstop=4:softtabstop=4:shiftwidth=4:textwidth=80
	diff --git a/extra/resources/controld b/extra/resources/controld
	index b4bd026c64..7c44845bfc 100755
	--- a/extra/resources/controld
	+++ b/extra/resources/controld
	@@ -1,306 +1,298 @@
	#!/bin/sh
	#
	# OCF resource agent for managing the DLM controld process
	#
	-# Copyright (c) 2009 Novell, Inc
	+# Copyright 2009-2018 Novell, Inc
	# All Rights Reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of version 2 of the GNU General Public License as
	# published by the Free Software Foundation.
	#
	# This program is distributed in the hope that it would be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	#
	# Further, this software is distributed without any warranty that it is
	# free of the rightful claim of any third person regarding infringement
	# or the like. Any license provided herein, whether implied or
	# otherwise, applies only to this software file. Patent licenses, if
	# any, provided herein do not apply to combinations of this program with
	# other software, or any other product whatsoever.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write the Free Software Foundation,
	# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	#

	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}

	#######################################################################

	if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then
	ocf_log info "Using heartbeat controld agent"
	$OCF_ROOT/resource.d/heartbeat/controld $1
	exit $?
	fi

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="controld" version="1.1">
	<version>1.0</version>

	<longdesc lang="en">
	This Resource Agent can control the dlm_controld services needed by cluster-aware file systems.
	It assumes that dlm_controld is in your default PATH.
	In most cases, it should be run as an anonymous clone.
	</longdesc>
	<shortdesc lang="en">DLM Agent for cluster file systems</shortdesc>

	<parameters>

	<parameter name="args" unique="1">
	<longdesc lang="en">
	Any additional options to start the dlm_controld service with
	</longdesc>
	<shortdesc lang="en">DLM Options</shortdesc>
	<content type="string" default="-s 0" />
	</parameter>

	<parameter name="daemon" unique="1">
	<longdesc lang="en">
	The daemon to start - supports gfs_controld(.pcmk) and dlm_controld(.pcmk)
	</longdesc>
	<shortdesc lang="en">The daemon to start</shortdesc>
	<content type="string" default="dlm_controld.pcmk" />
	</parameter>

	<parameter name="allow_stonith_disabled">
	<longdesc lang="en">
	Allow DLM start-up even if STONITH/fencing is disabled in the cluster.

	Setting this option to true will cause cluster malfunction and hangs on
	fail-over for DLM clients that require fencing (such as GFS2, OCFS2, and
	cLVM2).

	This option is advanced use only.
	</longdesc>
	<shortdesc lang="en">Allow start-up even without STONITH/fencing</shortdesc>
	<content type="string" default="false" />
	</parameter>

	</parameters>

	<actions>
	<action name="start" timeout="90" />
	<action name="stop" timeout="100" />
	<action name="monitor" timeout="20" interval="10" depth="0" start-delay="0" />
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="30" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	CONFIGFS_DIR="/sys/kernel/config"
	DLM_CONFIGFS_DIR="${CONFIGFS_DIR}/dlm"
	DLM_SYSFS_DIR="/sys/kernel/dlm"

	controld_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	check_uncontrolled_locks()
	{
	- local tmp
	- tmp=$(ls $DLM_SYSFS_DIR 2>&1)
	+ CUL_TMP=$(ls $DLM_SYSFS_DIR 2>&1)
	if [ $? -eq 0 ]; then
	- if [ -n "$tmp" ]; then
	+ if [ -n "$CUL_TMP" ]; then

	ocf_log err "Uncontrolled lockspace exists, system must reboot. Executing suicide fencing"
	- stonith_admin --reboot=$(crm_node -n) --tag controld
	+ stonith_admin --reboot="$(crm_node -n)" --tag controld

	exit $OCF_ERR_GENERIC
	fi
	fi
	}

	controld_start() {
	controld_monitor; rc=$?

	case $rc in
	$OCF_SUCCESS) return $OCF_SUCCESS;;
	$OCF_NOT_RUNNING) ;;
	*) return $OCF_ERR_GENERIC;;
	esac

	# Ensure configfs is mounted
	if [ ! -e "$CONFIGFS_DIR" ]; then
	modprobe configfs
	if [ ! -e "$CONFIGFS_DIR" ]; then
	ocf_log err "$CONFIGFS_DIR not available"
	return $OCF_ERR_INSTALLED
	fi
	fi
	mount -t configfs \| grep " $CONFIGFS_DIR " >/dev/null 2>/dev/null
	if [ $? -ne 0 ]; then
	mount -t configfs none "$CONFIGFS_DIR"
	fi

	# Ensure DLM is available
	if [ ! -e "$DLM_CONFIGFS_DIR" ]; then
	modprobe dlm
	if [ ! -e "$DLM_CONFIGFS_DIR" ]; then
	ocf_log err "$DLM_CONFIGFS_DIR not available"
	return $OCF_ERR_INSTALLED
	fi
	fi

	if ! ocf_is_true "$OCF_RESKEY_allow_stonith_disabled" && \
	! ocf_is_true "`crm_attribute --type=crm_config --name=stonith-enabled --query --quiet --default=true`"; then
	ocf_log err "The cluster property stonith-enabled may not be deactivated to use the DLM"
	return $OCF_ERR_CONFIGURED
	fi

	${OCF_RESKEY_daemon} $OCF_RESKEY_args

	while true
	do
	sleep 1

	controld_monitor; rc=$?
	case $rc in
	$OCF_SUCCESS)
	- local addr_list
	- addr_list="$(cat "${DLM_CONFIGFS_DIR}"/cluster/comms/*/addr_list 2>/dev/null)"
	- if [ $? -eq 0 ] && [ -n "$addr_list" ]; then
	+ CS_ADDR_LIST="$(cat "${DLM_CONFIGFS_DIR}"/cluster/comms/*/addr_list 2>/dev/null)"
	+ if [ $? -eq 0 ] && [ -n "$CS_ADDR_LIST" ]; then
	return $OCF_SUCCESS
	fi
	;;
	$OCF_NOT_RUNNING)
	return $OCF_NOT_RUNNING
	;;
	*)
	return $OCF_ERR_GENERIC
	;;
	esac

	ocf_log debug "Waiting for ${OCF_RESKEY_daemon} to be ready"
	done
	}

	controld_stop() {
	controld_monitor; rc=$?

	if [ $rc = $OCF_NOT_RUNNING ]; then
	return $OCF_SUCCESS
	fi

	killall -TERM ${OCF_RESKEY_daemon}; rc=$?

	if [ $rc != 0 ]; then
	return $OCF_ERR_GENERIC
	fi

	rc=$OCF_SUCCESS
	while [ $rc = $OCF_SUCCESS ]; do
	controld_monitor; rc=$?
	sleep 1
	done

	if [ $rc = $OCF_NOT_RUNNING ]; then
	rc=$OCF_SUCCESS
	fi

	return $rc
	}

	controld_monitor() {
	- local rc
	- killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; rc=$?
	+ killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; CM_RC=$?

	- case $rc in
	+ case $CM_RC in
	0) smw=$(dlm_tool status -v \| grep "stateful_merge_wait=" \| cut -d= -f2)
	if [ -n "$smw" ] && [ $smw -eq 1 ]; then
	ocf_log err "DLM status is: stateful_merge_wait"
	- rc=$OCF_ERR_GENERIC
	+ CM_RC=$OCF_ERR_GENERIC
	elif [ -z "$smw" ] && dlm_tool ls \| grep -q "wait fencing" && \
	! stonith_admin -H '*' -V \| grep -q "wishes to"; then
	ocf_log err "DLM status is: wait fencing"
	- rc=$OCF_ERR_GENERIC
	+ CM_RC=$OCF_ERR_GENERIC
	else
	- rc=$OCF_SUCCESS
	+ CM_RC=$OCF_SUCCESS
	fi
	;;
	- 1) rc=$OCF_NOT_RUNNING;;
	- *) rc=$OCF_ERR_GENERIC;;
	+ 1) CM_RC=$OCF_NOT_RUNNING;;
	+ *) CM_RC=$OCF_ERR_GENERIC;;
	esac

	# if the dlm is not successfully running, but
	# dlm lockspace bits are left over, we self must fence.
	- if [ $rc -ne $OCF_SUCCESS ]; then
	+ if [ $CM_RC -ne $OCF_SUCCESS ]; then
	check_uncontrolled_locks
	fi

	- return $rc
	+ return $CM_RC
	}

	controld_validate() {
	check_binary killall
	check_binary ${OCF_RESKEY_daemon}

	case ${OCF_RESKEY_CRM_meta_globally_unique} in
	yes\|Yes\|true\|True\|1)
	ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute"
	exit $OCF_ERR_CONFIGURED
	;;
	esac

	[ -d /var/run/cluster ] \|\| mkdir /var/run/cluster

	return $OCF_SUCCESS
	}

	: ${OCF_RESKEY_sctp=false}
	: ${OCF_RESKEY_CRM_meta_globally_unique:="false"}

	-case "$HA_quorum_type" in
	- pcmk) daemon_ext=".pcmk";;
	- *) daemon_ext="";;
	-esac
	-
	case "$OCF_RESOURCE_INSTANCE" in
	[gG][fF][sS])
	: ${OCF_RESKEY_args=-g 0}
	- : ${OCF_RESKEY_daemon=gfs_controld${daemon_ext}}
	+ : ${OCF_RESKEY_daemon=gfs_controld}
	;;
	[dD][lL][mM])
	: ${OCF_RESKEY_args=-s 0}
	- : ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}}
	+ : ${OCF_RESKEY_daemon=dlm_controld}
	;;
	*)
	: ${OCF_RESKEY_args=-s 0}
	- : ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}}
	+ : ${OCF_RESKEY_daemon=dlm_controld}
	esac

	case $__OCF_ACTION in
	meta-data) meta_data
	exit $OCF_SUCCESS
	;;
	start) controld_validate; controld_start;;
	stop) controld_stop;;
	monitor) controld_validate; controld_monitor;;
	validate-all) controld_validate;;
	usage\|help) controld_usage
	exit $OCF_SUCCESS
	;;
	*) controld_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac
	rc=$?

	exit $rc
	diff --git a/extra/resources/ifspeed.in b/extra/resources/ifspeed.in
	index d5eee6c5f8..c60e55b59c 100755
	--- a/extra/resources/ifspeed.in
	+++ b/extra/resources/ifspeed.in
	@@ -1,541 +1,538 @@
	#!@BASH_PATH@
	#
	# OCF resource agent which monitors state of network interface and records it
	# as a node attribute in the CIB based on the sum of speeds of its active (up,
	# link detected, not blocked) underlying interfaces.
	#
	-# Copyright (c) 2011 Vladislav Bogdanov <bubble@hoster-ok.com>
	+# Copyright 2011-2018 Vladislav Bogdanov <bubble@hoster-ok.com>
	# Partially based on 'ping' RA by Andrew Beekhof
	#
	# Change on 2017 by Tomer Azran <tomerazran@gmail.com>:
	# Add "ip" parameter to detect network interface name by ip address:
	# http://lists.clusterlabs.org/pipermail/users/2017-August/006224.html
	#
	# OCF instance parameters:
	# OCF_RESKEY_name: name of attribute to set in CIB
	# OCF_RESKEY_ip ip address to check
	# OCF_RESKEY_iface: network interface to monitor
	# OCF_RESKEY_bridge_ports: if not null and OCF_RESKEY_iface is a bridge, list of
	# bridge ports to consider.
	# Default is all ports which have designated_bridge=root_id
	# OCF_RESKEY_weight_base: Relative weight of 1Gbps. This can be used to tune
	# value of resulting CIB attribute.
	#
	# Initialization:

	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}

	# If these aren't available, we can still show help,
	# which is all that is needed to build the man pages.
	[ -r "${OCF_FUNCTIONS}" ] && . "${OCF_FUNCTIONS}"
	[ -r "${OCF_FUNCTIONS_DIR}/findif.sh" ] && . "${OCF_FUNCTIONS_DIR}/findif.sh"
	: ${OCF_SUCCESS=0}

	: ${__OCF_ACTION=$1}

	FINDIF=findif

	# Defaults
	OCF_RESKEY_name_default="ifspeed"
	OCF_RESKEY_bridge_ports_default="detect"
	OCF_RESKEY_weight_base_default=1000
	OCF_RESKEY_dampen_default=5

	: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}}
	: ${OCF_RESKEY_bridge_ports=${OCF_RESKEY_bridge_ports_default}}
	: ${OCF_RESKEY_weight_base=${OCF_RESKEY_weight_base_default}}
	: ${OCF_RESKEY_dampen=${OCF_RESKEY_dampen_default}}

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="ifspeed" version="1.0">
	<version>1.0</version>

	<longdesc lang="en">
	Every time the monitor action is run, this resource agent records (in the CIB)
	(relative) speed of network interface it monitors.

	This RA can monitor physical interfaces, bonds, bridges, vlans and (hopefully)
	any combination of them.

	Examples:
	*) Bridge on top of one 10Gbps interface (eth2) and 802.3ad bonding (bond0) built
	on two 1Gbps interfaces (eth0 and eth1).
	*) Active-backup bonding built on top of one physical interface and one vlan on
	another interface.

	For STP-enabled bridges this RA tries to some-how guess network topology and by
	default looks only on ports which are connected to upstream switch. This can be
	overridden by 'bridge_ports' parameter. Active interfaces in this case are those
	in "forwarding" state.

	For balancing bonds this RA summs speeds of underlying "up" slave interfaces
	(and applies coefficient 0.8 to result).

	For non-balancing bonds ('active-backup' and probably 'broadcast'), only the
	speed of the currently active slave is used.
	</longdesc>
	<shortdesc lang="en">Network interface speed monitor</shortdesc>

	<parameters>

	<parameter name="name" unique="1">
	<longdesc lang="en">
	The name of the attribute to set. This is the name to be used in the constraints.
	</longdesc>
	<shortdesc lang="en">Attribute name</shortdesc>
	<content type="string" default="${OCF_RESKEY_name_default}"/>
	</parameter>

	<parameter name="iface" unique="0" required="1">
	<longdesc lang="en">
	Network interface to monitor.
	</longdesc>
	<shortdesc lang="en">Network interface</shortdesc>
	<content type="string" default=""/>
	</parameter>

	<parameter name="ip" unique="0" required="0">
	<longdesc lang="en">
	Try to detect interface name by detecting the interface that holds the IP address.
	The IPv4 (dotted quad notation) or IPv6 address (colon hexadecimal notation)
	example IPv4 "192.168.1.1".
	example IPv6 "2001:db8:DC28:0:0:FC57:D4C8:1FFF".
	</longdesc>
	<shortdesc lang="en">IPv4 or IPv6 address</shortdesc>
	<content type="string" default="" />
	</parameter>

	<parameter name="bridge_ports" unique="0">
	<longdesc lang="en">
	If not null and OCF_RESKEY_iface is a bridge, list of bridge ports to consider.
	Default is all ports which have designated_bridge=root_id.
	</longdesc>
	<shortdesc lang="en">Bridge ports</shortdesc>
	<content type="string" default="${OCF_RESKEY_bridge_ports_default}"/>
	</parameter>

	<parameter name="weight_base" unique="0">
	<longdesc lang="en">
	Relative weight of 1Gbps in interface speed.
	Can be used to tune how big attribute value will be.
	</longdesc>
	<shortdesc lang="en">Weight of 1Gbps</shortdesc>
	<content type="integer" default="${OCF_RESKEY_weight_base_default}"/>
	</parameter>

	<parameter name="dampen" unique="0">
	<longdesc lang="en">
	The time to wait (dampening) for further changes to occur.
	</longdesc>
	<shortdesc lang="en">Dampening interval</shortdesc>
	<content type="integer" default="${OCF_RESKEY_dampen_default}"/>
	</parameter>

	<parameter name="debug" unique="0">
	<longdesc lang="en">
	Log what have been done more verbosely.
	</longdesc>
	<shortdesc lang="en">Verbose logging</shortdesc>
	<content type="string" default="false"/>
	</parameter>

	</parameters>

	<actions>
	<action name="start" timeout="30" />
	<action name="stop" timeout="30" />
	<action name="monitor" depth="0" timeout="30" interval="10"/>
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="30" />
	</actions>
	</resource-agent>
	END
	}

	usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	start() {
	monitor
	if [ $? -eq $OCF_SUCCESS ]; then
	return $OCF_SUCCESS
	fi
	ha_pseudo_resource ${ha_pseudo_resource_name} start
	update
	return $?
	}

	stop() {
	ha_pseudo_resource ${ha_pseudo_resource_name} stop
	attrd_updater -D -n ${OCF_RESKEY_name} -d ${OCF_RESKEY_dampen} ${attrd_options}
	return $OCF_SUCCESS
	}

	monitor() {
	local ret
	ha_pseudo_resource ${ha_pseudo_resource_name} monitor
	ret=$?
	if [ ${ret} -eq $OCF_SUCCESS ] ; then
	update
	fi
	return ${ret}
	}

	# This function tries to guess nic interface by IP
	get_nic_name_by_ip(){
	# $FINDIF takes its parameters from the environment.
	# Its output is as follows:
	# [NIC_NAME] netmask [NETMASK] broadcast [BROADCAST}
	NICINFO=$( ${FINDIF} )
	rc=$?
	if [ $rc -eq 0 ];then
	# Get NIC_NAME part of findif function output.
	echo "${NICINFO%% *}"
	else
	echo ""
	fi
	}

	validate() {
	# Check the interface parameter
	if [ -z "${OCF_RESKEY_iface}" ]; then
	if [ -z "${OCF_RESKEY_ip}" ]; then
	ocf_log err "Empty iface and ip parameters. Please specify either an interface name or valid ip address."
	exit $OCF_ERR_CONFIGURED
	else
	ipcheck_ipv4 "${OCF_RESKEY_ip}"
	if [ $? -eq 1 ] ; then
	ipcheck_ipv6 "${OCF_RESKEY_ip}"
	if [ $? -eq 1 ] ; then
	ocf_exit_reason "ip parameter [${OCF_RESKEY_ip}] is not a valid ip address."
	exit $OCF_ERR_CONFIGURED
	fi
	fi
	fi
	fi
	return $OCF_SUCCESS
	}

	iface_get_speed() {
	local iface=$1
	local operstate
	local carrier
	- local bridge_iface_speed
	- local bond_iface_speed
	- local vlan_iface_speed
	local speed

	if [ ! -e "/sys/class/net/${iface}" ] ; then
	echo "0"
	elif iface_is_bridge ${iface} ; then # bridges do not have operstate
	read carrier < "/sys/class/net/${iface}/carrier"

	if [ "${carrier}" != "1" ] ; then
	echo "0"
	else
	bridge_get_speed ${iface}
	fi
	else
	read operstate < "/sys/class/net/${iface}/operstate"
	read carrier < "/sys/class/net/${iface}/carrier"

	if [ "${operstate}" != "up" ] \|\| [ "${carrier}" != "1" ] ; then
	echo "0"
	elif iface_is_bond ${iface} ; then
	bond_get_speed ${iface}
	elif iface_is_vlan ${iface} ; then
	- iface_get_speed $( vlan_get_phy ${iface} )
	+ iface_get_speed "$(vlan_get_phy "${iface}")"
	elif iface_is_hfi1 "${iface}" ; then
	hfi1_get_speed "${iface}"
	else
	read speed < "/sys/class/net/${iface}/speed"
	echo ${speed}
	fi
	fi
	}

	iface_is_vlan() {
	local iface=$1
	[ -e "/proc/net/vlan/${iface}" ] && return 0 \|\| return 1
	}

	iface_is_bridge() {
	local iface=$1
	[ -e "/sys/class/net/${iface}/bridge" ] && return 0 \|\| return 1
	}

	iface_is_bond() {
	local iface=$1
	[ -e "/sys/class/net/${iface}/bonding" ] && return 0 \|\| return 1
	}

	iface_is_hfi1() {
	local iface=$1
	driver=$(readlink /sys/class/net/${iface}/device/driver)
	[[ $(basename ${driver}) =~ "hfi1" ]] && return 0 \|\| return 1
	}

	vlan_get_phy() {
	local iface=$1
	sed -ne "s/^${iface} .\| //p" < /proc/net/vlan/config
	}

	bridge_is_stp_enabled() {
	local iface=$1
	local stp
	read stp < "/sys/class/net/${iface}/bridge/stp_state"
	[ "${stp}" = "1" ] && return 0 \|\| return 1
	}

	bridge_get_root_ports() {
	local bridge=$1
	local root_id
	local root_ports=""
	local bridge_id

	read root_id < "/sys/class/net/${bridge}/bridge/root_id"

	for port in /sys/class/net/${bridge}/brif/* ; do
	read bridge_id < "${port}/designated_bridge"
	if [ "${bridge_id}" = "${root_id}" ] ; then
	root_ports="${root_ports} ${port##*/}"
	fi
	done

	root_ports=${root_ports# }

	if [ -n "$2" ] ; then # Record value in specified var. This expects we were called not in a sub-shell.
	- eval $2=\${root_ports}
	+ eval "$2=\${root_ports}"
	else # Expect sub-shell
	echo ${root_ports}
	fi
	}

	# From /inlude/linux/if_bridge.h:
	#define BR_STATE_DISABLED 0
	#define BR_STATE_LISTENING 1
	#define BR_STATE_LEARNING 2
	#define BR_STATE_FORWARDING 3
	#define BR_STATE_BLOCKING 4

	bridge_get_active_ports() {
	local bridge=$1
	shift 1
	local ports="$*"
	local active_ports=""
	local port_state
	local stp_state
	local warn=0

	bridge_is_stp_enabled ${bridge}
	stp_state=$?

	if [ -z "${ports}" ] \|\| [ "${ports}" = "detect" ] ; then
	bridge_get_root_ports ${bridge} ports
	fi

	for port in $ports ; do
	if [ ! -e "/sys/class/net/${bridge}/brif/${port}" ] ; then
	ocf_log warning "Port ${port} doesn't belong to bridge ${bridge}"
	continue
	fi
	read port_state < "/sys/class/net/${bridge}/brif/${port}/state"
	if [ "${port_state}" = "3" ] ; then
	if [ -n "${active_ports}" ] && ${stp_state} ; then
	warn=1
	fi
	active_ports="${active_ports} ${port}"
	fi
	done
	if [ ${warn} -eq 1 ] ; then
	ocf_log warning "More then one upstream port in bridge '${bridge}' is in forwarding state while STP is enabled: ${active_ports}"
	fi
	echo "${active_ports# }"
	}

	bridge_get_speed() {
	local iface=$1
	- local bridge_port_speed
	local aggregate_speed=0

	if ! iface_is_bridge ${iface} ; then
	echo 0
	return
	fi

	- local ports=$( bridge_get_active_ports ${iface} ${OCF_RESKEY_bridge_ports} )
	- for port in ${ports} ; do
	+ BGS_PORTS=$( bridge_get_active_ports "${iface}" "${OCF_RESKEY_bridge_ports}" )
	+ for port in ${BGS_PORTS} ; do
	: $(( aggregate_speed += $( iface_get_speed ${port} ) ))
	done
	if [ -n "$2" ] ; then # Record value in specified var. This expects we were called not in a sub-shell.
	- eval $2=\${aggregate_speed}
	+ eval "$2=\${aggregate_speed}"
	else # Expect sub-shell
	echo ${aggregate_speed}
	fi
	}

	hfi1_get_speed() {
	local iface=$1
	local hfi1_speed
	local hfi1_value
	local hfi1_desc

	- # Currently (9/14/2017 Intel Omni Path v10.5.0.0.155) Intel doesn't have Dual/Multiple ports Host Channel Adapters
	- # and it's save to use such method to get a speed.
	- # Example of output:
	+ # Currently (9/14/2017 Intel Omni Path v10.5.0.0.155), Intel doesn't have
	+ # dual- or multiple-port Host Channel Adapters, and it's safe to use this
	+ # method to get the speed. Example output:
	# [root@es-host0 ~]# cat /sys/class/net/ib0/device/infiniband//ports//rate
	# 100 Gb/sec (4X EDR)
	read hfi1_speed hfi1_value hfi1_desc < /sys/class/net/${iface}/device/infiniband//ports//rate
	+ ocf_is_true ${OCF_RESKEY_debug} && ocf_log debug "Detected speed $hfi1_speed $hfi1_value $hfi1_desc"

	# hfi1_value always in Gb/sec, so we need to convert hfi1_speed in Mb/sec
	echo $(( hfi1_speed * 1000 ))
	}

	bond_get_slaves() {
	local iface=$1
	local slaves
	read slaves < "/sys/class/net/${iface}/bonding/slaves"
	if [ -n "$2" ] ; then # Record value in specified var. This expects we were called not in a sub-shell.
	- eval $2=\${slaves}
	+ eval "$2=\${slaves}"
	else # Expect sub-shell
	echo ${slaves}
	fi
	}

	bond_get_active_iface() {
	local iface=$1
	local active
	read active < "/sys/class/net/${iface}/bonding/active_slave"
	if [ -n "$2" ] ; then # Record value in specified var. This expects we were called not in a sub-shell.
	- eval $2=\${active}
	+ eval "$2=\${active}"
	else # Expect sub-shell
	echo ${active}
	fi
	}

	bond_is_balancing() {
	local iface=$1
	read mode mode_index < "/sys/class/net/${iface}/bonding/mode"
	+ ocf_is_true ${OCF_RESKEY_debug} && ocf_log debug "Detected balancing $mode $mode_index"
	case ${mode} in
	"balance-rr"\|"balance-xor"\|"802.3ad"\|"balance-tlb"\|"balance-alb")
	return 0
	;;
	*)
	return 1
	;;
	esac
	}

	bond_get_speed() {
	local iface=$1
	local aggregate_speed=0
	- local bond_slave_speed
	local active_iface
	local bond_slaves

	if ! iface_is_bond ${iface} ; then
	echo 0
	return
	fi

	bond_get_slaves ${iface} bond_slaves

	if bond_is_balancing ${iface} ; then
	for slave in ${bond_slaves} ; do
	: $(( aggregate_speed += $( iface_get_speed ${slave} ) ))
	done
	# Bonding is unable to get speed*n
	: $(( aggregate_speed = aggregate_speed * 8 / 10 ))
	else
	bond_get_active_iface ${iface} active_iface
	aggregate_speed=$( iface_get_speed $active_iface )
	fi
	if [ -n "$2" ] ; then # Record value in specified var. This expects we were called not in a sub-shell.
	- eval $2=\${aggregate_speed}
	+ eval "$2=\${aggregate_speed}"
	else # Expect sub-shell
	echo ${aggregate_speed}
	fi
	}

	update() {
	local speed;
	local nic=${OCF_RESKEY_iface};
	if [ -z "${OCF_RESKEY_iface}" ]; then
	nic=$( get_nic_name_by_ip )
	if [ -z "${nic}" ];then
	ocf_log err "Could not retrieve network interface name from ip address (${OCF_RESKEY_ip})"
	exit $OCF_ERR_GENERIC
	fi
	fi
	speed=$( iface_get_speed ${nic} )

	: $(( score = speed * ${OCF_RESKEY_weight_base} / 1000 ))
	if [ "$__OCF_ACTION" = "start" ] ; then
	attrd_updater -n ${OCF_RESKEY_name} -B ${score} -d ${OCF_RESKEY_dampen} ${attrd_options}
	else
	attrd_updater -n ${OCF_RESKEY_name} -v ${score} -d ${OCF_RESKEY_dampen} ${attrd_options}
	fi
	rc=$?
	case ${rc} in
	0)
	ocf_is_true ${OCF_RESKEY_debug} && ocf_log debug "Updated ${OCF_RESKEY_name} = ${score}"
	;;
	*)
	ocf_log warn "Could not update ${OCF_RESKEY_name} = ${score}: rc=${rc}"
	;;
	esac
	return ${rc}
	}

	case $__OCF_ACTION in
	meta-data)
	meta_data
	exit $OCF_SUCCESS
	;;
	usage\|help)
	usage
	exit $OCF_SUCCESS
	;;
	esac

	-if [ `uname` != "Linux" ] ; then
	+if [ "$(uname)" != "Linux" ] ; then
	ocf_log err "This RA works only on linux."
	exit $OCF_ERR_INSTALLED
	fi

	: ${ha_pseudo_resource_name:="ifspeed-${OCF_RESOURCE_INSTANCE}"}

	attrd_options='-q'
	if ocf_is_true ${OCF_RESKEY_debug} ; then
	attrd_options=''
	fi

	validate \|\| exit $?

	case $__OCF_ACTION in
	start)
	start
	;;
	stop)
	stop
	;;
	monitor)
	monitor
	;;
	validate-all)
	;;
	*)
	usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac

	exit $?
	diff --git a/extra/resources/ping b/extra/resources/ping
	index 2d92d73a3c..7ae71d2837 100755
	--- a/extra/resources/ping
	+++ b/extra/resources/ping
	@@ -1,434 +1,414 @@
	#!/bin/sh
	#
	+# Ping OCF RA that utilizes the system ping
	#
	-# Ping OCF RA that utilizes the system ping
	+# Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	-# Copyright (c) 2009 Andrew Beekhof
	-# All Rights Reserved.
	+# This source code is licensed under the GNU General Public License version 2
	+# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#
	-# This program is free software; you can redistribute it and/or modify
	-# it under the terms of version 2 of the GNU General Public License as
	-# published by the Free Software Foundation.
	-#
	-# This program is distributed in the hope that it would be useful, but
	-# WITHOUT ANY WARRANTY; without even the implied warranty of
	-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	-#
	-# Further, this software is distributed without any warranty that it is
	-# free of the rightful claim of any third person regarding infringement
	-# or the like. Any license provided herein, whether implied or
	-# otherwise, applies only to this software file. Patent licenses, if
	-# any, provided herein do not apply to combinations of this program with
	-# other software, or any other product whatsoever.
	-#
	-# You should have received a copy of the GNU General Public License
	-# along with this program; if not, write the Free Software Foundation,
	-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	-#
	-
	#######################################################################
	# Initialization:

	: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
	. ${OCF_FUNCTIONS}
	: ${__OCF_ACTION=$1}

	#######################################################################

	meta_data() {
	cat <<END
	<?xml version="1.0"?>
	<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	<resource-agent name="ping" version="1.0">
	<version>1.0</version>

	<longdesc lang="en">
	Every time the monitor action is run, this resource agent records (in the CIB) the current number of nodes the host can connect to using the system fping (preferred) or ping tool.
	</longdesc>
	<shortdesc lang="en">node connectivity</shortdesc>

	<parameters>

	<parameter name="pidfile" unique="0">
	<longdesc lang="en">PID file</longdesc>
	<shortdesc lang="en">PID file</shortdesc>
	<content type="string" default="${HA_VARRUN%%/}/ping-${OCF_RESOURCE_INSTANCE}" />
	</parameter>

	<parameter name="dampen" unique="0">
	<longdesc lang="en">
	The time to wait (dampening) further changes occur
	</longdesc>
	<shortdesc lang="en">Dampening interval</shortdesc>
	<content type="integer" default="5s"/>
	</parameter>

	<parameter name="name" unique="0">
	<longdesc lang="en">
	The name of the attributes to set. This is the name to be used in the constraints.
	</longdesc>
	<shortdesc lang="en">Attribute name</shortdesc>
	<content type="string" default="pingd"/>
	</parameter>

	<parameter name="multiplier" unique="0">
	<longdesc lang="en">
	The number by which to multiply the number of connected ping nodes by
	</longdesc>
	<shortdesc lang="en">Value multiplier</shortdesc>
	<content type="integer" default="1"/>
	</parameter>

	<parameter name="host_list" unique="0" required="1">
	<longdesc lang="en">
	A space separated list of ping nodes to count.
	</longdesc>
	<shortdesc lang="en">Host list</shortdesc>
	<content type="string" default=""/>
	</parameter>

	<parameter name="attempts" unique="0">
	<longdesc lang="en">
	Number of ping attempts, per host, before declaring it dead
	</longdesc>
	<shortdesc lang="en">no. of ping attempts</shortdesc>
	<content type="integer" default="3"/>
	</parameter>

	<parameter name="timeout" unique="0">
	<longdesc lang="en">
	How long, in seconds, to wait before declaring a ping lost
	</longdesc>
	<shortdesc lang="en">ping timeout in seconds</shortdesc>
	<content type="integer" default="2"/>
	</parameter>

	<parameter name="options" unique="0">
	<longdesc lang="en">
	A catch all for any other options that need to be passed to ping.
	</longdesc>
	<shortdesc lang="en">Extra Options</shortdesc>
	<content type="string" default=""/>
	</parameter>

	<parameter name="failure_score" unique="0">
	<longdesc lang="en">
	Resource is failed if the score is less than failure_score.
	Default never fails.
	</longdesc>
	<shortdesc lang="en">failure_score</shortdesc>
	<content type="integer" default=""/>
	</parameter>

	<parameter name="use_fping" unique="0">
	<longdesc lang="en">
	Use fping rather than ping, if found. If set to 0, fping
	will not be used even if present.
	</longdesc>
	<shortdesc lang="en">Use fping if available</shortdesc>
	<content type="boolean" default="1"/>
	</parameter>

	<parameter name="debug" unique="0">
	<longdesc lang="en">
	Enables to use default attrd_updater verbose logging on every call.
	</longdesc>
	<shortdesc lang="en">Verbose logging</shortdesc>
	<content type="string" default="false"/>
	</parameter>

	</parameters>

	<actions>
	<action name="start" timeout="60" />
	<action name="stop" timeout="20" />
	<action name="monitor" depth="0" timeout="60" interval="10"/>
	<action name="meta-data" timeout="5" />
	<action name="validate-all" timeout="30" />
	</actions>
	</resource-agent>
	END
	}

	#######################################################################

	ping_conditional_log() {
	level=$1; shift
	if [ ${OCF_RESKEY_debug} = "true" ]; then
	ocf_log $level "$*"
	fi
	}

	ping_usage() {
	cat <<END
	usage: $0 {start\|stop\|monitor\|validate-all\|meta-data}

	Expects to have a fully populated OCF RA-compliant environment set.
	END
	}

	ping_start() {
	ping_monitor
	if [ $? = $OCF_SUCCESS ]; then
	return $OCF_SUCCESS
	fi
	touch ${OCF_RESKEY_pidfile}
	ping_update
	}

	ping_stop() {

	rm -f ${OCF_RESKEY_pidfile}

	attrd_updater -D -n $OCF_RESKEY_name -d $OCF_RESKEY_dampen $attrd_options

	return $OCF_SUCCESS
	}

	ping_monitor() {
	if [ -f ${OCF_RESKEY_pidfile} ]; then
	ping_update
	if [ $? -eq 0 ]; then
	return $OCF_SUCCESS
	fi
	return $OCF_ERR_GENERIC
	fi
	return $OCF_NOT_RUNNING
	}

	ping_validate() {
	# Is the state directory writable?
	state_dir=`dirname "$OCF_RESKEY_pidfile"`
	touch "$state_dir/$$"
	if [ $? != 0 ]; then
	ocf_log err "Invalid location for 'state': $state_dir is not writable"
	return $OCF_ERR_ARGS
	fi
	rm "$state_dir/$$"

	# Pidfile better be an absolute path
	case $OCF_RESKEY_pidfile in
	/*) ;;
	*) ocf_log warn "You should use an absolute path for pidfile not: $OCF_RESKEY_pidfile" ;;
	esac

	# Check the host list
	if [ "x" = "x$OCF_RESKEY_host_list" ]; then
	ocf_log err "Empty host_list. Please specify some nodes to ping"
	exit $OCF_ERR_CONFIGURED
	fi

	# For fping allow only same IP versions or hostnames
	if use_fping; then
	hosts_family
	if [ $? -eq 99 ]; then
	ocf_log err "host_list can contain only host with same IP versions for fping"
	exit $OCF_ERR_CONFIGURED
	fi
	fi

	check_binary ping

	return $OCF_SUCCESS
	}


	fping_check() {
	p_exe=fping
	hosts_family
	case $? in
	6) p_exe=fping6 ;;
	99) ocf_log err "Ambiguous IP versions in host_list: '$OCF_RESKEY_host_list'"; exit $OCF_ERR_CONFIGURED;;
	esac

	active=0

	- n=$OCF_RESKEY_attempts
	timeout=`expr $OCF_RESKEY_timeout \* 1000 / $OCF_RESKEY_attempts`

	cmd="$p_exe -r $OCF_RESKEY_attempts -t $timeout -B 1.0 $OCF_RESKEY_options $OCF_RESKEY_host_list"
	output=`$cmd 2>&1`; rc=$?
	active=`echo "$output" \| grep "is alive" \| wc -l`

	case $rc in
	0)
	;;
	1)
	for h in `echo "$output" \| grep "is unreachable" \| awk '{print $1}'`; do
	ping_conditional_log warn "$h is inactive"
	done
	;;
	*)
	ocf_log err "Unexpected result for '$cmd' $rc: `echo "$output" \| tr '\n' ';'`"
	;;
	esac

	return $active
	}

	ping_check() {
	active=0
	for host in $OCF_RESKEY_host_list; do
	p_exe=ping

	case `uname` in
	Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";;
	Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";;
	FreeBSD) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";;
	*) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;;
	esac

	case $host in
	:) p_exe=ping6
	esac

	p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$?

	case $rc in
	0) active=`expr $active + 1`;;
	1) ping_conditional_log warn "$host is inactive: $p_out";;
	*) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";;
	esac
	done
	return $active
	}

	ping_update() {

	if use_fping; then
	fping_check
	active=$?
	else
	ping_check
	active=$?
	fi

	score=`expr $active \* $OCF_RESKEY_multiplier`
	if [ "$__OCF_ACTION" = "start" ] ; then
	attrd_updater -n $OCF_RESKEY_name -B $score -d $OCF_RESKEY_dampen $attrd_options
	else
	attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options
	fi
	rc=$?
	case $rc in
	0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;;
	*) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";;
	esac
	if [ $rc -ne 0 ]; then
	return $rc
	fi

	- if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then
	+ if [ -n "$OCF_RESKEY_failure_score" ] && [ "$score" -lt "$OCF_RESKEY_failure_score" ]; then
	ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)"
	return 1
	fi
	return 0
	}

	use_fping() {
	ocf_is_true "$OCF_RESKEY_use_fping" && have_binary fping;
	}

	# return values:
	# 4 IPv4
	# 6 IPv6
	# 0 indefinite (i.e. hostname)
	host_family() {
	case $1 in
	[0-9].[0-9].[0-9].[0-9]) return 4 ;;
	:) return 6 ;;
	*) return 0 ;;
	esac
	}

	# return values same as host_family plus
	# 99 ambiguous families
	hosts_family() {
	# For fping allow only same IP versions or hostnames
	family=0
	for host in $OCF_RESKEY_host_list; do
	host_family $host
	f=$?
	- if [ $family -ne 0 -a $f -ne 0 -a $f -ne $family ] ; then
	+ if [ $family -ne 0 ] && [ $f -ne 0 ] && [ $f -ne $family ] ; then
	family=99
	break
	fi
	[ $f -ne 0 ] && family=$f
	done
	return $family
	}

	: ${OCF_RESKEY_name:="pingd"}
	: ${OCF_RESKEY_dampen:="5s"}
	: ${OCF_RESKEY_attempts:="3"}
	: ${OCF_RESKEY_multiplier:="1"}
	: ${OCF_RESKEY_debug:="false"}
	: ${OCF_RESKEY_failure_score:="0"}
	: ${OCF_RESKEY_use_fping:="1"}

	: ${OCF_RESKEY_CRM_meta_timeout:="20000"}
	: ${OCF_RESKEY_CRM_meta_globally_unique:="false"}

	integer=`echo ${OCF_RESKEY_timeout} \| egrep -o '[0-9]*'`
	case ${OCF_RESKEY_timeout} in
	[0-9]ms\|[0-9]msec) OCF_RESKEY_timeout=`expr $integer / 1000`;;
	[0-9]m\|[0-9]min) OCF_RESKEY_timeout=`expr $integer \* 60`;;
	[0-9]h\|[0-9]hr) OCF_RESKEY_timeout=`expr $integer \* 60 \* 60`;;
	*) OCF_RESKEY_timeout=$integer;;
	esac

	if [ -z ${OCF_RESKEY_timeout} ]; then
	if [ x"$OCF_RESKEY_host_list" != x ]; then
	host_count=`echo $OCF_RESKEY_host_list \| awk '{print NF}'`
	OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts`
	OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early
	else
	OCF_RESKEY_timeout=5
	fi
	fi

	if [ ${OCF_RESKEY_timeout} -lt 1 ]; then
	OCF_RESKEY_timeout=5
	elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then
	# ping actually complains if this value is too high, 5 minutes is plenty
	OCF_RESKEY_timeout=300
	fi

	if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
	: ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESKEY_name}"}
	else
	: ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESOURCE_INSTANCE}"}
	fi

	# Check the debug option
	case "${OCF_RESKEY_debug}" in
	true\|True\|TRUE\|1) OCF_RESKEY_debug=true;;
	false\|False\|FALSE\|0) OCF_RESKEY_debug=false;;
	*)
	ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}"
	OCF_RESKEY_debug=false
	;;
	esac

	attrd_options='-q'
	if [ ${OCF_RESKEY_debug} = "true" ]; then
	attrd_options=''
	fi

	case $__OCF_ACTION in
	meta-data) meta_data
	exit $OCF_SUCCESS
	;;
	start) ping_start;;
	stop) ping_stop;;
	monitor) ping_monitor;;
	validate-all) ping_validate;;
	usage\|help) ping_usage
	exit $OCF_SUCCESS
	;;
	*) ping_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
	esac
	exit $?

	diff --git a/tools/crm_failcount.in b/tools/crm_failcount.in
	index c95028a7a8..c3050c75aa 100755
	--- a/tools/crm_failcount.in
	+++ b/tools/crm_failcount.in
	@@ -1,287 +1,292 @@
	#!@BASH_PATH@
	+#
	+# Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	+#
	+# This source code is licensed under the GNU General Public License version 2
	+# or later (GPLv2+) WITHOUT ANY WARRANTY.
	+#

	USAGE_TEXT="Usage: crm_failcount <command> [<options>]
	Common options:
	--help Display this text, then exit
	--version Display version information, then exit
	-V, --verbose Specify multiple times to increase debug output
	-q, --quiet Print only the value (if querying)

	Commands:
	-G, --query Query the current value of the resource's fail count
	-D, --delete Delete resource's recorded failures

	Additional Options:
	-r, --resource=value Name of the resource to use (required)
	-n, --operation=value Name of operation to use (instead of all operations)
	-I, --interval=value If operation is specified, its interval
	-N, --node=value Use failcount on named node (instead of local node)"


	HELP_TEXT="crm_failcount - Query or delete resource fail counts

	$USAGE_TEXT"

	# These constants must track crm_exit_t values
	CRM_EX_OK=0
	-CRM_EX_ERROR=1
	CRM_EX_USAGE=64
	CRM_EX_NOSUCH=105

	exit_usage() {
	if [ $# -gt 0 ]; then
	- echo "error: $@" >&2
	+ echo "error:" "$@" >&2
	fi
	echo
	echo "$USAGE_TEXT"
	exit $CRM_EX_USAGE
	}

	warn() {
	- echo "warning: $@" >&2
	+ echo "warning:" "$@" >&2
	}

	interval_re() {
	echo "^[[:blank:]]([0-9]+)[[:blank:]](${1})[[:blank:]]*$"
	}

	# This function should follow crm_get_interval() as closely as possible
	parse_interval() {
	INT_S="$1"

	INT_8601RE="^P(([0-9]+)Y)?(([0-9]+)M)?(([0-9]+)D)?T?(([0-9]+)H)?(([0-9]+)M)?(([0-9]+)S)?$"

	if [[ $INT_S =~ $(interval_re "") ]]; then
	echo $(( ${BASH_REMATCH[1]} * 1000 ))

	elif [[ $INT_S =~ $(interval_re "s\|sec") ]]; then
	echo $(( ${BASH_REMATCH[1]} * 1000 ))

	elif [[ $INT_S =~ $(interval_re "ms\|msec") ]]; then
	echo "${BASH_REMATCH[1]}"

	elif [[ $INT_S =~ $(interval_re "m\|min") ]]; then
	echo $(( ${BASH_REMATCH[1]} * 60000 ))

	elif [[ $INT_S =~ $(interval_re "h\|hr") ]]; then
	echo $(( ${BASH_REMATCH[1]} * 3600000 ))

	elif [[ $INT_S =~ $(interval_re "us\|usec") ]]; then
	echo $(( ${BASH_REMATCH[1]} / 1000 ))

	elif [[ $INT_S =~ ^P([0-9]+)W$ ]]; then
	echo $(( ${BASH_REMATCH[1]} * 604800000 ))

	elif [[ $INT_S =~ $INT_8601RE ]]; then
	echo $(( ( ${BASH_REMATCH[2]:-0} * 31536000000 ) \
	+ ( ${BASH_REMATCH[4]:-0} * 2592000000 ) \
	+ ( ${BASH_REMATCH[6]:-0} * 86400000 ) \
	+ ( ${BASH_REMATCH[8]:-0} * 3600000 ) \
	+ ( ${BASH_REMATCH[10]:-0} * 60000 ) \
	+ ( ${BASH_REMATCH[12]:-0} * 1000 ) ))

	else
	warn "Unrecognized interval, using 0"
	echo "0"
	fi
	}

	query_single_attr() {
	QSR_TARGET="$1"
	QSR_ATTR="$2"

	crm_attribute $VERBOSE --quiet --query -t status -d 0 \
	-N "$QSR_TARGET" -n "$QSR_ATTR"
	}

	query_attr_sum() {
	QAS_TARGET="$1"
	QAS_PREFIX="$2"

	# Build xpath to match all transient node attributes with prefix
	QAS_XPATH="/cib/status/node_state[@uname='${QAS_TARGET}']"
	QAS_XPATH="${QAS_XPATH}/transient_attributes/instance_attributes"
	QAS_XPATH="${QAS_XPATH}/nvpair[starts-with(@name,'$QAS_PREFIX')]"

	# Query attributes that match xpath
	# @TODO We ignore stderr because we don't want "no results" to look
	# like an error, but that also makes $VERBOSE pointless.
	QAS_ALL=$(cibadmin --query --sync-call --local \
	--xpath="$QAS_XPATH" 2>/dev/null)
	QAS_EX=$?

	# "No results" is not an error
	if [ $QAS_EX -ne $CRM_EX_OK ] && [ $QAS_EX -ne $CRM_EX_NOSUCH ]; then
	echo "error: could not query CIB for fail counts" >&2
	exit $QAS_EX
	fi

	# Extract the attribute values (one per line) from the output
	QAS_VALUE=$(echo "$QAS_ALL" \| sed -n -e \
	's/.<nvpair.value="$[0-9][0-9]\\|INFINITY$".>.*/\1/p')

	# Sum the values
	QAS_SUM=0
	for i in 0 $QAS_VALUE; do
	if [ "$i" = "INFINITY" ]; then
	QAS_SUM="INFINITY"
	break
	else
	QAS_SUM=$(($QAS_SUM + $i))
	fi
	done
	if [ "$QAS_SUM" = "INFINITY" ]; then
	echo $QAS_SUM
	elif [ "$QAS_SUM" -ge 1000000 ]; then
	echo "INFINITY"
	else
	echo $QAS_SUM
	fi
	}

	query_failcount() {
	QF_TARGET="$1"
	QF_RESOURCE="$2"
	QF_OPERATION="$3"
	QF_INTERVAL="$4"

	QF_ATTR_RSC="fail-count-${QF_RESOURCE}"

	if [ -n "$QF_OPERATION" ]; then
	QF_ATTR_DISPLAY="${QF_ATTR_RSC}#${QF_OPERATION}_${QF_INTERVAL}"
	QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_DISPLAY")
	else
	QF_ATTR_DISPLAY="$QF_ATTR_RSC"
	QF_COUNT=$(query_attr_sum "$QF_TARGET" "${QF_ATTR_RSC}#")
	fi

	# @COMPAT attributes set < 1.1.17:
	# If we didn't find any per-operation failcount,
	# check whether there is a legacy per-resource failcount.
	if [ "$QF_COUNT" = "0" ]; then
	QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_RSC")
	if [ "$QF_COUNT" != "0" ]; then
	QF_ATTR_DISPLAY="$QF_ATTR_RSC"
	fi
	fi

	# Echo result (comparable to crm_attribute, for backward compatibility)
	if [ -n "$QUIET" ]; then
	echo $QF_COUNT
	else
	echo "scope=status name=$QF_ATTR_DISPLAY value=$QF_COUNT"
	fi
	}

	clear_failcount() {
	CF_TARGET="$1"
	CF_RESOURCE="$2"
	CF_OPERATION="$3"
	CF_INTERVAL="$4"

	if [ -n "$CF_OPERATION" ]; then
	CF_OPERATION="-n $CF_OPERATION -I ${CF_INTERVAL}ms"
	fi
	crm_resource $QUIET $VERBOSE --cleanup \
	-N "$CF_TARGET" -r "$CF_RESOURCE" $CF_OPERATION
	}

	QUIET=""
	VERBOSE=""

	command=""
	resource=""
	operation=""
	interval="0"
	target=$(crm_node -n 2>/dev/null)

	SHORTOPTS="qDGQVN:U:v:i:l:r:n:I:"

	LONGOPTS_COMMON="help,version,verbose,quiet"
	LONGOPTS_COMMANDS="query,delete"
	LONGOPTS_OTHER="resource:,node:,operation:,interval:"
	LONGOPTS_COMPAT="delete-attr,get-value,resource-id:,uname:,lifetime:,attr-value:,attr-id:"

	LONGOPTS="$LONGOPTS_COMMON,$LONGOPTS_COMMANDS,$LONGOPTS_OTHER,$LONGOPTS_COMPAT"

	TEMP=$(@GETOPT_PATH@ -o $SHORTOPTS --long $LONGOPTS -n crm_failcount -- "$@")
	if [ $? -ne 0 ]; then
	exit_usage
	fi
	eval set -- "$TEMP" # Quotes around $TEMP are essential

	while true ; do
	case "$1" in
	--help)
	echo "$HELP_TEXT"
	exit $CRM_EX_OK
	;;
	--version)
	crm_attribute --version
	exit $?
	;;
	-q\|-Q\|--quiet)
	QUIET="--quiet"
	shift
	;;
	-V\|--verbose)
	VERBOSE="$VERBOSE $1"
	shift
	;;
	-G\|--query\|--get-value)
	command="--query"
	shift
	;;
	-D\|--delete\|--delete-attr)
	command="--delete"
	shift
	;;
	-r\|--resource\|--resource-id)
	resource="$2"
	shift 2
	;;
	-n\|--operation)
	operation="$2"
	shift 2
	;;
	-I\|--interval)
	interval="$2"
	shift 2
	;;
	-N\|--node\|-U\|--uname)
	target="$2"
	shift 2
	;;
	-v\|--attr-value)
	if [ "$2" = "0" ]; then
	command="--delete"
	else
	warn "ignoring deprecated option '$1' with nonzero value"
	fi
	shift 2
	;;
	-i\|--attr-id\|-l\|--lifetime)
	warn "ignoring deprecated option '$1'"
	shift 2
	;;
	--)
	shift
	break
	;;
	*)
	exit_usage "unknown option '$1'"
	;;
	esac
	done

	[ -n "$command" ] \|\| exit_usage "must specify a command"
	[ -n "$resource" ] \|\| exit_usage "resource name required"
	[ -n "$target" ] \|\| exit_usage "node name required"

	interval=$(parse_interval $interval)

	if [ "$command" = "--query" ]; then
	query_failcount "$target" "$resource" "$operation" "$interval"
	else
	clear_failcount "$target" "$resource" "$operation" "$interval"
	fi
	diff --git a/tools/crm_master.in b/tools/crm_master.in
	index 5177c4f26c..896239c1ee 100755
	--- a/tools/crm_master.in
	+++ b/tools/crm_master.in
	@@ -1,103 +1,109 @@
	#!@BASH_PATH@
	+#
	+# Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	+#
	+# This source code is licensed under the GNU General Public License version 2
	+# or later (GPLv2+) WITHOUT ANY WARRANTY.
	+#

	USAGE_TEXT="Usage: crm_master <command> [<options>]
	Common options:
	--help Display this text, then exit
	--version Display version information, then exit
	-V, --verbose Specify multiple times to increase debug output
	-q, --quiet Print only the value (if querying)

	Commands:
	-G, --query Query the current value of the promotion score
	-v, --update=VALUE Update the value of the promotion score
	-D, --delete Delete the promotion score

	Additional Options:
	-N, --node=NODE Use promotion score on named node (instead of local node)
	-l, --lifetime=VALUE Until when should the setting take effect
	(valid values: reboot, forever)
	-i, --id=VALUE (Advanced) XML ID used to identify promotion score attribute"

	HELP_TEXT="crm_master - Query, update, or delete a resource's promotion score

	This program should normally be invoked only from inside an OCF resource agent.

	$USAGE_TEXT"

	exit_usage() {
	if [ $# -gt 0 ]; then
	- echo "error: $@" >&2
	+ echo "error:" "$@" >&2
	fi
	echo
	echo "$USAGE_TEXT"
	exit 1
	}

	SHORTOPTS_DEPRECATED="U:Q"
	LONGOPTS_DEPRECATED="uname:,get-value,delete-attr,attr-value:,attr-id:"
	SHORTOPTS="VqGv:DN:l:i:r:"
	LONGOPTS="help,version,verbose,quiet,query,update:,delete,node:,lifetime:,id:,resource:"

	TEMP=$(@GETOPT_PATH@ -o ${SHORTOPTS}${SHORTOPTS_DEPRECATED} \
	--long ${LONGOPTS},${LONGOPTS_DEPRECATED} \
	-n crm_master -- "$@")
	if [ $? -ne 0 ]; then
	exit_usage
	fi

	eval set -- "$TEMP" # Quotes around $TEMP are essential

	# Explicitly set the (usual default) lifetime, so the attribute gets set as a
	# node attribute and not a cluster property.
	options="--lifetime forever"

	while true ; do
	case "$1" in
	--help)
	echo "$HELP_TEXT"
	exit 0
	;;
	--version)
	crm_attribute --version
	exit 0
	;;
	--verbose\|-V\|--quiet\|-q\|--query\|-G\|--delete\|-D)
	options="$options $1"
	shift
	;;
	--update\|-v\|--node\|-N\|--lifetime\|-l\|--id\|-i)
	options="$options $1 $2"
	shift
	shift
	;;
	-r\|--resource)
	OCF_RESOURCE_INSTANCE=$2;
	shift
	shift
	;;
	--get-value\|--delete-attr\|-Q) # deprecated
	options="$options $1"
	shift
	;;
	--uname\|-U\|--attr-value\|--attr-id) # deprecated
	options="$options $1 $2"
	shift
	shift
	;;
	--)
	shift
	break
	;;
	*)
	exit_usage "unknown option '$1'"
	;;
	esac
	done

	if [ -z "$OCF_RESOURCE_INSTANCE" ]; then
	echo "This program should normally only be invoked from inside an OCF resource agent."
	echo "To set a promotion score from the command line, please specify resource with -r."
	exit 1
	fi

	crm_attribute -n master-$OCF_RESOURCE_INSTANCE $options
	diff --git a/tools/crm_report.in b/tools/crm_report.in
	index ffd8137ee6..541893f822 100644
	--- a/tools/crm_report.in
	+++ b/tools/crm_report.in
	@@ -1,465 +1,471 @@
	#!/bin/sh
	#
	# Copyright 2010-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	TEMP=`@GETOPT_PATH@ \
	-o hv?xl:f:t:n:T:L:p:c:dSCu:D:MVse: \
	--long help,cts:,cts-log:,dest:,node:,nodes:,from:,to:,sos-mode,logfile:,as-directory,single-node,cluster:,user:,max-depth:,version,features,rsh: \
	-n 'crm_report' -- "$@"`
	# The quotes around $TEMP are essential
	eval set -- "$TEMP"

	progname=$(basename "$0")
	rsh="ssh -T"
	-times=""
	tests=""
	nodes=""
	compress=1
	cluster="any"
	ssh_user="root"
	search_logs=1
	report_data=`dirname $0`
	maxdepth=5

	extra_logs=""
	sanitize_patterns="passw.*"
	log_patterns="CRIT: ERROR:"

	usage() {
	cat<<EOF
	$progname - Create archive of everything needed when reporting cluster problems


	Usage: $progname [options] [DEST]

	Required option:
	-f, --from TIME time prior to problems beginning
	(as "YYYY-M-D H:M:S" including the quotes)

	Options:
	-V increase verbosity (may be specified multiple times)
	-v, --version display software version
	--features display software features
	-t, --to TIME time at which all problems were resolved
	(as "YYYY-M-D H:M:S" including the quotes; default "now")
	-T, --cts TEST CTS test or set of tests to extract
	--cts-log CTS master logfile
	-n, --nodes NODES node names for this cluster (only needed if cluster is
	not active on this host; accepts -n "a b" or -n a -n b)
	-M do not search for cluster logs
	-l, --logfile FILE log file to collect (in addition to detected logs if -M
	is not specified; may be specified multiple times)
	-p PATT additional regular expression to match variables to be
	masked in output (default: "passw.*")
	-L PATT additional regular expression to match in log files for
	analysis (default: $log_patterns)
	-S, --single-node don't attempt to collect data from other nodes
	-c, --cluster TYPE force the cluster type instead of detecting
	(currently only corosync is supported)
	-C, --corosync force the cluster type to be corosync
	-u, --user USER username to use when collecting data from other nodes
	(default root)
	-D, --depth search depth to use when attempting to locate files
	-e, --rsh command to use to run commands on other nodes
	(default ssh -T)
	--sos-mode use defaults suitable for being called by sosreport tool
	(behavior subject to change and not useful to end users)
	DEST, --dest DEST custom destination directory or file name

	$progname works best when run from a cluster node on a running cluster,
	but can be run from a stopped cluster node or a Pacemaker Remote node.

	If neither --nodes nor --single-node is given, $progname will guess the
	node list, but may have trouble detecting Pacemaker Remote nodes.
	Unless --single-node is given, the node names (whether specified by --nodes
	or detected automatically) must be resolvable and reachable via the command
	specified by -e/--rsh using the user specified by -u/--user.

	Examples:
	$progname -f "2011-12-14 13:05:00" unexplained-apache-failure
	$progname -f 2011-12-14 -t 2011-12-15 something-that-took-multiple-days
	$progname -f 13:05:00 -t 13:12:00 brief-outage
	EOF
	}

	case "$1" in
	-v\|--version) echo "$progname @VERSION@-@BUILD_VERSION@"; exit 0;;
	--features) echo "@VERSION@-@BUILD_VERSION@: @PCMK_FEATURES@"; exit 0;;
	--\|-h\|--help) usage; exit 0;;
	esac

	# Prefer helpers in the same directory if they exist, to simplify development
	if [ ! -f $report_data/report.common ]; then
	report_data=@datadir@/@PACKAGE@
	else
	echo "Using local helpers"
	fi

	. $report_data/report.common

	while true; do
	case "$1" in
	-x) set -x; shift;;
	-V) verbose=`expr $verbose + 1`; shift;;
	-T\|--cts-test) tests="$tests $2"; shift; shift;;
	--cts-log) ctslog="$2"; shift; shift;;
	-f\|--from) start_time=`get_time "$2"`; shift; shift;;
	-t\|--to) end_time=`get_time "$2"`; shift; shift;;
	-n\|--node\|--nodes) nodes="$nodes $2"; shift; shift;;
	-S\|--single-node) nodes="$host"; shift;;
	-E\|-l\|--logfile) extra_logs="$extra_logs $2"; shift; shift;;
	-p) sanitize_patterns="$sanitize_patterns $2"; shift; shift;;
	-L) log_patterns="$log_patterns `echo $2 \| sed 's/ /\\\W/g'`"; shift; shift;;
	-d\|--as-directory) compress=0; shift;;
	-C\|--corosync) cluster="corosync"; shift;;
	-c\|--cluster) cluster="$2"; shift; shift;;
	-e\|--rsh) rsh="$2"; shift; shift;;
	-u\|--user) ssh_user="$2"; shift; shift;;
	-D\|--max-depth) maxdepth="$2"; shift; shift;;
	-M) search_logs=0; shift;;
	--sos-mode) search_logs=0; nodes="$host"; shift;;
	--dest) DESTDIR=$2; shift; shift;;
	--) if [ ! -z $2 ]; then DESTDIR=$2; fi; break;;
	-h\|--help) usage; exit 0;;
	# Options for compatibility with hb_report
	-s) shift;;

	*) echo "Unknown argument: $1"; usage; exit 1;;
	esac
	done


	collect_data() {
	label="$1"
	start=`expr $2 - 10`
	end=`expr $3 + 10`
	masterlog=$4

	if [ "x$DESTDIR" != x ]; then
	echo $DESTDIR \| grep -e "^/" -qs
	if [ $? = 0 ]; then
	l_base=$DESTDIR
	else
	l_base="`pwd`/$DESTDIR"
	fi
	debug "Using custom scratch dir: $l_base"
	r_base=`basename $l_base`
	else
	l_base=$HOME/$label
	r_base=$label
	fi

	if [ -e $l_base ]; then
	fatal "Output directory $l_base already exists, specify an alternate name with --dest"
	fi
	mkdir -p $l_base

	if [ "x$masterlog" != "x" ]; then
	dumplogset "$masterlog" $start $end > "$l_base/$HALOG_F"
	fi

	for node in $nodes; do
	cat <<EOF >$l_base/.env
	LABEL="$label"
	REPORT_HOME="$r_base"
	REPORT_MASTER="$host"
	REPORT_TARGET="$node"
	LOG_START=$start
	LOG_END=$end
	REMOVE=1
	SANITIZE="$sanitize_patterns"
	CLUSTER=$cluster
	LOG_PATTERNS="$log_patterns"
	EXTRA_LOGS="$extra_logs"
	SEARCH_LOGS=$search_logs
	verbose=$verbose
	maxdepth=$maxdepth
	EOF

	if [ $host = $node ]; then
	cat <<EOF >>$l_base/.env
	REPORT_HOME="$l_base"
	EOF
	cat $l_base/.env $report_data/report.common $report_data/report.collector > $l_base/collector
	bash $l_base/collector
	else
	cat $l_base/.env $report_data/report.common $report_data/report.collector \
	\| $rsh -l $ssh_user $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" \| (cd $l_base && tar mxf -)
	fi
	done

	analyze $l_base > $l_base/$ANALYSIS_F
	if [ -f $l_base/$HALOG_F ]; then
	node_events $l_base/$HALOG_F > $l_base/$EVENTS_F
	fi

	for node in $nodes; do
	cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F
	if [ -s $l_base/$node/$EVENTS_F ]; then
	cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F
	elif [ -s $l_base/$HALOG_F ]; then
	awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F
	fi
	done

	log " "
	if [ $compress = 1 ]; then
	fname=`shrink $l_base`
	rm -rf $l_base
	log "Collected results are available in $fname"
	log " "
	log "Please create a bug entry at"
	log " http://bugs.clusterlabs.org/enter_bug.cgi?product=Pacemaker"
	log "Include a description of your problem and attach this tarball"
	log " "
	log "Thank you for taking time to create this report."
	else
	log "Collected results are available in $l_base"
	fi
	log " "
	}

	#
	# check if files have same content in the cluster
	#
	cibdiff() {
	- d1=`dirname $1`
	- d2=`dirname $2`
	- if [ -f $d1/RUNNING -a -f $d2/RUNNING ] \|\|
	- [ -f $d1/STOPPED -a -f $d2/STOPPED ]; then
	+ d1=$(dirname $1)
	+ d2=$(dirname $2)
	+
	+ if [ -f "$d1/RUNNING" ] && [ ! -f "$d2/RUNNING" ]; then
	+ DIFF_OK=0
	+ elif [ -f "$d1/STOPPED" ] && [ ! -f "$d2/STOPPED" ]; then
	+ DIFF_OK=0
	+ else
	+ DIFF_OK=1
	+ fi
	+
	+ if [ $DIFF_OK -eq 1 ]; then
	if which crm_diff > /dev/null 2>&1; then
	crm_diff -c -n $1 -o $2
	else
	info "crm_diff(8) not found, cannot diff CIBs"
	fi
	else
	echo "can't compare cibs from running and stopped systems"
	fi
	}

	diffcheck() {
	[ -f "$1" ] \|\| {
	echo "$1 does not exist"
	return 1
	}
	[ -f "$2" ] \|\| {
	echo "$2 does not exist"
	return 1
	}
	- case `basename $1` in
	- $CIB_F) cibdiff $1 $2;;
	- $B_CONF) diff -u $1 $2;; # confdiff?
	- *) diff -u $1 $2;;
	-esac
	+ case $(basename "$1") in
	+ $CIB_F) cibdiff $1 $2 ;;
	+ *) diff -u $1 $2 ;;
	+ esac
	}

	#
	# remove duplicates if files are same, make links instead
	#
	consolidate() {
	- for n in $NODES; do
	+ for n in $nodes; do
	if [ -f $1/$2 ]; then
	rm $1/$n/$2
	else
	mv $1/$n/$2 $1
	fi
	ln -s ../$2 $1/$n
	done
	}

	analyze_one() {
	rc=0
	node0=""
	- for n in $NODES; do
	+ for n in $nodes; do
	if [ "$node0" ]; then
	diffcheck $1/$node0/$2 $1/$n/$2
	rc=$(($rc+$?))
	else
	node0=$n
	fi
	done
	return $rc
	}

	analyze() {
	- flist="$MEMBERSHIP_F $CIB_F $CRM_MON_F $B_CONF $SYSINFO_F"
	+ flist="$MEMBERSHIP_F $CIB_F $CRM_MON_F $SYSINFO_F"
	for f in $flist; do
	printf "Diff $f... "
	ls $1/*/$f >/dev/null 2>&1 \|\| {
	echo "no $1/*/$f :/"
	continue
	}
	if analyze_one $1 $f; then
	echo "OK"
	[ "$f" != $CIB_F ] && consolidate $1 $f
	else
	echo ""
	fi
	done
	}

	do_cts() {
	test_sets=`echo $tests \| tr ',' ' '`
	for test_set in $test_sets; do

	start_time=0
	start_test=`echo $test_set \| tr '-' ' ' \| awk '{print $1}'`

	end_time=0
	end_test=`echo $test_set \| tr '-' ' ' \| awk '{print $2}'`

	if [ x$end_test = x ]; then
	msg="Extracting test $start_test"
	label="CTS-$start_test-`date +"%b-%d-%Y"`"
	end_test=`expr $start_test + 1`
	else
	msg="Extracting tests $start_test to $end_test"
	label="CTS-$start_test-$end_test-`date +"%b-%d-%Y"`"
	end_test=`expr $end_test + 1`
	fi

	if [ $start_test = 0 ]; then
	start_pat="BEGINNING [0-9].* TESTS"
	else
	start_pat="Running test.\[ $start_test\]"
	fi

	if [ x$ctslog = x ]; then
	ctslog=`findmsg 1 "$start_pat"`

	if [ x$ctslog = x ]; then
	fatal "No CTS control file detected"
	else
	log "Using CTS control file: $ctslog"
	fi
	fi

	line=`grep -n "$start_pat" $ctslog \| tail -1 \| sed 's/:.*//'`
	if [ ! -z "$line" ]; then
	start_time=`linetime $ctslog $line`
	fi

	line=`grep -n "Running test.\[ $end_test\]" $ctslog \| tail -1 \| sed 's/:.*//'`
	if [ ! -z "$line" ]; then
	end_time=`linetime $ctslog $line`
	fi

	if [ -z "$nodes" ]; then
	nodes=`grep CTS: $ctslog \| grep -v debug: \| grep " \* " \| sed s:.\\\::g \| sort -u \| tr '\\n' ' '`
	log "Calculated node list: $nodes"
	fi

	if [ $end_time -lt $start_time ]; then
	debug "Test didn't complete, grabbing everything up to now"
	end_time=`date +%s`
	fi

	if [ $start_time != 0 ];then
	log "$msg (`time2str $start_time` to `time2str $end_time`)"
	collect_data $label $start_time $end_time $ctslog
	else
	fatal "$msg failed: not found"
	fi
	done
	}

	node_names_from_xml() {
	awk '
	/uname/ {
	for( i=1; i<=NF; i++ )
	if( $i~/^uname=/ ) {
	sub("uname=.","",$i);
	sub("\".*","",$i);
	print $i;
	next;
	}
	}
	' \| tr '\n' ' '
	}

	getnodes() {
	cluster="$1"

	# 1. Live (cluster nodes or Pacemaker Remote nodes)
	# TODO: This will not detect Pacemaker Remote nodes unless they
	# have ever had a permanent node attribute set, because it only
	# searches the nodes section. It should also search the config
	# for resources that create Pacemaker Remote nodes.
	cib_nodes=$(cibadmin -Ql -o nodes 2>/dev/null)
	if [ $? -eq 0 ]; then
	debug "Querying CIB for nodes"
	echo "$cib_nodes" \| node_names_from_xml
	return
	fi

	# 2. Saved
	if [ -f "@CRM_CONFIG_DIR@/cib.xml" ]; then
	debug "Querying on-disk CIB for nodes"
	grep "node " "@CRM_CONFIG_DIR@/cib.xml" \| node_names_from_xml
	return
	fi

	# 3. logs
	# TODO: Look for something like crm_update_peer
	}

	if [ "x$tests" != "x" ]; then
	do_cts

	elif [ "x$start_time" != "x" ]; then
	masterlog=""

	if [ -z "$sanitize_patterns" ]; then
	log "WARNING: The tarball produced by this program may contain"
	log " sensitive information such as passwords."
	log ""
	log "We will attempt to remove such information if you use the"
	log "-p option. For example: -p \"pass.\" -p \"user.\""
	log ""
	log "However, doing this may reduce the ability for the recipients"
	log "to diagnose issues and generally provide assistance."
	log ""
	log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE"
	log ""
	fi

	# If user didn't specify a cluster stack, make a best guess if possible.
	if [ -z "$cluster" ] \|\| [ "$cluster" = "any" ]; then
	cluster=$(get_cluster_type)
	fi

	# If user didn't specify node(s), make a best guess if possible.
	if [ -z "$nodes" ]; then
	nodes=`getnodes $cluster`
	if [ -n "$nodes" ]; then
	log "Calculated node list: $nodes"
	else
	fatal "Cannot determine nodes; specify --nodes or --single-node"
	fi
	fi

	if
	echo $nodes \| grep -qs $host
	then
	debug "We are a cluster node"
	else
	debug "We are a log master"
	masterlog=`findmsg 1 "pacemaker-controld\\\|CTS"`
	fi


	if [ -z $end_time ]; then
	end_time=`perl -e 'print time()'`
	fi
	label="pcmk-`date +"%a-%d-%b-%Y"`"
	log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)"
	collect_data $label $start_time $end_time $masterlog
	else
	fatal "Not sure what to do, no tests or time ranges to extract"
	fi

	# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80:
	diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
	index 7f79926e03..42de6a0ea3 100644
	--- a/tools/crm_resource_runtime.c
	+++ b/tools/crm_resource_runtime.c
	@@ -1,1977 +1,1980 @@
	/*
	* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_resource.h>

	int resource_verbose = 0;
	bool do_force = FALSE;
	int crmd_replies_needed = 1; /* The welcome message */

	const char *attr_set_type = XML_TAG_ATTR_SETS;

	static int
	do_find_resource(const char rsc, resource_t the_rsc, pe_working_set_t * data_set)
	{
	int found = 0;
	GListPtr lpc = NULL;

	for (lpc = the_rsc->running_on; lpc != NULL; lpc = lpc->next) {
	node_t node = (node_t ) lpc->data;

	if (BE_QUIET) {
	fprintf(stdout, "%s\n", node->details->uname);
	} else {
	const char *state = "";

	if (!pe_rsc_is_clone(the_rsc) && the_rsc->fns->state(the_rsc, TRUE) == RSC_ROLE_MASTER) {
	state = "Master";
	}
	fprintf(stdout, "resource %s is running on: %s %s\n", rsc, node->details->uname, state);
	}

	found++;
	}

	if (BE_QUIET == FALSE && found == 0) {
	fprintf(stderr, "resource %s is NOT running\n", rsc);
	}

	return found;
	}

	int
	cli_resource_search(resource_t rsc, const char requested_name,
	pe_working_set_t *data_set)
	{
	int found = 0;
	resource_t *parent = uber_parent(rsc);

	if (pe_rsc_is_clone(rsc)) {
	for (GListPtr iter = rsc->children; iter != NULL; iter = iter->next) {
	found += do_find_resource(requested_name, iter->data, data_set);
	}

	/* The anonymous clone children's common ID is supplied */
	} else if (pe_rsc_is_clone(parent)
	&& is_not_set(rsc->flags, pe_rsc_unique)
	&& rsc->clone_name
	&& safe_str_eq(requested_name, rsc->clone_name)
	&& safe_str_neq(requested_name, rsc->id)) {

	for (GListPtr iter = parent->children; iter; iter = iter->next) {
	found += do_find_resource(requested_name, iter->data, data_set);
	}

	} else {
	found += do_find_resource(requested_name, rsc, data_set);
	}

	return found;
	}

	#define XPATH_MAX 1024

	static int
	find_resource_attr(cib_t * the_cib, const char attr, const char rsc, const char *set_type,
	const char set_name, const char attr_id, const char attr_name, char *value)
	{
	int offset = 0;
	int rc = pcmk_ok;
	xmlNode *xml_search = NULL;
	char *xpath_string = NULL;

	if(value) {
	*value = NULL;
	}

	if(the_cib == NULL) {
	return -ENOTCONN;
	}

	xpath_string = calloc(1, XPATH_MAX);
	offset +=
	snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", get_object_path("resources"));

	offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//*[@id=\"%s\"]", rsc);

	if (set_type) {
	offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s", set_type);
	if (set_name) {
	offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "[@id=\"%s\"]", set_name);
	}
	}

	offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//nvpair[");
	if (attr_id) {
	offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@id=\"%s\"", attr_id);
	}

	if (attr_name) {
	if (attr_id) {
	offset += snprintf(xpath_string + offset, XPATH_MAX - offset, " and ");
	}
	offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@name=\"%s\"", attr_name);
	}
	offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "]");
	CRM_LOG_ASSERT(offset > 0);

	rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search,
	cib_sync_call \| cib_scope_local \| cib_xpath);

	if (rc != pcmk_ok) {
	goto bail;
	}

	crm_log_xml_debug(xml_search, "Match");
	if (xml_has_children(xml_search)) {
	xmlNode *child = NULL;

	rc = -EINVAL;
	printf("Multiple attributes match name=%s\n", attr_name);

	for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) {
	printf(" Value: %s \t(id=%s)\n",
	crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child));
	}

	} else if(value) {
	const char *tmp = crm_element_value(xml_search, attr);

	if (tmp) {
	*value = strdup(tmp);
	}
	}

	bail:
	free(xpath_string);
	free_xml(xml_search);
	return rc;
	}

	static resource_t *
	find_matching_attr_resource(resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_id,
	const char * attr_name, cib_t * cib, const char * cmd)
	{
	int rc = pcmk_ok;
	char *lookup_id = NULL;
	char *local_attr_id = NULL;

	if(do_force == TRUE) {
	return rsc;

	} else if(rsc->parent) {
	switch(rsc->parent->variant) {
	case pe_group:
	if (BE_QUIET == FALSE) {
	printf("Performing %s of '%s' for '%s' will not apply to its peers in '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id);
	}
	break;

	case pe_clone:
	rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id);
	free(local_attr_id);

	if(rc != pcmk_ok) {
	rsc = rsc->parent;
	if (BE_QUIET == FALSE) {
	printf("Performing %s of '%s' on '%s', the parent of '%s'\n", cmd, attr_name, rsc->id, rsc_id);
	}
	}
	break;
	default:
	break;
	}

	} else if (rsc->parent && BE_QUIET == FALSE) {
	printf("Forcing %s of '%s' for '%s' instead of '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id);

	} else if(rsc->parent == NULL && rsc->children) {
	resource_t *child = rsc->children->data;

	if(child->variant == pe_native) {
	lookup_id = clone_strip(child->id); /* Could be a cloned group! */
	rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id);

	if(rc == pcmk_ok) {
	rsc = child;
	if (BE_QUIET == FALSE) {
	printf("A value for '%s' already exists in child '%s', performing %s on that instead of '%s'\n", attr_name, lookup_id, cmd, rsc_id);
	}
	}

	free(local_attr_id);
	free(lookup_id);
	}
	}

	return rsc;
	}

	int
	cli_resource_update_attribute(resource_t rsc, const char requested_name,
	const char attr_set, const char attr_id,
	const char attr_name, const char attr_value,
	bool recursive, cib_t *cib,
	pe_working_set_t *data_set)
	{
	int rc = pcmk_ok;
	static bool need_init = TRUE;

	char *lookup_id = NULL;
	char *local_attr_id = NULL;
	char *local_attr_set = NULL;

	xmlNode *xml_top = NULL;
	xmlNode *xml_obj = NULL;

	if(attr_id == NULL
	&& do_force == FALSE
	&& pcmk_ok != find_resource_attr(
	cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL)) {
	printf("\n");
	}

	if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) {
	if (do_force == FALSE) {
	rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id,
	XML_TAG_META_SETS, attr_set, attr_id,
	attr_name, &local_attr_id);
	if (rc == pcmk_ok && BE_QUIET == FALSE) {
	printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n",
	uber_parent(rsc)->id, attr_name, local_attr_id);
	printf(" Delete '%s' first or use --force to override\n", local_attr_id);
	}
	free(local_attr_id);
	if (rc == pcmk_ok) {
	return -ENOTUNIQ;
	}
	}

	} else {
	rsc = find_matching_attr_resource(rsc, requested_name, attr_set,
	attr_id, attr_name, cib, "update");
	}

	lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */
	rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name,
	&local_attr_id);

	if (rc == pcmk_ok) {
	crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id);
	attr_id = local_attr_id;

	} else if (rc != -ENXIO) {
	free(lookup_id);
	free(local_attr_id);
	return rc;

	} else {
	const char *tag = crm_element_name(rsc->xml);

	if (attr_set == NULL) {
	local_attr_set = crm_concat(lookup_id, attr_set_type, '-');
	attr_set = local_attr_set;
	}
	if (attr_id == NULL) {
	local_attr_id = crm_concat(attr_set, attr_name, '-');
	attr_id = local_attr_id;
	}

	xml_top = create_xml_node(NULL, tag);
	crm_xml_add(xml_top, XML_ATTR_ID, lookup_id);

	xml_obj = create_xml_node(xml_top, attr_set_type);
	crm_xml_add(xml_obj, XML_ATTR_ID, attr_set);
	}

	xml_obj = crm_create_nvpair_xml(xml_obj, attr_id, attr_name, attr_value);
	if (xml_top == NULL) {
	xml_top = xml_obj;
	}

	crm_log_xml_debug(xml_top, "Update");

	rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options);
	if (rc == pcmk_ok && BE_QUIET == FALSE) {
	printf("Set '%s' option: id=%s%s%s%s%s=%s\n", lookup_id, local_attr_id,
	attr_set ? " set=" : "", attr_set ? attr_set : "",
	attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value);
	}

	free_xml(xml_top);

	free(lookup_id);
	free(local_attr_id);
	free(local_attr_set);

	if(recursive && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) {
	GListPtr lpc = NULL;

	if(need_init) {
	xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);

	need_init = FALSE;
	unpack_constraints(cib_constraints, data_set);

	for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) {
	resource_t r = (resource_t ) lpc->data;

	clear_bit(r->flags, pe_rsc_allocating);
	}
	}

	crm_debug("Looking for dependencies %p", rsc->rsc_cons_lhs);
	set_bit(rsc->flags, pe_rsc_allocating);
	for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) {
	rsc_colocation_t cons = (rsc_colocation_t ) lpc->data;
	resource_t *peer = cons->rsc_lh;

	crm_debug("Checking %s %d", cons->id, cons->score);
	if (cons->score > 0 && is_not_set(peer->flags, pe_rsc_allocating)) {
	/* Don't get into colocation loops */
	crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, peer->id);
	cli_resource_update_attribute(peer, peer->id, NULL, NULL,
	attr_name, attr_value, recursive,
	cib, data_set);
	}
	}
	}

	return rc;
	}

	int
	cli_resource_delete_attribute(resource_t rsc, const char requested_name,
	const char attr_set, const char attr_id,
	const char attr_name, cib_t cib,
	pe_working_set_t *data_set)
	{
	xmlNode *xml_obj = NULL;

	int rc = pcmk_ok;
	char *lookup_id = NULL;
	char *local_attr_id = NULL;

	if(attr_id == NULL
	&& do_force == FALSE
	&& find_resource_attr(
	cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL) != pcmk_ok) {
	printf("\n");
	}

	if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) {
	rsc = find_matching_attr_resource(rsc, requested_name, attr_set,
	attr_id, attr_name, cib, "delete");
	}

	lookup_id = clone_strip(rsc->id);
	rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name,
	&local_attr_id);

	if (rc == -ENXIO) {
	free(lookup_id);
	return pcmk_ok;

	} else if (rc != pcmk_ok) {
	free(lookup_id);
	return rc;
	}

	if (attr_id == NULL) {
	attr_id = local_attr_id;
	}

	xml_obj = crm_create_nvpair_xml(NULL, attr_id, attr_name, NULL);
	crm_log_xml_debug(xml_obj, "Delete");

	CRM_ASSERT(cib);
	rc = cib->cmds->remove(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options);

	if (rc == pcmk_ok && BE_QUIET == FALSE) {
	printf("Deleted '%s' option: id=%s%s%s%s%s\n", lookup_id, local_attr_id,
	attr_set ? " set=" : "", attr_set ? attr_set : "",
	attr_name ? " name=" : "", attr_name ? attr_name : "");
	}

	free(lookup_id);
	free_xml(xml_obj);
	free(local_attr_id);
	return rc;
	}

	static int
	send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op,
	const char host_uname, const char rsc_id,
	bool only_failed, pe_working_set_t * data_set)
	{
	char *our_pid = NULL;
	char *key = NULL;
	int rc = -ECOMM;
	xmlNode *cmd = NULL;
	xmlNode *xml_rsc = NULL;
	const char *value = NULL;
	const char *router_node = host_uname;
	xmlNode *params = NULL;
	xmlNode *msg_data = NULL;
	resource_t *rsc = pe_find_resource(data_set->resources, rsc_id);

	if (rsc == NULL) {
	CMD_ERR("Resource %s not found", rsc_id);
	return -ENXIO;

	} else if (rsc->variant != pe_native) {
	CMD_ERR("We can only process primitive resources, not %s", rsc_id);
	return -EINVAL;

	} else if (host_uname == NULL) {
	CMD_ERR("Please supply a node name with --node");
	return -EINVAL;
	} else {
	node_t *node = pe_find_node(data_set->nodes, host_uname);

	if (node && is_remote_node(node)) {
	node = pe__current_node(node->details->remote_rsc);
	if (node == NULL) {
	CMD_ERR("No cluster connection to Pacemaker Remote node %s detected",
	host_uname);
	return -ENXIO;
	}
	router_node = node->details->uname;
	}
	}

	key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx");

	msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP);
	crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key);
	free(key);

	crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname);
	if (safe_str_neq(router_node, host_uname)) {
	crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node);
	}

	xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE);
	if (rsc->clone_name) {
	crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name);
	crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->id);

	} else {
	crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id);
	}

	value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_ATTR_TYPE);
	if (value == NULL) {
	CMD_ERR("%s has no type! Aborting...", rsc_id);
	return -ENXIO;
	}

	value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_CLASS);
	if (value == NULL) {
	CMD_ERR("%s has no class! Aborting...", rsc_id);
	return -ENXIO;
	}

	crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER);

	params = create_xml_node(msg_data, XML_TAG_ATTRS);
	crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);

	key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
	crm_xml_add(params, key, "60000"); /* 1 minute */
	free(key);

	our_pid = crm_getpid_s();
	cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid);

	/* crm_log_xml_warn(cmd, "send_lrm_rsc_op"); */
	free_xml(msg_data);

	if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) {
	rc = 0;

	} else {
	crm_debug("Could not send %s op to the controller", op);
	rc = -ENOTCONN;
	}

	free_xml(cmd);
	return rc;
	}

	/*!
	* \internal
	* \brief Get resource name as used in failure-related node attributes
	*
	* \param[in] rsc Resource to check
	*
	* \return Newly allocated string containing resource's fail name
	* \note The caller is responsible for freeing the result.
	*/
	static inline char *
	rsc_fail_name(resource_t *rsc)
	{
	const char *name = (rsc->clone_name? rsc->clone_name : rsc->id);

	return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name);
	}

	static int
	clear_rsc_history(crm_ipc_t crmd_channel, const char host_uname,
	const char rsc_id, pe_working_set_t data_set)
	{
	int rc = pcmk_ok;

	/* Erase the resource's entire LRM history in the CIB, even if we're only
	* clearing a single operation's fail count. If we erased only entries for a
	* single operation, we might wind up with a wrong idea of the current
	* resource state, and we might not re-probe the resource.
	*/
	rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc_id,
	TRUE, data_set);
	if (rc != pcmk_ok) {
	return rc;
	}
	crmd_replies_needed++;

	crm_trace("Processing %d mainloop inputs", crmd_replies_needed);
	while (g_main_context_iteration(NULL, FALSE)) {
	crm_trace("Processed mainloop input, %d still remaining",
	crmd_replies_needed);
	}

	if (crmd_replies_needed < 0) {
	crmd_replies_needed = 0;
	}
	return rc;
	}

	static int
	clear_rsc_failures(crm_ipc_t crmd_channel, const char node_name,
	const char rsc_id, const char operation,
	const char interval_spec, pe_working_set_t data_set)
	{
	int rc = pcmk_ok;
	const char *failed_value = NULL;
	const char *failed_id = NULL;
	const char *interval_ms_s = NULL;
	GHashTable *rscs = NULL;
	GHashTableIter iter;

	/* Create a hash table to use as a set of resources to clean. This lets us
	* clean each resource only once (per node) regardless of how many failed
	* operations it has.
	*/
	rscs = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);

	// Normalize interval to milliseconds for comparison to history entry
	if (operation) {
	interval_ms_s = crm_strdup_printf("%u",
	crm_parse_interval_spec(interval_spec));
	}

	for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
	xml_op = __xml_next(xml_op)) {

	failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
	if (failed_id == NULL) {
	// Malformed history entry, should never happen
	continue;
	}

	// No resource specified means all resources match
	if (rsc_id) {
	resource_t *fail_rsc = pe_find_resource_with_flags(data_set->resources,
	failed_id,
	pe_find_renamed\|pe_find_anon);

	if (!fail_rsc \|\| safe_str_neq(rsc_id, fail_rsc->id)) {
	continue;
	}
	}

	// Host name should always have been provided by this point
	failed_value = crm_element_value(xml_op, XML_ATTR_UNAME);
	if (safe_str_neq(node_name, failed_value)) {
	continue;
	}

	// No operation specified means all operations match
	if (operation) {
	failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
	if (safe_str_neq(operation, failed_value)) {
	continue;
	}

	// Interval (if operation was specified) defaults to 0 (not all)
	failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
	if (safe_str_neq(interval_ms_s, failed_value)) {
	continue;
	}
	}

	g_hash_table_add(rscs, (gpointer) failed_id);
	}

	g_hash_table_iter_init(&iter, rscs);
	while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) {
	crm_debug("Erasing failures of %s on %s", failed_id, node_name);
	rc = clear_rsc_history(crmd_channel, node_name, failed_id, data_set);
	if (rc != pcmk_ok) {
	return rc;
	}
	}
	g_hash_table_destroy(rscs);
	return rc;
	}

	static int
	clear_rsc_fail_attrs(resource_t rsc, const char operation,
	const char interval_spec, node_t node)
	{
	int rc = pcmk_ok;
	int attr_options = attrd_opt_none;
	char *rsc_name = rsc_fail_name(rsc);

	if (is_remote_node(node)) {
	attr_options \|= attrd_opt_remote;
	}
	rc = attrd_clear_delegate(NULL, node->details->uname, rsc_name, operation,
	interval_spec, NULL, attr_options);
	free(rsc_name);
	return rc;
	}

	int
	cli_resource_delete(crm_ipc_t crmd_channel, const char host_uname,
	resource_t rsc, const char operation,
	const char *interval_spec, bool just_failures,
	pe_working_set_t *data_set)
	{
	int rc = pcmk_ok;
	node_t *node = NULL;

	if (rsc == NULL) {
	return -ENXIO;

	} else if (rsc->children) {
	GListPtr lpc = NULL;

	for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) {
	resource_t child = (resource_t ) lpc->data;

	rc = cli_resource_delete(crmd_channel, host_uname, child, operation,
	interval_spec, just_failures, data_set);
	if (rc != pcmk_ok) {
	return rc;
	}
	}
	return pcmk_ok;

	} else if (host_uname == NULL) {
	GListPtr lpc = NULL;
	GListPtr nodes = g_hash_table_get_values(rsc->known_on);

	if(nodes == NULL && do_force) {
	nodes = node_list_dup(data_set->nodes, FALSE, FALSE);

	} else if(nodes == NULL && rsc->exclusive_discover) {
	GHashTableIter iter;
	pe_node_t *node = NULL;

	g_hash_table_iter_init(&iter, rsc->allowed_nodes);
	while (g_hash_table_iter_next(&iter, NULL, (void**)&node)) {
	if(node->weight >= 0) {
	nodes = g_list_prepend(nodes, node);
	}
	}

	} else if(nodes == NULL) {
	nodes = g_hash_table_get_values(rsc->allowed_nodes);
	}

	for (lpc = nodes; lpc != NULL; lpc = lpc->next) {
	node = (node_t *) lpc->data;

	if (node->details->online) {
	rc = cli_resource_delete(crmd_channel, node->details->uname,
	rsc, operation, interval_spec,
	just_failures, data_set);
	}
	if (rc != pcmk_ok) {
	g_list_free(nodes);
	return rc;
	}
	}

	g_list_free(nodes);
	return pcmk_ok;
	}

	node = pe_find_node(data_set->nodes, host_uname);

	if (node == NULL) {
	printf("Unable to clean up %s because node %s not found\n",
	rsc->id, host_uname);
	return -ENODEV;
	}

	if (!node->details->rsc_discovery_enabled) {
	printf("Unable to clean up %s because resource discovery disabled on %s\n",
	rsc->id, host_uname);
	return -EOPNOTSUPP;
	}

	if (crmd_channel == NULL) {
	printf("Dry run: skipping clean-up of %s on %s due to CIB_file\n",
	rsc->id, host_uname);
	return pcmk_ok;
	}

	rc = clear_rsc_fail_attrs(rsc, operation, interval_spec, node);
	if (rc != pcmk_ok) {
	printf("Unable to clean up %s failures on %s: %s\n",
	rsc->id, host_uname, pcmk_strerror(rc));
	return rc;
	}

	if (just_failures) {
	rc = clear_rsc_failures(crmd_channel, host_uname, rsc->id, operation,
	interval_spec, data_set);
	} else {
	rc = clear_rsc_history(crmd_channel, host_uname, rsc->id, data_set);
	}
	if (rc != pcmk_ok) {
	printf("Cleaned %s failures on %s, but unable to clean history: %s\n",
	rsc->id, host_uname, pcmk_strerror(rc));
	} else {
	printf("Cleaned up %s on %s\n", rsc->id, host_uname);
	}
	return rc;
	}

	int
	cli_cleanup_all(crm_ipc_t crmd_channel, const char node_name,
	const char operation, const char interval_spec,
	pe_working_set_t *data_set)
	{
	int rc = pcmk_ok;
	int attr_options = attrd_opt_none;
	const char *display_name = node_name? node_name : "all nodes";

	if (crmd_channel == NULL) {
	printf("Dry run: skipping clean-up of %s due to CIB_file\n",
	display_name);
	return pcmk_ok;
	}
	crmd_replies_needed = 0;

	if (node_name) {
	node_t *node = pe_find_node(data_set->nodes, node_name);

	if (node == NULL) {
	CMD_ERR("Unknown node: %s", node_name);
	return -ENXIO;
	}
	if (is_remote_node(node)) {
	attr_options \|= attrd_opt_remote;
	}
	}

	rc = attrd_clear_delegate(NULL, node_name, NULL, operation, interval_spec,
	NULL, attr_options);
	if (rc != pcmk_ok) {
	printf("Unable to clean up all failures on %s: %s\n",
	display_name, pcmk_strerror(rc));
	return rc;
	}

	if (node_name) {
	rc = clear_rsc_failures(crmd_channel, node_name, NULL,
	operation, interval_spec, data_set);
	if (rc != pcmk_ok) {
	printf("Cleaned all resource failures on %s, but unable to clean history: %s\n",
	node_name, pcmk_strerror(rc));
	return rc;
	}
	} else {
	for (GList *iter = data_set->nodes; iter; iter = iter->next) {
	pe_node_t node = (pe_node_t ) iter->data;

	rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL,
	operation, interval_spec, data_set);
	if (rc != pcmk_ok) {
	printf("Cleaned all resource failures on all nodes, but unable to clean history: %s\n",
	pcmk_strerror(rc));
	return rc;
	}
	}
	}

	printf("Cleaned up all resources on %s\n", display_name);
	return pcmk_ok;
	}

	void
	cli_resource_check(cib_t * cib_conn, resource_t *rsc)
	{
	int need_nl = 0;
	char *role_s = NULL;
	char *managed = NULL;
	resource_t *parent = uber_parent(rsc);

	find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id,
	NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed);

	find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id,
	NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s);

	if(role_s) {
	enum rsc_role_e role = text2role(role_s);
	+
	+ free(role_s);
	if(role == RSC_ROLE_UNKNOWN) {
	// Treated as if unset

	} else if(role == RSC_ROLE_STOPPED) {
	printf("\n * The configuration specifies that '%s' should remain stopped\n", parent->id);
	need_nl++;

	} else if (is_set(parent->flags, pe_rsc_promotable)
	&& (role == RSC_ROLE_SLAVE)) {
	printf("\n * The configuration specifies that '%s' should not be promoted\n", parent->id);
	need_nl++;
	}
	}

	if(managed && crm_is_true(managed) == FALSE) {
	printf("%s * The configuration prevents the cluster from stopping or starting '%s' (unmanaged)\n", need_nl == 0?"\n":"", parent->id);
	need_nl++;
	}
	+ free(managed);

	if(need_nl) {
	printf("\n");
	}
	}

	int
	cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname,
	const char rsc_id, pe_working_set_t data_set)
	{
	crm_warn("Failing: %s", rsc_id);
	return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set);
	}

	static GHashTable *
	generate_resource_params(resource_t * rsc, pe_working_set_t * data_set)
	{
	GHashTable *params = NULL;
	GHashTable *meta = NULL;
	GHashTable *combined = NULL;
	GHashTableIter iter;

	if (!rsc) {
	crm_err("Resource does not exist in config");
	return NULL;
	}

	params = crm_str_table_new();
	meta = crm_str_table_new();
	combined = crm_str_table_new();

	get_rsc_attributes(params, rsc, NULL /* TODO: Pass in local node */ , data_set);
	get_meta_attributes(meta, rsc, NULL /* TODO: Pass in local node */ , data_set);

	if (params) {
	char *key = NULL;
	char *value = NULL;

	g_hash_table_iter_init(&iter, params);
	while (g_hash_table_iter_next(&iter, (gpointer ) & key, (gpointer ) & value)) {
	g_hash_table_insert(combined, strdup(key), strdup(value));
	}
	g_hash_table_destroy(params);
	}

	if (meta) {
	char *key = NULL;
	char *value = NULL;

	g_hash_table_iter_init(&iter, meta);
	while (g_hash_table_iter_next(&iter, (gpointer ) & key, (gpointer ) & value)) {
	char *crm_name = crm_meta_name(key);

	g_hash_table_insert(combined, crm_name, strdup(value));
	}
	g_hash_table_destroy(meta);
	}

	return combined;
	}

	static bool resource_is_running_on(resource_t rsc, const char host)
	{
	bool found = TRUE;
	GListPtr hIter = NULL;
	GListPtr hosts = NULL;

	if(rsc == NULL) {
	return FALSE;
	}

	rsc->fns->location(rsc, &hosts, TRUE);
	for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) {
	pe_node_t node = (pe_node_t ) hIter->data;

	if(strcmp(host, node->details->uname) == 0) {
	crm_trace("Resource %s is running on %s\n", rsc->id, host);
	goto done;
	} else if(strcmp(host, node->details->id) == 0) {
	crm_trace("Resource %s is running on %s\n", rsc->id, host);
	goto done;
	}
	}

	if(host != NULL) {
	crm_trace("Resource %s is not running on: %s\n", rsc->id, host);
	found = FALSE;

	} else if(host == NULL && hosts == NULL) {
	crm_trace("Resource %s is not running\n", rsc->id);
	found = FALSE;
	}

	done:

	g_list_free(hosts);
	return found;
	}

	/*!
	* \internal
	* \brief Create a list of all resources active on host from a given list
	*
	* \param[in] host Name of host to check whether resources are active
	* \param[in] rsc_list List of resources to check
	*
	* \return New list of resources from list that are active on host
	*/
	static GList *
	get_active_resources(const char host, GList rsc_list)
	{
	GList *rIter = NULL;
	GList *active = NULL;

	for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) {
	resource_t rsc = (resource_t ) rIter->data;

	/* Expand groups to their members, because if we're restarting a member
	* other than the first, we can't otherwise tell which resources are
	* stopping and starting.
	*/
	if (rsc->variant == pe_group) {
	active = g_list_concat(active,
	get_active_resources(host, rsc->children));
	} else if (resource_is_running_on(rsc, host)) {
	active = g_list_append(active, strdup(rsc->id));
	}
	}
	return active;
	}

	static GList*
	subtract_lists(GList from, GList items)
	{
	GList *item = NULL;
	GList *result = g_list_copy(from);

	for (item = items; item != NULL; item = item->next) {
	GList *candidate = NULL;
	for (candidate = from; candidate != NULL; candidate = candidate->next) {
	crm_info("Comparing %s with %s", (const char *) candidate->data,
	(const char *) item->data);
	if(strcmp(candidate->data, item->data) == 0) {
	result = g_list_remove(result, candidate->data);
	break;
	}
	}
	}

	return result;
	}

	static void dump_list(GList items, const char tag)
	{
	int lpc = 0;
	GList *item = NULL;

	for (item = items; item != NULL; item = item->next) {
	crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data);
	lpc++;
	}
	}

	static void display_list(GList items, const char tag)
	{
	GList *item = NULL;

	for (item = items; item != NULL; item = item->next) {
	fprintf(stdout, "%s%s\n", tag, (const char *)item->data);
	}
	}

	/*!
	* \internal
	* \brief Upgrade XML to latest schema version and use it as working set input
	*
	* This also updates the working set timestamp to the current time.
	*
	* \param[in] data_set Working set instance to update
	* \param[in] xml XML to use as input
	*
	* \return pcmk_ok on success, -ENOKEY if unable to upgrade XML
	* \note On success, caller is responsible for freeing memory allocated for
	* data_set->now.
	* \todo This follows the example of other callers of cli_config_update()
	* and returns -ENOKEY ("Required key not available") if that fails,
	* but perhaps -pcmk_err_schema_validation would be better in that case.
	*/
	int
	update_working_set_xml(pe_working_set_t data_set, xmlNode *xml)
	{
	if (cli_config_update(xml, NULL, FALSE) == FALSE) {
	return -ENOKEY;
	}
	data_set->input = *xml;
	data_set->now = crm_time_new(NULL);
	return pcmk_ok;
	}

	/*!
	* \internal
	* \brief Update a working set's XML input based on a CIB query
	*
	* \param[in] data_set Data set instance to initialize
	* \param[in] cib Connection to the CIB manager
	*
	* \return pcmk_ok on success, -errno on failure
	* \note On success, caller is responsible for freeing memory allocated for
	* data_set->input and data_set->now.
	*/
	static int
	update_working_set_from_cib(pe_working_set_t * data_set, cib_t *cib)
	{
	xmlNode *cib_xml_copy = NULL;
	int rc;

	rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local \| cib_sync_call);
	if (rc != pcmk_ok) {
	fprintf(stderr, "Could not obtain the current CIB: %s (%d)\n", pcmk_strerror(rc), rc);
	return rc;
	}
	rc = update_working_set_xml(data_set, &cib_xml_copy);
	if (rc != pcmk_ok) {
	fprintf(stderr, "Could not upgrade the current CIB XML\n");
	free_xml(cib_xml_copy);
	return rc;
	}
	return pcmk_ok;
	}

	static int
	update_dataset(cib_t cib, pe_working_set_t data_set, bool simulate)
	{
	char *pid = NULL;
	char *shadow_file = NULL;
	cib_t *shadow_cib = NULL;
	int rc;

	cleanup_alloc_calculations(data_set);
	rc = update_working_set_from_cib(data_set, cib);
	if (rc != pcmk_ok) {
	return rc;
	}

	if(simulate) {
	pid = crm_getpid_s();
	shadow_cib = cib_shadow_new(pid);
	shadow_file = get_shadow_file(pid);

	if (shadow_cib == NULL) {
	fprintf(stderr, "Could not create shadow cib: '%s'\n", pid);
	rc = -ENXIO;
	goto cleanup;
	}

	rc = write_xml_file(data_set->input, shadow_file, FALSE);

	if (rc < 0) {
	fprintf(stderr, "Could not populate shadow cib: %s (%d)\n", pcmk_strerror(rc), rc);
	goto cleanup;
	}

	rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command);
	if(rc != pcmk_ok) {
	fprintf(stderr, "Could not connect to shadow cib: %s (%d)\n", pcmk_strerror(rc), rc);
	goto cleanup;
	}

	do_calculations(data_set, data_set->input, NULL);
	run_simulation(data_set, shadow_cib, NULL, TRUE);
	rc = update_dataset(shadow_cib, data_set, FALSE);

	} else {
	cluster_status(data_set);
	}

	cleanup:
	/* Do not free data_set->input here, we need rsc->xml to be valid later on */
	cib_delete(shadow_cib);
	free(pid);

	if(shadow_file) {
	unlink(shadow_file);
	free(shadow_file);
	}

	return rc;
	}

	static int
	max_delay_for_resource(pe_working_set_t * data_set, resource_t *rsc)
	{
	int delay = 0;
	int max_delay = 0;

	if(rsc && rsc->children) {
	GList *iter = NULL;

	for(iter = rsc->children; iter; iter = iter->next) {
	resource_t child = (resource_t )iter->data;

	delay = max_delay_for_resource(data_set, child);
	if(delay > max_delay) {
	double seconds = delay / 1000.0;
	crm_trace("Calculated new delay of %.1fs due to %s", seconds, child->id);
	max_delay = delay;
	}
	}

	} else if(rsc) {
	char *key = crm_strdup_printf("%s_%s_0", rsc->id, RSC_STOP);
	action_t *stop = custom_action(rsc, key, RSC_STOP, NULL, TRUE, FALSE, data_set);
	const char *value = g_hash_table_lookup(stop->meta, XML_ATTR_TIMEOUT);

	max_delay = crm_int_helper(value, NULL);
	pe_free_action(stop);
	}


	return max_delay;
	}

	static int
	max_delay_in(pe_working_set_t * data_set, GList *resources)
	{
	int max_delay = 0;
	GList *item = NULL;

	for (item = resources; item != NULL; item = item->next) {
	int delay = 0;
	resource_t rsc = pe_find_resource(data_set->resources, (const char )item->data);

	delay = max_delay_for_resource(data_set, rsc);

	if(delay > max_delay) {
	double seconds = delay / 1000.0;
	crm_trace("Calculated new delay of %.1fs due to %s", seconds, rsc->id);
	max_delay = delay;
	}
	}

	return 5 + (max_delay / 1000);
	}

	#define waiting_for_starts(d, r, h) ((g_list_length(d) > 0) \|\| \
	(resource_is_running_on((r), (h)) == FALSE))

	/*!
	* \internal
	* \brief Restart a resource (on a particular host if requested).
	*
	* \param[in] rsc The resource to restart
	* \param[in] host The host to restart the resource on (or NULL for all)
	* \param[in] timeout_ms Consider failed if actions do not complete in this time
	* (specified in milliseconds, but a two-second
	* granularity is actually used; if 0, a timeout will be
	* calculated based on the resource timeout)
	* \param[in] cib Connection to the CIB manager
	*
	* \return pcmk_ok on success, -errno on failure (exits on certain failures)
	*/
	int
	cli_resource_restart(resource_t * rsc, const char host, int timeout_ms, cib_t cib)
	{
	int rc = 0;
	int lpc = 0;
	int before = 0;
	int step_timeout_s = 0;
	int sleep_interval = 2;
	int timeout = timeout_ms / 1000;

	bool stop_via_ban = FALSE;
	char *rsc_id = NULL;
	char *orig_target_role = NULL;

	GList *list_delta = NULL;
	GList *target_active = NULL;
	GList *current_active = NULL;
	GList *restart_target_active = NULL;

	pe_working_set_t data_set;

	if(resource_is_running_on(rsc, host) == FALSE) {
	const char *id = rsc->clone_name?rsc->clone_name:rsc->id;
	if(host) {
	printf("%s is not running on %s and so cannot be restarted\n", id, host);
	} else {
	printf("%s is not running anywhere and so cannot be restarted\n", id);
	}
	return -ENXIO;
	}

	/* We might set the target-role meta-attribute */
	attr_set_type = XML_TAG_META_SETS;

	rsc_id = strdup(rsc->id);
	if ((pe_rsc_is_clone(rsc) \|\| pe_bundle_replicas(rsc)) && host) {
	stop_via_ban = TRUE;
	}

	/*
	grab full cib
	determine originally active resources
	disable or ban
	poll cib and watch for affected resources to get stopped
	without --timeout, calculate the stop timeout for each step and wait for that
	if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down
	if everything stopped, re-enable or un-ban
	poll cib and watch for affected resources to get started
	without --timeout, calculate the start timeout for each step and wait for that
	if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up
	report success

	Optimizations:
	- use constraints to determine ordered list of affected resources
	- Allow a --no-deps option (aka. --force-restart)
	*/


	set_working_set_defaults(&data_set);
	rc = update_dataset(cib, &data_set, FALSE);
	if(rc != pcmk_ok) {
	fprintf(stdout, "Could not get new resource list: %s (%d)\n", pcmk_strerror(rc), rc);
	free(rsc_id);
	return rc;
	}

	restart_target_active = get_active_resources(host, data_set.resources);
	current_active = get_active_resources(host, data_set.resources);

	dump_list(current_active, "Origin");

	if (stop_via_ban) {
	/* Stop the clone or bundle instance by banning it from the host */
	BE_QUIET = TRUE;
	rc = cli_resource_ban(rsc_id, host, NULL, cib);

	} else {
	/* Stop the resource by setting target-role to Stopped.
	* Remember any existing target-role so we can restore it later
	* (though it only makes any difference if it's Slave).
	*/
	char *lookup_id = clone_strip(rsc->id);

	find_resource_attr(cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL,
	NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role);
	free(lookup_id);
	rc = cli_resource_update_attribute(rsc, rsc_id, NULL, NULL,
	XML_RSC_ATTR_TARGET_ROLE,
	RSC_STOPPED, FALSE, cib, &data_set);
	}
	if(rc != pcmk_ok) {
	fprintf(stderr, "Could not set target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc);
	if (current_active) {
	g_list_free_full(current_active, free);
	}
	if (restart_target_active) {
	g_list_free_full(restart_target_active, free);
	}
	free(rsc_id);
	return crm_exit(crm_errno2exit(rc));
	}

	rc = update_dataset(cib, &data_set, TRUE);
	if(rc != pcmk_ok) {
	fprintf(stderr, "Could not determine which resources would be stopped\n");
	goto failure;
	}

	target_active = get_active_resources(host, data_set.resources);
	dump_list(target_active, "Target");

	list_delta = subtract_lists(current_active, target_active);
	fprintf(stdout, "Waiting for %d resources to stop:\n", g_list_length(list_delta));
	display_list(list_delta, " * ");

	step_timeout_s = timeout / sleep_interval;
	while(g_list_length(list_delta) > 0) {
	before = g_list_length(list_delta);
	if(timeout_ms == 0) {
	step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval;
	}

	/* We probably don't need the entire step timeout */
	for(lpc = 0; lpc < step_timeout_s && g_list_length(list_delta) > 0; lpc++) {
	sleep(sleep_interval);
	if(timeout) {
	timeout -= sleep_interval;
	crm_trace("%ds remaining", timeout);
	}
	rc = update_dataset(cib, &data_set, FALSE);
	if(rc != pcmk_ok) {
	fprintf(stderr, "Could not determine which resources were stopped\n");
	goto failure;
	}

	if (current_active) {
	g_list_free_full(current_active, free);
	}
	current_active = get_active_resources(host, data_set.resources);
	g_list_free(list_delta);
	list_delta = subtract_lists(current_active, target_active);
	dump_list(current_active, "Current");
	dump_list(list_delta, "Delta");
	}

	crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before);
	if(before == g_list_length(list_delta)) {
	/* aborted during stop phase, print the contents of list_delta */
	fprintf(stderr, "Could not complete shutdown of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta));
	display_list(list_delta, " * ");
	rc = -ETIME;
	goto failure;
	}

	}

	if (stop_via_ban) {
	rc = cli_resource_clear(rsc_id, host, NULL, cib);

	} else if (orig_target_role) {
	rc = cli_resource_update_attribute(rsc, rsc_id, NULL, NULL,
	XML_RSC_ATTR_TARGET_ROLE,
	orig_target_role, FALSE, cib,
	&data_set);
	free(orig_target_role);
	orig_target_role = NULL;
	} else {
	rc = cli_resource_delete_attribute(rsc, rsc_id, NULL, NULL,
	XML_RSC_ATTR_TARGET_ROLE, cib,
	&data_set);
	}

	if(rc != pcmk_ok) {
	fprintf(stderr, "Could not unset target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc);
	free(rsc_id);
	return crm_exit(crm_errno2exit(rc));
	}

	if (target_active) {
	g_list_free_full(target_active, free);
	}
	target_active = restart_target_active;
	if (list_delta) {
	g_list_free(list_delta);
	}
	list_delta = subtract_lists(target_active, current_active);
	fprintf(stdout, "Waiting for %d resources to start again:\n", g_list_length(list_delta));
	display_list(list_delta, " * ");

	step_timeout_s = timeout / sleep_interval;
	while (waiting_for_starts(list_delta, rsc, host)) {
	before = g_list_length(list_delta);
	if(timeout_ms == 0) {
	step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval;
	}

	/* We probably don't need the entire step timeout */
	for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) {

	sleep(sleep_interval);
	if(timeout) {
	timeout -= sleep_interval;
	crm_trace("%ds remaining", timeout);
	}

	rc = update_dataset(cib, &data_set, FALSE);
	if(rc != pcmk_ok) {
	fprintf(stderr, "Could not determine which resources were started\n");
	goto failure;
	}

	if (current_active) {
	g_list_free_full(current_active, free);
	}

	/* It's OK if dependent resources moved to a different node,
	* so we check active resources on all nodes.
	*/
	current_active = get_active_resources(NULL, data_set.resources);
	g_list_free(list_delta);
	list_delta = subtract_lists(target_active, current_active);
	dump_list(current_active, "Current");
	dump_list(list_delta, "Delta");
	}

	if(before == g_list_length(list_delta)) {
	/* aborted during start phase, print the contents of list_delta */
	fprintf(stdout, "Could not complete restart of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta));
	display_list(list_delta, " * ");
	rc = -ETIME;
	goto failure;
	}

	}

	rc = pcmk_ok;
	goto done;

	failure:
	if (stop_via_ban) {
	cli_resource_clear(rsc_id, host, NULL, cib);
	} else if (orig_target_role) {
	cli_resource_update_attribute(rsc, rsc_id, NULL, NULL,
	XML_RSC_ATTR_TARGET_ROLE,
	orig_target_role, FALSE, cib, &data_set);
	free(orig_target_role);
	} else {
	cli_resource_delete_attribute(rsc, rsc_id, NULL, NULL,
	XML_RSC_ATTR_TARGET_ROLE, cib, &data_set);
	}

	done:
	if (list_delta) {
	g_list_free(list_delta);
	}
	if (current_active) {
	g_list_free_full(current_active, free);
	}
	if (target_active && (target_active != restart_target_active)) {
	g_list_free_full(target_active, free);
	}
	if (restart_target_active) {
	g_list_free_full(restart_target_active, free);
	}
	cleanup_alloc_calculations(&data_set);
	free(rsc_id);
	return rc;
	}

	static inline int action_is_pending(action_t *action)
	{
	if(is_set(action->flags, pe_action_optional)) {
	return FALSE;
	} else if(is_set(action->flags, pe_action_runnable) == FALSE) {
	return FALSE;
	} else if(is_set(action->flags, pe_action_pseudo)) {
	return FALSE;
	} else if(safe_str_eq("notify", action->task)) {
	return FALSE;
	}
	return TRUE;
	}

	/*!
	* \internal
	* \brief Return TRUE if any actions in a list are pending
	*
	* \param[in] actions List of actions to check
	*
	* \return TRUE if any actions in the list are pending, FALSE otherwise
	*/
	static bool
	actions_are_pending(GListPtr actions)
	{
	GListPtr action;

	for (action = actions; action != NULL; action = action->next) {
	action_t a = (action_t )action->data;
	if (action_is_pending(a)) {
	crm_notice("Waiting for %s (flags=0x%.8x)", a->uuid, a->flags);
	return TRUE;
	}
	}
	return FALSE;
	}

	/*!
	* \internal
	* \brief Print pending actions to stderr
	*
	* \param[in] actions List of actions to check
	*
	* \return void
	*/
	static void
	print_pending_actions(GListPtr actions)
	{
	GListPtr action;

	fprintf(stderr, "Pending actions:\n");
	for (action = actions; action != NULL; action = action->next) {
	action_t a = (action_t ) action->data;

	if (action_is_pending(a)) {
	fprintf(stderr, "\tAction %d: %s", a->id, a->uuid);
	if (a->node) {
	fprintf(stderr, "\ton %s", a->node->details->uname);
	}
	fprintf(stderr, "\n");
	}
	}
	}

	/* For --wait, timeout (in seconds) to use if caller doesn't specify one */
	#define WAIT_DEFAULT_TIMEOUT_S (60 * 60)

	/* For --wait, how long to sleep between cluster state checks */
	#define WAIT_SLEEP_S (2)

	/*!
	* \internal
	* \brief Wait until all pending cluster actions are complete
	*
	* This waits until either the CIB's transition graph is idle or a timeout is
	* reached.
	*
	* \param[in] timeout_ms Consider failed if actions do not complete in this time
	* (specified in milliseconds, but one-second granularity
	* is actually used; if 0, a default will be used)
	* \param[in] cib Connection to the CIB manager
	*
	* \return pcmk_ok on success, -errno on failure
	*/
	int
	wait_till_stable(int timeout_ms, cib_t * cib)
	{
	pe_working_set_t data_set;
	int rc = -1;
	int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S;
	time_t expire_time = time(NULL) + timeout_s;
	time_t time_diff;
	bool printed_version_warning = BE_QUIET; // i.e. don't print if quiet

	set_working_set_defaults(&data_set);
	do {

	/* Abort if timeout is reached */
	time_diff = expire_time - time(NULL);
	if (time_diff > 0) {
	crm_info("Waiting up to %ld seconds for cluster actions to complete", time_diff);
	} else {
	print_pending_actions(data_set.actions);
	cleanup_alloc_calculations(&data_set);
	return -ETIME;
	}
	if (rc == pcmk_ok) { /* this avoids sleep on first loop iteration */
	sleep(WAIT_SLEEP_S);
	}

	/* Get latest transition graph */
	cleanup_alloc_calculations(&data_set);
	rc = update_working_set_from_cib(&data_set, cib);
	if (rc != pcmk_ok) {
	cleanup_alloc_calculations(&data_set);
	return rc;
	}
	do_calculations(&data_set, data_set.input, NULL);

	if (!printed_version_warning) {
	/* If the DC has a different version than the local node, the two
	* could come to different conclusions about what actions need to be
	* done. Warn the user in this case.
	*
	* @TODO A possible long-term solution would be to reimplement the
	* wait as a new controller operation that would be forwarded to the
	* DC. However, that would have potential problems of its own.
	*/
	const char *dc_version = g_hash_table_lookup(data_set.config_hash,
	"dc-version");

	if (safe_str_neq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION)) {
	printf("warning: --wait command may not work properly in mixed-version cluster\n");
	printed_version_warning = TRUE;
	}
	}

	} while (actions_are_pending(data_set.actions));

	return pcmk_ok;
	}

	int
	cli_resource_execute(resource_t rsc, const char requested_name,
	const char rsc_action, GHashTable override_hash,
	int timeout_ms, cib_t * cib, pe_working_set_t *data_set)
	{
	int rc = pcmk_ok;
	svc_action_t *op = NULL;
	const char *rid = NULL;
	const char *rtype = NULL;
	const char *rprov = NULL;
	const char *rclass = NULL;
	const char *action = NULL;
	GHashTable *params = NULL;

	if (safe_str_eq(rsc_action, "validate")) {
	action = "validate-all";

	} else if (safe_str_eq(rsc_action, "force-check")) {
	action = "monitor";

	} else if (safe_str_eq(rsc_action, "force-stop")) {
	action = rsc_action+6;

	} else if (safe_str_eq(rsc_action, "force-start")
	\|\| safe_str_eq(rsc_action, "force-demote")
	\|\| safe_str_eq(rsc_action, "force-promote")) {
	action = rsc_action+6;

	if(pe_rsc_is_clone(rsc)) {
	rc = cli_resource_search(rsc, requested_name, data_set);
	if(rc > 0 && do_force == FALSE) {
	CMD_ERR("It is not safe to %s %s here: the cluster claims it is already active",
	action, rsc->id);
	CMD_ERR("Try setting target-role=stopped first or specifying --force");
	crm_exit(CRM_EX_UNSAFE);
	}
	}
	}

	if(pe_rsc_is_clone(rsc)) {
	/* Grab the first child resource in the hope it's not a group */
	rsc = rsc->children->data;
	}

	if(rsc->variant == pe_group) {
	CMD_ERR("Sorry, --%s doesn't support group resources", rsc_action);
	crm_exit(CRM_EX_UNIMPLEMENT_FEATURE);
	}

	rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
	rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
	rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE);

	if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) {
	CMD_ERR("Sorry, --%s doesn't support %s resources yet", rsc_action, rclass);
	crm_exit(CRM_EX_UNIMPLEMENT_FEATURE);
	}

	params = generate_resource_params(rsc, data_set);

	/* add meta_timeout env needed by some resource agents */
	if (timeout_ms == 0) {
	timeout_ms = pe_get_configured_timeout(rsc, action, data_set);
	}
	g_hash_table_insert(params, strdup("CRM_meta_timeout"),
	crm_strdup_printf("%d", timeout_ms));

	/* add crm_feature_set env needed by some resource agents */
	g_hash_table_insert(params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));

	rid = pe_rsc_is_anon_clone(rsc->parent)? requested_name : rsc->id;

	op = resources_action_create(rid, rclass, rprov, rtype, action, 0,
	timeout_ms, params, 0);
	if (op == NULL) {
	/* Re-run with stderr enabled so we can display a sane error message */
	crm_enable_stderr(TRUE);
	op = resources_action_create(rid, rclass, rprov, rtype, action, 0,
	timeout_ms, params, 0);

	/* We know op will be NULL, but this makes static analysis happy */
	services_action_free(op);

	return crm_exit(CRM_EX_DATAERR);
	}


	setenv("HA_debug", resource_verbose > 0 ? "1" : "0", 1);
	if(resource_verbose > 1) {
	setenv("OCF_TRACE_RA", "1", 1);
	}

	if (override_hash) {
	GHashTableIter iter;
	char *name = NULL;
	char *value = NULL;

	g_hash_table_iter_init(&iter, override_hash);
	while (g_hash_table_iter_next(&iter, (gpointer ) & name, (gpointer ) & value)) {
	printf("Overriding the cluster configuration for '%s' with '%s' = '%s'\n",
	rsc->id, name, value);
	g_hash_table_replace(op->params, strdup(name), strdup(value));
	}
	}

	if (services_action_sync(op)) {
	int more, lpc, last;
	char *local_copy = NULL;

	if (op->status == PCMK_LRM_OP_DONE) {
	printf("Operation %s for %s (%s:%s:%s) returned: '%s' (%d)\n",
	action, rsc->id, rclass, rprov ? rprov : "", rtype,
	services_ocf_exitcode_str(op->rc), op->rc);
	} else {
	printf("Operation %s for %s (%s:%s:%s) failed: '%s' (%d)\n",
	action, rsc->id, rclass, rprov ? rprov : "", rtype,
	services_lrm_status_str(op->status), op->status);
	}

	/* hide output for validate-all if not in verbose */
	if (resource_verbose == 0 && safe_str_eq(action, "validate-all"))
	goto done;

	if (op->stdout_data) {
	local_copy = strdup(op->stdout_data);
	more = strlen(local_copy);
	last = 0;

	for (lpc = 0; lpc < more; lpc++) {
	if (local_copy[lpc] == '\n' \|\| local_copy[lpc] == 0) {
	local_copy[lpc] = 0;
	printf(" > stdout: %s\n", local_copy + last);
	last = lpc + 1;
	}
	}
	free(local_copy);
	}
	if (op->stderr_data) {
	local_copy = strdup(op->stderr_data);
	more = strlen(local_copy);
	last = 0;

	for (lpc = 0; lpc < more; lpc++) {
	if (local_copy[lpc] == '\n' \|\| local_copy[lpc] == 0) {
	local_copy[lpc] = 0;
	printf(" > stderr: %s\n", local_copy + last);
	last = lpc + 1;
	}
	}
	free(local_copy);
	}
	}
	done:
	rc = op->rc;
	services_action_free(op);
	return rc;
	}

	int
	cli_resource_move(resource_t rsc, const char rsc_id, const char *host_name,
	cib_t cib, pe_working_set_t data_set)
	{
	int rc = pcmk_ok;
	unsigned int count = 0;
	node_t *current = NULL;
	node_t *dest = pe_find_node(data_set->nodes, host_name);
	bool cur_is_dest = FALSE;

	if (dest == NULL) {
	return -pcmk_err_node_unknown;
	}

	if (scope_master && is_not_set(rsc->flags, pe_rsc_promotable)) {
	resource_t *p = uber_parent(rsc);

	if (is_set(p->flags, pe_rsc_promotable)) {
	CMD_ERR("Using parent '%s' for --move command instead of '%s'.", rsc->id, rsc_id);
	rsc_id = p->id;
	rsc = p;

	} else {
	CMD_ERR("Ignoring '--master' option: %s is not a promotable resource",
	rsc_id);
	scope_master = FALSE;
	}
	}

	current = pe__find_active_requires(rsc, &count);

	if (is_set(rsc->flags, pe_rsc_promotable)) {
	GListPtr iter = NULL;
	unsigned int master_count = 0;
	pe_node_t *master_node = NULL;

	for(iter = rsc->children; iter; iter = iter->next) {
	resource_t child = (resource_t )iter->data;
	enum rsc_role_e child_role = child->fns->state(child, TRUE);

	if(child_role == RSC_ROLE_MASTER) {
	rsc = child;
	master_node = pe__current_node(child);
	master_count++;
	}
	}
	if (scope_master \|\| master_count) {
	count = master_count;
	current = master_node;
	}

	}

	if (count > 1) {
	if (pe_rsc_is_clone(rsc)) {
	current = NULL;
	} else {
	return -pcmk_err_multiple;
	}
	}

	if (current && (current->details == dest->details)) {
	cur_is_dest = TRUE;
	if (do_force) {
	crm_info("%s is already %s on %s, reinforcing placement with location constraint.",
	rsc_id, scope_master?"promoted":"active", dest->details->uname);
	} else {
	return -pcmk_err_already;
	}
	}

	/* Clear any previous constraints for 'dest' */
	cli_resource_clear(rsc_id, dest->details->uname, data_set->nodes, cib);

	/* Record an explicit preference for 'dest' */
	rc = cli_resource_prefer(rsc_id, dest->details->uname, cib);

	crm_trace("%s%s now prefers node %s%s",
	rsc->id, scope_master?" (master)":"", dest->details->uname, do_force?"(forced)":"");

	/* only ban the previous location if current location != destination location.
	* it is possible to use -M to enforce a location without regard of where the
	* resource is currently located */
	if(do_force && (cur_is_dest == FALSE)) {
	/* Ban the original location if possible */
	if(current) {
	(void)cli_resource_ban(rsc_id, current->details->uname, NULL, cib);

	} else if(count > 1) {
	CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move to %s",
	rsc_id, scope_master?"promoted":"active", count, dest->details->uname);
	CMD_ERR("You can prevent '%s' from being %s at a specific location with:"
	" --ban %s--host <name>", rsc_id, scope_master?"promoted":"active", scope_master?"--master ":"");

	} else {
	crm_trace("Not banning %s from its current location: not active", rsc_id);
	}
	}

	return rc;
	}

	static void
	cli_resource_why_without_rsc_and_host(cib_t *cib_conn,GListPtr resources)
	{
	GListPtr lpc = NULL;
	GListPtr hosts = NULL;

	for (lpc = resources; lpc != NULL; lpc = lpc->next) {
	resource_t rsc = (resource_t ) lpc->data;
	rsc->fns->location(rsc, &hosts, TRUE);

	if (hosts == NULL) {
	printf("Resource %s is not running\n", rsc->id);
	} else {
	printf("Resource %s is running\n", rsc->id);
	}

	cli_resource_check(cib_conn, rsc);
	g_list_free(hosts);
	hosts = NULL;
	}

	}

	static void
	cli_resource_why_with_rsc_and_host(cib_t *cib_conn, GListPtr resources,
	resource_t rsc, const char host_uname)
	{
	if (resource_is_running_on(rsc, host_uname)) {
	printf("Resource %s is running on host %s\n",rsc->id,host_uname);
	} else {
	printf("Resource %s is not running on host %s\n", rsc->id, host_uname);
	}
	cli_resource_check(cib_conn, rsc);
	}

	static void
	cli_resource_why_without_rsc_with_host(cib_t cib_conn,GListPtr resources,node_t node)
	{
	const char* host_uname = node->details->uname;
	GListPtr allResources = node->details->allocated_rsc;
	GListPtr activeResources = node->details->running_rsc;
	GListPtr unactiveResources = subtract_lists(allResources,activeResources);
	GListPtr lpc = NULL;

	for (lpc = activeResources; lpc != NULL; lpc = lpc->next) {
	resource_t rsc = (resource_t ) lpc->data;
	printf("Resource %s is running on host %s\n",rsc->id,host_uname);
	cli_resource_check(cib_conn,rsc);
	}

	for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) {
	resource_t rsc = (resource_t ) lpc->data;
	printf("Resource %s is assigned to host %s but not running\n",
	rsc->id, host_uname);
	cli_resource_check(cib_conn,rsc);
	}

	g_list_free(allResources);
	g_list_free(activeResources);
	g_list_free(unactiveResources);
	}

	static void
	cli_resource_why_with_rsc_without_host(cib_t *cib_conn, GListPtr resources,
	resource_t *rsc)
	{
	GListPtr hosts = NULL;

	rsc->fns->location(rsc, &hosts, TRUE);
	printf("Resource %s is %srunning\n", rsc->id, (hosts? "" : "not "));
	cli_resource_check(cib_conn, rsc);
	g_list_free(hosts);
	}

	void cli_resource_why(cib_t cib_conn, GListPtr resources, resource_t rsc,
	node_t *node)
	{
	const char *host_uname = (node == NULL)? NULL : node->details->uname;

	if ((rsc == NULL) && (host_uname == NULL)) {
	cli_resource_why_without_rsc_and_host(cib_conn, resources);

	} else if ((rsc != NULL) && (host_uname != NULL)) {
	cli_resource_why_with_rsc_and_host(cib_conn, resources, rsc,
	host_uname);

	} else if ((rsc == NULL) && (host_uname != NULL)) {
	cli_resource_why_without_rsc_with_host(cib_conn, resources, node);

	} else if ((rsc != NULL) && (host_uname == NULL)) {
	cli_resource_why_with_rsc_without_host(cib_conn, resources, rsc);
	}
	}
	diff --git a/tools/crm_standby.in b/tools/crm_standby.in
	index 220c1e7f94..1c6dea4655 100755
	--- a/tools/crm_standby.in
	+++ b/tools/crm_standby.in
	@@ -1,150 +1,156 @@
	#!@BASH_PATH@
	+#
	+# Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	+#
	+# This source code is licensed under the GNU General Public License version 2
	+# or later (GPLv2+) WITHOUT ANY WARRANTY.
	+#

	USAGE_TEXT="Usage: crm_standby <command> [options]

	Common options:
	--help Display this text, then exit
	--version Display version information, then exit
	-V, --verbose Specify multiple times to increase debug output
	-q, --quiet Print only the standby status (if querying)

	Commands:
	-G, --query Query the current value of standby mode (on/off)
	-v, --update=VALUE Update the value of standby mode (on/off)
	-D, --delete Let standby mode use default value

	Additional Options:
	-N, --node=NODE Operate on the named node instead of the current one
	-l, --lifetime=VALUE Until when should the setting take effect
	(valid values: reboot, forever)
	-i, --id=VALUE (Advanced) XML ID used to identify standby attribute"

	HELP_TEXT="crm_standby - Query, enable, or disable standby mode for a node

	Nodes in standby mode may not host cluster resources.

	$USAGE_TEXT
	"

	exit_usage() {
	if [ $# -gt 0 ]; then
	- echo "error: $@" >&2
	+ echo "error:" "$@" >&2
	fi
	echo
	echo "$USAGE_TEXT"
	exit 1
	}

	op=""
	options=""
	lifetime=0
	target=""

	SHORTOPTS_DEPRECATED="U:Q"
	LONGOPTS_DEPRECATED="uname:,get-value,delete-attr,attr-value:,attr-id:"
	SHORTOPTS="VqGv:DN:l:i:"
	LONGOPTS="help,version,verbose,quiet,query,update:,delete,node:,lifetime:,id:"

	TEMP=$(@GETOPT_PATH@ -o ${SHORTOPTS}${SHORTOPTS_DEPRECATED} \
	--long ${LONGOPTS},${LONGOPTS_DEPRECATED} \
	-n crm_standby -- "$@")
	if [ $? -ne 0 ]; then
	exit_usage
	fi

	eval set -- "$TEMP" # Quotes around $TEMP are essential

	while true ; do
	case "$1" in
	--help)
	echo "$HELP_TEXT"
	exit 0
	;;
	--version)
	crm_attribute --version
	exit 0
	;;
	-q\|--quiet\|-V\|--verbose\|-Q)
	options="$options $1"
	shift
	;;
	-N\|--node\|-U\|--uname)
	target="$2"
	shift
	shift
	;;
	-G\|--query\|--get-value)
	options="$options --query"
	op=g
	shift
	;;
	-v\|--update\|--attr-value)
	options="$options --update $2"
	op=u
	shift
	shift
	;;
	-D\|--delete\|--delete-attr)
	options="$options --delete"
	op=d
	shift
	;;
	-l\|--lifetime)
	options="$options --lifetime $2"
	lifetime=1
	shift
	shift
	;;
	-i\|--id\|--attr-id)
	options="$options --id $2"
	shift
	shift
	;;
	--)
	shift
	break
	;;
	*)
	exit_usage "unknown option '$1'"
	;;
	esac
	done

	# It's important to call cluster commands only after arguments are processed,
	# so --version and --help work without problems even if those commands don't.
	if [ "$target" = "" ]; then
	target=$(crm_node -n)
	fi

	options="-N $target -n standby $options"
	if [ x$op = x ]; then
	options="$options -G"; op=g
	fi

	# If the user didn't explicitly specify a lifetime ...
	if [ $lifetime -eq 0 ]; then
	case $op in
	g)
	# For query, report the forever entry if one exists, otherwise
	# report the reboot entry if one exists, otherwise report off.
	crm_attribute $options -l forever >/dev/null 2>&1
	if [ $? -eq 0 ]; then
	options="$options -l forever"
	else
	options="$options -l reboot -d off"
	fi
	;;
	u)
	# For update, default to updating the forever entry.
	options="$options -l forever"
	;;
	d)
	# For delete, default to deleting both forever and reboot entries.
	crm_attribute $options -l forever
	crm_attribute $options -l reboot
	exit 0
	;;
	esac
	fi

	crm_attribute $options
	diff --git a/tools/report.collector.in b/tools/report.collector.in
	index 2540fc7769..25e3c6a7b2 100644
	--- a/tools/report.collector.in
	+++ b/tools/report.collector.in
	@@ -1,823 +1,820 @@
	#
	# Originally based on hb_report
	# Copyright 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
	#
	# Later changes copyright 2010-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	if
	echo $REPORT_HOME \| grep -qs '^/'
	then
	debug "Using full path to working directory: $REPORT_HOME"
	else
	REPORT_HOME="$HOME/$REPORT_HOME"
	debug "Canonicalizing working directory path: $REPORT_HOME"
	fi

	detect_host

	#
	# find files newer than a and older than b
	#
	isnumber() {
	echo "$" \| grep -qs '^[0-9][0-9]$'
	}

	touchfile() {
	t=`mktemp` &&
	perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' &&
	echo $t
	}

	find_files_clean() {
	[ -z "$from_stamp" ] \|\| rm -f "$from_stamp"
	[ -z "$to_stamp" ] \|\| rm -f "$to_stamp"
	from_stamp=""
	to_stamp=""
	}

	find_files() {
	dirs=
	from_time=$2
	to_time=$3
	for d in $1; do
	if [ -d $d ]; then
	dirs="$dirs $d"
	fi
	done

	if [ x"$dirs" = x ]; then
	return
	fi

	isnumber "$from_time" && [ "$from_time" -gt 0 ] \|\| {
	warning "sorry, can't find files in [ $1 ] based on time if you don't supply time"
	return
	}
	trap find_files_clean 0
	if ! from_stamp=`touchfile $from_time`; then
	warning "sorry, can't create temporary file for find_files"
	return
	fi
	findexp="-newer $from_stamp"
	if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then
	if ! to_stamp=`touchfile $to_time`; then
	warning "sorry, can't create temporary file for find_files"
	find_files_clean
	return
	fi
	findexp="$findexp ! -newer $to_stamp"
	fi
	find $dirs -type f $findexp
	find_files_clean
	trap "" 0
	}

	#
	# check permissions of files/dirs
	#
	pl_checkperms() {
	perl -e '
	# check permissions and ownership
	# uid and gid are numeric
	# everything must match exactly
	# no error checking! (file should exist, etc)
	($filename, $perms, $in_uid, $in_gid) = @ARGV;
	($mode,$uid,$gid) = (stat($filename))[2,4,5];
	$p=sprintf("%04o", $mode & 07777);
	$p ne $perms and exit(1);
	$uid ne $in_uid and exit(1);
	$gid ne $in_gid and exit(1);
	' $*
	}

	num_id() {
	getent $1 $2 \| awk -F: '{print $3}'
	}

	chk_id() {
	[ "$2" ] && return 0
	echo "$1: id not found"
	return 1
	}

	check_perms() {
	while read type f p uid gid; do
	if [ ! -e "$f" ]; then
	echo "$f doesn't exist"
	continue
	elif [ ! -$type "$f" ]; then
	echo "$f has wrong type"
	continue
	fi
	n_uid=`num_id passwd $uid`
	chk_id "$uid" "$n_uid" \|\| continue
	n_gid=`num_id group $gid`
	chk_id "$gid" "$n_gid" \|\| continue
	pl_checkperms $f $p $n_uid $n_gid \|\| {
	echo "wrong permissions or ownership for $f:"
	ls -ld $f
	}
	done
	}

	#
	# coredumps
	#
	findbinary() {
	random_binary=`which cat 2>/dev/null` # suppose we are lucky
	binary=`gdb $random_binary $1 < /dev/null 2>/dev/null \|
	grep 'Core was generated' \| awk '{print $5}' \|
	sed "s/^.//;s/[.':]*$//"`
	if [ x = x"$binary" ]; then
	debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)"
	binary=$(file $1 \| awk '/from/{
	for( i=1; i<=NF; i++ )
	if( $i == "from" ) {
	print $(i+1)
	break
	}
	}')
	binary=`echo $binary \| tr -d "'"`
	binary=$(echo $binary \| tr -d '`')
	if [ "$binary" ]; then
	binary=`which $binary 2>/dev/null`
	fi
	fi
	if [ x = x"$binary" ]; then
	warning "Could not find the program path for core $1"
	return
	fi
	fullpath=`which $binary 2>/dev/null`
	if [ x = x"$fullpath" ]; then
	if [ -x $CRM_DAEMON_DIR/$binary ]; then
	echo $CRM_DAEMON_DIR/$binary
	debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1"
	else
	warning "Could not find the program path for core $1"
	fi
	else
	echo $fullpath
	debug "Found the program at $fullpath for core $1"
	fi
	}

	getbt() {
	which gdb > /dev/null 2>&1 \|\| {
	warning "Please install gdb to get backtraces"
	return
	}
	for corefile; do
	absbinpath=`findbinary $corefile`
	[ x = x"$absbinpath" ] && continue
	echo "====================== start backtrace ======================"
	ls -l $corefile
	# Summary first...
	gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt"} -ex quit \
	$absbinpath $corefile 2>/dev/null
	echo "====================== start detail ======================"
	# Now the unreadable details...
	gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \
	$absbinpath $corefile 2>/dev/null
	echo "======================= end backtrace ======================="
	done
	}

	dump_status_and_config() {
	crm_mon -1 2>&1 \| grep -v '^Last upd' > $target/$CRM_MON_F
	cibadmin -Ql 2>/dev/null > $target/${CIB_F}.live
	}

	getconfig() {
	cluster=$1; shift;
	target=$1; shift;

	for cf in $*; do
	if [ -e "$cf" ]; then
	cp -a "$cf" $target/
	fi
	done

	if is_running pacemaker-controld; then
	dump_status_and_config
	- case $cluster in
	- corosync) crm_node -p --corosync > $target/$MEMBERSHIP_F 2>&1;;
	- *) crm_node -p > $target/$MEMBERSHIP_F 2>&1;;
	- esac
	+ crm_node -p > "$target/$MEMBERSHIP_F" 2>&1
	echo "$host" > $target/RUNNING

	elif is_running pacemaker-remoted; then
	dump_status_and_config
	echo "$host" > $target/RUNNING

	# Pre-2.0.0 daemon name in case we're collecting on a mixed-version cluster
	elif is_running pacemaker_remoted; then
	dump_status_and_config
	echo "$host" > $target/RUNNING

	else
	echo "$host" > $target/STOPPED
	fi
	}

	get_readable_cib() {
	target="$1"; shift;

	if [ -f "$target/$CIB_F" ]; then
	crm_verify -V -x "$target/$CIB_F" >"$target/$CRM_VERIFY_F" 2>&1
	if which crm >/dev/null 2>&1 ; then
	CIB_file="$target/$CIB_F" crm configure show >"$target/$CIB_TXT_F" 2>&1
	elif which pcs >/dev/null 2>&1 ; then
	pcs config -f "$target/$CIB_F" >"$target/$CIB_TXT_F" 2>&1
	fi
	fi
	}

	#
	# remove values of sensitive attributes
	#
	# this is not proper xml parsing, but it will work under the
	# circumstances
	sanitize_xml_attrs() {
	sed $(
	for patt in $SANITIZE; do
	echo "-e /name=\"$patt\"/s/value=\"[^\"]\"/value=\"***\"/"
	done
	)
	}

	sanitize_hacf() {
	awk '
	$1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; }
	{print}
	'
	}

	sanitize_one_clean() {
	[ -z "$tmp" ] \|\| rm -f "$tmp"
	tmp=""
	[ -z "$ref" ] \|\| rm -f "$ref"
	ref=""
	}

	sanitize() {
	file=$1
	compress=""
	if [ -z "$SANITIZE" ]; then
	return
	fi
	echo $file \| grep -qs 'gz$' && compress=gzip
	echo $file \| grep -qs 'bz2$' && compress=bzip2
	if [ "$compress" ]; then
	decompress="$compress -dc"
	else
	compress=cat
	decompress=cat
	fi
	trap sanitize_one_clean 0
	tmp=`mktemp`
	ref=`mktemp`
	if [ -z "$tmp" -o -z "$ref" ]; then
	sanitize_one_clean
	fatal "cannot create temporary files"
	fi
	touch -r $file $ref # save the mtime
	if [ "`basename $file`" = ha.cf ]; then
	sanitize_hacf
	else
	$decompress \| sanitize_xml_attrs \| $compress
	fi < $file > $tmp
	mv $tmp $file
	# note: cleaning $tmp up is still needed even after it's renamed
	# because its temp directory is still there.

	touch -r $ref $file
	sanitize_one_clean
	trap "" 0
	}

	#
	# get some system info
	#
	distro() {
	if
	which lsb_release >/dev/null 2>&1
	then
	lsb_release -d \| sed -e 's/^Description:\s*//'
	debug "Using lsb_release for distribution info"
	return
	fi

	relf=`ls /etc/debian_version 2>/dev/null` \|\|
	relf=`ls /etc/slackware-version 2>/dev/null` \|\|
	relf=`ls -d /etc/*-release 2>/dev/null` && {
	for f in $relf; do
	test -f $f && {
	echo "`ls $f` `cat $f`"
	debug "Found `echo $relf \| tr '\n' ' '` distribution release file(s)"
	return
	}
	done
	}
	warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information"
	}

	pkg_ver() {
	if which dpkg >/dev/null 2>&1 ; then
	pkg_mgr="deb"
	elif which rpm >/dev/null 2>&1 ; then
	pkg_mgr="rpm"
	elif which pkg_info >/dev/null 2>&1 ; then
	pkg_mgr="pkg_info"
	elif which pkginfo >/dev/null 2>&1 ; then
	pkg_mgr="pkginfo"
	else
	warning "Unknown package manager"
	return
	fi
	debug "The package manager is: $pkg_mgr"
	echo "The package manager is: $pkg_mgr"

	echo "Installed packages:"
	case $pkg_mgr in
	deb)
	dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W \| sort
	echo
	for pkg in $*; do
	if dpkg-query -W $pkg 2>/dev/null ; then
	debug "Verifying installation of: $pkg"
	echo "Verifying installation of: $pkg"
	debsums -s $pkg 2>/dev/null
	fi
	done
	;;
	rpm)
	rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' \| sort
	echo
	for pkg in $*; do
	if rpm -q $pkg >/dev/null 2>&1 ; then
	debug "Verifying installation of: $pkg"
	echo "Verifying installation of: $pkg"
	rpm --verify $pkg 2>&1
	fi
	done
	;;
	pkg_info)
	pkg_info
	;;
	pkginfo)
	pkginfo \| awk '{print $3}' # format?
	;;
	esac
	}

	getbacktraces() {
	debug "Looking for backtraces: $*"
	flist=$(
	for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do
	bf=`basename $f`
	test `expr match $bf core` -gt 0 &&
	echo $f
	done)
	if [ "$flist" ]; then
	for core in $flist; do
	log "Found core file: `ls -al $core`"
	done

	# Make a copy of them in case we need more data later
	# Luckily they compress well
	mkdir cores >/dev/null 2>&1
	cp -a $flist cores/
	shrink cores
	rm -rf cores

	# Now get as much as we can from them automagically
	for f in $flist; do
	getbt $f
	done
	fi
	}

	getpeinputs() {
	if [ -n "$PE_STATE_DIR" ]; then
	flist=$(
	find_files "$PE_STATE_DIR" "$1" "$2" \| sed "s,`dirname $PE_STATE_DIR`/,,g"
	)
	if [ "$flist" ]; then
	(cd $(dirname "$PE_STATE_DIR") && tar cf - $flist) \| (cd "$3" && tar xf -)
	debug "found `echo $flist \| wc -w` scheduler input files in $PE_STATE_DIR"
	fi
	fi
	}

	getblackboxes() {
	flist=$(
	find_files $BLACKBOX_DIR $1 $2
	)

	for bb in $flist; do
	bb_short=`basename $bb`
	qb-blackbox $bb > $3/${bb_short}.blackbox 2>&1
	info "Extracting contents of blackbox: $bb_short"
	done
	}

	#
	# some basic system info and stats
	#
	sys_info() {
	cluster=$1; shift
	echo "Platform: `uname`"
	echo "Kernel release: `uname -r`"
	echo "Architecture: `uname -m`"
	if [ `uname` = Linux ]; then
	echo "Distribution: `distro`"
	fi

	echo
	cibadmin --version 2>&1 \| head -1
	cibadmin -! 2>&1
	case $cluster in
	corosync)
	/usr/sbin/corosync -v 2>&1 \| head -1
	;;
	esac

	# Cluster glue version hash (if available)
	stonith -V 2>/dev/null

	# Resource agents version hash
	echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`"

	echo
	pkg_ver $*
	}

	sys_stats() {
	set -x
	uname -n
	uptime
	ps axf
	ps auxw
	top -b -n 1
	ifconfig -a
	ip addr list
	netstat -i
	arp -an
	test -d /proc && {
	cat /proc/cpuinfo
	}
	lsscsi
	lspci
	mount
	df
	set +x
	}

	dlm_dump() {
	if which dlm_tool >/dev/null 2>&1 ; then
	if is_running dlm_controld; then
	echo "--- Lockspace overview:"
	dlm_tool ls -n

	echo "---Lockspace history:"
	dlm_tool dump

	echo "---Lockspace status:"
	dlm_tool status
	dlm_tool status -v

	echo "---Lockspace config:"
	dlm_tool dump_config

	dlm_tool log_plock

	dlm_tool ls \| grep name \|
	while read X N ; do
	echo "--- Lockspace $N:"
	dlm_tool lockdump "$N"
	dlm_tool lockdebug -svw "$N"
	done
	fi
	fi
	}

	drbd_info() {
	test -f /proc/drbd && {
	echo "--- /proc/drbd:"
	cat /proc/drbd 2>&1
	echo
	}

	if which drbd-overview >/dev/null 2>&1; then
	echo "--- drbd-overview:"
	drbd-overview 2>&1
	echo
	fi

	if which drbdsetup >/dev/null 2>&1; then
	echo "--- drbdsetup status:"
	drbdsetup status --verbose --statistics 2>&1
	echo

	echo "--- drbdsetup events2:"
	drbdsetup events2 --timestamps --statistics --now 2>&1
	echo
	fi

	if which drbdadm >/dev/null 2>&1; then
	echo "--- drbdadm show-gi:"
	for res in $(drbdsetup status \| grep -e ^\\S \| awk '{ print $1 }'); do
	echo "$res:"
	drbdadm show-gi $res 2>&1
	echo
	done
	fi
	}

	iscfvarset() {
	test "`getcfvar $1 $2`"
	}

	iscfvartrue() {
	getcfvar $1 $2 $3 \| egrep -qsi "^(true\|y\|yes\|on\|1)"
	}

	get_logfiles() {
	cf_type=$1
	cf_file="$2"
	facility_var="logfacility"

	case $cf_type in
	corosync)
	if [ -f "$cf_file" ]; then
	debug "Reading $cf_type log settings from $cf_file"
	if iscfvartrue $cf_type to_syslog "$cf_file"; then
	facility_var=syslog_facility
	fi
	if iscfvartrue $cf_type to_logfile "$cf_file"; then
	logfile=$(getcfvar $cf_type logfile "$cf_file")
	fi
	fi
	;;
	esac

	if [ -z "$logfile" ]; then
	# @TODO Use PCMK_logfile if set
	logfile="@CRM_LOG_DIR@/pacemaker.log"
	debug "Log settings not found for cluster type $cf_type, assuming $logfile"
	fi
	if [ -f "$logfile" ]; then
	echo $logfile
	fi

	if [ "x$facility" = x ]; then
	facility=`getcfvar $cf_type $facility_var $cf_file`
	[ "" = "$facility" ] && facility="daemon"
	fi

	# Always include system logs (if we can find them)
	msg="Mark:pcmk:`perl -e 'print time()'`"
	logger -p $facility.info $msg >/dev/null 2>&1
	sleep 2 # Give syslog time to catch up in case it's busy
	findmsg 1 "$msg"

	# Look for detail logs:

	# - initial pacemakerd logs and tracing might go to a different file
	pattern="Starting Pacemaker"

	# - make sure we get something from the scheduler
	pattern="$pattern\\\|Calculated Transition"

	# - cib and pacemaker-execd updates
	# (helpful on non-DC nodes and when cluster has been up for a long time)
	pattern="$pattern\\\|cib_perform_op\\\|process_lrm_event"

	# - pacemaker_remote might use a different file
	pattern="$pattern\\\|pacemaker[-_]remoted:"

	findmsg 3 "$pattern"
	}

	essential_files() {
	cat<<EOF
	d $PE_STATE_DIR 0750 hacluster haclient
	d $CRM_CONFIG_DIR 0750 hacluster haclient
	d $CRM_STATE_DIR 0750 hacluster haclient
	EOF
	}

	# Trim leading and ending whitespace (using only POSIX expressions)
	trim() {
	TRIM_S="$1"

	TRIM_S="${TRIM_S#"${TRIM_S%%[![:space:]]*}"}"
	TRIM_S="${TRIM_S%"${TRIM_S##*[![:space:]]}"}"
	echo -n "$TRIM_S"
	}

	collect_logs() {
	CL_START="$1"
	shift
	CL_END="$1"
	shift
	CL_LOGFILES="$@"

	which journalctl > /dev/null 2>&1
	if [ $? -eq 0 ]; then
	cl_have_journald=1
	else
	cl_have_journald=0
	fi

	cl_lognames="$CL_LOGFILES"
	if [ $cl_have_journald -eq 1 ]; then
	cl_lognames="$cl_lognames journalctl"
	fi
	cl_lognames=$(trim "$cl_lognames")
	if [ -z "$cl_lognames" ]; then
	return
	fi

	# YYYY-MM-DD HH:MM:SS
	cl_start_ymd=$(date -d @${CL_START} +"%F %T")
	cl_end_ymd=$(date -d @${CL_END} +"%F %T")

	debug "Gathering logs from $cl_start_ymd to $cl_end_ymd:"
	debug " $cl_lognames"

	# Remove our temporary file if we get interrupted here
	trap '[ -z "$cl_pattfile" ] \|\| rm -f "$cl_pattfile"' 0

	# Create a temporary file with patterns to grep for
	cl_pattfile=$(mktemp) \|\| fatal "cannot create temporary files"
	for cl_pattern in $LOG_PATTERNS; do
	echo "$cl_pattern"
	done > $cl_pattfile

	echo "Log pattern matches from $REPORT_TARGET:" > $ANALYSIS_F
	if [ -n "$CL_LOGFILES" ]; then
	for cl_logfile in $CL_LOGFILES; do
	cl_extract="$(basename $cl_logfile).extract.txt"

	if [ ! -f "$cl_logfile" ]; then
	# Not a file
	continue

	elif [ -f "$cl_extract" ]; then
	# We already have it
	continue
	fi

	dumplogset "$cl_logfile" $LOG_START $LOG_END > "$cl_extract"
	sanitize "$cl_extract"

	grep -f "$cl_pattfile" "$cl_extract" >> $ANALYSIS_F
	done
	fi

	# Collect systemd logs if present
	if [ $cl_have_journald -eq 1 ]; then
	journalctl --since "$cl_start_ymd" --until "$cl_end_ymd" > journal.log
	grep -f "$cl_pattfile" journal.log >> $ANALYSIS_F
	fi

	rm -f $cl_pattfile
	trap "" 0
	}

	debug "Initializing $REPORT_TARGET subdir"
	if [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then
	if [ -e $REPORT_HOME/$REPORT_TARGET ]; then
	warning "Directory $REPORT_HOME/$REPORT_TARGET already exists, using /tmp/$$/$REPORT_TARGET instead"
	REPORT_HOME=/tmp/$$
	fi
	fi

	mkdir -p $REPORT_HOME/$REPORT_TARGET
	cd $REPORT_HOME/$REPORT_TARGET

	case $CLUSTER in
	any) cluster=`get_cluster_type`;;
	*) cluster=$CLUSTER;;
	esac

	cluster_cf=`find_cluster_cf $cluster`

	# If cluster stack is still "any", this might be a Pacemaker Remote node,
	# so don't complain in that case.
	if [ -z "$cluster_cf" ] && [ $cluster != "any" ]; then
	warning "Could not determine the location of your cluster configuration"
	fi

	if [ "$SEARCH_LOGS" = "1" ]; then
	logfiles=$(get_logfiles "$cluster" "$cluster_cf" \| sort -u)
	fi
	logfiles="$(trim "$logfiles $EXTRA_LOGS")"

	if [ -z "$logfiles" ]; then
	which journalctl > /dev/null 2>&1
	if [ $? -eq 0 ]; then
	info "Systemd journal will be only log collected"
	else
	info "No logs will be collected"
	fi
	info "No log files found or specified with --logfile /some/path"
	fi

	debug "Config: $cluster ($cluster_cf) $logfiles"

	sys_info $cluster $PACKAGES > $SYSINFO_F
	essential_files $cluster \| check_perms > $PERMISSIONS_F 2>&1
	getconfig $cluster "$REPORT_HOME/$REPORT_TARGET" "$cluster_cf" "$CRM_CONFIG_DIR/$CIB_F" "/etc/drbd.conf" "/etc/drbd.d" "/etc/booth"

	getpeinputs $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET
	getbacktraces $LOG_START $LOG_END > $REPORT_HOME/$REPORT_TARGET/$BT_F
	getblackboxes $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET

	case $cluster in
	corosync)
	if is_running corosync; then
	corosync-blackbox >corosync-blackbox-live.txt 2>&1
	# corosync-fplay > corosync-blackbox.txt
	tool=`pickfirst corosync-objctl corosync-cmapctl`
	case $tool in
	*objctl) $tool -a > corosync.dump 2>/dev/null;;
	*cmapctl) $tool > corosync.dump 2>/dev/null;;
	esac
	corosync-quorumtool -s -i > corosync.quorum 2>&1
	fi
	;;
	esac

	dc=`crm_mon -1 2>/dev/null \| awk '/Current DC/ {print $3}'`
	if [ "$REPORT_TARGET" = "$dc" ]; then
	echo "$REPORT_TARGET" > DC
	fi

	dlm_dump > $DLM_DUMP_F 2>&1
	sys_stats > $SYSSTATS_F 2>&1
	drbd_info > $DRBD_INFO_F 2>&1

	debug "Sanitizing files: $SANITIZE"
	#
	# replace sensitive info with '****'
	#
	cf=""
	if [ ! -z "$cluster_cf" ]; then
	cf=`basename $cluster_cf`
	fi
	for f in "$cf" "$CIB_F" "$CIB_F.live" pengine/*; do
	if [ -f "$f" ]; then
	sanitize "$f"
	fi
	done

	# For convenience, generate human-readable version of CIB and any XML errors
	# in it (AFTER sanitizing, so we don't need to sanitize this output)
	get_readable_cib "$REPORT_HOME/$REPORT_TARGET"

	collect_logs "$LOG_START" "$LOG_END" $logfiles

	# Purge files containing no information
	for f in `ls -1`; do
	if [ -d "$f" ]; then
	continue
	elif [ ! -s "$f" ]; then
	case $f in
	core) log "Detected empty core file: $f";;
	*) debug "Removing empty file: `ls -al $f`"
	rm -f $f
	;;
	esac
	fi
	done

	# Parse for events
	for l in $logfiles; do
	b="$(basename $l).extract.txt"
	node_events "$b" > $EVENTS_F

	# Link the first logfile to a standard name if it doesn't yet exist
	if [ -e "$b" -a ! -e "$HALOG_F" ]; then
	ln -s "$b" "$HALOG_F"
	fi
	done

	if [ -e $REPORT_HOME/.env ]; then
	debug "Localhost: $REPORT_MASTER $REPORT_TARGET"

	elif [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then
	debug "Streaming report back to $REPORT_MASTER"
	(cd $REPORT_HOME && tar cf - $REPORT_TARGET)
	if [ "$REMOVE" = "1" ]; then
	cd
	rm -rf $REPORT_HOME
	fi
	fi

	# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80:
	diff --git a/tools/report.common.in b/tools/report.common.in
	index 9c4113fc30..39e59360af 100644
	--- a/tools/report.common.in
	+++ b/tools/report.common.in
	@@ -1,866 +1,866 @@
	#
	# Originally based on hb_report
	# Copyright 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
	#
	# Later changes copyright 2010-2018 Andrew Beekhof <andrew@beekhof.net>
	#
	# This source code is licensed under the GNU General Public License version 2
	# or later (GPLv2+) WITHOUT ANY WARRANTY.
	#

	host=`uname -n`
	shorthost=`echo $host \| sed s:\\\\..*::`
	if [ -z $verbose ]; then
	verbose=0
	fi

	# Target Files
	EVENTS_F=events.txt
	ANALYSIS_F=analysis.txt
	HALOG_F=cluster-log.txt
	BT_F=backtraces.txt
	SYSINFO_F=sysinfo.txt
	SYSSTATS_F=sysstats.txt
	DLM_DUMP_F=dlm_dump.txt
	CRM_MON_F=crm_mon.txt
	MEMBERSHIP_F=members.txt
	CRM_VERIFY_F=crm_verify.txt
	PERMISSIONS_F=permissions.txt
	CIB_F=cib.xml
	CIB_TXT_F=cib.txt
	DRBD_INFO_F=drbd_info.txt

	EVENT_PATTERNS="
	state do_state_transition
	membership pcmk_peer_update.*(lost\|memb):
	quorum (crmd\|pacemaker-controld).*crm_update_quorum
	pause Process.pause.detected
	resources (lrmd\|pacemaker-execd).*rsc:(start\|stop)
	stonith te_fence_node\|fenced.*(requests\|(Succeeded\|Failed).to.\|result=)
	start_stop shutdown.decision\|Corosync.Cluster.Engine\|corosync.*Initializing.transport\|Executive.Service.RELEASE\|crm_shutdown:.Requesting.shutdown\|pcmk_shutdown:.Shutdown.complete
	"

	# superset of all packages of interest on all distros
	# (the package manager will be used to validate the installation
	# of any of these packages that are installed)
	PACKAGES="pacemaker pacemaker-libs pacemaker-cluster-libs libpacemaker3
	pacemaker-remote pacemaker-pygui pacemaker-pymgmt pymgmt-client
	corosync corosynclib libcorosync4
	resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord
	ocfs2-tools ocfs2-tools-o2cb ocfs2console
	ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace
	drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace
	drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen
	lvm2 lvm2-clvm cmirrord
	libdlm libdlm2 libdlm3
	hawk ruby lighttpd
	kernel-default kernel-pae kernel-xen
	glibc
	"

	# Potential locations of system log files
	SYSLOGS="
	/var/log/*
	/var/logs/*
	/var/syslog/*
	/var/adm/*
	/var/log/ha/*
	/var/log/cluster/*
	"

	# Whether pacemaker-remoted was found (0 = yes, 1 = no, -1 = haven't looked yet)
	REMOTED_STATUS=-1

	#
	# keep the user posted
	#
	record() {
	if [ x != x"$REPORT_HOME" -a -d "${REPORT_HOME}/$shorthost" ]; then
	rec="${REPORT_HOME}/$shorthost/report.out"

	elif [ x != x"${l_base}" -a -d "${l_base}" ]; then
	rec="${l_base}/report.summary"

	else
	rec="/dev/null"
	fi
	printf "%-10s $*\n" "$shorthost:" 2>&1 >> "${rec}"
	}

	log() {
	printf "%-10s $*\n" "$shorthost:" 1>&2
	record "$*"
	}

	debug() {
	if [ $verbose -gt 0 ]; then
	log "Debug: $*"
	else
	record "Debug: $*"
	fi
	}

	info() {
	log "$*"
	}

	warning() {
	log "WARN: $*"
	}

	fatal() {
	log "ERROR: $*"
	exit 1
	}

	# check if process of given substring in its name does exist;
	# only look for processes originated by user 0 (by UID), "@CRM_DAEMON_USER@"
	# or effective user running this script, and/or group 0 (by GID),
	# "@CRM_DAEMON_GROUP@" or one of the groups the effective user belongs to
	# (there's no business in probing any other processes)
	is_running() {
	- ps -G "0 $(getent group '@CRM_DAEMON_GROUP@' 2>/dev/null \| cut -d: -f3) \
	- $(id -G)" \
	- -u "0 @CRM_DAEMON_USER@ $(id -u)" \
	+ ps -G "0 $(getent group '@CRM_DAEMON_GROUP@' 2>/dev/null \| cut -d: -f3) $(id -G)" \
	+ -u "0 @CRM_DAEMON_USER@ $(id -u)" -f \
	\| grep -Eqs $(echo "$1" \| sed -e 's/^$.$/[\1]/')
	}

	has_remoted() {
	if [ $REMOTED_STATUS -eq -1 ]; then
	REMOTED_STATUS=1
	if which pacemaker-remoted >/dev/null 2>&1; then
	REMOTED_STATUS=0
	# Check for pre-2.0.0 daemon name in case we have mixed-version cluster
	elif which pacemaker_remoted >/dev/null 2>&1; then
	REMOTED_STATUS=0
	elif [ -x "@sbindir@/pacemaker-remoted" ]; then
	REMOTED_STATUS=0
	elif [ -x "@sbindir@/pacemaker_remoted" ]; then
	REMOTED_STATUS=0
	else
	# @TODO: the binary might be elsewhere,
	# but a global search is too expensive
	for d in /{usr,opt}/{local/,}{s,}bin; do
	if [ -x "${d}/pacemaker-remoted" ]; then
	REMOTED_STATUS=0
	elif [ -x "${d}/pacemaker_remoted" ]; then
	REMOTED_STATUS=0
	fi
	done
	fi
	fi
	return $REMOTED_STATUS
	}

	# found_dir <description> <dirname>
	found_dir() {
	echo "$2"
	info "Pacemaker $1 found in: $2"
	}

	detect_daemon_dir() {
	info "Searching for where Pacemaker daemons live... this may take a while"

	for d in \
	{/usr,/usr/local,/opt/local,@exec_prefix@}/{libexec,lib64,lib}/pacemaker
	do
	# pacemaker and pacemaker-cts packages can install to daemon directory,
	# so check for a file from each
	if [ -e $d/pacemaker-schedulerd ] \|\| [ -e $d/cts-exec-helper ]; then
	found_dir "daemons" "$d"
	return
	fi
	done

	# Pacemaker Remote nodes don't need to install daemons
	if has_remoted; then
	info "Pacemaker daemons not found (this appears to be a Pacemaker Remote node)"
	return
	fi

	for f in $(find / -maxdepth $maxdepth -type f -name pacemaker-schedulerd -o -name cts-exec-helper); do
	d=$(dirname "$f")
	found_dir "daemons" "$d"
	return
	done

	fatal "Pacemaker daemons not found (nonstandard installation?)"
	}

	detect_cib_dir() {
	- if [ "-f ${local_state_dir}/lib/pacemaker/cib/cib.xml" ]; then
	+ d="${local_state_dir}/lib/pacemaker/cib"
	+ if [ -f "$d/cib.xml" ]; then
	found_dir "config files" "$d"
	return
	fi

	# Pacemaker Remote nodes don't need a CIB
	if has_remoted; then
	info "Pacemaker config not found (this appears to be a Pacemaker Remote node)"
	return
	fi

	info "Searching for where Pacemaker keeps config information... this may take a while"
	# TODO: What about false positives where someone copied the CIB?
	for f in $(find / -maxdepth $maxdepth -type f -name cib.xml); do
	d=$(dirname $f)
	found_dir "config files" "$d"
	return
	done

	warning "Pacemaker config not found (nonstandard installation?)"
	}

	detect_state_dir() {
	if [ -n "$CRM_CONFIG_DIR" ]; then
	# Assume new layout
	# $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores)
	dirname "$CRM_CONFIG_DIR"

	# Pacemaker Remote nodes might not have a CRM_CONFIG_DIR
	elif [ -d "$local_state_dir/lib/pacemaker" ]; then
	echo $local_state_dir/lib/pacemaker
	fi
	}

	detect_pe_dir() {
	config_root="$1"

	d="$config_root/pengine"
	if [ -d "$d" ]; then
	found_dir "scheduler inputs" "$d"
	return
	fi

	if has_remoted; then
	info "Pacemaker scheduler inputs not found (this appears to be a Pacemaker Remote node)"
	return
	fi

	info "Searching for where Pacemaker keeps scheduler inputs... this may take a while"
	for d in $(find / -maxdepth $maxdepth -type d -name pengine); do
	found_dir "scheduler inputs" "$d"
	return
	done

	fatal "Pacemaker scheduler inputs not found (nonstandard installation?)"
	}

	detect_host() {
	local_state_dir=@localstatedir@

	if [ -d $local_state_dir/run ]; then
	CRM_STATE_DIR=$local_state_dir/run/crm
	else
	info "Searching for where Pacemaker keeps runtime data... this may take a while"
	for d in `find / -maxdepth $maxdepth -type d -name run`; do
	local_state_dir=`dirname $d`
	CRM_STATE_DIR=$d/crm
	break
	done
	info "Found: $CRM_STATE_DIR"
	fi
	debug "Machine runtime directory: $local_state_dir"
	debug "Pacemaker runtime data located in: $CRM_STATE_DIR"

	CRM_DAEMON_DIR=$(detect_daemon_dir)
	CRM_CONFIG_DIR=$(detect_cib_dir)
	config_root=$(detect_state_dir)

	# Older versions had none
	BLACKBOX_DIR=$config_root/blackbox
	debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR"

	PE_STATE_DIR=$(detect_pe_dir "$config_root")

	CRM_CORE_DIRS=""
	for d in $config_root/cores $local_state_dir/lib/corosync; do
	if [ -d $d ]; then
	CRM_CORE_DIRS="$CRM_CORE_DIRS $d"
	fi
	done
	debug "Core files located under: $CRM_CORE_DIRS"
	}

	time2str() {
	perl -e "use POSIX; print strftime('%x %X',localtime($1));"
	}

	get_time() {
	perl -e "\$time=\"$*\";" -e '
	$unix_tm = 0;
	eval "use Date::Parse";
	if (index($time, ":") < 0) {
	} elsif (!$@) {
	$unix_tm = str2time($time);
	} else {
	eval "use Date::Manip";
	if (!$@) {
	$unix_tm = UnixDate(ParseDateString($time), "%s");
	}
	}
	if ($unix_tm != "") {
	print int($unix_tm);
	} else {
	print "";
	}
	'
	}

	get_time_() {
	warning "Unknown time format used by: $*"
	}

	get_time_syslog() {
	awk '{print $1,$2,$3}'
	}

	get_time_legacy() {
	awk '{print $2}' \| sed 's/_/ /'
	}

	get_time_iso8601() {
	awk '{print $1}'
	}

	get_time_format_for_string() {
	l="$*"
	t=$(get_time `echo $l \| get_time_syslog`)
	if [ "x$t" != x ]; then
	echo syslog
	return
	fi

	t=$(get_time `echo $l \| get_time_iso8601`)
	if [ "x$t" != x ]; then
	echo iso8601
	return
	fi

	t=$(get_time `echo $l \| get_time_legacy`)
	if [ "x$t" != x ]; then
	echo legacy
	return
	fi
	}

	get_time_format() {
	t=0 l="" func=""
	trycnt=10
	while [ $trycnt -gt 0 ] && read l; do
	func=$(get_time_format_for_string $l)
	if [ "x$func" != x ]; then
	break
	fi
	trycnt=$(($trycnt-1))
	done
	#debug "Logfile uses the $func time format"
	echo $func
	}

	get_first_time() {
	l=""
	format=$1
	while read l; do
	t=$(echo $l \| get_time_$format)
	ts=$(get_time $t)
	if [ "x$ts" != x ]; then
	echo "$ts"
	return
	fi
	done
	}

	get_last_time() {
	l=""
	best=`date +%s` # Now
	format=$1
	while read l; do
	t=$(echo $l \| get_time_$format)
	ts=$(get_time $t)
	if [ "x$ts" != x ]; then
	best=$ts
	fi
	done
	echo $best
	}

	linetime() {
	l=`tail -n +$2 $1 \| grep -a ":[0-5][0-9]:" \| head -n 1`
	format=`get_time_format_for_string $l`
	t=`echo $l \| get_time_$format`
	get_time "$t"
	}

	#
	# findmsg <max> <pattern>
	#
	# Print the names of up to <max> system logs that contain <pattern>,
	# ordered by most recently modified.
	#
	findmsg() {
	max=$1
	pattern="$2"
	found=0

	# List all potential system logs ordered by most recently modified.
	candidates=$(ls -1td $SYSLOGS 2>/dev/null)
	if [ -z "$candidates" ]; then
	debug "No system logs found to search for pattern \'$pattern\'"
	return
	fi

	# Portable way to handle files with spaces in their names.
	SAVE_IFS=$IFS
	IFS="
	"

	# Check each log file for matches.
	logfiles=""
	for f in $candidates; do
	local cat=""

	# We only care about readable files with something in them.
	if [ ! -f "$f" ] \|\| [ ! -r "$f" ] \|\| [ ! -s "$f" ] ; then
	continue
	fi

	cat=$(find_decompressor "$f")

	# We want to avoid grepping through potentially huge binary logs such
	# as lastlog. However, control characters sometimes find their way into
	# text logs, so we use a heuristic of more than 256 nonprintable
	# characters in the file's first kilobyte.
	if [ $($cat "$f" 2>/dev/null \| head -c 1024 \| tr -d '[:print:][:space:]' \| wc -c) -gt 256 ]
	then
	continue
	fi

	# Our patterns are ASCII, so we can use LC_ALL="C" to speed up grep
	$cat "$f" 2>/dev/null \| LC_ALL="C" grep -q -e "$pattern"
	if [ $? -eq 0 ]; then

	# Add this file to the list of hits
	# (using newline as separator to handle spaces in names).
	if [ -z "$logfiles" ]; then
	logfiles="$f"
	else
	logfiles="$logfiles
	$f"
	fi

	# If we have enough hits, print them and return.
	found=$(($found+1))
	if [ $found -ge $max ]; then
	debug "Pattern \'$pattern\' found in: [ $logfiles ]"
	IFS=$SAVE_IFS
	echo "$logfiles"
	return
	fi
	fi
	done 2>/dev/null
	IFS=$SAVE_IFS

	debug "Pattern \'$pattern\' not found in any system logs"
	}

	node_events() {
	if [ -e $1 ]; then
	Epatt=`echo "$EVENT_PATTERNS" \|
	while read title p; do [ -n "$p" ] && echo -n "\|$p"; done \|
	sed 's/.//'
	`
	grep -E "$Epatt" $1
	fi
	}

	pickfirst() {
	for x; do
	which $x >/dev/null 2>&1 && {
	echo $x
	return 0
	}
	done
	return 1
	}

	shrink() {
	olddir=$PWD
	dir=`dirname $1`
	base=`basename $1`

	target=$1.tar
	tar_options="cf"

	variant=`pickfirst bzip2 gzip xz false`
	case $variant in
	bz*)
	tar_options="jcf"
	target="$target.bz2"
	;;
	gz*)
	tar_options="zcf"
	target="$target.gz"
	;;
	xz*)
	tar_options="Jcf"
	target="$target.xz"
	;;
	*)
	warning "Could not find a compression program, the resulting tarball may be huge"
	;;
	esac

	if [ -e $target ]; then
	fatal "Destination $target already exists, specify an alternate name with --dest"
	fi

	cd $dir >/dev/null 2>&1
	tar $tar_options $target $base >/dev/null 2>&1
	cd $olddir >/dev/null 2>&1

	echo $target
	}

	findln_by_time() {
	local logf=$1
	local tm=$2
	local first=1

	# Some logs can be massive (over 1,500,000,000 lines have been seen in the wild)
	# Even just 'wc -l' on these files can take 10+ minutes

	local fileSize=`ls -lh \| awk '{ print $5 }' \| grep -ie G`
	if [ x$fileSize != x ]; then
	warning "$logf is ${fileSize} in size and could take many hours to process. Skipping."
	return
	fi

	local last=`wc -l < $logf`
	while [ $first -le $last ]; do
	mid=$((($last+$first)/2))
	trycnt=10
	while [ $trycnt -gt 0 ]; do
	tmid=`linetime $logf $mid`
	[ "$tmid" ] && break
	warning "cannot extract time: $logf:$mid; will try the next one"
	trycnt=$(($trycnt-1))
	# shift the whole first-last segment
	first=$(($first-1))
	last=$(($last-1))
	mid=$((($last+$first)/2))
	done
	if [ -z "$tmid" ]; then
	warning "giving up on log..."
	return
	fi
	if [ $tmid -gt $tm ]; then
	last=$(($mid-1))
	elif [ $tmid -lt $tm ]; then
	first=$(($mid+1))
	else
	break
	fi
	done
	echo $mid
	}

	dumplog() {
	local logf=$1
	local from_line=$2
	local to_line=$3
	[ "$from_line" ] \|\|
	return
	tail -n +$from_line $logf \|
	if [ "$to_line" ]; then
	head -$(($to_line-$from_line+1))
	else
	cat
	fi
	}

	#
	# find log/set of logs which are interesting for us
	#
	#
	# find log slices
	#

	find_decompressor() {
	case $1 in
	*bz2) echo "bzip2 -dc" ;;
	*gz) echo "gzip -dc" ;;
	*xz) echo "xz -dc" ;;
	*) echo "cat" ;;
	esac
	}

	#
	# check if the log contains a piece of our segment
	#
	is_our_log() {
	local logf=$1
	local from_time=$2
	local to_time=$3

	local cat=`find_decompressor $logf`
	local format=`$cat $logf \| get_time_format`
	local first_time=`$cat $logf \| head -10 \| get_first_time $format`
	local last_time=`$cat $logf \| tail -10 \| get_last_time $format`

	if [ x = "x$first_time" -o x = "x$last_time" ]; then
	warning "Skipping bad logfile '$1': Could not determine log dates"
	return 0 # skip (empty log?)
	fi
	if [ $from_time -gt $last_time ]; then
	# we shouldn't get here anyway if the logs are in order
	return 2 # we're past good logs; exit
	fi
	if [ $from_time -ge $first_time ]; then
	return 3 # this is the last good log
	fi
	# have to go further back
	if [ x = "x$to_time" -o $to_time -ge $first_time ]; then
	return 1 # include this log
	else
	return 0 # don't include this log
	fi
	}
	#
	# go through archived logs (timewise backwards) and see if there
	# are lines belonging to us
	# (we rely on untouched log files, i.e. that modify time
	# hasn't been changed)
	#
	arch_logs() {
	local logf=$1
	local from_time=$2
	local to_time=$3

	# look for files such as: ha-log-20090308 or
	# ha-log-20090308.gz (.bz2) or ha-log.0, etc
	ls -t $logf $logf*[0-9z] 2>/dev/null \|
	while read next_log; do
	is_our_log $next_log $from_time $to_time
	case $? in
	0) ;; # noop, continue
	1) echo $next_log # include log and continue
	debug "Found log $next_log"
	;;
	2) break;; # don't go through older logs!
	3) echo $next_log # include log and continue
	debug "Found log $next_log"
	break
	;; # don't go through older logs!
	esac
	done
	}

	#
	# print part of the log
	#
	drop_tmp_file() {
	[ -z "$tmp" ] \|\| rm -f "$tmp"
	}

	print_logseg() {
	local logf=$1
	local from_time=$2
	local to_time=$3

	# uncompress to a temp file (if necessary)
	local cat=`find_decompressor $logf`
	if [ "$cat" != "cat" ]; then
	tmp=`mktemp`
	$cat $logf > $tmp
	trap drop_tmp_file 0
	sourcef=$tmp
	else
	sourcef=$logf
	tmp=""
	fi

	if [ "$from_time" = 0 ]; then
	FROM_LINE=1
	else
	FROM_LINE=`findln_by_time $sourcef $from_time`
	fi
	if [ -z "$FROM_LINE" ]; then
	warning "couldn't find line for time $from_time; corrupt log file?"
	return
	fi

	TO_LINE=""
	if [ "$to_time" != 0 ]; then
	TO_LINE=`findln_by_time $sourcef $to_time`
	if [ -z "$TO_LINE" ]; then
	warning "couldn't find line for time $to_time; corrupt log file?"
	return
	fi
	if [ $FROM_LINE -lt $TO_LINE ]; then
	dumplog $sourcef $FROM_LINE $TO_LINE
	log "Including segment [$FROM_LINE-$TO_LINE] from $logf"
	else
	debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf"
	fi
	else
	dumplog $sourcef $FROM_LINE $TO_LINE
	log "Including all logs after line $FROM_LINE from $logf"
	fi
	drop_tmp_file
	trap "" 0
	}

	#
	# find log/set of logs which are interesting for us
	#
	dumplogset() {
	local logf=$1
	local from_time=$2
	local to_time=$3

	local logf_set=`arch_logs $logf $from_time $to_time`
	if [ x = "x$logf_set" ]; then
	return
	fi

	local num_logs=`echo "$logf_set" \| wc -l`
	local oldest=`echo $logf_set \| awk '{print $NF}'`
	local newest=`echo $logf_set \| awk '{print $1}'`
	local mid_logfiles=`echo $logf_set \| awk '{for(i=NF-1; i>1; i--) print $i}'`

	# the first logfile: from $from_time to $to_time (or end)
	# logfiles in the middle: all
	# the last logfile: from beginning to $to_time (or end)
	case $num_logs in
	1) print_logseg $newest $from_time $to_time;;
	*)
	print_logseg $oldest $from_time 0
	for f in $mid_logfiles; do
	`find_decompressor $f` $f
	debug "including complete $f logfile"
	done
	print_logseg $newest 0 $to_time
	;;
	esac
	}

	# cut out a stanza
	getstanza() {
	awk -v name="$1" '
	!in_stanza && NF==2 && /^[a-z][a-z][[:space:]]{/ { # stanza start
	if ($1 == name)
	in_stanza = 1
	}
	in_stanza { print }
	in_stanza && NF==1 && $1 == "}" { exit }
	'
	}
	# supply stanza in $1 and variable name in $2
	# (stanza is optional)
	getcfvar() {
	cf_type=$1; shift;
	cf_var=$1; shift;
	cf_file=$*

	[ -f "$cf_file" ] \|\| return
	case $cf_type in
	corosync)
	sed 's/#.*//' < $cf_file \|
	if [ $# -eq 2 ]; then
	getstanza "$cf_var"
	shift 1
	else
	cat
	fi \|
	awk -v varname="$cf_var" '
	NF==2 && match($1,varname":$")==1 { print $2; exit; }
	'
	;;
	esac
	}

	pickfirst() {
	for x; do
	which $x >/dev/null 2>&1 && {
	echo $x
	return 0
	}
	done
	return 1
	}

	#
	# figure out the cluster type, depending on the process list
	# and existence of configuration files
	#
	get_cluster_type() {
	if is_running corosync; then
	tool=`pickfirst corosync-objctl corosync-cmapctl`
	case $tool in
	objctl) quorum=`$tool -a \| grep quorum.provider \| sed 's/.=\s*//'`;;
	cmapctl) quorum=`$tool \| grep quorum.provider \| sed 's/.=\s*//'`;;
	esac
	stack="corosync"

	# Now we're guessing...

	# TODO: Technically these could be anywhere :-/
	elif [ -f /etc/corosync/corosync.conf ]; then
	stack="corosync"

	else
	# We still don't know. This might be a Pacemaker Remote node,
	# or the configuration might be in a nonstandard location.
	stack="any"
	fi

	debug "Detected the '$stack' cluster stack"
	echo $stack
	}

	find_cluster_cf() {
	case $1 in
	corosync)
	best_size=0
	best_file=""

	# TODO: Technically these could be anywhere :-/
	for cf in /etc/corosync/corosync.conf; do
	if [ -f $cf ]; then
	size=`wc -l $cf \| awk '{print $1}'`
	if [ $size -gt $best_size ]; then
	best_size=$size
	best_file=$cf
	fi
	fi
	done
	if [ -z "$best_file" ]; then
	debug "Looking for corosync configuration file. This may take a while..."
	for f in `find / -maxdepth $maxdepth -type f -name corosync.conf`; do
	best_file=$f
	break
	done
	fi
	debug "Located corosync config file: $best_file"
	echo "$best_file"
	;;
	any)
	# Cluster type is undetermined. Don't complain, because this
	# might be a Pacemaker Remote node.
	;;
	*)
	warning "Unknown cluster type: $1"
	;;
	esac
	}

	#
	# check for the major prereq for a) parameter parsing and b)
	# parsing logs
	#
	t=`get_time "12:00"`
	if [ "$t" = "" ]; then
	fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)"
	fi

	# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80:

File Metadata

Mime Type: text/x-diff
Expires: Tue, Jul 8, 4:23 PM (1 d, 47 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 2002271
Default Alt Text: (658 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions