diff --git a/ChangeLog b/ChangeLog
index e9f671b24d..2fa761586c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,2748 +1,2748 @@
 * Tue Jun 04 2019 Ken Gaillot <kgaillot@redhat.com> Pacemaker-2.0.2
 - Changesets: 288
- 225 files changed, 28494 insertions(+), 24465 deletions(-)
+- Diff: 225 files changed, 28494 insertions(+), 24465 deletions(-)
 
 - Features added since Pacemaker-2.0.1
   + tools: crm_resource --validate can get resource parameters from command line
   + tools: crm_resource --clear prints out any cleared constraints
   + tools: new crm_rule tool for checking rule expiration (experimental)
   + tools: stonith_admin supports XML output for machine parsing (experimental)
   + resources: new HealthIOWait resource agent for node health tracking
 
 - Changes since Pacemaker-2.0.1
   + Important security fixes for CVE-2018-16878, CVE-2018-16877, CVE-2019-3885
   + build: crm_report bug report URL is now configurable at build time
   + build: private libpengine/libtransitioner libraries combined as libpacemaker
   + controller: avoid memory leak when duplicate monitor is scheduled
   + scheduler: respect order constraints when resources are being probed
   + scheduler: one group stop shouldn't make another required
   + libcrmcommon: handle out-of-range integers in configuration better
   + libcrmcommon: export logfile environment variable if using default
   + libcrmcommon: avoid segmentation fault when beginning formatted text list
   + libcrmservice: fix use-after-free memory error in alert handling
   + libstonithd: handle more than 64KB output from fence agents
 
 
 * Mon Mar 4 2019 Ken Gaillot <kgaillot@redhat.com> Pacemaker-2.0.1
 - Changesets: 592
- 173 files changed, 9268 insertions(+), 5344 deletions(-)
+- Diff: 173 files changed, 9268 insertions(+), 5344 deletions(-)
 
 - Features added since Pacemaker-2.0.0
   + Pacemaker bundles support podman for container management
   + fencing: SBD may be used in a cluster that has guest nodes or bundles
   + fencing: fencing history is synchronized among all nodes
   + fencing: stonith_admin has option to clear fence history
   + tools: crm_mon can show fencing action failures and history
   + tools: crm_resource --clear supports new --expired option
   + Pacemaker Remote: new options to restrict TLS Diffie-Hellman prime length
 
 - Changes since Pacemaker-2.0.0
   + scheduler: clone notifications could be scheduled for a stopped
     Pacemaker Remote node and block all further cluster actions
     (regression since 2.0.0)
   + libcrmcommon: correct behavior for completing interrupted live migrations
     (regression since 2.0.0)
   + tools: crm_resource -C could fail to clean up all failures in one run
     (regression since 2.0.0)
   + Pacemaker Remote: avoid unnecessary downtime when moving resource to
     Pacemaker Remote node that fails to come up (regression since 1.1.18)
   + tools: restore stonith_admin ability to confirm unseen nodes are down
     (regression since 1.1.12)
   + build: minor logging fixes to allow compatibility with GCC 9 -Werror
   + build: spec file now puts XML schemas in new pacemaker-schemas package
   + build: spec file now provides virtual pcmk-cluster-manager package
   + pacemaker-attrd: wait a short time before re-attempting failed writes
   + pacemaker-attrd: ignore attribute delays when writing after node (re-)join
   + pacemaker-attrd: start new election immediately if writer is lost
   + pacemaker-attrd: clear election dampening when the writer leaves
   + pacemaker-attrd: detect alert configuration changes when CIB is replaced
   + CIB: inform originator of CIB upgrade failure
   + controller: support resource agents that require node name even for meta-data
   + controller: don't record pending clone notifications in CIB
   + controller: DC detects completion of another node's shutdown more accurately
   + controller: shut down DC if unable to update node attributes
   + controller: handle corosync peer/join notifications for new node in any order
   + controller: clear election dampening when DC is lost
   + executor: cancel recurring monitors if fence device registration is lost
   + fencing: check for fence device update when resource defaults change
   + fencing: avoid pacemaker-fenced crash possible with stonith_admin misuse
   + fencing: limit fencing history to 500 entries
   + fencing: stonith_admin now complains if no action option is specified
   + pacemakerd: do not modify kernel.sysrq on Linux
   + scheduler: regression test compatibility with glib 2.59.0
   + scheduler: avoid unnecessary recovery of cleaned guest nodes and bundles
   + scheduler: ensure failures causing fencing not expired until fencing done
   + scheduler: start unique clone instances in numerical order
   + scheduler: convert unique clones to anonymous clones when not supported
   + scheduler: associate pending tasks with correct clone instance
   + scheduler: ensure bundle clone notifications are directed to correct host
   + scheduler: avoid improper bundle monitor rescheduling or fail count clearing
   + scheduler: honor asymmetric orderings even when restarting
   + scheduler: don't order non-DC shutdowns before DC fencing
   + ACLs: assume unprivileged ACL user if can't get user info
   + Pacemaker Remote: get Diffie-Hellman prime bit length from GnuTLS API
   + libcrmservice: cancel DBus call when cancelling systemd/upstart actions
   + libcrmservice: order systemd resources relative to pacemaker_remote
   + libpe_status: add public API constructor/destructor for pe_working_set_t
   + tools: fix crm_resource --clear when lifetime was used with ban/move
   + tools: fix crm_resource --move when lifetime was used with previous move
   + tools: make crm_mon CIB connection errors non-fatal if previously successful
   + tools: improve crm_mon messages when generating HTML output
   + tools: crm_mon cluster connection failure is now "critical" in nagios mode
   + tools: crm_mon listing of standby nodes shows if they have active resources
   + tools: crm_diff now ignores attribute ordering when comparing in CIB mode
   + tools: improve crm_report detection of logs, CIB directory, and processes
   + tools: crm_verify returns reliable exit codes
   + tools: crm_simulate resource history uses same name as live cluster would
 
 
 * Fri Jul 6 2018 Ken Gaillot <kgaillot@redhat.com> Pacemaker-2.0.0
 - Changesets: 885
 - Diff:       549 files changed, 89865 insertions(+), 95100 deletions(-)
 
 - Deprecated features removed since Pacemaker-1.1.18
   + All of these have newer forms, and the cluster will automatically convert
     most older syntax usage in saved configurations to newer syntax as needed
   + Drop support for heartbeat and corosync 1 (whether using CMAN or plugin)
   + Drop support for rolling upgrades from Pacemaker versions older than 1.1.11
   + Drop support for built-in SMTP and SNMP in crm_mon
   + Drop support for legacy option aliases including default-action-timeout,
     default-resource-stickiness, resource-failure-stickiness,
     default-resource-failure-stickiness, is-managed-default,
     and all names using underbar instead of dash
   + Drop support for "requires" operation meta-attribute
   + Drop support for the pcmk_*_cmd, pcmk_arg_map, and pcmk_poweroff_action
     fence resource parameters
   + Drop support for deprecated command-line options to crmadmin,
     crm_attribute, crm_resource, crm_verify, crm_mon, and stonith_admin
   + Drop support for operation meta-attributes in instance_attributes
   + Drop support for PCMK_legacy and LRMD_MAX_CHILDREN environment variables
   + Drop support for undocumented resource isolation feature
   + Drop support for processing very old saved CIB files (including
     pre-0.6.0 start failure entries, pre-0.6.5 operation history entries,
     pre-0.7 transition keys, pre-1.1.4 migration history entries,
     pre-1.0 XML configuration schemas, pre-1.1.6 ticket state entries, and
     pre-1.1.7 failed recurring operation history entries)
 
 - Features added since Pacemaker-1.1.18
   + The pacemaker daemons have been renamed to make logs more intuitive
     and easier to search
   + The default location of the Pacemaker detail log is now
     /var/log/pacemaker/pacemaker.log (instead of being directly in /var/log),
     and Pacemaker will no longer use Corosync's logging preferences;
     configure script options are available to change default log locations
   + The detail log's message format has been improved
   + The master XML tag is deprecated in favor of using a standard clone tag
     with a new "promotable" meta-attribute set to true, and the "master-max"
     and "master-node-max" master meta-attributes are deprecated in favor of
     new "promoted-max" and "promoted-node-max" clone meta-attributes;
     documentation now refers to these as promotable clones rather than
     master/slave, stateful, or multistate clones, and refers to
     promotion scores instead of master scores
   + Administration-related documentation has been moved from the
     "Pacemaker Explained" document to a new "Pacemaker Administration" document
   + record-pending now defaults to TRUE (pending actions are shown in status)
   + All Python code in Pacemaker now supports both Python 2.7 and Python 3
   + The command-line tools now return consistent, well-defined exit codes;
     crm_error has an --exit option to list these
   + Pacemaker's systemd unit files now remove systemd's spawned process limit
   + mount, path, and timer systemd unit types are now supported as resources
   + A negative stonith-watchdog-timeout now tells the cluster to automatically
     calculate the value based on SBD_WATCHDOG_TIMEOUT (which was the behavior
     of 0 before 1.1.15; 0 retains its post-1.1.15 behavior of disabling use of
     the watchdog as a fencing device)
   + The undocumented restart-type and role_after_failure
     resource meta-attributes are now deprecated
   + Regression testing code has been consolidated and overhauled
     (the most obvious change is new command names)
   + build: create /etc/pacemaker directory when installing
   + build: improved portability to BSD-based platforms
   + tools: crm_resource --cleanup now cleans only failed operation history;
     crm_resource --reprobe retains the previous behavior of cleaning all
     operation history
   + tools: add stonith_admin --validate option to check device configuration
   + tools: crm_node is now in the pacemaker-cli package (instead of pacemaker)
   + alerts: add epoch and usec alert variables for improved SNMP alerts
   + controller: deprecate "crmd-*" cluster options in favor of new names
   + scheduler: deprecate stonith-action value "poweroff" (use "off" instead)
   + scheduler: deprecate require-all in rsc_order
   + libcrmcluster: prefer corosync name over ring0_addr
   + xml: allow local "kind" in resource_set within rsc_order
 
 - Changes since Pacemaker-1.1.18
   + Restore systemd unit dependency on DBus (regression in 1.1.17)
   + CIB: handle mixed-case node names when modifying attributes (regression in 1.1.17)
   + scheduler: avoid crash when logging ignored failure timeout (regression in 1.1.17)
   + attrd: ensure node name is broadcast at start-up (regression in 1.1.18)
   + scheduler: unfence before probing or starting fence devices (regression in 1.1.18)
   + tools: treat INFINITY correctly in crm_failcount (regression in 1.1.17)
   + tools: show master scores with crm_simulate -sL (regression in 1.1.18)
   + tools: crm_master did not work without explicit --lifetime (regression in 1.1.18)
   + Numerous changes to public C API of libraries
   + Choose current node correctly when a resource is multiply active
   + controller,executor,tools: avoid minor memory leaks
   + CIB: don't use empty CIB if real CIB has bad permissions
   + controller: avoid double free after ACL rejection of resource deletion
   + controller: don't record pending clone notifications in CIB
   + controller: always write faked failures to CIB whenever possible
   + controller: quorum gain without a node join should cause new transition
   + executor: handle systemd actions correctly when used with "service:"
   + executor: find absolute LSB paths when used with "service:"
   + scheduler: handle "requires" of "quorum" or "nothing" properly
   + scheduler: ensure orphaned recurring monitors have interval set
   + scheduler: handle pending migrations correctly when record-pending is true
   + scheduler: don't time out failures that cause fencing until fencing completes
   + scheduler: handle globally-unique bundle children correctly
   + scheduler: use correct default timeout for monitors
   + scheduler: "symmetrical" defaults to "false" for serialize orders
   + scheduler: avoid potential use-of-NULL when unpacking ordering constraint
   + scheduler: properly cancel recurring monitors
   + scheduler: do not schedule notifications for unrunnable actions
   + scheduler: ensure stops occur after stopped remote connections come back up
   + scheduler: consider only allowed nodes when ordering start after all recovery
   + scheduler: avoid graph loop from ordering bundle child stops/demotes after container fencing
   + scheduler: remote connection resources are safe to require only quorum
   + scheduler: correctly observe colocation with bundles in Master role
   + scheduler: restart resource after failed demote when appropriate
   + Pacemaker Remote: always use most recent remote proxy
   + tools: crm_node now gets correct node name and ID on Pacemaker Remote nodes
   + tools: correctly check crm_resource --move for master role
   + tools: cibsecret --help/--version doesn't require cluster to be running
   + tools: ignore attribute placement when crm_diff compares in cib mode
   + tools: prevent notify actions from causing crm_resource --wait to hang
   + resources: drop broken configdir parameter from ocf:pacemaker:controld
 
 - For further details, see:
   https://wiki.clusterlabs.org/wiki/Pacemaker_2.0_Changes
 
 
 * Tue Nov 14 2017 Ken Gaillot <kgaillot@redhat.com> Pacemaker-1.1.18
 - Update source tarball to revision: a9fbd15
 - Changesets: 644
 - Diff:       167 files changed, 9753 insertions(+), 5596 deletions(-)
 
 - Features added since Pacemaker-1.1.17
   + warnings are now logged when using legacy syntax to be removed in 2.0
   + agents: ifspeed agent is now installed when building
   + agents: ifspeed agent can optionally detect interface name from IP address
   + alerts: support alert filters
   + alerts: experimental support for alerts for node attribute changes
   + crmd,pengine: support unfencing of remote nodes
   + pengine: bundles now support all constraint types
   + pengine: bundles now support rkt containers
   + pengine: bundles support new container-attribute-target parameter
   + pengine,tools: logs and crm_mon show why resources changed state
   + stonith-ng: support new fencing resource parameter pcmk_delay_base
   + tools: new crm_resource option --why explains why resources are stopped
 
 - Changes since Pacemaker-1.1.17
   + many documentation improvements
   + agents: ifspeed properly calculates speed of hfi1 interfaces
   + agents: ClusterMon now interprets "update" less than 1000 as seconds
   + attrd: don't lose attributes set between attrd start-up and cluster join
   + attrd: fix multiple minor memory leaks
   + crmd: correctly record that unfencing is complete
   + crmd: error more quickly if remote start fails due to missing key
   + lrmd: remote resource operations return immediate error if key setup fails
   + lrmd: allow pre-1.1.15 cluster nodes to connect to current Pacemaker Remote
   + pengine: guest nodes are now probed like other nodes
   + pengine: probe remote nodes for guest node resources
   + pengine: do not probe guest/bundle connections until guest/bundle is active
   + pengine: allow resources to stop prior to probes completing
   + pengine: bundles wait only for other containers on same node to be probed
   + pengine: have bundles log to stderr so 'docker logs'/'journalctl -M' works
   + pengine: only pass requests for promote/demote flags onto the bundle's child
   + pengine: do not map ports into Docker container when net=host is specified
   + pengine: allow resources inside bundles to receive clone notifications
   + pengine: default to non-interleaved bundle ordering for safety
   + pengine: ensure bundle nodes and child resources are correctly cleaned up
   + pengine: prevent graph loops when fencing the host underneath a bundle
   + pengine: fix multiple memory issues (use-after-free, use-of-NULL) with bundles
   + pengine: resources in bundles respect failcounts
   + pengine: ensure nested container connections run on the same host
   + pengine: ensure unrecoverable remote nodes are fenced even with no resources
   + pengine: handle resource migrating behind a migrating remote connection
   + pengine: don't prefer to keep unique instances on same node
   + pengine: exclude exclusive resources and nodes from symmetric default score
   + pengine: if ignoring failure, also ignore migration-threshold
   + pengine: restore the ability to send the transition graph via the disk if it gets too big
   + pengine: validate no-quorum-policy=suicide correctly
   + pengine: avoid crash when alerts section has comments
   + pengine: detect permanent master scores at start-up
   + pengine: do not re-add a node's default score for each location constraint
   + pengine: make sure calculated resource scores are consistent on different architectures
   + pengine: retrigger unfencing for changed device parameters only when necessary
   + pengine: don't schedule reload and restart in same transition (CLBZ#5309, regression introduced in 1.1.15)
   + stonith-ng: make fencing-device reappear properly after reenabling
   + stonith-ng: include pcmk_on_action in meta-data so 'on' can be overridden
   + tools: allow crm_report to work with no log files specified
   + tools: fix use-after-free in crm_diff introduced in 1.1.17
   + tools: allow crm_resource to operate on anonymous clones in unknown states
   + tools: crm_resource --cleanup on appropriate nodes if we don't know state of resource
   + tools: prevent disconnection from crmd during crm_resource --cleanup
   + tools: improve messages for crm_resource --force-* options
   + tools: crm_mon: avoid infinite process spawning if -E script can't be run
   + tools: crm_mon: don't show previous exit-reason for failed action with none
   + libcrmservice: list systemd unit files, not only active units (CLBZ#5299)
   + libcrmservice: parse long description correctly for LSB meta-data
 
 
 * Thu Jul 06 2017 Ken Gaillot <kgaillot@redhat.com> Pacemaker-1.1.17
 - Update source tarball to revision: 301bc44
 - Changesets: 539
 - Diff:       177 files changed, 11525 insertions(+), 5036 deletions(-)
 
 - Features added since Pacemaker-1.1.16
   + New "bundle" resource type for Docker container use cases (experimental)
   + New "PCMK_node_start_state" environment variable to start node in standby
   + New "value-source" rule expression attribute in location constraints to
     compare a node attribute against a resource parameter
   + New "stonith-max-attempts" cluster option to specify how many times
     fencing can fail for a target before the cluster will no longer
     immediately re-attempt it (previously hard-coded at 10) 
   + New "cluster-ipc-limit" cluster option to avoid IPC client eviction in
     large clusters
   + Failures are now tracked per operation type, as well as per node and
     resource (the "fail-count" and "last-failure" node attribute names now end
     in "#OPERATION_INTERVAL")
   + attrd: Pacemaker Remote node attributes and regular expressions are now
     supported on legacy cluster stacks (heartbeat, CMAN, and corosync plugin)
   + tools: New "crm_resource --validate" option
   + tools: New "stonith_admin --list-targets" option
   + tools: New "crm_attribute --pattern" option to match a regular expression
   + tools: "crm_resource --cleanup" and "crm_failcount" can now take
     --operation and --interval options to operate on a single operation type
 
 - Changes since Pacemaker-1.1.16
   + Fix multiple memory issues (leaks, use-after-free) in libraries
   + pengine: unmanaging a guest node resource puts guest in maintenance mode
   + cib: broadcasts of cib changes should always pass ACL checks
   + crmd,libcrmcommon: update throttling when CPUs are hot-plugged
   + crmd: abort transition whenever we lose quorum
   + crmd: avoid attribute write-out on join when atomic attrd is used
   + crmd: check for too many stonith failures only when aborting for that reason
   + crmd: correctly clear failure counts only for a specified node
   + crmd: don't fence old DC if it's shutting down as soon-to-be DC joins
   + crmd: forget stonith failures when forgetting node
   + crmd: all nodes should track stonith failure counts in case they become DC
   + crmd: update cache status for guest node whose host is fenced
   + dbus: prevent lrmd from hanging on dbus calls
   + fencing: detect newly added constraints for stonith devices
   + pengine: order remote actions after connection recovery
     (regression introduced in 1.1.15)
   + pengine: quicker recovery from failed demote
   + libcib: determine remote nodes correctly from node status entries
   + libcrmcommon: avoid evicting IPC client if messages spike briefly
   + libcrmcommon: better XML comment handling prevents infinite election loop
   + libcrmcommon: set month correctly in date/time string sent to alert agents
   + libfencing,fencing: intelligently remap "action" wrongly specified in config
   + libservices: ensure completed ops aren't on blocked ops list
   + libservices: properly detect and cancel in-flight systemd/upstart ops
   + libservices: properly watch writable DBus handles
   + libservices: systemd service that is reloading doesn't cause monitor failure
   + pacemaker_remoted: allow graceful shutdown while unmanaged
   + pengine,libpe_status: don't clear same fail-count twice
   + pengine: consider guest node unclean if its host is unclean
   + pengine: do not re-add a node's default score for each location constraint
   + pengine: avoid restarting services when recovering remote connection
   + pengine: better guest node recovery when host fails
   + pengine: guest node fencing doesn't require stonith enabled
   + pengine: allow probes of guest node connection resources
   + pengine: properly handle allow-migrate explicitly set for remote connection
   + pengine: fence failed remote nodes even if no resources can run on them
   + tools: resource agents will now get the correct node name on
     Pacemaker Remote nodes when using crm_node and crm_attribute
   + tools: avoid grep crashes in crm_report when looking for system logs
   + tools: crm_resource -C now clears last-failure as well as fail-count
   + tools: implement crm_failcount command-line options correctly
   + tools: properly ignore version with crm_diff --no-version
 
 
 * Wed Nov 30 2016 Ken Gaillot <kgaillot@redhat.com> Pacemaker-1.1.16
 - Update source tarball to revision: 76876b3
 - Changesets: 382
 - Diff:       145 files changed, 7200 insertions(+), 5621 deletions(-)
 
 - Features added since Pacemaker-1.1.15
   + Location constraints may use rsc-pattern, with submatches expanded
   + node-health-base available with node-health-strategy=progressive
   + new Pacemaker Development document for working on pacemaker code base
   + new PCMK_panic_action variable allows crash instead of reboot on panic
   + resources: add resource agent for managing a node attribute
   + systemd: include socket units when listing all systemd agents
 
 - Changes since Pacemaker-1.1.15
   + Important security fix for CVE-2016-7035
   + Logging is now synchronous when blackboxes are enabled
   + All python code except CTS is now compatible with python 2.6+ and 3.2+
   + build: take advantage of compiler features for security and performance
   + build: update SuSE spec modifications for recent spec changes
   + build: avoid watchdog reboot when upgrading pacemaker_remote with sbd
   + build: numerous other improvements in environment detection, etc.
   + cib: fix infinite loop when no schema validates
   + crmd: cl#5185 - record pending operations in CIB before they are performed
   + crmd: don't abort transitions for CIB comment changes
   + crmd: resend shutdown request if DC loses original request
   + documentation: install improved README in doc instead of now-removed AUTHORS
   + documentation: clarify licensing and provide copy of all licenses
   + documentation: document various features and upgrades better
   + fence_legacy: use "list" action when searching cluster-glue agents
   + libcib: don't stop sending alerts after releasing DC role
   + libcrmcommon: properly handle XML comments when comparing v2 patchset diffs
   + libcrmcommon: report errors consistently when waiting for data on connection
   + libpengine: avoid potential use-of-NULL
   + libservices: use DBusError API properly
   + pacemaker_remote: init script stop should always return 0
   + pacemaker_remote: allow remote clients to timeout/reconnect
   + pacemaker_remote: correctly calculate remaining timeout when receiving messages
   + pengine: avoid transition loop for start-then-stop + unfencing
   + pengine: correctly update dependent actions of un-runnable clones
   + pengine: do not fence a node in maintenance mode if it shuts down cleanly
   + pengine: set OCF_RESKEY_CRM_meta_notify_active_* for multistate resources
   + resources: ping - avoid temporary files for fping check, support FreeBSD
   + resources: SysInfo - better support for FreeBSD
   + resources: variable name typo in docker-wrapper
   + systemd: order pacemaker after time-sync target
   + tools: correct attrd_updater help and error messages when using CMAN
   + tools: crm_standby --version/--help should work without cluster running
   + tools: make crm_report sanitize CIB before generating readable version
   + tools: display pending resource state by default when available
   + tools: avoid matching other process with same PID in ClusterMon
 
 
 * Tue Jun 21 2016 Ken Gaillot <kgaillot@redhat.com> Pacemaker-1.1.15
 - Update source tarball to revision: 32fa6a5
 - Changesets: 533
 - Diff:       219 files changed, 6659 insertions(+), 3989 deletions(-)
 
 - Features added since Pacemaker-1.1.14
   + Event-driven alerts allow scripts to be called after significant events
   + build: Some files moved from pacemaker package to pacemaker-cli for cleaner pacemaker-remote dependencies
   + build: ./configure --with-configdir argument for /etc/sysconfig, /etc/default, etc.
   + fencing: Simplify watchdog integration
   + fencing: Support concurrent fencing actions via new pcmk_action_limit option
   + remote: pacemaker_remote may be stopped without disabling resource first
   + remote: Report integration status of Pacemaker Remote nodes in CIB node_state
   + tools: crm_mon now reports why resources are not starting
   + tools: crm_report now obscures passwords in logfiles
   + tools: attrd_updater --update-both/--update-delay options allow changing dampening value
   + tools: allow stonith_admin -H '*' to show history for all nodes
 
 - Changes since Pacemaker-1.1.14
   + Fix multiple memory issues (leaks, use-after-free) in daemons, libraries and tools
   + Make various log messages more user-friendly
   + Improve FreeBSD and Hurd support
   + attrd: Prevent possible segfault on exit
   + cib: Fix regression to restore support for compressed CIB larger than 1MB
   + common: fix regression in 1.1.14 that made have-watchdog always true
   + controld: handle DLM "wait fencing" state better
   + crmd: Fix regression so that fenced unseen nodes do not remain unclean
   + crmd: Take start-delay into account when calculation action timeouts
   + crmd: Avoid timeout on older peers when cancelling a resource operation
   + fencing: Allow fencing by node ID (e.g. by DLM) even if node left cluster
   + lrmd: Fix potential issues when cluster is stopped via systemd shutdown
   + pacemakerd: Properly respawn stonithd if it fails
   + pengine: Fix regression with multiple monitor levels that could ignore failure
   + pengine: Correctly set OCF_RESKEY_CRM_meta_timeout when start-delay is configured
   + pengine: Properly order actions for master/slave resources in anti-colocations
   + pengine: Respect asymmetrical ordering when trying to move resources
   + pengine: Properly order stop actions on guest node relative to host stonith
   + pengine: Correctly block actions dependent on unrunnable clones
   + remote: Allow remote nodes to have node attributes even with legacy attrd
   + remote: Recover from remote node fencing more quickly
   + remote: Place resources on newly rejoined remote nodes more quickly
   + resources: ping agent can now use fping6 for IPv6 hosts
   + resources: SysInfo now resets #health_disk to green when there's sufficient free disk
   + tools: crm_report is now more efficient and handles Pacemaker Remote nodes better
   + tools: Prevent crm_resource segfault when --resource is not supplied with --restart
   + tools: crm_shadow --display option now works
   + tools: crm_resource --restart handles groups, target-roles and moving resources better
 
 
 * Thu Jan 14 2016 Ken Gaillot <kgaillot@redhat.com> Pacemaker-1.1.14
 - Update source tarball to revision: f0b585a
 - Changesets: 724
 - Diff:        179 files changed, 13142 insertions(+), 7695 deletions(-)
 
 - Features added since Pacemaker-1.1.13
   + crm_resource: Indicate common reasons why a resource may not start after a cleanup
   + crm_resource: New --force-promote and --force-demote options for debugging
   + fencing: Support targeting fencing topologies by node name pattern or node attribute
   + fencing: Remap sequential topology reboots to all-off-then-all-on
   + pengine: Allow resources to start and stop as soon as their state is known on all nodes
   + pengine: Include a list of all and available nodes with clone notifications
   + pengine: Addition of the clone resource clone-min metadata option
   + pengine: Support of multiple-active=block for resource groups
   + remote: Resources that create guest nodes can be included in a group resource
   + remote: reconnect_interval option for remote nodes to delay reconnect after fence
 
 - Changes since Pacemaker-1.1.13
   + improve support for building on FreeBSD and Debian
   + fix multiple memory issues (leaks, use-after-free, double free, use-of-NULL) in components and tools
   + cib: Do not terminate due to badly behaving clients
   + cman: handle corosync-invented node names of the form Node{id} for peers not in its node list
   + controld: replace bashism
   + crm_node: Display node state with -l and quorum status with -q, if available
   + crmd: resources would sometimes be restarted when only non-unique parameters changed
   + crmd: fence remote node after connection failure only once
   + crmd: handle resources named the same as cluster nodes
   + crmd: Pre-emptively fail in-flight actions when lrmd connections fail
   + crmd: Record actions in the CIB as failed if we cannot execute them
   + crm_report: Enable password sanitizing by default
   + crm_report: Allow log file discovery to be disabled
   + crm_resource: Allow the resource configuration to be modified for --force-{check,start,..} calls
   + crm_resource: Compensate for -C and -p being called with the child resource for clones
   + crm_resource: Correctly clean up all children for anonymous cloned groups
   + crm_resource: Correctly clean up failcounts for inactive anonymous clones
   + crm_resource: Correctly observe --force when deleting and updating attributes
   + crm_shadow: Fix "crm_shadow --diff"
   + crm_simulate: Prevent segfault on arches with 64bit time_t
   + fencing: ensure "required"/"automatic" only apply to "on" actions
   + fencing: Return a provider for the internal fencing agent "#watchdog" instead of logging an error
   + fencing: ignore stderr output of fence agents (often used for debug messages)
   + fencing: fix issue where deleting a fence device attribute can delete the device
   + libcib: potential user input overflow
   + libcluster: overhaul peer cache management
   + log: make syslog less noisy
   + log: fix various misspellings in log messages
   + lrmd: cancel currently pending STONITH op if stonithd connection is lost
   + lrmd: Finalize all pending and recurring operations when cleaning up a resource
   + pengine: Bug cl#5247 - Imply resources running on a container are stopped when the container is stopped
   + pengine: cl#5235 - Prevent graph loops that can be introduced by "load_stopped -> migrate_to" ordering
   + pengine: Correctly bypass fencing for resources that do not require it
   + pengine: do not timeout remote node recurring monitor op failure until after fencing
   + pengine: Ensure recurring monitor operations are cancelled when clone instances are de-allocated
   + pengine: fixes segfault in pengine when fencing remote node
   + pengine: properly handle blocked clone actions
   + pengine: ensure failed actions that occurred in node shutdown are displayed
   + remote: Correctly display the usage of the ocf:pacemaker:remote resource agent
   + remote: do not fail operations because of a migration
   + remote: enable reloads for select remote connection options
   + resources: allow for top output with or without percent sign in HealthCPU
   + resources: Prevent an error message on stopping "Dummy" resource
   + systemd: Prevent segfault when logging failed operations
   + systemd: Reconnect to System DBus if the connection is closed
   + systemd: set systemd resources' timeout values higher than systemd's own default
   + tools: Do not send command lines to syslog
   + tools: update SNMP MIB
   + upstart: Ensure pending structs are correctly unreferenced
 
 
 * Wed Jun 24 2015 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.13
 - Update source tarball to revision: 2a1847e
 - Changesets: 750
 - Diff:       156 files changed, 11323 insertions(+), 3725 deletions(-)
 
 - Features added since Pacemaker-1.1.12
   + Allow fail-counts to be removed en-mass when the new attrd is in operation
   + attrd supports private attributes (not written to CIB)
   + crmd: Ensure a watchdog device is in use if stonith-watchdog-timeout is configured
   + crmd: If configured, trigger the watchdog immediately if we lose quorum and no-quorum-policy=suicide
   + crm_diff: Support generating a difference without versions details if --no-version/-u is supplied
   + crm_resource: Implement an intelligent restart capability
   + Fencing: Advertise the watchdog device for fencing operations
   + Fencing: Allow the cluster to recover resources if the watchdog is in use
   + fencing: cl#5134 - Support random fencing delay to avoid double fencing
   + mcp: Allow orphan children to initiate node panic via SIGQUIT
   + mcp: Turn on sbd integration if pacemakerd finds it running
   + mcp: Two new error codes that result in machine reset or power off
   + Officially support the resource-discovery attribute for location constraints
   + PE: Allow natural ordering of colocation sets
   + PE: Support non-actionable degraded mode for OCF
   + pengine: cl#5207 - Display "UNCLEAN" for resources running on unclean offline nodes
   + remote: pcmk remote client tool for use with container wrapper script
   + Support machine panics for some kinds of errors (via sbd if available)
   + tools: add crm_resource --wait option
   + tools: attrd_updater supports --query and --all options
   + tools: attrd_updater: Allow attributes to be set for other nodes
 
 - Changes since Pacemaker-1.1.12
   + pengine: exclusive discovery implies rsc is only allowed on exclusive subset of nodes
   + acl: Correctly implement the 'reference' acl directive
   + acl: Do not delay evaluation of added nodes in some situations
   + attrd: b22b1fe did uuid test too early
   + attrd: Clean out the node cache when requested by the admin
   + attrd: fixes double free in attrd legacy
   + attrd: properly write attributes for peers once uuid is discovered
   + attrd: refresh should force an immediate write-out of all attributes
   + attrd: Simplify how node deletions happen
   + Bug rhbz#1067544 - Tools: Correctly handle --ban, --move and --locate for master/slave groups
   + Bug rhbz#1181824 - Ensure the DC can be reliably fenced
   + cib: Ability to upgrade cib validation schema in legacy mode
   + cib: Always generate digests for cib diffs in legacy mode
   + cib: assignment where comparison intended
   + cib: Avoid nodeid conflicts we don't care about
   + cib: Correctly add "update-origin", "update-client" and "update-user" attributes for cib
   + cib: Correctly set up signal handlers
   + cib: Correctly track node state
   + cib: Do not update on disk backups if we're just querying them
   + cib: Enable cib legacy mode for plugin-based clusters
   + cib: Ensure file-based backends treat '-o section' consistently with the native backend
   + cib: Ensure upgrade operations from a non-DC get an acknowledgement
   + cib: No need to enforce cib digests for v2 diffs in legacy mode
   + cib: Revert d153b86 to instantly get cib synchronized in legacy mode
   + cib: tls sock cleanup for remote cib connections
   + cli: Ensure subsequent unknown long options are correctly detected
   + cluster: Invoke crm_remove_conflicting_peer() only when the new node's uname is being assigned in the node cache
   + common: Increment current and age for lib common as a result of APIs being added
   + corosync:  Bug cl#5232 - Somewhat gracefully handle nodes with invalid UUIDs
   + corosync: Avoid unnecessary repeated CMAP API calls
   + crmd/pengine: handle on-fail=ignore properly
   + crmd: Add "on_node" attribute for *_last_failure_0 lrm resource operations
   + crmd: All peers need to track node shutdown requests
   + crmd: Cached copies of transient attributes cease to be valid once a node leaves the membership
   + crmd: Correctly add the local option that validates against schema for pengine to calculate
   + crmd: Disable debug logging that results in significant overhead
   + crmd: do not remove connection resources during re-probe
   + crmd: don't update fail count twice for same failure
   + crmd: Ensure remote connection resources timeout properly during 'migrate_from' action
   + crmd: Ensure throttle_mode() does something on Linux
   + crmd: Fixes crash when remote connection migration fails
   + crmd: gracefully handle remote node disconnects during op execution
   + crmd: Handle remote connection failures while executing ops on remote connection
   + crmd: include remote nodes when forcing cluster wide resource reprobe
   + crmd: never stop recurring monitor ops for pcmk remote during incomplete migration
   + crmd: Prevent the old version of DC from being fenced when it shuts down for rolling-upgrade
   + crmd: Prevent use-of-NULL during reprobe
   + crmd: properly update job limit for baremetal remote-nodes
   + crmd: Remote-node throttle jobs count towards cluster-node hosting conneciton rsc
   + crmd: Reset stonith failcount to recover transitioner when the node rejoins
   + crmd: resolves memory leak in crmd.
   + crmd: respect start-failure-is-fatal even for artifically injected events
   + crmd: Wait for all pending operations to complete before poking the policy engine
   + crmd: When container's host is fenced, cancel in-flight operations
   + crm_attribute: Correctly update config options when -o crm_config is specified
   + crm_failcount: Better error reporting when no resource is specified
   + crm_mon: add exit reason to resource failure output
   + crm_mon: Fill CRM_notify_node in traps with node's uname rather than node's id if possible
   + crm_mon: Repair notification delivery when the v2 patch format is in use
   + crm_node: Correctly remove nodes from the CIB by nodeid
   + crm_report: More patterns for finding logs on non-DC nodes
   + crm_resource: Allow resource restart operations to be node specific
   + crm_resource: avoid deletion of lrm cache on node with resource discovery disabled.
   + crm_resource: Calculate how long to wait for a restart based on the resource timeouts
   + crm_resource: Clean up memory in --restart error paths
   + crm_resource: Display the locations of all anonymous clone children when supplying the children's common ID
   + crm_resource: Ensure --restart sets/clears meta attributes
   + crm_resource: Ensure fail-counts are purged when we redetect the state of all resources
   + crm_resource: Implement --timeout for resource restart operations
   + crm_resource: Include group members when calculating the next timeout
   + crm_resource: Memory leak in error paths
   + crm_resource: Prevent use-after-free
   + crm_resource: Repair regression test outputs
   + crm_resource: Use-after-free when restarting a resource
   + dbus: ref count leaks
   + dbus: Ensure both the read and write queues get dispatched
   + dbus: Fail gracefully if malloc fails
   + dbus: handle dispatch queue when multiple replies need to be processed
   + dbus: Notice when dbus connections get disabled
   + dbus: Remove double-free introduced while trying to make coverity shut up
   + ensure if B is colocated with A, B can never run without A
   + fence_legacy: Avoid passing 'port' to cluster-glue agents
   + fencing: Allow nodes to be purged from the member cache
   + fencing: Correctly make args for fencing agents
   + fencing: Correctly wait for self-fencing to occur when the watchdog is in use
   + fencing: Ensure the hostlist parameter is set for watchdog agents
   + fencing: Force 'stonith-ng' as the system name
   + fencing: Gracefully handle invalid metadata from agents
   + fencing: If configured, wait stonith-watchdog-timer seconds for self-fencing to complete
   + fencing: Reject actions for devices that haven't been explicitly registered yet
   + ipc: properly allocate server enforced buffer size on client
   + ipc: use server enforced buffer during ipc client send
   + lrmd, services: interpret LSB status codes properly
   + lrmd: add back support for class heartbeat agents
   + lrmd: cancel pending async connection during disconnect
   + lrmd: enable ipc proxy for docker-wrapper privileged mode
   + lrmd: fix rescheduling of systemd monitor op during start
   + lrmd: Handle systemd reporting 'done' before a resource is actually stopped
   + lrmd: Hint to child processes that using sd_notify is not required
   + lrmd: Log with the correct personality
   + lrmd: Prevent glib assert triggered by timers being removed from mainloop more than once
   + lrmd: report original timeout when systemd operation completes
   + lrmd: store failed operation exit reason in cib
   + mainloop: resolves race condition mainloop poll involving modification of ipc connections
   + make targetted reprobe for remote node work, crm_resource -C -N <remote node>
   + mcp: Allow a configurable delay when debugging shutdown issues
   + mcp: Avoid requiring 'export' for SYS-V sysconfig options
   + Membership: Detect and resolve nodes that change their ID
   + pacemakerd: resolves memory leak of xml structure in pacemakerd
   + pengine: ability to launch resources in isolated containers
   + pengine: add #kind=remote for baremetal remote-nodes
   + pengine: allow baremetal remote-nodes to recover without requiring fencing when cluster-node fails
   + pengine: allow remote-nodes to be placed in maintenance mode
   + pengine: Avoid trailing whitespaces when printing resource state
   + pengine: cl#5130 - Choose nodes capable of running all the colocated utilization resources
   + pengine: cl#5130 - Only check the capacities of the nodes that are allowed to run the resource
   + pengine: Correctly compare feature set to determine how to unpack meta attributes
   + pengine: disable migrations for resources with isolation containers
   + pengine: disable reloading of resources within isolated container wrappers
   + pengine: Do not aggregate children in a pending state into the started/stopped/etc lists
   + pengine: Do not record duplicate copies of the failed actions
   + pengine: Do not reschedule monitors that are no longer needed while resource definitions have changed
   + pengine: Fence baremetal remote when recurring monitor op fails
   + pengine: Fix colocation with unmanaged resources
   + pengine: Fix the behaviors of multi-state resources with asymmetrical ordering
   + pengine: fixes pengine crash with orphaned remote node connection resource
   + pengine: fixes segfault caused by malformed log warning
   + pengine: handle cloned isolated resources in a sane way
   + pengine: handle isolated resource scenario, cloned group of isolated resources
   + pengine: Handle ordering between stateful and migratable resources
   + pengine: imply stop in container node resources when host node is fenced
   + pengine: only fence baremetal remote when connection can fails or can not be recovered
   + pengine: only kill process group on timeout when on-fail does not equal block.
   + pengine: per-node control over resource discovery
   + pengine: prefer migration target for remote node connections
   + pengine: prevent disabling rsc discovery per node in certain situations
   + pengine: Prevent use-after-free in sort_rsc_process_order()
   + pengine: properly handle ordering during remote connection partial migration
   + pengine: properly recover remote-nodes when cluster-node proxy goes offline
   + pengine: remove unnecessary whitespace from notify environment variables
   + pengine: require-all feature for ordered clones
   + pengine: Resolve memory leaks
   + pengine: resource discovery mode for location constraints
   + pengine: restart master instances on instance attribute changes
   + pengine: Turn off legacy unpacking of resource options into the meta hashtable
   + pengine: Watchdog integration is sufficient for fencing
   + Perform systemd reloads asynchronously
   + ping: Correctly advertise multiplier default
   + Prefer to inherit the  watchdog timeout from SBD
   + properly record stop args after reload
   + provide fake meta data for ra class heartbeat
   + remote: report timestamps for remote connection resource operations
   + remote: Treat recv msg timeout as a disconnect
   + service: Prevent potential use-of-NULL in metadata lookups
   + solaris: Allow compilation when dirent.d_type is not available
   + solaris: Correctly replace the linux swab functions
   + solaris: Disable throttling since /proc doesn't exist
   + stonith-ng: Correctly observe the watchdog completion timeout
   + stonith-ng: Correctly track node state
   + stonith-ng: Reset mainloop source IDs after removing them
   + systemd: Correctly handle long running stop actions
   + systemd: Ensure failed monitor operations always return
   + systemd: Ensure we don't call dbus_message_unref() with NULL
   + systemd: fix crash caused when canceling in-flight operation
   + systemd: Kindly ask dbus NOT to kill the process if the dbus connection fails
   + systemd: Perform actions asynchronously
   + systemd: Perform monitor operations without blocking
   + systemd: Tell systemd not to take DBus down from underneath us
   + systemd: Trick systemd into not stopping our services before us during shutdown
   + tools: Improve crm_mon output with certain option combinations
   + upstart: Monitor actions always return 'ok' or 'not running'
   + upstart: Perform more parts of monitor operations without blocking
   + xml: add 'require-all' to xml schema for constraints
   + xml: cl#5231 - Unset the deleted attributes in the resulting diffs
   + xml: Clone the latest constraint schema in preparation for changes"
   + xml: Correctly create v1 patchsets when deleting attributes
   + xml: Do not change the ordering of properties when applying v1 cib diffs
   + xml: Do not dump deleted attributes
   + xml: Do not prune leaves from v1 cib diffs that are being created with digests
   + xml: Ensure ACLs are reapplied before calculating what a replace operation changed
   + xml: Fix upgrade-1.3.xsl to correctly transform ACL rules with "attribute"
   + xml: Prevent assert errors in crm_element_value() on applying a patch without version information
   + xml: Prevent potential use-of-NULL
 
 
 * Tue Jul 22 2014 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.12
 - Update source tarball to revision: 93a037d
 - Changesets: 795
 - Diff:       195 files changed, 13772 insertions(+), 6176 deletions(-)
 
 - Features added since Pacemaker-1.1.11
   + Changes to the ACL schema to support nodes and unix groups
   + cib: Check ACLs prior to making the update instead of parsing the diff afterwards
   + cib: Default ACL support to on
   + cib: Enable the more efficient xml patchset format
   + cib: Implement zero-copy status update
   + cib: Send all r/w operations via the cluster connection and have all nodes process them
   + crmd: Set "cluster-name" property to corosync's "cluster_name" by default for corosync-2
   + crm_mon: Display brief output if "-b/--brief" is supplied or 'b' is toggled
   + crm_report: Allow ssh alternatives to be used
   + crm_ticket: Support multiple modifications for a ticket in an atomic operation
   + extra: Add logrotate configuration file for /var/log/pacemaker.log
   + Fencing: Add the ability to call stonith_api_time() from stonith_admin
   + logging: daemons always get a log file, unless explicitly set to configured 'none'
   + logging: allows the user to specify a log level that is output to syslog
   + PE: Automatically re-unfence a node if the fencing device definition changes
   + pengine: cl#5174 - Allow resource sets and templates for location constraints
   + pengine: Support cib object tags
   + pengine: Support cluster-specific instance attributes based on rules
   + pengine: Support id-ref in nvpair with optional "name"
   + pengine: Support per-resource maintenance mode
   + pengine: Support site-specific instance attributes based on rules
   + tools: Allow crm_shadow to create older configuration versions
   + tools: Display pending state in crm_mon/crm_resource/crm_simulate if --pending/-j is supplied (cl#5178)
   + xml: Add the ability to have lightweight schema revisions
   + xml: Enable resource sets in location constraints for 1.2 schema
   + xml: Support resources that require unfencing
 
 - Changes since Pacemaker-1.1.11
   + acl: Authenticate pacemaker-remote requests with the node name as the client
   + acl: Read access must be explicitly granted
   + attrd: Ensure attribute dampening is always observed
   + attrd: Remove offline nodes from node cache for "peer-remove" requests
   + Bug cl#5055 - Improved migration support.
   + Bug cl#5184 - Ensure pending probes that ultimately fail are correctly updated
   + Bug cl#5196 - pengine: Check values after expanding templates
   + Bug cl#5212 - Do not promote instances when quorum is lots and no-quorum-policy=freeze
   + Bug cl#5213 - Ensure role colocation with -INFINITY is enforced
   + Bug cl#5213 - Limit the scope of the previous commit to the masters role
   + Bug cl#5219 - pengine: Allow unrelated resources with a common colocation target to remain promoted
   + Bug cl#5222 - cib: Repair rolling update capability
   + Bug cl#5222 - Enable legacy mode whenever a broadcast update is detected
   + Bug rhbz#1036631 - Stop members of cloned groups when dependencies are stopped
   + Bug rhbz#1054307 - cname pattern match should be more restrictive in init script
   + Bug rhbz#1057697 - Use native DBus library for systemd/upstart support to avoid problematic use of threads
   + Bug rhbz#1097457 - Limit the scope of the previous fix and include a helpful comment
   + Bug rhbz#1097457 - Prevent invalid transition when resource are ordered to start after the container they're started in
   + cib: allow setting permanent remote-node attributes
   + cib: Auto-detect which patchset format to use
   + cib: Determine the best value of validate-with if one is not supplied
   + cib: Do not disable cib disk writes if on-disk cib is corrupt
   + cib: Ensure 'cibadmin -R/--replace' commands get replies
   + cib: Erasing the cib is an admin action, bump the admin_epoch instead
   + cib: Fix remote cib based on TLS
   + cib: Ignore patch failures if we already have their contents
   + cib: Validate that everyone still sees the same configuration once all updates have completed
   + cibadmin: Allow priviliged clients to perform tasks as unpriviliged users
   + cibadmin: Remove dangerous commands that exposed unnecessary implementation internal details
   + cluster: Fix segfault on removing a node
   + cluster: Prevent search of unames from attempting to create node entries for unknown nodes
   + cluster: Remove unknown offline nodes with conflicting unames from node cache
   + controld: Do not consider the dlm up until the address list is present
   + controld: handling startup fencing within the controld agent, not the dlm
   + controld: Return OCF_ERR_INSTALLED instead of OCF_NOT_INSTALLED
   + crmd: Ack pending operations that were cancelled due to rsc deletion
   + crmd: Actions can only be executed if their pre-requisits completed successfully
   + crmd: avoid double free caused by nested hash table removal
   + crmd: Avoid spamming the cib by triggering a transition only once per non-status change
   + crmd: Correctly react to successful unfencing operations
   + crmd: Correctly recognise operation cancellations we initiated
   + crmd: Do not erase the status section for unfenced nodes
   + crmd: Do not overwrite existing node state when fencing completes
   + crmd: Do not start timers for already completed operations
   + crmd: Ensure crm_config options are re-read on updates
   + crmd: Fenced nodes that return prior to an election do not need to have their status section reset
   + crmd: make lrm_state hash table not case sensitive
   + crmd: make node_state erase correctly
   + crmd: Only write fence_averride if open() returns a positive file descriptor
   + crmd: Prevent manual fencing confirmations from attempting to create node entries for unknown nodes
   + crmd: Prevent SIGPIPE when notifying CMAN about fencing operations
   + crmd: Remove state of unknown nodes with conflicting unames from CIB
   + crmd: Remove unknown nodes with conflicting unames from CIB
   + crmd: Report unsuccessful unfencing operations
   + crm_diff: Allow the generation of xml patchsets without digests
   + crm_mon: Allow the file created by --as-html to be world readable
   + crm_mon: Ensure resource attributes have been unpacked before displaying connectivity data
   + crm_node: Only remove the named resource from the cib
   + crm_report: Gracefully handle rediculously large logfiles
   + crm_report: Only gather dlm data if dlm_controld is running
   + crm_resource: Gracefully handle -EACCESS when querying the cib
   + crm_verify: Perform a full set of calculations whenever the status section is present
   + fencing: Advertise support for reboot/on/off in the metadata for legacy agents
   + fencing: Automatically switch from 'list' to 'status' to 'static-list' if those actions are not advertised in the metadata
   + fencing: Cache metadata lookups to avoid repeated blocking during device registration
   + fencing: Correctly record which peer performed the fencing operation
   + fencing: default to 'off' when agent does not advertise 'reboot' in metadata
   + fencing: Do not unregister/register all stonith devices on every resource agent change
   + fencing: Execute all required fencing devices regardless of what topology level they are at
   + fencing: Fence using all required devices
   + fencing: Pass the correct options when looking up the history by node name
   + fencing: Update stonith device list only if stonith is enabled
   + get_cluster_type: failing concurrent tool invocations on heartbeat
   + ignore SIGPIPE when gnutls is in use
   + iso8601: Different logic is needed when logging and calculating durations
   + iso8601: Fix memory leak in duration calculation
   + Logging: Bootstrap daemon logging before processing arguments but configure it afterwards
   + lrmd: Cancel recurring operations before stop action is executed
   + lrmd: Expose logging variables expected by OCF agents
   + lrmd: Handle systemd reporting 'done' before a resource is actually stopped/started
   + lrmd: Merge duplicate recurring monitor operations
   + lrmd: Prevent OCF agents from logging to random files due to "value" of setenv() being NULL
   + lrmd: Provide stderr output from agents if available, otherwise fall back to stdout
   + mainloop: Better handle the killing of processes in the act of exiting
   + mainloop: Canceling in-flight operations should not fail if child process has already exited.
   + mainloop: Fixes use after free in process monitor code
   + mcp: Tell systemd not to respawn us if we exit with rc=100
   + membership: Avoid duplicate peer entries in the peer cache
   + pengine: Allow container nodes to migrate with connection resource
   + pengine: avoid assert by searching for stop action on correct node during LogActions
   + pengine: Block restart of resources if any dependent resource in a group is unmanaged
   + pengine: cl#5186 - Avoid running rsc on two nodes when node is fenced during migration
   + pengine: cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node
   + pengine: cl#5200 - Before migrating utilization-using resources to a node, take off the load that will no longer run there if it's not introducing transition loop
   + pengine: Correctly handle origin offsets in the future
   + pengine: Correctly observe requires=nothing
   + pengine: Default sequential to TRUE for resource sets for consistency with colocation sets
   + pengine: Delay unfencing until after we know the state of all resources that require unfencing
   + pengine: Do not initiate fencing for unclean nodes when fencing is disabled
   + pengine: Ensure instance numbers are preserved for cloned templates
   + pengine: Ensure unfencing only happens once, even if the transition is interrupted
   + pengine: Fencing devices default to only requiring quorum in order to start
   + pengine: fixes invalid transition caused by clones with more than 10 instances
   + pengine: Force record pending for migrate_to actions
   + pengine: handles edge case where container order constraints are not honored during migration
   + pengine: Ignore failure-timeout only if the failed operation has on-fail="block"
   + pengine: Mark unrunnable stop actions as "blocked" and show the correct current locations
   + pengine: Memory leaks
   + pengine: properly handle fencing of container remote-nodes when the container is orphaned
   + pengine: properly place resource within a container when container is a remote-node.
   + pengine: Unfencing is based on device probes, there is no need to unfence when normal resources are found active
   + pengine: Use "#cluster-name" in rules for setting cluster-specific instance attributes
   + pengine: Use "#site-name" in rules for setting site-specific instance attributes
   + remote: Allow baremetal remote-node connection resources to migrate
   + remote: clear remote-node status correctly
   + remote: Enable migration support for baremetal connection resources by default
   + remote: Handle request/response ipc proxy correctly
   + services: Correctly reset the nice value for lrmd's children
   + services: Do not allow duplicate recurring op entries
   + services: Do not block synced service executions
   + services: Fixes segfault associated with cancelling in-flight recurring operations.
   + services: Remove cancelled recurring ops from internal lists as early as possible
   + services: Remove file descriptors from mainloop as soon as we have drained them
   + services: Reset the scheduling policy and priority for lrmd's children without replying on SCHED_RESET_ON_FORK
   + services_action_cancel: Interpret return code from mainloop_child_kill() correctly
   + stonith_admin: Ensure pointers passed to sscanf() are properly initialized
   + stonith_api_time_helper now returns when the most recent fencing operation completed
   + systemd: Prevent use-of-NULL when determining if an agent exists
   + systemd: Try to handle dbus actions that complete prior to configuring a callback
   + Tools: Non-daemons shouldn't abort just because xml parsing failed
   + Upstart: Allow comilation with glib versions older than 2.28
   + Upstart: Do not attempt upstart jobs if we cannot connect to dbus
   + When data was old, it fixed so that the newest cib might not be acquired.
   + xml: Check all available schemas when doing upgrades
   + xml: Correctly determine the lowest allowed schema version
   + xml: Correctly enforce ACLs after a replace operation
   + xml: Correctly infer attribute changes after a replace operation
   + xml: Create the correct diff when only part of a document is changed
   + xml: Detect attribute ordering changes
   + xml: Detect content that is added and removed in the same update
   + xml: Do not prune meaningful leaves from v1 patchsets
   + xml: Empty patchsets are considered to have applied cleanly
   + xml: Ensure patches always have version details set
   + xml: Find the minimal set of changes when part of a document is replaced
   + xml: If validate-with is missing, we find the most recent schema that accepts it and go from there
   + xml: Introduce a 'move' primitive for v2 patch sets
   + xml: Preserve the attribute order in the patch for subsequent digest validation
   + xml: Resolve memory leak when logging xml blobs
   + xml: Update xml validation to allow '<node type=remote />'
 
 
 * Thu Feb 13 2014 David Vossel <davidvossel@gmail.com> Pacemaker-1.1.11
 - Update source tarball to revision: 33f9d09
 - Changesets: 462
 - Diff:       147 files changed, 6810 insertions(+), 4057 deletions(-)
 
 - Features added since Pacemaker-1.1.10
 
   + attrd: A truly atomic version of attrd for use where CPG is used for cluster communication
   + cib: Allow values to be added/updated and removed in a single update
   + cib: Support XML comments in diffs
   + Core: Allow blackbox logging to be disabled with SIGUSR2
   + crmd: Do not block on proxied calls from pacemaker_remoted
   + crmd: Enable cluster-wide throttling when the cib heavily exceeds its target load
   + crmd: Make the per-node action limit directly configurable in the CIB
   + crmd: Slow down recovery on nodes with IO load
   + crmd: Track CPU usage on cluster nodes and slow down recovery on nodes with high CPU/IO load
   + crm_mon: add --hide-headers option to hide all headers
   + crm_node: Display partition output in sorted order
   + crm_report: Collect logs directly from journald if available
   + Fencing: On timeout, clean up the agent's entire process group
   + Fencing: Support agents that need the host to be unfenced at startup
   + ipc: Raise the default buffer size to 128k
   + PE: Add a special attribute for distinguishing between real nodes and containers in constraint rules
   + PE: Allow location constraints to take a regex pattern to match against resource IDs
   + pengine: Distinguish between the agent being missing and something the agent needs being missing
   + remote: Properly version the remote connection protocol
 
 - Changes since Pacemaker-1.1.10
 
   + Bug rhbz#1011618 - Consistently use 'Slave' as the role for unpromoted master/slave resources
   + Bug rhbz#1057697 - Use native DBus library for systemd and upstart support to avoid problematic use of threads
   + attrd: Any variable called 'cluster' makes the daemon crash before reaching main()
   + attrd: Avoid infinite write loop for unknown peers
   + attrd: Drop all attributes for peers that left the cluster
   + attrd: Give remote-nodes ability to set attributes with attrd
   + attrd: Prevent inflation of attribute dampen intervals
   + attrd: Support SI units for attribute dampening
   + Bug cl#5171 - pengine: Don't prevent clones from running due to dependent resources
   + Bug cl#5179 - Corosync: Attempt to retrieve a peer's node name if it is not already known
   + Bug cl#5181 - corosync: Ensure node IDs are written to the CIB as unsigned integers
   + Bug rhbz#902407 - crm_resource: Handle --ban for master/slave resources as advertised
   + cib: Correctly check for archived configuration files
   + cib: Correctly log short-form xml diffs
   + cib: Fix remote cib based on TLS
   + cibadmin: Report errors during sign-off
   + cli: Do not enabled blackbox for cli tools
   + cluster: Fix segfault on removing a node
   + cman: Do not start pacemaker if cman startup fails
   + cman: Start clvmd and friends from the init script if enabled
   + Command-line tools should stop after an assertion failure
   + controld: Use the correct variant of dlm_controld for corosync-2 clusters
   + cpg: Correctly set the group name length
   + cpg: Ensure the CPG group is always null-terminated
   + cpg: Only process one message at a time to allow other priority jobs to be performed
   + crmd: Correctly observe the configured batch-limit
   + crmd: Correctly update expected state when the previous DC shuts down
   + crmd: Correcty update the history cache when recurring ops change their return code
   + crmd: Don't add node_state to cib, if we have not seen or fenced this node yet
   + crmd: don't segfault on shutdown when using heartbeat
   + crmd: Prevent recurring monitors being cancelled due to notify operations
   + crmd: Reliably detect and act on reprobe operations from the policy engine
   + crmd: When a peer expectedly shuts down, record the new join and expected states into the cib
   + crmd: When the DC gracefully shuts down, record the new expected state into the cib
   + crm_attribute: Do not swallow hostname lookup failures
   + crm_mon: Do not display duplicates of failed actions
   + crm_mon: Reduce flickering in interactive mode
   + crm_resource: Observe --master modifier for --move
   + crm_resource: Provide a meaningful error if --master is used for primitives and groups
   + fencing: Allow fencing for node after topology entries are deleted
   + fencing: Apply correct score to the resource of group
   + fencing: Ignore changes to non-fencing resources
   + fencing: Observe pcmk_host_list during automatic unfencing
   + fencing: Put all fencing agent processes into their own process group
   + fencing: Wait until all possible replies are recieved before continuing with unverified devices
   + ipc: Compress msgs based on client's actual max send size
   + ipc: Have the ipc server enforce a minimum buffer size all clients must use.
   + iso8601: Prevent dates from jumping backwards a day in some timezones
   + lrmd: Correctly calculate metadata for the 'service' class
   + lrmd: Correctly cancel monitor actions for lsb/systemd/service resources on cleaning up
   + mcp: Remove LSB hints that instruct chkconfig to start pacemaker at boot time
   + mcp: Some distros complain when LSB scripts do not include Default-Start/Stop directives
   + pengine: Allow fencing of baremetal remote nodes
   + pengine: cl#5186 - Avoid running rsc on two nodes when node is fenced during migration
   + pengine: Correctly account for the location preferences of things colocated with a group
   + pengine: Correctly handle demotion of grouped masters that are partially demoted
   + pengine: Disable container node probes due to constraint conflicts
   + pengine: Do not allow colocation with blocked clone instances
   + pengine: Do not re-allocate clone instances that are blocked in the Stopped state
   + pengine: Do not restart resources that depend on unmanaged resources
   + pengine: Force record pending for migrate_to actions
   + pengine: Location constraints with role=Started should prevent masters from running at all
   + pengine: Order demote/promote of resources on remote nodes to happen only once the connection is up
   + pengine: Properly handle orphaned multistate resources living on remote-nodes
   + pengine: Properly shutdown orphaned remote connection resources
   + pengine: Recover unexpectedly running container nodes.
   + remote: Add support for ipv6 into pacemaker_remote daemon
   + remote: Handle endian changes between client and server and improve forward compatibility
   + services: Fixes segfault associated with cancelling in-flight recurring operations.
   + services: Reset the scheduling policy and priority for lrmd's children without replying on SCHED_RESET_ON_FORK
 
 * Fri Jul 26 2013 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.10
 - Update source tarball to revision: ab2e209
 - Changesets: 602
 - Diff:       143 files changed, 8162 insertions(+), 5159 deletions(-)
 
 - Features added since Pacemaker-1.1.9
   + Core: Convert all exit codes to positive errno values
   + crm_error: Add the ability to list and print error symbols
   + crm_resource: Allow individual resources to be reprobed
   + crm_resource: Allow options to be set recursively
   + crm_resource: Implement --ban for moving resources away from nodes and --clear (replaces --unmove)
   + crm_resource: Support OCF tracing when using --force-(check|start|stop)
   + PE: Allow active nodes in our current membership to be fenced without quorum
   + PE: Suppress meaningless IDs when displaying anonymous clone status
   + Turn off auto-respawning of systemd services when the cluster starts them
   + Bug cl#5128 - pengine: Support maintenance mode for a single node
 
 - Changes since Pacemaker-1.1.9
   + crmd: cib: stonithd: Memory leaks resolved and improved use of glib reference counting
   + attrd: Fixes deleted attributes during dc election
   + Bug cf#5153 - Correctly display clone failcounts in crm_mon
   + Bug cl#5133 - pengine: Correctly observe on-fail=block for failed demote operation
   + Bug cl#5148 - legacy: Correctly remove a node that used to have a different nodeid
   + Bug cl#5151 - Ensure node names are consistently compared without case
   + Bug cl#5152 - crmd: Correctly clean up fenced nodes during membership changes
   + Bug cl#5154 - Do not expire failures when on-fail=block is present
   + Bug cl#5155 - pengine: Block the stop of resources if any depending resource is unmanaged
   + Bug cl#5157 - Allow migration in the absence of some colocation constraints
   + Bug cl#5161 - crmd: Prevent memory leak in operation cache
   + Bug cl#5164 - crmd: Fixes crash when using pacemaker-remote
   + Bug cl#5164 - pengine: Fixes segfault when calculating transition with remote-nodes.
   + Bug cl#5167 - crm_mon: Only print "stopped" node list for incomplete clone sets
   + Bug cl#5168 - Prevent clones from being bounced around the cluster due to location constraints
   + Bug cl#5170 - Correctly support on-fail=block for clones
   + cib: Correctly read back archived configurations if the primary is corrupted
   + cib: The result is not valid when diffs fail to apply cleanly for CLI tools
   + cib: Restore the ability to embed comments in the configuration
   + cluster: Detect and warn about node names with capitals
   + cman: Do not pretend we know the state of nodes we've never seen
   + cman: Do not unconditionally start cman if it is already running
   + cman: Support non-blocking CPG calls
   + Core: Ensure the blackbox is saved on abnormal program termination
   + corosync: Detect the loss of members for which we only know the nodeid
   + corosync: Do not pretend we know the state of nodes we've never seen
   + corosync: Ensure removed peers are erased from all caches
   + corosync: Nodes that can persist in sending CPG messages must be alive afterall
   + crmd: Do not get stuck in S_POLICY_ENGINE if a node we couldn't fence returns
   + crmd: Do not update fail-count and last-failure for old failures
   + crmd: Ensure all membership operations can complete while trying to cancel a transition
   + crmd: Ensure operations for cleaned up resources don't block recovery
   + crmd: Ensure we return to a stable state if there have been too many fencing failures
   + crmd: Initiate node shutdown if another node claims to have successfully fenced us
   + crmd: Prevent messages for remote crmd clients from being relayed to wrong daemons
   + crmd: Properly handle recurring monitor operations for remote-node agent
   + crmd: Store last-run and last-rc-change for all operations
   + crm_mon: Ensure stale pid files are updated when a new process is started
   + crm_report: Correctly collect logs when 'uname -n' reports fully qualified names
   + fencing: Fail the operation once all peers have been exhausted
   + fencing: Restore the ability to manually confirm that fencing completed
   + ipc: Allow unpriviliged clients to clean up after server failures
   + ipc: Restore the ability for members of the haclient group to connect to the cluster
   + legacy: Support "crm_node --remove" with a node name for corosync plugin (bnc#805278)
   + lrmd: Default to the upstream location for resource agent scratch directory
   + lrmd: Pass errors from lsb metadata generation back to the caller
   + pengine: Correctly handle resources that recover before we operate on them
   + pengine: Delete the old resource state on every node whenever the resource type is changed
   + pengine: Detect constraints with inappropriate actions (ie. promote for a clone)
   + pengine: Ensure per-node resource parameters are used during probes
   + pengine: If fencing is unavailable or disabled, block further recovery for resources that fail to stop
   + pengine: Implement the rest of get_timet_now() and rename to get_effective_time
   + pengine: Re-initiate _active_ recurring monitors that previously failed but have timed out
   + remote: Workaround for inconsistent tls handshake behavior between gnutls versions
   + systemd: Ensure we get shut down correctly by systemd
   + systemd: Reload systemd after adding/removing override files for cluster services
   + xml: Check for and replace non-printing characters with their octal equivalent while exporting xml text
   + xml: Prevent lockups by setting a more reliable buffer allocation strategy
 
 * Fri Mar 08 2013 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.9
 - Update source tarball to revision: 7e42d77
 - Statistics:
   Changesets: 731
   Diff:       1301 files changed, 92909 insertions(+), 57455 deletions(-)
 
 - Features added in Pacemaker-1.1.9
   + corosync: Allow cman and corosync 2.0 nodes to use a name other than uname()
   + corosync: Use queues to avoid blocking when sending CPG messages
   + ipc: Compress messages that exceed the configured IPC message limit
   + ipc: Use queues to prevent slow clients from blocking the server
   + ipc: Use shared memory by default
   + lrmd: Support nagios remote monitoring
   + lrmd: Pacemaker Remote Daemon for extending pacemaker functionality outside corosync cluster.
   + pengine: Check for master/slave resources that are not OCF agents
   + pengine: Support a 'requires' resource meta-attribute for controlling whether it needs quorum, fencing or nothing
   + pengine: Support for resource container
   + pengine: Support resources that require unfencing before start
 
 - Changes since Pacemaker-1.1.8
   + attrd: Correctly handle deletion of non-existant attributes
   + Bug cl#5135 - Improved detection of the active cluster type
   + Bug rhbz#913093 - Use crm_node instead of uname
   + cib: Avoid use-after-free by correctly support cib_no_children for non-xpath queries
   + cib: Correctly process XML diff's involving element removal
   + cib: Performance improvements for non-DC nodes
   + cib: Prevent error message by correctly handling peer replies
   + cib: Prevent ordering changes when applying xml diffs
   + cib: Remove text nodes from cib replace operations
   + cluster: Detect node name collisions in corosync
   + cluster: Preserve corosync membership state when matching node name/id entries
   + cman: Force fenced to terminate on shutdown
   + cman: Ignore qdisk 'nodes'
   + core: Drop per-user core directories
   + corosync: Avoid errors when closing failed connections
   + corosync: Ensure peer state is preserved when matching names to nodeids
   + corosync: Clean up CMAP connections after querying node name
   + corosync: Correctly detect corosync 2.0 clusters even if we don't have permission to access it
   + crmd: Bug cl#5144 - Do not updated the expected status of failed nodes
   + crmd: Correctly determin if cluster disconnection was abnormal
   + crmd: Correctly relay messages for remote clients (bnc#805626, bnc#804704)
   + crmd: Correctly stall the FSA when waiting for additional inputs
   + crmd: Detect and recover when we are evicted from CPG
   + crmd: Differentiate between a node that is up and coming up in peer_update_callback()
   + crmd: Have cib operation timeouts scale with node count
   + crmd: Improved continue/wait logic in do_dc_join_finalize()
   + crmd: Prevent election storms caused by getrusage() values being too close
   + crmd: Prevent timeouts when performing pacemaker level membership negotiation
   + crmd: Prevent use-after-free of fsa_message_queue during exit
   + crmd: Store all current actions when stalling the FSA
   + crm_mon: Do not try to render a blank cib and indicate the previous output is now stale
   + crm_mon: Fixes crm_mon crash when using snmp traps.
   + crm_mon: Look for the correct error codes when applying configuration updates
   + crm_report: Ensure policy engine logs are found
   + crm_report: Fix node list detection
   + crm_resource: Have crm_resource generate a valid transition key when sending resource commands to the crmd
   + date/time: Bug cl#5118 - Correctly convert seconds-since-epoch to the current time
   + fencing: Attempt to provide more information that just 'generic error' for failed actions
   + fencing: Correctly record completed but previously unknown fencing operations
   + fencing: Correctly terminate when all device options have been exhausted
   + fencing: cov#739453 - String not null terminated
   + fencing: Do not merge new fencing requests with stale ones from dead nodes
   + fencing: Do not start fencing until entire device topology is found or query results timeout.
   + fencing: Do not wait for the query timeout if all replies have arrived
   + fencing: Fix passing of parameters from CMAN containing '='
   + fencing: Fix non-comparison when sorting devices by priority
   + fencing: On failure, only try a topology device once from the remote level.
   + fencing: Only try peers for non-topology based operations once
   + fencing: Retry stonith device for duration of action's timeout period.
   + heartbeat: Remove incorrect assert during cluster connect
   + ipc: Bug cl#5110 - Prevent 100% CPU usage when looking for synchronous replies
   + ipc: Use 50k as the default compression threshold
   + legacy: Prevent assertion failure on routing ais messages (bnc#805626)
   + legacy: Re-enable logging from the pacemaker plugin
   + legacy: Relax the 'active' check for plugin based clusters to avoid false negatives
   + legacy: Skip peer process check if the process list is empty in crm_is_corosync_peer_active()
   + mcp: Only define HA_DEBUGLOG to avoid agent calls to ocf_log printing everything twice
   + mcp: Re-attach to existing pacemaker components when mcp fails
   + pengine: Any location constraint for the slave role applies to all roles
   + pengine: Avoid leaking memory when cleaning up failcounts and using containers
   + pengine: Bug cl#5101 - Ensure stop order is preserved for partially active groups
   + pengine: Bug cl#5140 - Allow set members to be stopped when the subseqent set has require-all=false
   + pengine: Bug cl#5143 - Prevent shuffling of anonymous master/slave instances
   + pengine: Bug rhbz#880249 - Ensure orphan masters are demoted before being stopped
   + pengine: Bug rhbz#880249 - Teach the PE how to recover masters into primitives
   + pengine: cl#5025 - Automatically clear failcount for start/monitor failures after resource parameters change
   + pengine: cl#5099 - Probe operation uses the timeout value from the minimum interval monitor by default (#bnc776386)
   + pengine: cl#5111 - When clone/master child rsc has on-fail=stop, insure all children stop on failure.
   + pengine: cl#5142 - Do not delete orphaned children of an anonymous clone
   + pengine: Correctly unpack active anonymous clones
   + pengine: Ensure previous migrations are closed out before attempting another one
   + pengine: Introducing the whitebox container resources feature
   + pengine: Prevent double-free for cloned primitive from template
   + pengine: Process rsc_ticket dependencies earlier for correctly allocating resources (bnc#802307)
   + pengine: Remove special cases for fencing resources
   + pengine: rhbz#902459 - Remove rsc node status for orphan resources
   + systemd: Gracefully handle unexpected DBus return types
   + Replace the use of the insecure mktemp(3) with mkstemp(3)
 
 * Thu Sep 20 2012 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.8
 
 - Update source tarball to revision: 1a5341f
 - Statistics:
   Changesets: 1019
   Diff:       2107 files changed, 117258 insertions(+), 73606 deletions(-)
 
 - All APIs have been cleaned up and reduced to essentials
 - Pacemaker now includes a replacement lrmd that supports systemd and upstart agents
 - Config and state files (cib.xml, PE inputs and core files) have moved to new locations
 - The crm shell has become a separate project and no longer included with Pacemaker
 - All daemons/tools now have a unified set of error codes based on errno.h (see crm_error)
 
 - Changes since Pacemaker-1.1.7
   + Core: Bug cl#5032 - Rewrite the iso8601 date handling code
   + Core: Correctly extract the version details from a diff
   + Core: Log blackbox contents, if enabled, when an error occurs
   + Core: Only LOG_NOTICE and higher are sent to syslog
   + Core: Replace use of IPC from clplumbing with IPC from libqb
   + Core: SIGUSR1 now enables blackbox logging, SIGTRAP to write out
   + Core: Support a blackbox for additional logging detail after crashes/errors
   + Promote support for advanced fencing logic to the stable schema
   + Promote support for node starting scores to the stable schema
   + Promote support for service and systemd to the stable schema
 
   + attrd: Differentiate between updating all our attributes and everybody updating all theirs too
   + attrd: Have single-shot clients wait for an ack before disconnecting
   + cib: cl#5026 - Synced cib updates should not return until the cpg broadcast is complete.
   + corosync: Detect when the first corosync has not yet formed and handle it gracefully
   + corosync: Obtain a full list of configured nodes, including their names, when we connect to the quorum API
   + corosync: Obtain a node name from DNS if one was not already known
   + corosync: Populate the cib nodelist from corosync if available
   + corosync: Use the CFG API and DNS to determine node names if not configured in corosync.conf
   + crmd: Block after 10 failed fencing attempts for a node
   + crmd: cl#5051 - Fixes file leak in PE ipc connection initialization.
   + crmd: cl#5053 - Fixes fail-count not being updated properly.
   + crmd: cl#5057 - Restart sub-systems correctly (bnc#755671)
   + crmd: cl#5068 - Fixes crm_node -R option so it works with corosync 2.0
   + crmd: Correctly re-establish failed attrd connections
   + crmd: Detect when the quorum API isn't configured for corosync 2.0
   + crmd: Do not overwrite any configured node type (eg. quorum node)
   + crmd: Enable use of new lrmd daemon and client library in crmd.
   + crmd: Overhaul the way node state is recorded and updated in the CIB
   + fencing: Bug rhbz#853537 - Prevent use-of-NULL when the cib libraries are not available
   + fencing: cl#5073 - Add 'off' as an valid value for stonith-action option.
   + fencing: cl#5092 - Always timeout stonith operations if timeout period expires.
   + fencing: cl#5093 - Stonith per device timeout option
   + fencing: Clean up if we detect a failed connection
   + fencing: Delegate complex self fencing requests - we wont be around to see it to completion
   + fencing: Ensure all peers are notified of complex fencing op completion
   + fencing: Fix passing of fence_legacy parameters containing '='
   + fencing: Gracefully handle metadata requests for unknown agents
   + fencing: Return cached dynamic target list for busy devices.
   + fencing: rhbz#801355 - Abort transition on DC when external fencing operation is detected
   + fencing: rhbz#801355 - Merge fence requests for identical operations already in progress.
   + fencing: rhbz#801355 - Report fencing operations external of pacemaker to cib
   + fencing: Specify the action to perform using action= instead of the older option=
   + fencing: Stop building fake metadata for broken agents
   + fencing: Tolerate agents that report empty metadata in the admin tool
   + mcp: Correctly retry the connection to corosync on failure
   + mcp: Do not shut down IPC until the last client exits
   + mcp: Prevent use-after-free when running against corosync 1.x
   + pengine: Bug cl#5059 - Use the correct action's status when calculating required actions for interleaved clones
   + pengine: Bypass online/offline checking resource detection for ping/quorum nodes
   + pengine: cl#5044 - migrate_to no longer requires load_stopped for avoiding possible transition loop
   + pengine: cl#5069 - Honor 'on-fail=ignore' even when operation is disabled.
   + pengine: cl#5070 - Allow influence of promotion score when multistate rsc is left hand of colocation
   + pengine: cl#5072 - Fixes monitor op stopping after rsc promotion.
   + pengine: cl#5072 - Fixes pengine regression test failures
   + pengine: Correctly set the status for nodes not intended to run Pacemaker
   + pengine: Do not append instance numbers to anonymous clones
   + pengine: Fix failcount expiration
   + pengine: Fix memory leaks found by valgrind
   + pengine: Fix use-after-free and use-of-NULL errors detected by coverity
   + pengine: Fixes use of colocation scores other than +/- INFINITY
   + pengine: Improve detection of rejoining nodes
   + pengine: Prevent use-of-NULL when tracing is enabled
   + pengine: Stonith resources are allowed to start even if their probes haven't completed on partially active nodes
   + services: New class called 'service' which expands to the correct (LSB/systemd/upstart) standard
   + services: Support Asynchronous systemd/upstart actions
   + Tools: crm_shadow - Bug cl#5062 - Correctly set argv[0] when forking a shell process
   + Tools: crm_report: Always include system logs (if we can find them)
 
 * Wed Mar 28 2012 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.7
 - Update source tarball to revision: bc7ff2c
 - Statistics:
   Changesets: 513
   Diff:       1171 files changed, 90472 insertions(+), 19368 deletions(-)
 
 - Changes since Pacemaker-1.1.6.1
   + ais: Prepare for corosync versions using IPC from libqb
   + cib: Correctly shutdown in the presence of peers without relying on timers
   + cib: Don't halt disk writes if the previous digest is missing
   + cib: Determine when there are no peers to respond to our shutdown request and exit
   + cib: Ensure no additional messages are processed after we begin terminating
   + Cluster: Hook up the callbacks to the corosync quorum notifications
   + Core: basename() may modify its input, do not pass in a constant
   + Core: Bug cl#5016 - Prevent failures in recurring ops from being lost
   + Core: Bug rhbz#800054 - Correctly retrieve heartbeat uuids
   + Core: Correctly determine when an XML file should be decompressed
   + Core: Correctly track the length of a string without reading from uninitialzied memory (valgrind)
   + Core: Ensure signals are handled eventually in the absense of timer sources or IPC messages
   + Core: Prevent use-of-NULL in crm_update_peer()
   + Core: Strip text nodes from on disk xml files
   + Core: Support libqb for logging
   + corosync: Consistently set the correct uuid with get_node_uuid()
   + Corosync: Correctly disconnect from corosync variants
   + Corosync: Correctly extract the node id from membership udpates
   + corosync: Correctly infer lost members from the quorum API
   + Corosync: Default to using the nodeid as the node's uuid (instead of uname)
   + corosync: Ensure we catch nodes that leave the membership, even if the ringid doesn't change
   + corosync: Hook up CPG membership
   + corosync: Relax a development assert and gracefully handle the error condition
   + corosync: Remove deprecated member of the CFG API
   + corosync: Treat CS_ERR_QUEUE_FULL the same as CS_ERR_TRY_AGAIN
   + corosync: Unset the process list when nodes dissappear on us
   + crmd: Also purge fencing results when we enter S_NOT_DC
   + crmd: Bug cl#5015 - Remove the failed operation as well as the resulting fail-count and last-failure attributes
   + crmd: Correctly determine when a node can suicide with fencing
   + crmd: Election - perform the age comparison only once
   + crmd: Fast-track shutdown if we couldn't request it via attrd
   + crmd: Leave it up to the PE to decide which ops can/cannot be reload
   + crmd: Prevent use-after-free when calling delete_resource due to CRM_OP_REPROBE
   + crmd: Supply format arguments in the correct order
   + fencing: Add missing format parameter
   + fencing: Add the fencing topology section to the 1.1 configuration schema
   + fencing: fence_legacy - Drop spurilous host argument from status query
   + fencing: fence_legacy - Ensure port is available as an environment variable when calling monitor
   + fencing: fence_pcmk - don't block if nothing is specified on stdin
   + fencing: Fix log format error
   + fencing: Fix segfault caused by passing garbage to dlsym()
   + fencing: Fix use-of-NULL in process_remote_stonith_query()
   + fencing: Fix use-of-NULL when listing installed devices
   + fencing: Implement support for advanced fencing topologies: eg. kdump || (network && disk) || power
   + fencing: More gracefully handle failed 'list' operations for devices that only support a single connection
   + fencing: Prevent duplicate free when listing devices
   + fencing: Prevent uninitialized pointers being passed to free
   + fencing: Prevent use-after-free, we may need the query result for subsequent operations
   + fencing: Provide enough data to construct an entry in the node's fencing history
   + fencing: Standardize on /one/ method for clients to request members be fenced
   + fencing: Supress errors when listing all registered devices
   + mcp: corosync_cfg_state_track was removed from the corosync API, luckily we didnt use it for anything
   + mcp: Do not specify a WorkingDirectory in the systemd unit file - startup fails if its not available
   + mcp: Set the HA_quorum_type env variable consistently with our corosync plugin
   + mcp: Shut down if one of our child processes can/should not be respawned
   + pengine: Bug cl#5000 - Ensure ordering is preserved when depending on partial sets
   + pengine: Bug cl#5028 - Unmanaged services should block shutdown unless in maintenance mode
   + pengine: Bug cl#5038 - Prevent restart of anonymous clones when clone-max decreases
   + pengine: Bug cl#5007 - Fixes use of colocation constraints with multi-state resources
   + pengine: Bug cl#5014 - Prevent asymmetrical order constraints from causing resource stops
   + pengine: Bug cl#5000 - Implements ability to create rsc_order constraint sets such that A can start after B or C has started.
   + pengine: Correctly migrate a resource that has just migrated
   + pengine: Correct return from error path
   + pengine: Detect reloads of previously migrated resources
   + pengine: Ensure post-migration stop actions occur before node shutdown
   + pengine: Log as loudly as possible when we cannot shut down a cluster node
   + pengine: Reload of a resource no longer causes a restart of dependent resources
   + pengine: Support limiting the number of concurrent live migrations
   + pengine: Support referencing templates in constraints
   + pengine: Support of referencing resource templates in resource sets
   + pengine: Support to make tickets standby for relinquishing tickets gracefully
   + stonith: A "start" operation of a stonith resource does a "monitor" on the device beyond registering it
   + stonith: Bug rhbz#745526 - Ensure stonith_admin actually gets called by fence_pcmk
   + Stonith: Ensure all nodes receive and deliver notifications of the manual override
   + stonith: Fix the stonith timeout issue (cl#5009, bnc#727498)
   + Stonith: Implement a manual override for when nodes are known to be safely off
   + Tools: Bug cl#5003 - Prevent use-after-free in crm_simlate
   + Tools: crm_mon - Support to display tickets (based on Yuusuke Iida's work)
   + Tools: crm_simulate - Support to grant/revoke/standby/activate tickets from the new ticket state section
   + Tools: Implement crm_node functionality for native corosync
   + Fix a number of potential problems reported by coverity
 
 * Wed Aug 31 2011 Andrew Beekhof <andrew@beekhof.net> 1.1.6
 - Update source tarball to revision: 676e5f25aa46 tip
 - Statistics:
   Changesets: 376
   Diff:       1761 files changed, 36259 insertions(+), 140578 deletions(-)
 
 - Changes since Pacemaker-1.1.5
   + ais: check for retryable errors when dispatching AIS messages
   + ais: Correctly disconnect from Corosync and Cman based clusters
   + ais: Followup to previous patch - Ensure we drain the corosync queue of messages when Glib tells us there is input
   + ais: Handle IPC error before checking for NULL data (bnc#702907)
   + cib: Check the validation version before adding the originator details of a CIB change
   + cib: Remove disconnected remote connections from mainloop
   + cman: Correctly override existing fenced operations
   + cman: Dequeue all the cman emitted events and not only the first one leaving the others in the event's queue.
   + cman: Don't call fenced_join and fenced_leave when notifying cman of a fencing event.
   + cman: We need to run the crmd as root for CMAN so that we can ACK fencing operations
   + Core: Cancelled and pending operations do not count as failed
   + Core: Ensure there is sufficient space for EOS when building short-form option strings
   + Core: Fix variable expansion in pkg-config files
   + Core: Partial revert of accidental commit in previous patch
   + Core: Use dlopen to load heartbeat libraries on-demand
   + crmd: Bug lf#2509 - Watch for config option changes from the CIB even if we're not the DC
   + crmd: Bug lf#2528 - Introduce a slight delay when creating a transition to allow attrd time to perform its updates
   + crmd: Bug lf#2559 - Fail actions that were scheduled for a failed/fenced node
   + crmd: Bug lf#2584 - Allow nodes to fence themselves if they're the last one standing
   + crmd: Bug lf#2632 - Correctly handle nodes that return faster than stonith
   + crmd: Cancel timers for actions that were pending on dead nodes
   + crmd: Catch fence operations that claim to succeed but did not really
   + crmd: Do not wait for actions that were pending on dead nodes
   + crmd: Ensure we do not attempt to perform action on failed nodes
   + crmd: Prevent use-of-NULL by g_hash_table_iter_next()
   + crmd: Recurring actions shouldn't cause the last non-recurring action to be forgotten
   + crmd: Store only the last and last failed operation in the CIB
   + mcp: dirname() modifies the input path - pass in a copy of the logfile path
   + mcp: Enable stack detection logic instead of forcing 'corosync'
   + mcp: Fix spelling mistake in systemd service script that prevents shutdown
   + mcp: Shut down if corosync becomes unavailable
   + mcp: systemd control file is now functional
   + pengine: Before migrating an utilization-using resource to a node, take off the load which will no longer run there (lf#2599, bnc#695440)
   + pengine: Before migrating an utilization-using resource to a node, take off the load which will no longer run there (regression tests) (lf#2599, bnc#695440)
   + pengine: Bug lf#2574 - Prevent shuffling by choosing the correct clone instance to stop
   + pengine: Bug lf#2575 - Use uname for migration variables, id is a UUID on heartbeat
   + pengine: Bug lf#2581 - Avoid group restart when clone (re)starts on an unrelated node
   + pengine: Bug lf#2613, lf#2619 - Group migration after failures and non-default utilization policies
   + pengine: Bug suse#707150 - Prevent services being active if dependencies on clones are not satisfied
   + pengine: Correctly recognise which recurring operations are currently active
   + pengine: Demote from Master does not clear previous errors
   + pengine: Ensure restarts due to definition changes cause the start action to be re-issued not probes
   + pengine: Ensure role is preserved for unmanaged resources
   + pengine: Ensure unmanaged resources have the correct role set so the correct monitor operation is chosen
   + pengine: Fix memory leak for re-allocated resources reported by valgrind
   + pengine: Implement cluster ticket and deadman
   + pengine: Implement resource template
   + pengine: Correctly determine the state of multi-state resources with a partial operation history
   + pengine: Only allocate master/slave resources once
   + pengine: Partial revert of 'Minor code cleanup CS: cf6bca32376c On: 2011-08-15'
   + pengine: Resolve memory leak reported by valgrind
   + pengine: Restore the ability to save inputs to disk
   + Shell: implement -w,--wait option to wait for the transition to finish
   + Shell: repair template list command
   + Shell: set of commands to examine logs, reports, etc
   + Stonith: Consolidate pcmk_host_map into run_stonith_agent so that it is applied consistently
   + Stonith: Deprecate pcmk_arg_map for the saner pcmk_host_argument
   + Stonith: Fix use-of-NULL by g_hash_table_lookup
   + Stonith: Improved pcmk_host_map parsing
   + Stonith: Prevent use-of-NULL by g_hash_table_lookup
   + Stonith: Prevent use-of-NULL when no Linux-HA stonith agents are present
   + stonith: Add missing entries to stonith_error2string()
   + Stonith: Correctly finish sending agent options if the initial write is interrupted
   + stonith: Correctly handle synchronous calls
   + stonith: Coverity - Correctly construct result list for the query API call
   + stonith: Coverity - Remove badly constructed memory allocation from the query API call
   + stonith: Ensure completed operations are recorded as such in the history
   + Stonith: Ensure device parameters are passed to the daemon during registration
   + stonith: Fix use-of-NULL in stonith_api_device_list()
   + stonith: stonith_admin - Prevent use of uninitialized pointer by --history command
   + Tools: Bug lf#2528 - Make progress when attrd_updater is called repeatedly within the dampen interval but with the same value
   + Tools: crm_report - Correctly extract data from the local node
   + Tools: crm_report - Remove newlines when detecting the node list
   + Tools: crm_report - Repair the ability to extract data from the local machine
   + Tools: crm_report - Report on all detected backtraces
 
 * Fri Feb 11 2011 Andrew Beekhof <andrew@beekhof.net> 1.1.5
 - Update source tarball to revision: baad6636a053
 - Statistics:
   Changesets: 184
   Diff:       605 files changed, 46103 insertions(+), 26417 deletions(-)
 
 - Changes since Pacemaker-1.1.4
   + Add the ability to delegate sub-sections of the cluster to non-root users via ACLs
 	  Needs to be enabled at compile time, not enabled by default.
   + ais: Bug lf#2550 - Report failed processes immediately
   + Core: Prevent recently introduced use-after-free in replace_xml_child()
   + Core: Reinstate the logic that skips past non-XML_ELEMENT_NODE children
   + Core: Remove extra calls to xmlCleanupParser resulting in use-after-free
   + Core: Repair reference to child-of-child after removal of xml_child_iter_filter from get_message_xml()
   + crmd: Bug lf#2545 - Ensure notify variables are accurate for stop operations
   + crmd: Cancel recurring operations while we're still connected to the lrmd
   + crmd: Reschedule the PE_START action if its not already running when we try to use it
   + crmd: Update failcount for failed promote and demote operations
   + pengine: Bug lf#2445 - Avoid relying on stickness for stable clone placement
   + pengine: Bug lf#2445 - Do not override configured clone stickiness values
   + pengine: Bug lf#2493 - Don't imply colocation requirements when applying ordering constraints with clones
   + pengine: Bug lf#2495 - Prevent segfault by validating the contents of ordering sets
   + pengine: Bug lf#2508 - Correctly reconstruct the status of anonymous cloned groups
   + pengine: Bug lf#2518 - Avoid spamming the logs with errors for orphan resources
   + pengine: Bug lf#2544 - Prevent unstable clone placement by factoring in the current node's score before all others
   + pengine: Bug lf#2554 - target-role alone is not sufficient to promote resources
   + pengine: Correct target_rc for probes of inactive resources (fix regression introduced by cs:ac3f03006e95)
   + pengine: Ensure that fencing has completed for stop actions on stonith-dependent resources (lf#2551)
   + pengine: Only update the node's promotion score if the resource is active there
   + pengine: Only use the promotion score from the current clone instance
   + pengine: Prevent use-of-NULL resulting from variable shadowing spotted by Coverity
   + pengine: Prevent use-of-NULL when there is status for an undefined node
   + pengine: Prevet use-after-free resulting from unintended recursion when chosing a node to promote master/slave resources
   + Shell: don't create empty optional sections (bnc#665131)
   + Stonith: Teach stonith_admin to automagically obtain the current node attributes for the target from the CIB
   + tools: Bug lf#2527 - Prevent use-of-NULL in crm_simulate
   + Tools: Prevent crm_resource commands from being lost due to the use of cib_scope_local
 
 * Wed Oct 20 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.4
 - Update source tarball to revision: 75406c3eb2c1 tip
 - Statistics:
   Changesets: 169
   Diff:       772 files changed, 56172 insertions(+), 39309 deletions(-)
 
 - Changes since Pacemaker-1.1.3
   + Italian translation of Clusters from Scratch
   + Significant performance enhancements to the Policy Engine and CIB
   + cib: Bug lf#2506 - Don't remove client's when notifications fail, they might just be too big
   + cib: Drop invalid/failed connections from the client hashtable
   + cib: Ensure all diffs sent to peers have sufficient ordering information
   + cib: Ensure non-change diffs can preserve the ordering on the other side
   + cib: Fix the feature set check
   + cib: Include version information on our synthesised diffs when nothing changed
   + cib: Optimize the way we detect group/set ordering changes - 15% speedup
   + cib: Prevent false detection of config updates with the new diff format
   + cib: Reduce unnecessary copying when comparing xml objects
   + cib: Repair the processing of updates sent from peer nodes
   + cib: Revert part of a recent commit that purged still valid connections
   + cib: The feature set version check is only valid if the current value is non-NULL
   + Core: Actually removing diff markers is necessary
   + Core: Bug lf#2506 - Drop the compression limit because Heartbeat's IPC code sucks
   + Core: Cache Relax-NG schemas - profiling indicates many cycles are wasted needlessly re-parsing them
   + Core: Correctly compare against crm_log_level in the logging macros
   + Core: Correctly extract the version details from a diff
   + Core: Correctly hook up the RNG schema cache
   + Core: Correctly use lazy_xml_sort() for v2 digests
   + Core: Don't compress large payload elements unless we're approaching message limits
   + Core: Don't insert empty ID tags when applying diffs
   + Core: Enable the improve v2 digests
   + Core: Ensure ordering is preserved when applying diffs
   + Core: Fix the CRM_CHECK macro
   + Core: Modify the v2 digest algorithm so that some fields are sorted
   + Core: Prevent use-after-free when creating a CIB update for a timed out action
   + Core: Prevent use-of-NULL when cleaning up RelaxNG data structures
   + Core: Provide significant performance improvements by implementing versioned diffs and digests
   + crmd: All pending operations should be recorded, even recurring ones with high start delays
   + crmd: Don't abort transitions when probes are completed on a node
   + crmd: Don't hide stop events that time out - allowing faster recovery in the presence of overloaded hosts
   + crmd: Ensure the CIB is always writable on the DC by removing a timing hole
   + crmd: Include the correct transition details for timed out operations
   + crmd: Prevent use of NULL by making copies of the operation's hash table
   + crmd: There's no need to check the cib version from the 'added' part of diff updates
   + crmd: Use the supplied timeout for stop actions
   + mcp: Ensure valgrind is able to log its output somewhere
   + mcp: Use 99/01 for the start/stop sequence to avoid problems with services (such as libvirtd) started by init - Patch from Vladislav Bogdanov
   + pengine: Ensure fencing of the DC preceeds the STONITH_DONE operation
   + pengine: Fix memory leak introduced as part of the conversion to GHashTables
   + pengine: Fix memory leak when processing completed migration actions
   + pengine: Fix typo leading to use-of-NULL in the new ordering code
   + pengine: Free memory in recently introduced helper function
   + pengine: lf#2478 - Implement improved handling and recovery of atomic resource migrations
   + pengine: Obtain massive speedup by prepending to the list of ordering constraints (which can grow quite large)
   + pengine: Optimize the logic for deciding which non-grouped anonymous clone instances to probe for
   + pengine: Prevent clones from being stopped because resources colocated with them cannot be active
   + pengine: Try to ensure atomic migration ops occur within a single transition
   + pengine: Use hashtables instead of linked lists for performance sensitive datastructures
   + pengine: Use the original digest algorithm for parameter lists
   + stonith: cleanup children on timeout in fence_legacy
   + Stonith: Fix two memory leaks
   + Tools: crm_shadow - Avoid replacing the entire configuration (including status)
 
 * Tue Sep 21 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.3
 - Update source tarball to revision: e3bb31c56244 tip
 - Statistics:
   Changesets: 352
   Diff:       481 files changed, 14130 insertions(+), 11156 deletions(-)
 
 - Changes since Pacemaker-1.1.2.1
   + ais: Bug lf#2401 - Improved processing when the peer crmd processes join/leave
   + ais: Correct the logic for conecting to plugin based clusters
   + ais: Do not supply a process list in mcp-mode
   + ais: Drop support for whitetank in the 1.1 release series
   + ais: Get an initial dump of the node membership when connecting to quorum-based clusters
   + ais: Guard against saturated cpg connections
   + ais: Handle CS_ERR_TRY_AGAIN in more cases
   + ais: Move the code for finding uid before the fork so that the child does no logging
   + ais: Never allow quorum plugins to affect connection to the pacemaker plugin
   + ais: Sign everyone up for peer process updates, not just the crmd
   + ais: The cluster type needs to be set before initializing classic openais connections
   + cib: Also free query result for xpath operations that return more than one hit
   + cib: Attempt to resolve memory corruption when forking a child to write the cib to disk
   + cib: Correctly free memory when writing out the cib to disk
   + cib: Fix the application of unversioned diffs
   + cib: Remove old developmental error logging
   + cib: Restructure the 'valid peer' check for deciding which instructions to ignore
   + cman: Correctly process membership/quorum changes from the pcmk plugin. Allow other message types through untouched
   + cman: Filter directed messages not intended for us
   + cman: Grab the initial membership when we connect
   + cman: Keep the list of peer processes up-to-date
   + cman: Make sure our common hooks are called after a cman membership update
   + cman: Make sure we can compile without cman present
   + cman: Populate sender details for cpg messages
   + cman: Update the ringid for cman based clusters
   + Core: Correctly unpack HA_Messages containing multiple entries with the same name
   + Core: crm_count_member() should only track nodes that have the full stack up
   + Core: New developmental logging system inspired by the kernel and a PoC from Lars Ellenberg
   + crmd: All nodes should see status updates, not just he DC
   + crmd: Allow non-DC nodes to clear failcounts too
   + crmd: Base DC election on process relative uptime
   + crmd: Bug lf#2439 - cancel_op() can also return HA_RSCBUSY
   + crmd: Bug lf#2439 - Handle asynchronous notification of resource deletion events
   + crmd: Bug lf#2458 - Ensure stop actions always have the relevant resource attributes
   + crmd: Disable age as a criteria for cman based clusters, its not reliable enough
   + crmd: Ensure we activate the DC timer if we detect an alternate DC
   + crmd: Factor the nanosecond component of process uptime in elections
   + crmd: Fix assertion failure when performing async resource failures
   + crmd: Fix handling of async resource deletion results
   + crmd: Include the action for crm graph operations
   + crmd: Make sure the membership cache is accurate after a sucessful fencing operation
   + crmd: Make sure we always poke the FSA after a transition to clear any TE_HALT actions
   + crmd: Offer crm-level membership once the peer starts the crmd process
   + crmd: Only need to request quorum update for plugin based clusters
   + crmd: Prevent assertion failure for stop actions resulting from cs: 3c0bc17c6daf
   + crmd: Prevent everyone from loosing DC elections by correctly initializing all relevant variables
   + crmd: Prevent segmentation fault
   + crmd: several fixes for async resource delete (thanks to beekhof)
   + crmd: Use the correct define/size for lrm resource IDs
   + Introduce two new cluster types 'cman' and 'corosync', replaces 'quorum_provider' concept
   + mcp: Add missing headers when built without heartbeat support
   + mcp: Correctly initialize the string containing the list of active daemons
   + mcp: Fix macro expansion in init script
   + mcp: Fix the expansion of the pid file in the init script
   + mcp: Handle CS_ERR_TRY_AGAIN when connecting to libcfg
   + mcp: Make sure we can compile the mcp without cman present
   + mcp: New master control process for (re)spawning pacemaker daemons
   + mcp: Read config early so we can re-initialize logging asap if daemonizing
   + mcp: Rename the mcp binary to pacemakerd and create a 'pacemaker' init script
   + mcp: Resend our process list after every CPG change
   + mcp: Tell chkconfig we need to shut down early on
   + pengine: Avoid creating invalid ordering constraints for probes that are not needed
   + pengine: Bug lf#1959 - Fail unmanaged resources should not prevent other services from shutting down
   + pengine: Bug lf#2422 - Ordering dependencies on partially active groups not observed properly
   + pengine: Bug lf#2424 - Use notify oepration definition if it exists in the configuration
   + pengine: Bug lf#2433 - No services should be stopped until probes finish
   + pengine: Bug lf#2453 - Enforce clone ordering in the absense of colocation constraints
   + pengine: Bug lf#2476 - Repair on-fail=block for groups and primitive resources
   + pengine: Correctly detect when there is a real failcount that expired and needs to be cleared
   + pengine: Correctly handle pseudo action creation
   + pengine: Correctly order clone startup after group/clone start
   + pengine: Correct use-after-free introduced in the prior patch
   + pengine: Do not demote resources because something that requires it can not run
   + pengine: Fix colocation for interleaved clones
   + pengine: Fix colocation with partially active groups
   + pengine: Fix potential use-after-free defect from coverity
   + pengine: Fix previous merge
   + pengine: Fix use-after-free in order_actions() reported by valgrind
   + pengine: Make the current data set a global variable so it does not need to be passed around everywhere
   + pengine: Prevent endless loop when looking for operation definitions in the configuration
   + pengine: Prevent segfault by ensuring the arguments to do_calculations() are initialized
   + pengine: Rewrite the ordering constraint logic to be simplicity, clarity and maintainability
   + pengine: Wait until stonith is available, do not fall back to shutdown for nodes requesting termination
   + Resolve coverity RESOURCE_LEAK defects
   + Shell: Complete the transition to using crm_attribute instead of crm_failcount and crm_standby
   + stonith: Advertise stonith-ng options in the metadata
   + stonith: Bug lf#2461 - Prevent segfault by not looking up operations if the hashtable has not been initialized yet
   + stonith: Bug lf#2473 - Add the timeout at the top level where the daemon is looking for it
   + Stonith: Bug lf#2473 - Ensure stonith operations complete within the timeout and are terminated if they run too long
   + stonith: Bug lf#2473 - Ensure timeouts are included for fencing operations
   + stonith: Bug lf#2473 - Gracefully handle remote operations that arrive late (after we have done notifications)
   + stonith: Correctly parse pcmk_host_list parameters that appear on a single line
   + stonith: Map poweron/poweroff back to on/off expected by the stonith tool from cluster-glue
   + stonith: pass the configuration to the stonith program via environment variables (bnc#620781)
   + Stonith: Use the timeout specified by the user
   + Support starting plugin-based Pacemaker clusters with the MCP as well
   + Tools: Bug lf#2456 - Fix assertion failure in crm_resource
   + tools: crm_node - Repair the ability to connect to openais based clusters
   + tools: crm_node - Use the correct short option for --cman
   + tools: crm_report - corosync.conf wont necessarily contain the text 'pacemaker' anymore
   + Tools: crm_simulate - Fix use-after-free in when terminating
   + tools: crm_simulate - Resolve coverity USE_AFTER_FREE defect
   + Tools: Drop the 'pingd' daemon and resource agent in favor of ocf:pacemaker:ping
   + Tools: Fix recently introduced use-of-NULL
   + Tools: Fix use-after-free defects from coverity
 
 * Wed May 12 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.2
 - Update source tarball to revision: c25c972a25cc tip
 - Statistics:
   Changesets: 339
   Diff:       708 files changed, 37918 insertions(+), 10584 deletions(-)
 - Changes since Pacemaker-1.1.1
   + ais: Do not count votes from offline nodes and calculate current votes before sending quorum data
   + ais: Ensure the list of active processes sent to clients is always up-to-date
   + ais: Look for the correct conf variable for turning on file logging
   + ais: Need to find a better and thread-safe way to set core_uses_pid. Disable for now.
   + ais: Use the threadsafe version of getpwnam
   + Core: Bump the feature set due to the new failcount expiry feature
   + Core: fix memory leaks exposed by valgrind
   + Core: Bug lf#2414 - Prevent use-after-free reported by valgrind when doing xpath based deletions
   + crmd: Bug lf#2414 - Prevent use-after-free of the PE connection after it dies
   + crmd: Bug lf#2414 - Prevent use-after-free of the stonith-ng connection
   + crmd: Bug lf#2401 - Improved detection of partially active peers
   + crmd: Bug lf#2379 - Ensure the cluster terminates when the PE is not available
   + crmd: Do not allow the target_rc to be misused by resource agents
   + crmd: Do not ignore action timeouts based on FSA state
   + crmd: Ensure we don't get stuck in S_PENDING if we lose an election to someone that never talks to us again
   + crmd: Fix memory leaks exposed by valgrind
   + crmd: Remove race condition that could lead to multiple instances of a clone being active on a machine
   + crmd: Send erase_status_tag() calls to the local CIB when the DC is fenced, since there is no DC to accept them
   + crmd: Use global fencing notifications to prevent secondary fencing operations of the DC
   + pengine: Bug lf#2317 - Avoid needless restart of primitive depending on a clone
   + pengine: Bug lf#2361 - Ensure clones observe mandatory ordering constraints if the LHS is unrunnable
   + pengine: Bug lf#2383 - Combine failcounts for all instances of an anonymous clone on a host
   + pengine: Bug lf#2384 - Fix intra-set colocation and ordering
   + pengine: Bug lf#2403 - Enforce mandatory promotion (colocation) constraints
   + pengine: Bug lf#2412 - Correctly find clone instances by their prefix
   + pengine: Do not be so quick to pull the trigger on nodes that are coming up
   + pengine: Fix memory leaks exposed by valgrind
   + pengine: Rewrite native_merge_weights() to avoid Fix use-after-free
   + Shell: Bug bnc#590035 - always reload status if working with the cluster
   + Shell: Bug bnc#592762 - Default to using the status section from the live CIB
   + Shell: Bug lf#2315 - edit multiple meta_attributes sets in resource management
   + Shell: Bug lf#2221 - enable comments
   + Shell: Bug bnc#580492 - implement new cibstatus interface and commands
   + Shell: Bug bnc#585471 - new cibstatus import command
   + Shell: check timeouts also against the default-action-timeout property
   + Shell: new configure filter command
   + Tools: crm_mon - fix memory leaks exposed by valgrind
 
 * Tue Feb 16 2010 Andrew Beekhof <andrew@beekhof.net> - 1.1.1
 - First public release of Pacemaker 1.1
 - Package reference documentation in a doc subpackage
 - Move cts into a subpackage so that it can be easily consumed by others
 - Update source tarball to revision: 17d9cd4ee29f
   + New stonith daemon that supports global notifications
   + Service placement influenced by the physical resources
   + A new tool for simulating failures and the cluster’s reaction to them
   + Ability to serialize an otherwise unrelated a set of resource actions (eg. Xen migrations)
 
 * Mon Jan 18 2010 Andrew Beekhof <andrew@beekhof.net> - 1.0.7
 - Update source tarball to revision: 2eed906f43e9 (stable-1.0) tip
 - Statistics:
       Changesets:      193
       Diff:            220 files changed, 15933 insertions(+), 8782 deletions(-)
 - Changes since 1.0.5-4
   + pengine: Bug 2213 - Ensure groups process location constraints so that clone-node-max works for cloned groups
   + pengine: Bug lf#2153 - non-clones should not restart when clones stop/start on other nodes
   + pengine: Bug lf#2209 - Clone ordering should be able to prevent startup of dependent clones
   + pengine: Bug lf#2216 - Correctly identify the state of anonymous clones when deciding when to probe
   + pengine: Bug lf#2225 - Operations that require fencing should wait for 'stonith_complete' not 'all_stopped'.
   + pengine: Bug lf#2225 - Prevent clone peers from stopping while another is instance is (potentially) being fenced
   + pengine: Correctly anti-colocate with a group
   + pengine: Correctly unpack ordering constraints for resource sets to avoid graph loops
   + Tools: crm: load help from crm_cli.txt
   + Tools: crm: resource sets (bnc#550923)
   + Tools: crm: support for comments (LF 2221)
   + Tools: crm: support for description attribute in resources/operations (bnc#548690)
   + Tools: hb2openais: add EVMS2 CSM processing (and other changes) (bnc#548093)
   + Tools: hb2openais: do not allow empty rules, clones, or groups (LF 2215)
   + Tools: hb2openais: refuse to convert pure EVMS volumes
   + cib: Ensure the loop for login message terminates
   + cib: Finally fix reliability of receiving large messages over remote plaintext connections
   + cib: Fix remote notifications
   + cib: For remote connections, default to CRM_DAEMON_USER since thats the only one that the cib can validate the password for using PAM
   + cib: Remote plaintext - Retry sending parts of the message that did not fit the first time
   + crmd: Ensure batch-limit is correctly enforced
   + crmd: Ensure we have the latest status after a transition abort
   + (bnc#547579,547582): Tools: crm: status section editing support
   + shell: Add allow-migrate as allowed meta-attribute (bnc#539968)
   + Medium: Build: Do not automatically add -L/lib, it could cause 64-bit arches to break
   + Medium: pengine: Bug lf#2206 - rsc_order constraints always use score at the top level
   + Medium: pengine: Only complain about target-role=master for non m/s resources
   + Medium: pengine: Prevent non-multistate resources from being promoted through target-role
   + Medium: pengine: Provide a default action for resource-set ordering
   + Medium: pengine: Silently fix requires=fencing for stonith resources so that it can be set in op_defaults
   + Medium: Tools: Bug lf#2286 - Allow the shell to accept template parameters on the command line
   + Medium: Tools: Bug lf#2307 - Provide a way to determin the nodeid of past cluster members
   + Medium: Tools: crm: add update method to template apply (LF 2289)
   + Medium: Tools: crm: direct RA interface for ocf class resource agents (LF 2270)
   + Medium: Tools: crm: direct RA interface for stonith class resource agents (LF 2270)
   + Medium: Tools: crm: do not add score which does not exist
   + Medium: Tools: crm: do not consider warnings as errors (LF 2274)
   + Medium: Tools: crm: do not remove sets which contain id-ref attribute (LF 2304)
   + Medium: Tools: crm: drop empty attributes elements
   + Medium: Tools: crm: exclude locations when testing for pathological constraints (LF 2300)
   + Medium: Tools: crm: fix exit code on single shot commands
   + Medium: Tools: crm: fix node delete (LF 2305)
   + Medium: Tools: crm: implement -F (--force) option
   + Medium: Tools: crm: rename status to cibstatus (LF 2236)
   + Medium: Tools: crm: revisit configure commit
   + Medium: Tools: crm: stay in crm if user specified level only (LF 2286)
   + Medium: Tools: crm: verify changes on exit from the configure level
   + Medium: ais: Some clients such as gfs_controld want a cluster name, allow one to be specified in corosync.conf
   + Medium: cib: Clean up logic for receiving remote messages
   + Medium: cib: Create valid notification control messages
   + Medium: cib: Indicate where the remote connection came from
   + Medium: cib: Send password prompt to stderr so that stdout can be redirected
   + Medium: cts: Fix rsh handling when stdout is not required
   + Medium: doc: Fill in the section on removing a node from an AIS-based cluster
   + Medium: doc: Update the docs to reflect the 0.6/1.0 rolling upgrade problem
   + Medium: doc: Use Publican for docbook based documentation
   + Medium: fencing: stonithd: add metadata for stonithd instance attributes (and support in the shell)
   + Medium: fencing: stonithd: ignore case when comparing host names (LF 2292)
   + Medium: tools: Make crm_mon functional with remote connections
   + Medium: xml: Add stopped as a supported role for operations
   + Medium: xml: Bug bnc#552713 - Treat node unames as text fields not IDs
   + Medium: xml: Bug lf#2215 - Create an always-true expression for empty rules when upgrading from 0.6
 
 * Thu Oct 29 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-4
 - Include the fixes from CoroSync integration testing
 - Move the resource templates - they are not documentation
 - Ensure documentation is placed in a standard location
 - Exclude documentation that is included elsewhere in the package
 
 - Update the tarball from upstream to version ee19d8e83c2a
   + cib: Correctly clean up when both plaintext and tls remote ports are requested
   + pengine: Bug bnc#515172 - Provide better defaults for lt(e) and gt(e) comparisions
   + pengine: Bug lf#2197 - Allow master instances placemaker to be influenced by colocation constraints
   + pengine: Make sure promote/demote pseudo actions are created correctly
   + pengine: Prevent target-role from promoting more than master-max instances
   + ais: Bug lf#2199 - Prevent expected-quorum-votes from being populated with garbage
   + ais: Prevent deadlock - don't try to release IPC message if the connection failed
   + cib: For validation errors, send back the full CIB so the client can display the errors
   + cib: Prevent use-after-free for remote plaintext connections
   + crmd: Bug lf#2201 - Prevent use-of-NULL when running heartbeat
 
 * Wed Oct 13 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-3
 - Update the tarball from upstream to version 38cd629e5c3c
   + Core: Bug lf#2169 - Allow dtd/schema validation to be disabled
   + pengine: Bug lf#2106 - Not all anonymous clone children are restarted after configuration change
   + pengine: Bug lf#2170 - stop-all-resources option had no effect
   + pengine: Bug lf#2171 - Prevent groups from starting if they depend on a complex resource which can not
   + pengine: Disable resource management if stonith-enabled=true and no stonith resources are defined
   + pengine: do not include master score if it would prevent allocation
   + ais: Avoid excessive load by checking for dead children every 1s (instead of 100ms)
   + ais: Bug rh#525589 - Prevent shutdown deadlocks when running on CoroSync
   + ais: Gracefully handle changes to the AIS nodeid
   + crmd: Bug bnc#527530 - Wait for the transition to complete before leaving S_TRANSITION_ENGINE
   + crmd: Prevent use-after-free with LOG_DEBUG_3
   + Medium: xml: Mask the "symmetrical" attribute on rsc_colocation constraints (bnc#540672)
   + Medium (bnc#520707): Tools: crm: new templates ocfs2 and clvm
   + Medium: Build: Invert the disable ais/heartbeat logic so that --without (ais|heartbeat) is available to rpmbuild
   + Medium: pengine: Bug lf#2178 - Indicate unmanaged clones
   + Medium: pengine: Bug lf#2180 - Include node information for all failed ops
   + Medium: pengine: Bug lf#2189 - Incorrect error message when unpacking simple ordering constraint
   + Medium: pengine: Correctly log resources that would like to start but can not
   + Medium: pengine: Stop ptest from logging to syslog
   + Medium: ais: Include version details in plugin name
   + Medium: crmd: Requery the resource metadata after every start operation
 
 * Fri Aug 21 2009 Tomas Mraz <tmraz@redhat.com> - 1.0.5-2.1
 - rebuilt with new openssl
 
 * Wed Aug 19 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-2
 - Add versioned perl dependency as specified by
     https://fedoraproject.org/wiki/Packaging/Perl#Packages_that_link_to_libperl
 - No longer remove RPATH data, it prevents us finding libperl.so and no other
   libraries were being hardcoded
 - Compile in support for heartbeat
 - Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements
   depending on which stacks are supported
 
 * Mon Aug 17 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5
 - Add dependency on resource-agents
 - Use the version of the configure macro that supplies --prefix, --libdir, etc
 - Update the tarball from upstream to version 462f1569a437 (Pacemaker 1.0.5 final)
   + Tools: crm_resource - Advertise --move instead of --migrate
   + Medium: Extra: New node connectivity RA that uses system ping and attrd_updater
   + Medium: crmd: Note that dc-deadtime can be used to mask the brokeness of some switches
 
 * Tue Aug 11 2009 Ville Skyttä <ville.skytta@iki.fi> - 1.0.5-0.7.c9120a53a6ae.hg
 - Use bzipped upstream tarball.
 
 * Wed Jul  29 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.6.c9120a53a6ae.hg
 - Add back missing build auto* dependencies
 - Minor cleanups to the install directive
 
 * Tue Jul  28 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.5.c9120a53a6ae.hg
 - Add a leading zero to the revision when alphatag is used
 
 * Tue Jul  28 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.4.c9120a53a6ae.hg
 - Incorporate the feedback from the cluster-glue review
 - Realistically, the version is a 1.0.5 pre-release
 - Use the global directive instead of define for variables
 - Use the haclient/hacluster group/user instead of daemon
 - Use the _configure macro
 - Fix install dependencies
 
 * Fri Jul  24 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.4-3
 - Initial Fedora checkin
 - Include an AUTHORS and license file in each package
 - Change the library package name to pacemaker-libs to be more
   Fedora compliant
 - Remove execute permissions from xml related files
 - Reference the new cluster-glue devel package name
 - Update the tarball from upstream to version c9120a53a6ae
   + pengine: Only prevent migration if the clone dependency is stopping/starting on the target node
   + pengine: Bug 2160 - Don't shuffle clones due to colocation
   + pengine: New implementation of the resource migration (not stop/start) logic
   + Medium: Tools: crm_resource - Prevent use-of-NULL by requiring a resource name for the -A and -a options
   + Medium: pengine: Prevent use-of-NULL in find_first_action()
 
 * Tue Jul 14 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.4-2
 - Reference authors from the project AUTHORS file instead of listing in description
 - Change Source0 to reference the Mercurial repo
 - Cleaned up the summaries and descriptions
 - Incorporate the results of Fedora package self-review
 
 * Thu Jun 04 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.4
 - Update source tarball to revision: 1d87d3e0fc7f (stable-1.0)
 - Statistics:
     Changesets:      209
     Diff:            266 files changed, 12010 insertions(+), 8276 deletions(-)
 - Changes since Pacemaker-1.0.3
   + (bnc#488291): ais: do not rely on byte endianness on ptr cast
   + (bnc#507255): Tools: crm: delete rsc/op_defaults (these meta_attributes are killing me)
   + (bnc#507255): Tools: crm: import properly rsc/op_defaults
   + (LF 2114): Tools: crm: add support for operation instance attributes
   + ais: Bug lf#2126 - Messages replies cannot be routed to transient clients
   + ais: Fix compilation for the latest Corosync API (v1719)
   + attrd: Do not perform all updates as complete refreshes
   + cib: Fix huge memory leak affecting heartbeat-based clusters
   + Core: Allow xpath queries to match attributes
   + Core: Generate the help text directly from a tool options struct
   + Core: Handle differences in 0.6 messaging format
   + crmd: Bug lf#2120 - All transient node attribute updates need to go via attrd
   + crmd: Correctly calculate how long an FSA action took to avoid spamming the logs with errors
   + crmd: Fix another large memory leak affecting Heartbeat based clusters
   + lha: Restore compatibility with older versions
   + pengine: Bug bnc#495687 - Filesystem is not notified of successful STONITH under some conditions
   + pengine: Make running a cluster with STONITH enabled but no STONITH resources an error and provide details on resolutions
   + pengine: Prevent use-ofNULL when using resource ordering sets
   + pengine: Provide inter-notification ordering guarantees
   + pengine: Rewrite the notification code to be understanable and extendable
   + Tools: attrd - Prevent race condition resulting in the cluster forgetting the node wishes to shut down
   + Tools: crm: regression tests
   + Tools: crm_mon - Fix smtp notifications
   + Tools: crm_resource - Repair the ability to query meta attributes
   + Low Build: Bug lf#2105 - Debian package should contain pacemaker doc and crm templates
   + Medium (bnc#507255): Tools: crm: handle empty rsc/op_defaults properly
   + Medium (bnc#507255): Tools: crm: use the right obj_type when creating objects from xml nodes
   + Medium (LF 2107): Tools: crm: revisit exit codes in configure
   + Medium: cib: Do not bother validating updates that only affect the status section
   + Medium: Core: Include supported stacks in version information
   + Medium: crmd: Record in the CIB, the cluster infrastructure being used
   + Medium: cts: Do not combine crm_standby arguments - the wrapper can not process them
   + Medium: cts: Fix the CIBAusdit class
   + Medium: Extra: Refresh showscores script from Dominik
   + Medium: pengine: Build a statically linked version of ptest
   + Medium: pengine: Correctly log the actions for resources that are being recovered
   + Medium: pengine: Correctly log the occurrence of promotion events
   + Medium: pengine: Implememt node health based on a patch from Mark Hamzy
   + Medium: Tools: Add examples to help text outputs
   + Medium: Tools: crm: catch syntax errors for configure load
   + Medium: Tools: crm: implement erasing nodes in configure erase
   + Medium: Tools: crm: work with parents only when managing xml objects
   + Medium: Tools: crm_mon - Add option to run custom notification program on resource operations (Patch by Dominik Klein)
   + Medium: Tools: crm_resource - Allow --cleanup to function on complex resources and cluster-wide
   + Medium: Tools: haresource2cib.py - Patch from horms to fix conversion error
   + Medium: Tools: Include stack information in crm_mon output
   + Medium: Tools: Two new options (--stack,--constraints) to crm_resource for querying how a resource is configured
 
 * Wed Apr 08 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.3
 - Update source tarball to revision: b133b3f19797 (stable-1.0) tip
 - Statistics:
     Changesets:      383
     Diff:            329 files changed, 15471 insertions(+), 15119 deletions(-)
 - Changes since Pacemaker-1.0.2
   + Added tag SLE11-HAE-GMC for changeset 9196be9830c2
   + ais plugin: Fix quorum calculation (bnc#487003)
   + ais: Another memory fix leak in error path
   + ais: Bug bnc#482847, bnc#482905 - Force a clean exit of OpenAIS once Pacemaker has finished unloading
   + ais: Bug bnc#486858 - Fix update_member() to prevent spamming clients with membership events containing no changes
   + ais: Centralize all quorum calculations in the ais plugin and allow expected votes to be configured int he cib
   + ais: Correctly handle a return value of zero from openais_dispatch_recv()
   + ais: Disable logging to a file
   + ais: Fix memory leak in error path
   + ais: IPC messages are only in scope until a response is sent
   + All signal handlers used with CL_SIGNAL() need to be as minimal as possible
   + cib: Bug bnc#482885 - Simplify CIB disk-writes to prevent data loss.  Required a change to the backup filename format
   + cib: crmd: Revert part of 9782ab035003.  Complex shutdown routines need G_main_add_SignalHandler to avoid race coditions
   + crm: Avoid infinite loop during crm configure edit (bnc#480327)
   + crmd: Avoid a race condition by waiting for the attrd update to trigger a transition automatically
   + crmd: Bug bnc#480977 - Prevent extra, partial, shutdown when a node restarts too quickly
   + crmd: Bug bnc#480977 - Prevent extra, partial, shutdown when a node restarts too quickly (verified)
   + crmd: Bug bnc#489063 - Ensure the DC is always unset after we 'lose' an election
   + crmd: Bug BSC#479543 - Correctly find the migration source for timed out migrate_from actions
   + crmd: Call crm_peer_init() before we start the FSA - prevents a race condition when used with Heartbeat
   + crmd: Erasing the status section should not be forced to the local node
   + crmd: Fix memory leak in cib notication processing code
   + crmd: Fix memory leak in transition graph processing
   + crmd: Fix memory leaks found by valgrind
   + crmd: More memory leaks fixes found by valgrind
   + fencing: stonithd: is_heartbeat_cluster is a no-no if there is no heartbeat support
   + pengine: Bug bnc#466788 - Exclude nodes that can not run resources
   + pengine: Bug bnc#466788 - Make colocation based on node attributes work
   + pengine: Bug BNC#478687 - Do not crash when clone-max is 0
   + pengine: Bug bnc#488721 - Fix id-ref expansion for clones, the doc-root for clone children is not the cib root
   + pengine: Bug bnc#490418 - Correctly determine node state for nodes wishing to be terminated
   + pengine: Bug LF#2087 - Correctly parse the state of anonymous clones that have multiple instances on a given node
   + pengine: Bug lf#2089 - Meta attributes are not inherited by clone children
   + pengine: Bug lf#2091 - Correctly restart modified resources that were found active by a probe
   + pengine: Bug lf#2094 - Fix probe ordering for cloned groups
   + pengine: Bug LF:2075 - Fix large pingd memory leaks
   + pengine: Correctly attach orphaned clone children to their parent
   + pengine: Correctly handle terminate node attributes that are set to the output from time()
   + pengine: Ensure orphaned clone members are hooked up to the parent when clone-max=0
   + pengine: Fix memory leak in LogActions
   + pengine: Fix the determination of whether a group is active
   + pengine: Look up the correct promotion preference for anonymous masters
   + pengine: Simplify handling of start failures by changing the default migration-threshold to INFINITY
   + pengine: The ordered option for clones no longer causes extra start/stop operations
   + RA: Bug bnc#490641 - Shut down dlm_controld with -TERM instead of -KILL
   + RA: pingd: Set default ping interval to 1 instead of 0 seconds
   + Resources: pingd - Correctly tell the ping daemon to shut down
   + Tools: Bug bnc#483365 - Ensure the command from cluster_test includes a value for --log-facility
   + Tools: cli: fix and improve delete command
   + Tools: crm: add and implement templates
   + Tools: crm: add support for command aliases and some common commands (i.e. cd,exit)
   + Tools: crm: create top configuration nodes if they are missing
   + Tools: crm: fix parsing attributes for rules (broken by the previous changeset)
   + Tools: crm: new ra set of commands
   + Tools: crm: resource agents information management
   + Tools: crm: rsc/op_defaults
   + Tools: crm: support for no value attribute in nvpairs
   + Tools: crm: the new configure monitor command
   + Tools: crm: the new configure node command
   + Tools: crm_mon - Prevent use-of-NULL when summarizing an orphan
   + Tools: hb2openais: create clvmd clone for respawn evmsd in ha.cf
   + Tools: hb2openais: fix a serious recursion bug in xml node processing
   + Tools: hb2openais: fix ocfs2 processing
   + Tools: pingd - prevent double free of getaddrinfo() output in error path
   + Tools: The default re-ping interval for pingd should be 1s not 1ms
   + Medium (bnc#479049): Tools: crm: add validation of resource type for the configure primitive command
   + Medium (bnc#479050): Tools: crm: add help for RA parameters in tab completion
   + Medium (bnc#479050): Tools: crm: add tab completion for primitive params/meta/op
   + Medium (bnc#479050): Tools: crm: reimplement cluster properties completion
   + Medium (bnc#486968): Tools: crm: listnodes function requires no parameters (do not mix completion with other stuff)
   + Medium: ais: Remove the ugly hack for dampening AIS membership changes
   + Medium: cib: Fix memory leaks by using mainloop_add_signal
   + Medium: cib: Move more logging to the debug level (was info)
   + Medium: cib: Overhaul the processing of synchronous replies
   + Medium: Core: Add library functions for instructing the cluster to terminate nodes
   + Medium: crmd: Add new expected-quorum-votes option
   + Medium: crmd: Allow up to 5 retires when an attrd update fails
   + Medium: crmd: Automatically detect and use new values for crm_config options
   + Medium: crmd: Bug bnc#490426 - Escalated shutdowns stall when there are pending resource operations
   + Medium: crmd: Clean up and optimize the DC election algorithm
   + Medium: crmd: Fix memory leak in shutdown
   + Medium: crmd: Fix memory leaks spotted by Valgrind
   + Medium: crmd: Ignore join messages from hosts other than our DC
   + Medium: crmd: Limit the scope of resource updates to the status section
   + Medium: crmd: Prevent the crmd from being respawned if its told to shut down when it did not ask to be
   + Medium: crmd: Re-check the election status after membership events
   + Medium: crmd: Send resource updates via the local CIB during elections
   + Medium: pengine: Bug bnc#491441 - crm_mon does not display operations returning 'uninstalled' correctly
   + Medium: pengine: Bug lf#2101 - For location constraints, role=Slave is equivalent to role=Started
   + Medium: pengine: Clean up the API - removed ->children() and renamed ->find_child() to fine_rsc()
   + Medium: pengine: Compress the display of healthy anonymous clones
   + Medium: pengine: Correctly log the actions for resources that are being recovered
   + Medium: pengine: Determin a promotion score for complex resources
   + Medium: pengine: Ensure clones always have a value for globally-unique
   + Medium: pengine: Prevent orphan clones from being allocated
   + Medium: RA: controld: Return proper exit code for stop op.
   + Medium: Tools: Bug bnc#482558 - Fix logging test in cluster_test
   + Medium: Tools: Bug bnc#482828 - Fix quoting in cluster_test logging setup
   + Medium: Tools: Bug bnc#482840 - Include directory path to CTSlab.py
   + Medium: Tools: crm: add more user input checks
   + Medium: Tools: crm: do not check resource status of we are working with a shadow
   + Medium: Tools: crm: fix id-refs and allow reference to top objects (i.e. primitive)
   + Medium: Tools: crm: ignore comments in the CIB
   + Medium: Tools: crm: multiple column output would not work with small lists
   + Medium: Tools: crm: refuse to delete running resources
   + Medium: Tools: crm: rudimentary if-else for templates
   + Medium: Tools: crm: Start/stop clones via target-role.
   + Medium: Tools: crm_mon - Compress the node status for healthy and offline nodes
   + Medium: Tools: crm_shadow - Return 0/cib_ok when --create-empty succeeds
   + Medium: Tools: crm_shadow - Support -e, the short form of --create-empty
   + Medium: Tools: Make attrd quieter
   + Medium: Tools: pingd - Avoid using various clplumbing functions as they seem to leak
   + Medium: Tools: Reduce pingd logging
 
 * Mon Feb 16 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.2
 - Update source tarball to revision: d232d19daeb9 (stable-1.0) tip
 - Statistics:
     Changesets:      441
     Diff:            639 files changed, 20871 insertions(+), 21594 deletions(-)
 - Changes since Pacemaker-1.0.1
   + (bnc#450815): Tools: crm cli: do not generate id for the operations tag
   + ais: Add support for the new AIS IPC layer
   + ais: Always set header.error to the correct default: SA_AIS_OK
   + ais: Bug BNC#456243 - Ensure the membership cache always contains an entry for the local node
   + ais: Bug BNC:456208 - Prevent deadlocks by not logging in the child process before exec()
   + ais: By default, disable supprt for the WIP openais IPC patch
   + ais: Detect and handle situations where ais and the crm disagree on the node name
   + ais: Ensure crm_peer_seq is updated after a membership update
   + ais: Make sure all IPC header fields are set to sane defaults
   + ais: Repair and streamline service load now that whitetank startup functions correctly
   + build: create and install doc files
   + cib: Allow clients without mainloop to connect to the cib
   + cib: CID:18 - Fix use-of-NULL in cib_perform_op
   + cib: CID:18 - Repair errors introduced in b5a18704477b - Fix use-of-NULL in cib_perform_op
   + cib: Ensure diffs contain the correct values of admin_epoch
   + cib: Fix four moderately sized memory leaks detected by Valgrind
   + Core: CID:10 - Prevent indexing into an array of schemas with a negative value
   + Core: CID:13 - Fix memory leak in log_data_element
   + Core: CID:15 - Fix memory leak in crm_get_peer
   + Core: CID:6 - Fix use-of-NULL in copy_ha_msg_input
   + Core: Fix crash in the membership code preventing node shutdown
   + Core: Fix more memory leaks foudn by valgrind
   + Core: Prevent unterminated strings after decompression
   + crmd: Bug BNC:467995 - Delay marking STONITH operations complete until STONITH tells us so
   + crmd: Bug LF:1962 - Do not NACK peers because they are not (yet) in our membership.  Just ignore them.
   + crmd: Bug LF:2010 - Ensure fencing cib updates create the node_state entry if needed to preent re-fencing during cluster startup
   + crmd: Correctly handle reconnections to attrd
   + crmd: Ensure updates for lost migrate operations indicate which node it tried to migrating to
   + crmd: If there are no nodes to finalize, start an election.
   + crmd: If there are no nodes to welcome, start an election.
   + crmd: Prevent node attribute loss by detecting attrd disconnections immediately
   + crmd: Prevent node re-probe loops by ensuring mandatory actions always complete
   + pengine: Bug 2005 - Fix startup ordering of cloned stonith groups
   + pengine: Bug 2006 - Correctly reprobe cloned groups
   + pengine: Bug BNC:465484 - Fix the no-quorum-policy=suicide option
   + pengine: Bug LF:1996 - Correctly process disabled monitor operations
   + pengine: CID:19 - Fix use-of-NULL in determine_online_status
   + pengine: Clones now default to globally-unique=false
   + pengine: Correctly calculate the number of available nodes for the clone to use
   + pengine: Only shoot online nodes with no-quorum-policy=suicide
   + pengine: Prevent on-fail settings being ignored after a resource is successfully stopped
   + pengine: Prevent use-of-NULL for failed migrate actions in process_rsc_state()
   + pengine: Remove an optimization for the terminate node attribute that caused the cluster to block indefinitly
   + pengine: Repar the ability to colocate based on node attributes other than uname
   + pengine: Start the correct monitor operation for unmanaged masters
   + stonith: CID:3 - Fix another case of exceptionally poor error handling by the original stonith developers
   + stonith: CID:5 - Checking for NULL and then dereferencing it anyway is an interesting approach to error handling
   + stonithd: Sending IPC to the cluster is a privileged operation
   + stonithd: wrong checks for shmid (0 is a valid id)
   + Tools: attrd - Correctly determine when an attribute has stopped changing and should be committed to the CIB
   + Tools: Bug 2003 - pingd does not correctly detect failures when the interface is down
   + Tools: Bug 2003 - pingd does not correctly handle node-down events on multi-NIC systems
   + Tools: Bug 2021 - pingd does not detect sequence wrapping correctly, incorrectly reports nodes offline
   + Tools: Bug BNC:468066 - Do not use the result of uname() when its no longer in scope
   + Tools: Bug BNC:473265 - crm_resource -L dumps core
   + Tools: Bug LF:2001 - Transient node attributes should be set via attrd
   + Tools: Bug LF:2036 - crm_resource cannot set/get parameters for cloned resources
   + Tools: Bug LF:2046 - Node attribute updates are lost because attrd can take too long to start
   + Tools: Cause the correct clone instance to be failed with crm_resource -F
   + Tools: cluster_test - Allow the user to select a stack and fix CTS invocation
   + Tools: crm cli: allow rename only if the resource is stopped
   + Tools: crm cli: catch system errors on file operations
   + Tools: crm cli: completion for ids in configure
   + Tools: crm cli: drop '-rsc' from attributes for order constraint
   + Tools: crm cli: exit with an appropriate exit code
   + Tools: crm cli: fix wrong order of action and resource in order constraint
   + Tools: crm cli: fox wrong exit code
   + Tools: crm cli: improve handling of cib attributes
   + Tools: crm cli: new command: configure rename
   + Tools: crm cli: new command: configure upgrade
   + Tools: crm cli: new command: node delete
   + Tools: crm cli: prevent key errors on missing cib attributes
   + Tools: crm cli: print long help for help topics
   + Tools: crm cli: return on syntax error when parsing score
   + Tools: crm cli: rsc_location can be without nvpairs
   + Tools: crm cli: short node preference location constraint
   + Tools: crm cli: sometimes, on errors, level would change on single shot use
   + Tools: crm cli: syntax: drop a bunch of commas (remains of help tables conversion)
   + Tools: crm cli: verify user input for sanity
   + Tools: crm: find expressions within rules (do not always skip xml nodes due to used id)
   + Tools: crm_master should not define a set id now that attrd is used.  Defining one can break lookups
   + Tools: crm_mon Use the OID assigned to the project by IANA for SNMP traps
   + Medium (bnc#445622): Tools: crm cli: improve the node show command and drop node status
   + Medium (LF 2009): stonithd: improve timeouts for remote fencing
   + Medium: ais: Allow dead peers to be removed from membership calculations
   + Medium: ais: Pass node deletion events on to clients
   + Medium: ais: Sanitize ipc usage
   + Medium: ais: Supply the node uname in addtion to the id
   + Medium: Build: Clean up configure to ensure NON_FATAL_CFLAGS is consistent with CFLAGS (ie. includes -g)
   + Medium: Build: Install cluster_test
   + Medium: Build: Use more restrictive CFLAGS and fix the resulting errors
   + Medium: cib: CID:20 - Fix potential use-after-free in cib_native_signon
   + Medium: Core: Bug BNC:474727 - Set a maximum time to wait for IPC messages
   + Medium: Core: CID:12 - Fix memory leak in decode_transition_magic error path
   + Medium: Core: CID:14 - Fix memory leak in calculate_xml_digest error path
   + Medium: Core: CID:16 - Fix memory leak in date_to_string error path
   + Medium: Core: Try to track down the cause of XML parsing errors
   + Medium: crmd: Bug BNC:472473 - Do not wait excessive amounts of time for lost actions
   + Medium: crmd: Bug BNC:472473 - Reduce the transition timeout to action_timeout+network_delay
   + Medium: crmd: Do not fast-track the processing of LRM refreshes when there are pending actions.
   + Medium: crmd: do_dc_join_filter_offer - Check the 'join' message is for the current instance before deciding to NACK peers
   + Medium: crmd: Find option values without having to do a config upgrade
   + Medium: crmd: Implement shutdown using a transient node attribute
   + Medium: crmd: Update the crmd options to use dashes instead of underscores
   + Medium: cts: Add 'cluster reattach' to the suite of automated regression tests
   + Medium: cts: cluster_test - Make some usability enhancements
   + Medium: CTS: cluster_test - suggest a valid port number
   + Medium: CTS: Fix python import order
   + Medium: cts: Implement an automated SplitBrain test
   + Medium: CTS: Remove references to deleted classes
   + Medium: Extra: Resources - Use HA_VARRUN instead of HA_RSCTMP for state files as Heartbeat removes HA_RSCTMP at startup
   + Medium: HB: Bug 1933 - Fake crmd_client_status_callback() calls because HB does not provide them for already running processes
   + Medium: pengine: CID:17 - Fix memory leak in find_actions_by_task error path
   + Medium: pengine: CID:7,8 - Prevent hypothetical use-of-NULL in LogActions
   + Medium: pengine: Defer logging the actions performed on a resource until we have processed ordering constraints
   + Medium: pengine: Remove the symmetrical attribute of colocation constraints
   + Medium: Resources: pingd - fix the meta defaults
   + Medium: Resources: Stateful - Add missing meta defaults
   + Medium: stonithd: exit if we the pid file cannot be locked
   + Medium: Tools: Allow attrd clients to specify the ID the attribute should be created with
   + Medium: Tools: attrd - Allow attribute updates to be performed from a hosts peer
   + Medium: Tools: Bug LF:1994 - Clean up crm_verify return codes
   + Medium: Tools: Change the pingd defaults to ping hosts once every second (instead of 5 times every 10 seconds)
   + Medium: Tools: cibmin - Detect resource operations with a view to providing email/snmp/cim notification
   + Medium: Tools: crm cli: add back symmetrical for order constraints
   + Medium: Tools: crm cli: generate role in location when converting from xml
   + Medium: Tools: crm cli: handle shlex exceptions
   + Medium: Tools: crm cli: keep order of help topics
   + Medium: Tools: crm cli: refine completion for ids in configure
   + Medium: Tools: crm cli: replace inf with INFINITY
   + Medium: Tools: crm cli: streamline cib load and parsing
   + Medium: Tools: crm cli: supply provider only for ocf class primitives
   + Medium: Tools: crm_mon - Add support for sending mail notifications of resource events
   + Medium: Tools: crm_mon - Include the DC version in status summary
   + Medium: Tools: crm_mon - Sanitize startup and option processing
   + Medium: Tools: crm_mon - switch to event-driven updates and add support for sending snmp traps
   + Medium: Tools: crm_shadow - Replace the --locate option with the saner --edit
   + Medium: Tools: hb2openais: do not remove Evmsd resources, but replace them with clvmd
   + Medium: Tools: hb2openais: replace crmadmin with crm_mon
   + Medium: Tools: hb2openais: replace the lsb class with ocf for o2cb
   + Medium: Tools: hb2openais: reuse code
   + Medium: Tools: LF:2029 - Display an error if crm_resource is used to reset the operation history of non-primitive resources
   + Medium: Tools: Make pingd resilient to attrd failures
   + Medium: Tools: pingd - fix the command line switches
   + Medium: Tools: Rename ccm_tool to crm_node
 
 * Tue Nov 18 2008 Andrew Beekhof <abeekhof@suse.de> - 1.0.1
 - Update source tarball to revision: 6fc5ce8302ab (stable-1.0) tip
 - Statistics:
     Changesets:      170
     Diff:            816 files changed, 7633 insertions(+), 6286 deletions(-)
 - Changes since Pacemaker-1.0.1
   + ais: Allow the crmd to get callbacks whenever a node state changes
   + ais: Create an option for starting the mgmtd daemon automatically
   + ais: Ensure HA_RSCTMP exists for use by resource agents
   + ais: Hook up the openais.conf config logging options
   + ais: Zero out the PID of disconnecting clients
   + cib: Ensure global updates cause a disk write when appropriate
   + Core: Add an extra snaity check to getXpathResults() to prevent segfaults
   + Core: Do not redefine __FUNCTION__ unnecessarily
   + Core: Repair the ability to have comments in the configuration
   + crmd: Bug:1975 - crmd should wait indefinitely for stonith operations to complete
   + crmd: Ensure PE processing does not occur for all error cases in do_pe_invoke_callback
   + crmd: Requests to the CIB should cause any prior PE calculations to be ignored
   + heartbeat: Wait for membership 'up' events before removing stale node status data
   + pengine: Bug LF:1988 - Ensure recurring operations always have the correct target-rc set
   + pengine: Bug LF:1988 - For unmanaged resources we need to skip the usual can_run_resources() checks
   + pengine: Ensure the terminate node attribute is handled correctly
   + pengine: Fix optional colocation
   + pengine: Improve up the detection of 'new' nodes joining the cluster
   + pengine: Prevent assert failures in master_color() by ensuring unmanaged masters are always reallocated to their current location
   + Tools: crm cli: parser: return False on syntax error and None for comments
   + Tools: crm cli: unify template and edit commands
   + Tools: crm_shadow - Show more line number information after validation failures
   + Tools: hb2openais: add option to upgrade the CIB to v3.0
   + Tools: hb2openais: add U option to getopts and update usage
   + Tools: hb2openais: backup improved and multiple fixes
   + Tools: hb2openais: fix class/provider reversal
   + Tools: hb2openais: fix testing
   + Tools: hb2openais: move the CIB update to the end
   + Tools: hb2openais: update logging and set logfile appropriately
   + Tools: LF:1969 - Attrd never sets any properties in the cib
   + Tools: Make attrd functional on OpenAIS
   + Medium: ais: Hook up the options for specifying the expected number of nodes and total quorum votes
   + Medium: ais: Look for pacemaker options inside the service block with 'name: pacemaker' instead of creating an addtional configuration block
   + Medium: ais: Provide better feedback when nodes change nodeids (in openais.conf)
   + Medium: cib: Always store cib contents on disk with num_updates=0
   + Medium: cib: Ensure remote access ports are cleaned up on shutdown
   + Medium: crmd: Detect deleted resource operations automatically
   + Medium: crmd: Erase a nodes resource operations and transient attributes after a successful STONITH
   + Medium: crmd: Find a more appropriate place to update quorum and refresh attrd attributes
   + Medium: crmd: Fix the handling of unexpected PE exits to ensure the current CIB is stored
   + Medium: crmd: Fix the recording of pending operations in the CIB
   + Medium: crmd: Initiate an attrd refresh _after_ the status section has been fully repopulated
   + Medium: crmd: Only the DC should update quorum in an openais cluster
   + Medium: Ensure meta attributes are used consistantly
   + Medium: pengine: Allow group and clone level resource attributes
   + Medium: pengine: Bug N:437719 - Ensure scores from colocated resources count when allocating groups
   + Medium: pengine: Prevent lsb scripts from being used in globally unique clones
   + Medium: pengine: Make a best-effort guess at a migration threshold for people with 0.6 configs
   + Medium: Resources: controld - ensure we are part of a clone with globally_unique=false
   + Medium: Tools: attrd - Automatically refresh all attributes after a CIB replace operation
   + Medium: Tools: Bug LF:1985 - crm_mon - Correctly process failed cib queries to allow reconnection after cluster restarts
   + Medium: Tools: Bug LF:1987 - crm_verify incorrectly warns of configuration upgrades for the most recent version
   + Medium: Tools: crm (bnc#441028): check for key error in attributes management
   + Medium: Tools: crm_mon - display the meaning of the operation rc code instead of the status
   + Medium: Tools: crm_mon - Fix the display of timing data
   + Medium: Tools: crm_verify - check that we are being asked to validate a complete config
   + Medium: xml: Relax the restriction on the contents of rsc_locaiton.node
 
 * Thu Oct 16 2008 Andrew Beekhof <abeekhof@suse.de> - 1.0.0
 - Update source tarball to revision: 388654dfef8f tip
 - Statistics:
     Changesets:      261
     Diff:            3021 files changed, 244985 insertions(+), 111596 deletions(-)
 - Changes since f805e1b30103
   + add the crm cli program
   + ais: Move the service id definition to a common location and make sure it is always used
   + build: rename hb2openais.sh to .in and replace paths with vars
   + cib: Implement --create for crm_shadow
   + cib: Remove dead files
   + Core: Allow the expected number of quorum votes to be configrable
   + Core: cl_malloc and friends were removed from Heartbeat
   + Core: Only call xmlCleanupParser() if we parsed anything.  Doing so unconditionally seems to cause a segfault
   + hb2openais.sh: improve pingd handling; several bugs fixed
   + hb2openais: fix clone creation; replace EVMS strings
   + new hb2openais.sh conversion script
   + pengine: Bug LF:1950 - Ensure the current values for all notification variables are always set (even if empty)
   + pengine: Bug LF:1955 - Ensure unmanaged masters are unconditionally repromoted to ensure they are monitored correctly.
   + pengine: Bug LF:1955 - Fix another case of filtering causing unmanaged master failures
   + pengine: Bug LF:1955 - Umanaged mode prevents master resources from being allocated correctly
   + pengine: Bug N:420538 - Anit-colocation caused a positive node preference
   + pengine: Correctly handle unmanaged resources to prevent them from being started elsewhere
   + pengine: crm_resource - Fix the --migrate command
   + pengine: MAke stonith-enabled default to true and warn if no STONITH resources are found
   + pengine: Make sure orphaned clone children are created correctly
   + pengine: Monitors for unmanaged resources do not need to wait for start/promote/demote actions to complete
   + stonithd (LF 1951): fix remote stonith operations
   + stonithd: fix handling of timeouts
   + stonithd: fix logic for stonith resource priorities
   + stonithd: implement the fence-timeout instance attribute
   + stonithd: initialize value before reading fence-timeout
   + stonithd: set timeouts for fencing ops to the timeout of the start op
   + stonithd: stonith rsc priorities (new feature)
   + Tools: Add hb2openais - a tool for upgrading a Heartbeat cluster to use OpenAIS instead
   + Tools: crm_verify - clean up the upgrade logic to prevent crash on invalid configurations
   + Tools: Make pingd functional on Linux
   + Update version numbers for 1.0 candidates
   + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid
   + Medium: ais: Use the agreed service number
   + Medium: Build: Reliably detect heartbeat libraries during configure
   + Medium: Build: Supply prototypes for libreplace functions when needed
   + Medium: Build: Teach configure how to find corosync
   + Medium: Core: Provide better feedback if Pacemaker is started by a stack it does not support
   + Medium: crmd: Avoid calling GHashTable functions with NULL
   + Medium: crmd: Delay raising I_ERROR when the PE exits until we have had a chance to save the current CIB
   + Medium: crmd: Hook up the stonith-timeout option to stonithd
   + Medium: crmd: Prevent potential use-of-NULL in global_timer_callback
   + Medium: crmd: Rationalize the logging of graph aborts
   + Medium: pengine: Add a stonith_timeout option and remove new options that are better set in rsc_defaults
   + Medium: pengine: Allow external entities to ask for a node to be shot by creating a terminate=true transient node attribute
   + Medium: pengine: Bug LF:1950 - Notifications do not contain all documented resource state fields
   + Medium: pengine: Bug N:417585 - Do not restart group children whos individual score drops below zero
   + Medium: pengine: Detect clients that disconnect before receiving their reply
   + Medium: pengine: Implement a true maintenance mode
   + Medium: pengine: Implement on-fail=standby for NTT.  Derived from a patch by Satomi TANIGUCHI
   + Medium: pengine: Print the correct message when stonith is disabled
   + Medium: pengine: ptest - check the input is valid before proceeding
   + Medium: pengine: Revert group stickiness to the 'old way'
   + Medium: pengine: Use the correct attribute for action 'requires' (was prereq)
   + Medium: stonithd: Fix compilation without full heartbeat install
   + Medium: stonithd: exit with better code on empty host list
   + Medium: tools: Add a new regression test for CLI tools
   + Medium: tools: crm_resource - return with non-zero when a resource migration command is invalid
   + Medium: tools: crm_shadow - Allow the admin to start with an empty CIB (and no cluster connection)
   + Medium: xml: pacemaker-0.7 is now an alias for the 1.0 schema
 
 * Mon Sep 22 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.3
 - Update source tarball to revision: 33e677ab7764+ tip
 - Statistics:
     Changesets:      133
     Diff:            89 files changed, 7492 insertions(+), 1125 deletions(-)
 - Changes since f805e1b30103
   + Tools: add the crm cli program
   + Core: cl_malloc and friends were removed from Heartbeat
   + Core: Only call xmlCleanupParser() if we parsed anything.  Doing so unconditionally seems to cause a segfault
   + new hb2openais.sh conversion script
   + pengine: Bug LF:1950 - Ensure the current values for all notification variables are always set (even if empty)
   + pengine: Bug LF:1955 - Ensure unmanaged masters are unconditionally repromoted to ensure they are monitored correctly.
   + pengine: Bug LF:1955 - Fix another case of filtering causing unmanaged master failures
   + pengine: Bug LF:1955 - Umanaged mode prevents master resources from being allocated correctly
   + pengine: Bug N:420538 - Anit-colocation caused a positive node preference
   + pengine: Correctly handle unmanaged resources to prevent them from being started elsewhere
   + pengine: crm_resource - Fix the --migrate command
   + pengine: MAke stonith-enabled default to true and warn if no STONITH resources are found
   + pengine: Make sure orphaned clone children are created correctly
   + pengine: Monitors for unmanaged resources do not need to wait for start/promote/demote actions to complete
   + stonithd (LF 1951): fix remote stonith operations
   + Tools: crm_verify - clean up the upgrade logic to prevent crash on invalid configurations
   + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid
   + Medium: ais: Use the agreed service number
   + Medium: pengine: Allow external entities to ask for a node to be shot by creating a terminate=true transient node attribute
   + Medium: pengine: Bug LF:1950 - Notifications do not contain all documented resource state fields
   + Medium: pengine: Bug N:417585 - Do not restart group children whos individual score drops below zero
   + Medium: pengine: Implement a true maintenance mode
   + Medium: pengine: Print the correct message when stonith is disabled
   + Medium: stonithd: exit with better code on empty host list
   + Medium: xml: pacemaker-0.7 is now an alias for the 1.0 schema
 
 * Wed Aug 20 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.1
 - Update source tarball to revision: f805e1b30103+ tip
 - Statistics:
     Changesets:      184
     Diff:            513 files changed, 43408 insertions(+), 43783 deletions(-)
 - Changes since 0.7.0-19
   + Fix compilation when GNUTLS isn't found
   + admin: Fix use-after-free in crm_mon
   + Build: Remove testing code that prevented heartbeat-only builds
   + cib: Use single quotes so that the xpath queries for nvpairs will succeed
   + crmd: Always connect to stonithd when the TE starts and ensure we notice if it dies
   + crmd: Correctly handle a dead PE process
   + crmd: Make sure async-failures cause the failcount to be incremented
   + pengine: Bug LF:1941 - Handle failed clone instance probes when clone-max < #nodes
   + pengine: Parse resource ordering sets correctly
   + pengine: Prevent use-of-NULL - order->rsc_rh will not always be non-NULL
   + pengine: Unpack colocation sets correctly
   + Tools: crm_mon - Prevent use-of-NULL for orphaned resources
   + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid
   + Medium: ais: Allow transient clients to receive membership updates
   + Medium: ais: Avoid double-free in error path
   + Medium: ais: Include in the mebership nodes for which we have not determined their hostname
   + Medium: ais: Spawn the PE from the ais plugin instead of the crmd
   + Medium: cib: By default, new configurations use the latest schema
   + Medium: cib: Clean up the CIB if it was already disconnected
   + Medium: cib: Only increment num_updates if something actually changed
   + Medium: cib: Prevent use-after-free in client after abnormal termination of the CIB
   + Medium: Core: Fix memory leak in xpath searches
   + Medium: Core: Get more details regarding parser errors
   + Medium: Core: Repair expand_plus_plus - do not call char2score on unexpanded values
   + Medium: Core: Switch to the libxml2 parser - its significantly faster
   + Medium: Core: Use a libxml2 library function for xml -> text conversion
   + Medium: crmd: Asynchronous failure actions have no parameters
   + Medium: crmd: Avoid calling glib functions with NULL
   + Medium: crmd: Do not allow an election to promote a node from S_STARTING
   + Medium: crmd: Do not vote if we have not completed the local startup
   + Medium: crmd: Fix te_update_diff() now that get_object_root() functions differently
   + Medium: crmd: Fix the lrmd xpath expressions to not contain quotes
   + Medium: crmd: If we get a join offer during an election, better restart the election
   + Medium: crmd: No further processing is needed when using the LRMs API call for failing resources
   + Medium: crmd: Only update have-quorum if the value changed
   + Medium: crmd: Repair the input validation logic in do_te_invoke
   + Medium: cts: CIBs can no longer contain comments
   + Medium: cts: Enable a bunch of tests that were incorrectly disabled
   + Medium: cts: The libxml2 parser wont allow v1 resources to use integers as parameter names
   + Medium: Do not use the cluster UID and GID directly.  Look them up based on the configured value of HA_CCMUSER
   + Medium: Fix compilation when heartbeat is not supported
   + Medium: pengine: Allow groups to be involved in optional ordering constraints
   + Medium: pengine: Allow sets of operations to be reused by multiple resources
   + Medium: pengine: Bug LF:1941 - Mark extra clone instances as orphans and do not show inactive ones
   + Medium: pengine: Determin the correct migration-threshold during resource expansion
   + Medium: pengine: Implement no-quorum-policy=suicide (FATE #303619)
   + Medium: pengine: Clean up resources after stopping old copies of the PE
   + Medium: pengine: Teach the PE how to stop old copies of itself
   + Medium: Tools: Backport hb_report updates
   + Medium: Tools: cib_shadow - On create, spawn a new shell with CIB_shadow and PS1 set accordingly
   + Medium: Tools: Rename cib_shadow to crm_shadow
 
 * Fri Jul 18 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.0-19
 - Update source tarball to revision: 007c3a1c50f5 (unstable) tip
 - Statistics:
     Changesets:      108
     Diff:            216 files changed, 4632 insertions(+), 4173 deletions(-)
 - Changes added since unstable-0.7
   + admin: Fix use-after-free in crm_mon
   + ais: Change the tag for the ais plugin to "pacemaker" (used in openais.conf)
   + ais: Log terminated processes as an error
   + cib: Performance - Reorganize things to avoid calculating the XML diff twice
   + pengine: Bug LF:1941 - Handle failed clone instance probes when clone-max < #nodes
   + pengine: Fix memory leak in action2xml
   + pengine: Make OCF_ERR_ARGS a node-level error rather than a cluster-level one
   + pengine: Properly handle clones that are not installed on all nodes
   + Medium: admin: cibadmin - Show any validation errors if the upgrade failed
   + Medium: admin: cib_shadow - Implement --locate to display the underlying filename
   + Medium: admin: cib_shadow - Implement a --diff option
   + Medium: admin: cib_shadow - Implement a --switch option
   + Medium: admin: crm_resource - create more compact constraints that do not use lifetime (which is deprecated)
   + Medium: ais: Approximate born_on for OpenAIS based clusters
   + Medium: cib: Remove do_id_check, it is a poor substitute for ID validation by a schema
   + Medium: cib: Skip construction of pre-notify messages if no-one wants one
   + Medium: Core: Attempt to streamline some key functions to increase performance
   + Medium: Core: Clean up XML parser after validation
   + Medium: crmd: Detect and optimize the CRMs behavior when processing diffs of an LRM refresh
   + Medium: Fix memory leaks when resetting the name of an XML object
   + Medium: pengine: Prefer the current location if it is one of a group of nodes with the same (highest) score
 
 * Wed Jun 25 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.0
 - Update source tarball to revision: bde0c7db74fb tip
 - Statistics:
     Changesets:      439
     Diff:            676 files changed, 41310 insertions(+), 52071 deletions(-)
 - Changes added since stable-0.6
   + A new tool for setting up and invoking CTS
   + Admin: All tools now use --node (-N) for specifying node unames
   + Admin: All tools now use --xml-file (-x) and --xml-text (-X) for specifying where to find XML blobs
   + cib: Cleanup the API - remove redundant input fields
   + cib: Implement CIB_shadow - a facility for making and testing changes before uploading them to the cluster
   + cib: Make registering per-op callbacks an API call and renamed (for clarity) the API call for requesting notifications
   + Core: Add a facility for automatically upgrading old configurations
   + Core: Adopt libxml2 as the XML processing library - all external clients need to be recompiled
   + Core: Allow sending TLS messages larger than the MTU
   + Core: Fix parsing of time-only ISO dates
   + Core: Smarter handling of XML values containing quotes
   + Core: XML memory corruption - catch, and handle, cases where we are overwriting an attribute value with itself
   + Core: The xml ID type does not allow UUIDs that start with a number
   + Core: Implement XPath based versions of query/delete/replace/modify
   + Core: Remove some HA2.0.(3,4) compatibility code
   + crmd: Overhaul the detection of nodes that are starting vs. failed
   + pengine: Bug LF:1459 - Allow failures to expire
   + pengine: Have the PE do non-persistent configuration upgrades before performing calculations
   + pengine: Replace failure-stickiness with a simple 'migration-threshold'
   + tengine: Simplify the design by folding the tengine process into the crmd
   + Medium: Admin: Bug LF:1438 - Allow the list of all/active resource operations to be queried by crm_resource
   + Medium: Admin: Bug LF:1708 - crm_resource should print a warning if an attribute is already set as a meta attribute
   + Medium: Admin: Bug LF:1883 - crm_mon should display fail-count and operation history
   + Medium: Admin: Bug LF:1883 - crm_mon should display operation timing data
   + Medium: Admin: Bug N:371785 - crm_resource -C does not also clean up fail-count attributes
   + Medium: Admin: crm_mon - include timing data for failed actions
   + Medium: ais: Read options from the environment since objdb is not completely usable yet
   + Medium: cib: Add sections for op_defaults and rsc_defaults
   + Medium: cib: Better matching notification callbacks (for detecting duplicates and removal)
   + Medium: cib: Bug LF:1348 - Allow rules and attribute sets to be referenced for use in other objects
   + Medium: cib: BUG LF:1918 - By default, all cib calls now timeout after 30s
   + Medium: cib: Detect updates that decrease the version tuple
   + Medium: cib: Implement a client-side operation timeout - Requires LHA update
   + Medium: cib: Implement callbacks and async notifications for remote connections
   + Medium: cib: Make cib->cmds->update() an alias for modify at the API level (also implemented in cibadmin)
   + Medium: cib: Mark the CIB as disconnected if the IPC connection is terminated
   + Medium: cib: New call option 'cib_can_create' which can be passed to modify actions - allows the object to be created if it does not exist yet
   + Medium: cib: Reimplement get|set|delete attributes using XPath
   + Medium: cib: Remove some useless parts of the API
   + Medium: cib: Remove the 'attributes' scaffolding from the new format
   + Medium: cib: Implement the ability for clients to connect to remote servers
   + Medium: Core: Add support for validating xml against RelaxNG schemas
   + Medium: Core: Allow more than one item to be modified/deleted in XPath based operations
   + Medium: Core: Fix the sort_pairs function for creating sorted xml objects
   + Medium: Core: iso8601 - Implement subtract_duration and fix subtract_time
   + Medium: Core: Reduce the amount of xml copying
   + Medium: Core: Support value='value+=N' XML updates (in addtion to value='value++')
   + Medium: crmd: Add support for lrm_ops->fail_rsc if its available
   + Medium: crmd: HB - watch link status for node leaving events
   + Medium: crmd: Bug LF:1924 - Improved handling of lrmd disconnects and shutdowns
   + Medium: crmd: Do not wait for actions with a start_delay over 5 minutes. Confirm them immediately
   + Medium: pengine: Bug LF:1328 - Do not fencing nodes in clusters without managed resources
   + Medium: pengine: Bug LF:1461 - Give transient node attributes (in <status/>) preference over persistent ones (in <nodes/>)
   + Medium: pengine: Bug LF:1884, Bug LF:1885 - Implement N:M ordering and colocation constraints
   + Medium: pengine: Bug LF:1886 - Create a resource and operation 'defaults' config section
   + Medium: pengine: Bug LF:1892 - Allow recurring actions to be triggered at known times
   + Medium: pengine: Bug LF:1926 - Probes should complete before stop actions are invoked
   + Medium: pengine: Fix the standby when its set as a transient attribute
   + Medium: pengine: Implement a global 'stop-all-resources' option
   + Medium: pengine: Implement cibpipe, a tool for performing/simulating config changes "offline"
   + Medium: pengine: We do not allow colocation with specific clone instances
   + Medium: Tools: pingd - Implement a stack-independent version of pingd
   + Medium: xml: Ship an xslt for upgrading from 0.6 to 0.7
 
 * Thu Jun 19 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.5
 - Update source tarball to revision: b9fe723d1ac5 tip
 - Statistics:
     Changesets:      48
     Diff:            37 files changed, 1204 insertions(+), 234 deletions(-)
 - Changes since Pacemaker-0.6.4
   + Admin: Repair the ability to delete failcounts
   + ais: Audit IPC handling between the AIS plugin and CRM processes
   + ais: Have the plugin create needed /var/lib directories
   + ais: Make sure the sync and async connections are assigned correctly (not swapped)
   + cib: Correctly detect configuration changes - num_updates does not count
   + pengine: Apply stickiness values to the whole group, not the individual resources
   + pengine: Bug N:385265 - Ensure groups are migrated instead of remaining partially active on the current node
   + pengine: Bug N:396293 - Enforce mandatory group restarts due to ordering constraints
   + pengine: Correctly recover master instances found active on more than one node
   + pengine: Fix memory leaks reported by Valgrind
   + Medium: Admin: crm_mon - Misc improvements from Satomi Taniguchi
   + Medium: Bug LF:1900 - Resource stickiness should not allow placement in asynchronous clusters
   + Medium: crmd: Ensure joins are completed promptly when a node taking part dies
   + Medium: pengine: Avoid clone instance shuffling in more cases
   + Medium: pengine: Bug LF:1906 - Remove an optimization in native_merge_weights() causing group scores to behave eratically
   + Medium: pengine: Make use of target_rc data to correctly process resource operations
   + Medium: pengine: Prevent a possible use of NULL in sort_clone_instance()
   + Medium: tengine: Include target rc in the transition key - used to correctly determin operation failure
 
 * Thu May 22 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.4
 - Update source tarball to revision: 226d8e356924 tip
 - Statistics:
     Changesets:       55
     Diff:             199 files changed, 7103 insertions(+), 12378 deletions(-)
 - Changes since Pacemaker-0.6.3
   + crmd: Bug LF:1881 LF:1882 - Overhaul the logic for operation cancelation and deletion
   + crmd: Bug LF:1894 - Make sure cancelled recurring operations are cleaned out from the CIB
   + pengine: Bug N:387749 - Colocation with clones causes unnecessary clone instance shuffling
   + pengine: Ensure 'master' monitor actions are cancelled _before_ we demote the resource
   + pengine: Fix assert failure leading to core dump - make sure variable is properly initialized
   + pengine: Make sure 'slave' monitoring happens after the resource has been demoted
   + pengine: Prevent failure stickiness underflows (where too many failures become a _positive_ preference)
   + Medium: Admin: crm_mon - Only complain if the output file could not be opened
   + Medium: Common: filter_action_parameters - enable legacy handling only for older versions
   + Medium: pengine: Bug N:385265 - The failure stickiness of group children is ignored until it reaches -INFINITY
   + Medium: pengine: Implement master and clone colocation by exlcuding nodes rather than setting ones score to INFINITY (similar to cs: 756afc42dc51)
   + Medium: tengine: Bug LF:1875 - Correctly find actions to cancel when their node leaves the cluster
 
 * Wed Apr 23 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.3
 - Update source tarball to revision: fd8904c9bc67 tip
 - Statistics:
     Changesets:      117
     Diff:            354 files changed, 19094 insertions(+), 11338 deletions(-)
 - Changes since Pacemaker-0.6.2
   + Admin: Bug LF:1848 - crm_resource - Pass set name and id to delete_resource_attr() in the correct order
   + Build: SNMP has been moved to the management/pygui project
   + crmd: Bug LF1837 - Unmanaged resources prevent crmd from shutting down
   + crmd: Prevent use-after-free in lrm interface code (Patch based on work by Keisuke MORI)
   + pengine: Allow the cluster to make progress by not retrying failed demote actions
   + pengine: Anti-colocation with slave should not prevent master colocation
   + pengine: Bug LF 1768 - Wait more often for STONITH ops to complete before starting resources
   + pengine: Bug LF1836 - Allow is-managed-default=false to be overridden by individual resources
   + pengine: Bug LF185 - Prevent pointless master/slave instance shuffling by ignoring the master-pref of stopped instances
   + pengine: Bug N-191176 - Implement interleaved ordering for clone-to-clone scenarios
   + pengine: Bug N-347004 - Ensure clone notifications are always sent when an instance is stopped/started
   + pengine: Bug N-347004 - Include notification ordering is correct for interleaved clones
   + pengine: Bug PM-11 - Directly link probe_complete to starting clone instances
   + pengine: Bug PM1 - Fix setting failcounts when applied to complex resources
   + pengine: Bug PM12, LF1648 - Extensive revision of group ordering
   + pengine: Bug PM7 - Ensure masters are always demoted before they are stopped
   + pengine: Create probes after allocation to allow smarter handling of anonymous clones
   + pengine: Do not prioritize clone instances that must be moved
   + pengine: Fix error in previous commit that allowed more than the required number of masters to be promoted
   + pengine: Group start ordering fixes
   + pengine: Implement promote/demote ordering for cloned groups
   + tengine: Repair failcount updates
   + tengine: Use the correct offset when updating failcount
   + Medium: Admin: Add a summary output that can be easily parsed by CTS for audit purposes
   + Medium: Build: Make configure fail if bz2 or libxml2 are not present
   + Medium: Build: Re-instate a better default for LCRSODIR
   + Medium: CIB: Bug LF-1861 - Filter irrelvant error status from synchronous CIB clients
   + Medium: Core: Bug 1849 - Invalid conversion of ordinal leap year to gregorian date
   + Medium: Core: Drop compatibility code for 2.0.4 and 2.0.5 clusters
   + Medium: crmd: Bug LF-1860 - Automatically cancel recurring ops before demote and promote operations (not only stops)
   + Medium: crmd: Save the current CIB contents if we detect the PE crashed
   + Medium: pengine: Bug LF:1866 - Fix version check when applying compatibility handling for failed start operations
   + Medium: pengine: Bug LF:1866 - Restore the ability to have start failures not be fatal
   + Medium: pengine: Bug PM1 - Failcount applies to all instances of non-unique clone
   + Medium: pengine: Correctly set the state of partially active master/slave groups
   + Medium: pengine: Do not claim to be stopping an already stopped orphan
   + Medium: pengine: Ensure implies_left ordering constraints are always effective
   + Medium: pengine: Indicate each resources 'promotion' score
   + Medium: pengine: Prevent a possible use-of-NULL
   + Medium: pengine: Reprocess the current action if it changed (so that any prior dependencies are updated)
   + Medium: tengine: Bug LF-1859 - Wait for fail-count updates to complete before terminating the transition
   + Medium: tengine: Bug LF:1859 - Do not abort graphs due to our own failcount updates
   + Medium: tengine: Bug LF:1859 - Prevent the TE from interupting itself
 
 * Thu Feb 14 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.2
 - Update source tarball to revision: 28b1a8c1868b tip
 - Statistics:
     Changesets:    11
     Diff:          7 files changed, 58 insertions(+), 18 deletions(-)
 - Changes since Pacemaker-0.6.1
   + haresources2cib.py: set default-action-timeout to the default (20s)
   + haresources2cib.py: update ra parameters lists
   + Medium: SNMP: Allow the snmp subagent to be built (patch from MATSUDA, Daiki)
   + Medium: Tools: Make sure the autoconf variables in haresources2cib are expanded
 
 * Tue Feb 12 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.1
 - Update source tarball to revision: e7152d1be933 tip
 - Statistics:
     Changesets:    25
     Diff:          37 files changed, 1323 insertions(+), 227 deletions(-)
 - Changes since Pacemaker-0.6.0
   + CIB: Ensure changes to top-level attributes (like admin_epoch) cause a disk write
   + CIB: Ensure the archived file hits the disk before returning
   + CIB: Repair the ability to do 'atomic increment' updates (value="value++")
   + crmd: Bug #7 - Connecting to the crmd immediately after startup causes use-of-NULL
   + Medium: CIB: Mask cib_diff_resync results from the caller - they do not need to know
   + Medium: crmd: Delay starting the IPC server until we are fully functional
   + Medium: CTS: Fix the startup patterns
   + Medium: pengine: Bug 1820 - Allow the first resource in a group to be migrated
   + Medium: pengine: Bug 1820 - Check the colocation dependencies of resources to be migrated
 
 * Mon Jan 14 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.0
 - This is the first release of the Pacemaker Cluster Resource Manager formerly part of Heartbeat.
 - For those looking for the GUI, mgmtd, CIM or TSA components, they are now found in
   the new pacemaker-pygui project.  Build dependencies prevent them from being
   included in Heartbeat (since the built-in CRM is no longer supported) and,
   being non-core components, are not included with Pacemaker.
 - Update source tarball to revision: c94b92d550cf
 - Statistics:
     Changesets:      347
     Diff:            2272 files changed, 132508 insertions(+), 305991 deletions(-)
 - Test hardware:
     + 6-node vmware cluster (sles10-sp1/256MB/vmware stonith) on a single host (opensuse10.3/2GB/2.66GHz Quad Core2)
     + 7-node EMC Centera cluster (sles10/512MB/2GHz Xeon/ssh stonith)
 - Notes: Heartbeat Stack
     + All testing was performed with STONITH enabled
     + The CRM was enabled using the "crm respawn" directive
 - Notes: OpenAIS Stack
     + This release contains a preview of support for the OpenAIS cluster stack
     + The current release of the OpenAIS project is missing two important
     patches that we require.  OpenAIS packages containing these patches are
     available for most major distributions at:
     http://download.opensuse.org/repositories/server:/ha-clustering
     + The OpenAIS stack is not currently recommended for use in clusters that
     have shared data as STONITH support is not yet implimented
     + pingd is not yet available for use with the OpenAIS stack
     + 3 significant OpenAIS issues were found during testing of 4 and 6 node
     clusters.  We are activly working together with the OpenAIS project to
     get these resolved.
 - Pending bugs encountered during testing:
     + OpenAIS   #1736 - Openais membership took 20s to stabilize
     + Heartbeat #1750 - ipc_bufpool_update: magic number in head does not match
     + OpenAIS   #1793 - Assertion failure in memb_state_gather_enter()
     + OpenAIS   #1796 - Cluster message corruption
 - Changes since Heartbeat-2.1.2-24
   + Add OpenAIS support
   + Admin: crm_uuid - Look in the right place for Heartbeat UUID files
   + admin: Exit and indicate a problem if the crmd exits while crmadmin is performing a query
   + cib: Fix CIB_OP_UPDATE calls that modify the whole CIB
   + cib: Fix compilation when supporting the heartbeat stack
   + cib: Fix memory leaks caused by the switch to get_message_xml()
   + cib: HA_VALGRIND_ENABLED needs to be set _and_ set to 1|yes|true
   + cib: Use get_message_xml() in preference to cl_get_struct()
   + cib: Use the return value from call to write() in cib_send_plaintext()
   + Core: ccm nodes can legitimately have a node id of 0
   + Core: Fix peer-process tracking for the Heartbeat stack
   + Core: Heartbeat does not send status notifications for nodes that were already part of the cluster.  Fake them instead
   + CRM: Add children to HA_Messages such that the field name matches F_XML_TAGNAME
   + crm: Adopt a more flexible appraoch to enabling Valgrind
   + crm: Fix compilation when bzip2 is not installed
   + CRM: Future-proof get_message_xml()
   + crmd: Filter election responses based on time not FSA state
   + crmd: Handle all possible peer states in crmd_ha_status_callback()
   + crmd: Make sure the current date/time is set - prevents use-of-NULL when evaluating rules
   + crmd: Relax an assertion regrading  ccm membership instances
   + crmd: Use (node->processes&crm_proc_ais) to accurately update the CIB after replace operations
   + crmd: Heartbeat: Accurately record peer client status
   + pengine: Bug 1777 - Allow colocation with a resource in the Stopped state
   + pengine: Bug 1822 - Prevent use-of-NULL in PromoteRsc()
   + pengine: Implement three recovery policies based on op_status and op_rc
   + pengine: Parse fail-count correctly (it may be set to ININFITY)
   + pengine: Prevent graph-loop when stonith agents need to be moved around before a STONITH op
   + pengine: Prevent graph-loops when two operations have the same name+interval
   + tengine: Cancel active timers when destroying graphs
   + tengine: Ensure failcount is set correctly for failed stops/starts
   + tengine: Update failcount for oeprations that time out
   + Medium: admin: Prevent hang in crm_mon -1 when there is no cib connection - Patch from Junko IKEDA
   + Medium: cib: Require --force|-f when performing potentially dangerous commands with cibadmin
   + Medium: cib: Tweak the shutdown code
   + Medium: Common: Only count peer processes of active nodes
   + Medium: Core: Create generic cluster sign-in method
   + Medium: core: Fix compilation when Heartbeat support is disabled
   + Medium: Core: General cleanup for supporting two stacks
   + Medium: Core: iso6601 - Support parsing of time-only strings
   + Medium: core: Isolate more code that is only needed when SUPPORT_HEARTBEAT is enabled
   + Medium: crm: Improved logging of errors in the XML parser
   + Medium: crmd: Fix potential use-of-NULL in string comparison
   + Medium: crmd: Reimpliment syncronizing of CIB queries and updates when invoking the PE
   + Medium: crm_mon: Indicate when a node is both in standby mode and offline
   + Medium: pengine: Bug 1822 - Do not try an promote groups if not all of it is active
   + Medium: pengine: on_fail=nothing is an alias for 'ignore' not 'restart'
   + Medium: pengine: Prevent a potential use-of-NULL in cron_range_satisfied()
   + snmp subagent: fix a problem on displaying an unmanaged group
   + snmp subagent: use the syslog setting
   + snmp: v2 support (thanks to Keisuke MORI)
   + snmp_subagent - made it not complain about some things if shutting down
diff --git a/GNUmakefile b/GNUmakefile
index e646bb6e4a..3cdecb5070 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -1,416 +1,417 @@
 #
 # Copyright 2008-2019 the Pacemaker project contributors
 #
 # The version control history for this file may have further details.
 #
 # This source code is licensed under the GNU General Public License version 2
 # or later (GPLv2+) WITHOUT ANY WARRANTY.
 #
 
 default: build
 .PHONY: default
 
 # The toplevel "clean" targets are generated from Makefile.am, not this file.
 # We can't use autotools' CLEANFILES, clean-local, etc. here. Instead, we
 # define this target, which Makefile.am can use as a dependency of clean-local.
 EXTRA_CLEAN_TARGETS	= ancillary-clean
 
 -include Makefile
 
 # The main purpose of this GNUmakefile is that its targets can be invoked
 # without having to call autogen.sh and configure first. That means automake
 # variables may or may not be defined. Here, we use the current working
 # directory if a relevant variable hasn't been defined.
 #
 # The idea is to keep generated artifacts in the build tree, in case a VPATH
 # build is in use, but in practice it would be difficult to make the targets
 # here usable from a different location than the source tree.
 abs_srcdir	?= $(shell pwd)
 abs_builddir	?= $(shell pwd)
 
 PACKAGE		?= pacemaker
 
 
 # Definitions that specify what various targets will apply to
 
 COMMIT  ?= HEAD
 TAG     ?= $(shell T=$$(git describe --all '$(COMMIT)' 2>/dev/null | sed -n 's|tags/\(.*\)|\1|p'); \
 	     test -n "$${T}" && echo "$${T}" \
 	       || git log --pretty=format:%H -n 1 '$(COMMIT)' 2>/dev/null || echo DIST)
 lparen = (
 rparen = )
 SHORTTAG ?= $(shell case $(TAG) in Pacemaker-*|DIST$(rparen) echo '$(TAG)' | cut -c11-;; \
 	      *$(rparen) git log --pretty=format:%h -n 1 '$(TAG)';; esac)
 SHORTTAG_ABBREV = $(shell printf %s '$(SHORTTAG)' | wc -c)
 
 LAST_RC		?= $(shell test -e /Volumes || git tag -l | grep Pacemaker | sort -Vr | grep rc | head -n 1)
 ifneq ($(origin VERSION), undefined)
 LAST_RELEASE	?= Pacemaker-$(VERSION)
 else
 LAST_RELEASE	?= $(shell git tag -l | grep Pacemaker | sort -Vr | grep -v rc | head -n 1)
 endif
 NEXT_RELEASE	?= $(shell echo $(LAST_RELEASE) | awk -F. '/[0-9]+\./{$$3+=1;OFS=".";print $$1,$$2,$$3}')
 
 
 # This Makefile can create 2 types of distributions:
 #
 # - "make dist" is automake's native functionality, based on the various
 #   dist/nodist make variables; it always uses the current sources
 #
 # - "make export" is a custom target based on git archive and relevant entries
 #   from .gitattributes; it defaults to current sources but can use any git tag
 #
 # Both types use the TARFILE name for the result, though they generate
 # different contents.
 distdir			= $(PACKAGE)-$(SHORTTAG)
 TARFILE			= $(abs_builddir)/$(PACKAGE)-$(SHORTTAG).tar.gz
 
 .PHONY: init
 init:
 	test -e $(top_srcdir)/configure || ./autogen.sh
 	test -e $(abs_builddir)/Makefile || $(abs_builddir)/configure
 
 .PHONY: build
 build: init
 	$(MAKE) $(AM_MAKEFLAGS) core
 
 export:
 	if [ ! -f "$(TARFILE)" ]; then						\
 	    if [ $(TAG) = dirty ]; then 					\
 		git commit -m "DO-NOT-PUSH" -a;					\
 		git archive --prefix=$(distdir)/ -o "$(TARFILE)" HEAD^{tree};	\
 		git reset --mixed HEAD^; 					\
 	    else								\
 		git archive --prefix=$(distdir)/ -o "$(TARFILE)" $(TAG)^{tree};	\
 	    fi;									\
 	    echo "`date`: Rebuilt $(TARFILE)";					\
 	else									\
 	    echo "`date`: Using existing tarball: $(TARFILE)";			\
 	fi
 
 ## RPM-related targets
 
 # Where to put RPM artifacts; possible values:
 #
 # - toplevel (default): RPM sources, spec, and source rpm in top-level build
 #   directory (everything else uses the usual defaults)
 #
 # - subtree: RPM sources (i.e. TARFILE) in top-level build directory,
-#   everything else in dedicated "rpmbuild" subdirectory of build tree
+#   everything else in dedicated "rpm" subdirectory of build tree
 RPMDEST         	?= toplevel
 
 RPM_SPEC_DIR_toplevel	= $(abs_builddir)
 RPM_SRCRPM_DIR_toplevel	= $(abs_builddir)
 RPM_OPTS_toplevel	= --define "_sourcedir $(abs_builddir)" 		\
 			  --define "_specdir   $(RPM_SPEC_DIR_toplevel)"	\
 			  --define "_srcrpmdir $(RPM_SRCRPM_DIR_toplevel)"
 
 RPM_SPEC_DIR_subtree	= $(abs_builddir)/rpm/SPECS
 RPM_SRCRPM_DIR_subtree	= $(abs_builddir)/rpm/SRPMS
 RPM_OPTS_subtree	= --define "_sourcedir $(abs_builddir)" 		\
 			  --define "_topdir $(abs_builddir)/rpm"
 
 RPM_SPEC_DIR	= $(RPM_SPEC_DIR_$(RPMDEST))
 RPM_SRCRPM_DIR	= $(RPM_SRCRPM_DIR_$(RPMDEST))
 RPM_OPTS	= $(RPM_OPTS_$(RPMDEST))
 
 WITH		?= --without doc
 BUILD_COUNTER	?= build.counter
 LAST_COUNT      = $(shell test ! -e $(BUILD_COUNTER) && echo 0; test -e $(BUILD_COUNTER) && cat $(BUILD_COUNTER))
 COUNT           = $(shell expr 1 + $(LAST_COUNT))
 SPECVERSION	?= $(COUNT)
 
 MOCK_DIR	= $(abs_builddir)/mock
 MOCK_OPTIONS	?= --resultdir=$(MOCK_DIR) --no-cleanup-after
 
 F	?= $(shell test ! -e /etc/fedora-release && echo 0; test -e /etc/fedora-release && rpm --eval %{fedora})
 ARCH	?= $(shell test ! -e /etc/fedora-release && uname -m; test -e /etc/fedora-release && rpm --eval %{_arch})
 MOCK_CFG	?= $(shell test -e /etc/fedora-release && echo fedora-$(F)-$(ARCH))
 
 # rpmbuild wrapper that translates "--with[out] FEATURE" into RPM macros
 #
 # Unfortunately, at least recent versions of rpm do not support mentioned
 # switch.  To work this around, we can emulate mechanism that rpm uses
 # internally: unfold the flags into respective macro definitions:
 #
 #    --with[out] FOO  ->  --define "_with[out]_FOO --with[out]-FOO"
 #
 # $(1) ... WITH string (e.g., --with pre_release --without doc)
 # $(2) ... options following the initial "rpmbuild" in the command
 # $(3) ... final arguments determined with $2 (e.g., pacemaker.spec)
 #
 # Note that if $(3) is a specfile, extra case is taken so as to reflect
 # pcmkversion correctly (using in-place modification).
 #
 # Also note that both ways to specify long option with an argument
 # (i.e., what getopt and, importantly, rpm itself support) can be used:
 #
 #    --with FOO
 #    --with=FOO
 rpmbuild-with = \
 	WITH=$$(getopt -o "" -l with:,without: -- $(1)) || exit 1; \
 	CMD='rpmbuild $(2)'; PREREL=0; \
 	eval set -- "$${WITH}"; \
 	while true; do \
 		case "$$1" in \
 		--with) CMD="$${CMD} --define \"_with_$$2 --with-$$2\""; \
 			[ "$$2" != pre_release ] || PREREL=1; shift 2;; \
 		--without) CMD="$${CMD} --define \"_without_$$2 --without-$$2\""; \
 		        [ "$$2" != pre_release ] || PREREL=0; shift 2;; \
 		--) shift ; break ;; \
 		*) echo "cannot parse WITH: $$1"; exit 1;; \
 		esac; \
 	done; \
 	case "$(3)" in \
 	*.spec) { [ $${PREREL} -eq 0 ] || [ $(LAST_RELEASE) = $(TAG) ]; } \
 		&& sed -i "s/^\(%global pcmkversion \).*/\1$$(echo $(LAST_RELEASE) | sed -e s:Pacemaker-:: -e s:-.*::)/" $(3) \
 		|| sed -i "s/^\(%global pcmkversion \).*/\1$$(echo $(NEXT_RELEASE) | sed -e s:Pacemaker-:: -e s:-.*::)/" $(3);; \
 	esac; \
 	CMD="$${CMD} $(3)"; \
 	eval "$${CMD}"
 
-$(RPM_SPEC_DIR)/$(PACKAGE).spec: rpm/pacemaker.spec.in
+# Depend on spec-clean so it gets rebuilt every time
+$(RPM_SPEC_DIR)/$(PACKAGE).spec: spec-clean rpm/pacemaker.spec.in
 	$(AM_V_at)$(MKDIR_P) $(RPM_SPEC_DIR)	# might not exist in VPATH build
 	$(AM_V_GEN)if [ x != x"`git ls-files -m rpm/pacemaker.spec.in 2>/dev/null`" ]; then	\
 	    cat $(abs_srcdir)/rpm/pacemaker.spec.in;							\
 	elif git cat-file -e $(TAG):rpm/pacemaker.spec.in 2>/dev/null; then		\
 	    git show $(TAG):rpm/pacemaker.spec.in;					\
 	elif git cat-file -e $(TAG):pacemaker.spec.in 2>/dev/null; then			\
 	    git show $(TAG):pacemaker.spec.in;						\
 	else 										\
 	    cat $(abs_srcdir)/rpm/pacemaker.spec.in;							\
 	fi | sed									\
 	    -e 's/global\ specversion\ .*/global\ specversion\ $(SPECVERSION)/' 	\
 	    -e 's/global\ commit\ .*/global\ commit\ $(SHORTTAG)/'			\
 	    -e 's/global\ commit_abbrev\ .*/global\ commit_abbrev\ $(SHORTTAG_ABBREV)/' \
 	    -e "s/PACKAGE_DATE/$$(date +'%a %b %d %Y')/"				\
 	    -e "s/PACKAGE_VERSION/$$(git describe --tags $(TAG) | sed -e s:Pacemaker-:: -e s:-.*::)/"	\
 	    > "$@"
 
 .PHONY: $(PACKAGE).spec
 $(PACKAGE).spec: $(RPM_SPEC_DIR)/$(PACKAGE).spec
 
 .PHONY: spec-clean
 spec-clean:
 	-rm -f $(RPM_SPEC_DIR)/$(PACKAGE).spec
 
 .PHONY: srpm
 srpm:	export srpm-clean $(RPM_SPEC_DIR)/$(PACKAGE).spec
 	if [ -e $(BUILD_COUNTER) ]; then					\
 		echo $(COUNT) > $(BUILD_COUNTER);				\
 	fi
 	$(call rpmbuild-with,$(WITH),-bs $(RPM_OPTS),$(RPM_SPEC_DIR)/$(PACKAGE).spec)
 
 .PHONY: srpm-clean
 srpm-clean:
 	-rm -f $(RPM_SRCRPM_DIR)/*.src.rpm
 
 .PHONY: chroot
 chroot: mock-$(MOCK_CFG) mock-install-$(MOCK_CFG) mock-sh-$(MOCK_CFG)
 	@echo "Done"
 
 .PHONY: mock-next
 mock-next:
 	$(MAKE) $(AM_MAKEFLAGS) F=$(shell expr 1 + $(F)) mock
 
 .PHONY: mock-rawhide
 mock-rawhide:
 	$(MAKE) $(AM_MAKEFLAGS) F=rawhide mock
 
 mock-install-%:
 	@echo "Installing packages"
 	mock --root=$* $(MOCK_OPTIONS) --install $(MOCK_DIR)/*.rpm \
 		vi sudo valgrind lcov gdb fence-agents psmisc
 
 .PHONY: mock-install
 mock-install: mock-install-$(MOCK_CFG)
 	@echo "Done"
 
 .PHONY: mock-sh
 mock-sh: mock-sh-$(MOCK_CFG)
 	@echo "Done"
 
 mock-sh-%:
 	@echo "Connecting"
 	mock --root=$* $(MOCK_OPTIONS) --shell
 	@echo "Done"
 
 mock-%: srpm mock-clean
 	mock $(MOCK_OPTIONS) --root=$* --no-cleanup-after --rebuild	\
 		$(WITH) $(RPM_SRCRPM_DIR)/*.src.rpm
 
 .PHONY: mock
 mock:   mock-$(MOCK_CFG)
 	@echo "Done"
 
 .PHONY: dirty
 dirty:
 	$(MAKE) $(AM_MAKEFLAGS) TAG=dirty mock
 
 .PHONY: mock-clean
 mock-clean:
 	-rm -rf $(MOCK_DIR)
 
 .PHONY: rpm-dep
 rpm-dep: $(RPM_SPEC_DIR)/$(PACKAGE).spec
 	sudo yum-builddep $(PACKAGE).spec
 
 # e.g. make WITH="--with pre_release" rpm
 .PHONY: rpm
 rpm:	srpm
 	@echo To create custom builds, edit the flags and options in $(PACKAGE).spec first
 	$(call rpmbuild-with,$(WITH),$(RPM_OPTS),--rebuild $(RPM_SRCRPM_DIR)/*.src.rpm)
 
 .PHONY: rpmlint
 rpmlint: $(RPM_SPEC_DIR)/$(PACKAGE).spec
 	rpmlint -f rpm/rpmlintrc "$<"
 
 .PHONY: release
 release:
 	$(MAKE) $(AM_MAKEFLAGS) TAG=$(LAST_RELEASE) rpm
 
 .PHONY: rc
 rc:
 	$(MAKE) $(AM_MAKEFLAGS) TAG=$(LAST_RC) rpm
 
 
 ## Static analysis via coverity
 
 # Aggressiveness (low, medium, or high)
 COVLEVEL	?= low
 
 # Generated outputs
 COVERITY_DIR	= $(abs_builddir)/coverity-$(TAG)
 COVTAR		= $(abs_builddir)/$(PACKAGE)-coverity-$(TAG).tgz
 COVEMACS	= $(abs_builddir)/$(TAG).coverity
 COVHTML		= $(COVERITY_DIR)/output/errors
 
 # Coverity outputs are phony so they get rebuilt every invocation
 
 .PHONY: $(COVERITY_DIR)
 $(COVERITY_DIR): init core-clean coverity-clean
 	$(AM_V_GEN)cov-build --dir "$@" $(MAKE) $(AM_MAKEFLAGS) core
 
 # Public coverity instance
 
 .PHONY: $(COVTAR)
 $(COVTAR): $(COVERITY_DIR)
 	$(AM_V_GEN)tar czf "$@" --transform="s@.*$(TAG)@cov-int@" "$<"
 
 .PHONY: coverity
 coverity: $(COVTAR)
 	@echo "Now go to https://scan.coverity.com/users/sign_in and upload:"
 	@echo "  $(COVTAR)"
 	@echo "then make core-clean coverity-clean"
 
 # Licensed coverity instance
 #
 # The prerequisites are a little hacky; rather than actually required, some
 # of them are designed so that things execute in the proper order (which is
 # not the same as GNU make's order-only prerequisites).
 
 .PHONY: coverity-analyze
 coverity-analyze: $(COVERITY_DIR)
 	@echo ""
 	@echo "Analyzing (waiting for coverity license if necessary) ..."
 	cov-analyze --dir "$<" --wait-for-license --security		\
 		--aggressiveness-level "$(COVLEVEL)"
 
 .PHONY: $(COVEMACS)
 $(COVEMACS): coverity-analyze
 	$(AM_V_GEN)cov-format-errors --dir "$(COVERITY_DIR)" --emacs-style > "$@"
 
 .PHONY: $(COVHTML)
 $(COVHTML): $(COVEMACS)
 	$(AM_V_GEN)cov-format-errors --dir "$(COVERITY_DIR)" --html-output "$@"
 
 .PHONY: coverity-corp
 coverity-corp: $(COVHTML)
 	$(MAKE) $(AM_MAKEFLAGS) core-clean
 	@echo "Done. See:"
 	@echo "  file://$(COVHTML)/index.html"
 	@echo "When no longer needed, make coverity-clean"
 
 # Remove all outputs regardless of tag
 .PHONY: coverity-clean
 coverity-clean:
 	-rm -rf "$(abs_builddir)"/coverity-*			\
 		"$(abs_builddir)"/$(PACKAGE)-coverity-*.tgz	\
 		"$(abs_builddir)"/*.coverity
 
 
 ## Change log generation
 
 summary:
 	@printf "\n* `date +"%a %b %d %Y"` `git config user.name` <`git config user.email`> $(NEXT_RELEASE)"
-	@printf "\n- Changesets: `git log --pretty=oneline $(LAST_RELEASE)..HEAD | wc -l`"
-	@printf "\n- Diff:      "
+	@printf "\n- Changesets: `git log --pretty=oneline --no-merges $(LAST_RELEASE)..HEAD | wc -l`"
+	@printf "\n- Diff:\n"
 	@git diff $(LAST_RELEASE)..HEAD --shortstat include lib daemons tools xml
 
 rc-changes:
 	@$(MAKE) $(AM_MAKEFLAGS) NEXT_RELEASE=$(shell echo $(LAST_RC) | sed s:-rc.*::) LAST_RELEASE=$(LAST_RC) changes
 
 changes: summary
 	@printf "\n- Features added since $(LAST_RELEASE)\n"
 	@git log --pretty=format:'  +%s' --abbrev-commit $(LAST_RELEASE)..HEAD | grep -e Feature: | sed -e 's@Feature:@@' | sort -uf
 	@printf "\n- Changes since $(LAST_RELEASE)\n"
 	@git log --pretty=format:'  +%s' --no-merges --abbrev-commit $(LAST_RELEASE)..HEAD \
 		| grep -e High: -e Fix: -e Bug | sed \
 			-e 's@\(Fix\|High\|Bug\):@@' \
 			-e 's@\(cib\|pacemaker-based\|based\):@CIB:@' \
 			-e 's@\(crmd\|pacemaker-controld\|controld\):@controller:@' \
 			-e 's@\(lrmd\|pacemaker-execd\|execd\):@executor:@' \
 			-e 's@\(Fencing\|stonithd\|stonith\|pacemaker-fenced\|fenced\):@fencing:@' \
 			-e 's@\(PE\|pengine\|pacemaker-schedulerd\|schedulerd\):@scheduler:@' \
 		| sort -uf
 
 authors:
 	git log $(LAST_RELEASE)..$(COMMIT) --format='%an' | sort -u
 
 changelog:
 	@$(MAKE) $(AM_MAKEFLAGS) changes > ChangeLog
 	@printf "\n">> ChangeLog
 	git show $(LAST_RELEASE):ChangeLog >> ChangeLog
 
 DO_NOT_INDENT = lib/gnu daemons/controld/controld_fsa.h
 
 indent:
 	find . -name "*.[ch]" -exec ./p-indent \{\} \;
 	git co HEAD $(DO_NOT_INDENT)
 
 rel-tags: tags
 	find . -name TAGS -exec sed -i 's:\(.*\)/\(.*\)/TAGS:\2/TAGS:g' \{\} \;
 
 CLANG_analyzer = $(shell which scan-build)
 CLANG_checkers = 
 
 # Use CPPCHECK_ARGS to pass extra cppcheck options, e.g.:
 # --enable={warning,style,performance,portability,information,all}
 # --inconclusive --std=posix
 CPPCHECK_ARGS ?=
 cppcheck:
 	cppcheck $(CPPCHECK_ARGS) -I include --max-configs=25 -q replace lib daemons tools
 
 clang:
 	test -e $(CLANG_analyzer)
 	scan-build $(CLANG_checkers:%=-enable-checker %) $(MAKE) $(AM_MAKEFLAGS) clean all
 
 # V3	= scandir unsetenv alphasort xalloc
 # V2	= setenv strerror strchrnul strndup
 # https://www.gnu.org/software/gnulib/manual/html_node/Initial-import.html#Initial-import
 # previously, this was crypto/md5, but got spoiled with streams/kernel crypto
 GNU_MODS	= crypto/md5-buffer
 # stdint appears to be surrogate only for C99-lacking environments
 GNU_MODS_AVOID	= stdint
 # only for plain crypto/md5: we make do without kernel-assisted crypto
 # GNU_MODS_AVOID	+= crypto/af_alg
 gnulib-update:
 	-test -e maint/gnulib \
 	  || git clone https://git.savannah.gnu.org/git/gnulib.git maint/gnulib
 	cd maint/gnulib && git pull
 	maint/gnulib/gnulib-tool \
 	  --source-base=lib/gnu --lgpl=2 --no-vc-files --no-conditional-dependencies \
 	  $(GNU_MODS_AVOID:%=--avoid %) --import $(GNU_MODS)
 
 ancillary-clean: spec-clean srpm-clean mock-clean coverity-clean
 	-rm -f $(TARFILE)
diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in
index a82271aca8..4f16afbbbe 100644
--- a/cts/cts-scheduler.in
+++ b/cts/cts-scheduler.in
@@ -1,1450 +1,1450 @@
 #!@PYTHON@
 """ Regression tests for Pacemaker's scheduler
 """
 
 # Pacemaker targets compatibility with Python 2.7 and 3.2+
 from __future__ import print_function, unicode_literals, absolute_import, division
 
 __copyright__ = "Copyright 2004-2019 the Pacemaker project contributors"
 __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
 
 import io
 import os
 import re
 import sys
 import stat
 import shlex
 import shutil
 import argparse
 import subprocess
 
 DESC = """Regression tests for Pacemaker's scheduler"""
 
 # Each entry in TESTS is a group of tests, where each test consists of a
 # test base name, test description, and additional test arguments.
 # Test groups will be separated by newlines in output.
 TESTS = [
     [
         [ "simple1", "Offline" ],
         [ "simple2", "Start" ],
         [ "simple3", "Start 2" ],
         [ "simple4", "Start Failed" ],
         [ "simple6", "Stop Start" ],
         [ "simple7", "Shutdown" ],
         #[ "simple8", "Stonith" ],
         #[ "simple9", "Lower version" ],
         #[ "simple10", "Higher version" ],
         [ "simple11", "Priority (ne)" ],
         [ "simple12", "Priority (eq)" ],
         [ "simple8", "Stickiness" ],
     ],
     [
         [ "group1", "Group" ],
         [ "group2", "Group + Native" ],
         [ "group3", "Group + Group" ],
         [ "group4", "Group + Native (nothing)" ],
         [ "group5", "Group + Native (move)" ],
         [ "group6", "Group + Group (move)" ],
         [ "group7", "Group colocation" ],
         [ "group13", "Group colocation (cant run)" ],
         [ "group8", "Group anti-colocation" ],
         [ "group9", "Group recovery" ],
         [ "group10", "Group partial recovery" ],
         [ "group11", "Group target_role" ],
         [ "group14", "Group stop (graph terminated)" ],
         [ "group15", "Negative group colocation" ],
         [ "bug-1573", "Partial stop of a group with two children" ],
         [ "bug-1718", "Mandatory group ordering - Stop group_FUN" ],
         [ "bug-lf-2613", "Move group on failure" ],
         [ "bug-lf-2619", "Move group on clone failure" ],
         [ "group-fail", "Ensure stop order is preserved for partially active groups" ],
         [ "group-unmanaged", "No need to restart r115 because r114 is unmanaged" ],
         [ "group-unmanaged-stopped", "Make sure r115 is stopped when r114 fails" ],
         [ "group-dependents", "Account for the location preferences of things colocated with a group" ],
         [ "group-stop-ordering", "Ensure blocked group member stop does not force other member stops" ],
     ],
     [
         [ "rsc_dep1", "Must not" ],
         [ "rsc_dep3", "Must" ],
         [ "rsc_dep5", "Must not 3" ],
         [ "rsc_dep7", "Must 3" ],
         [ "rsc_dep10", "Must (but cant)" ],
         [ "rsc_dep2", "Must (running)" ],
         [ "rsc_dep8", "Must (running : alt)" ],
         [ "rsc_dep4", "Must (running + move)" ],
         [ "asymmetric", "Asymmetric - require explicit location constraints" ],
     ],
     [
         [ "orphan-0", "Orphan ignore" ],
         [ "orphan-1", "Orphan stop" ],
         [ "orphan-2", "Orphan stop, remove failcount" ],
     ],
     [
         [ "params-0", "Params: No change" ],
         [ "params-1", "Params: Changed" ],
         [ "params-2", "Params: Resource definition" ],
         [ "params-4", "Params: Reload" ],
         [ "params-5", "Params: Restart based on probe digest" ],
         [ "novell-251689", "Resource definition change + target_role=stopped" ],
         [ "bug-lf-2106", "Restart all anonymous clone instances after config change" ],
         [ "params-6", "Params: Detect reload in previously migrated resource" ],
         [ "nvpair-id-ref", "Support id-ref in nvpair with optional name" ],
         [ "not-reschedule-unneeded-monitor",
                      "Do not reschedule unneeded monitors while resource definitions have changed" ],
         [ "reload-becomes-restart", "Cancel reload if restart becomes required" ],
     ],
     [
         [ "target-0", "Target Role : baseline" ],
         [ "target-1", "Target Role : master" ],
         [ "target-2", "Target Role : invalid" ],
     ],
     [
         [ "base-score", "Set a node's default score for all nodes" ],
     ],
     [
         [ "date-1", "Dates", [ "-t",  "2005-020" ] ],
         [ "date-2", "Date Spec - Pass", [ "-t", "2005-020T12:30" ] ],
         [ "date-3", "Date Spec - Fail", [ "-t", "2005-020T11:30" ] ],
         [ "origin", "Timing of recurring operations", [ "-t", "2014-05-07 00:28:00" ] ],
         [ "probe-0", "Probe (anon clone)" ],
         [ "probe-1", "Pending Probe" ],
         [ "probe-2", "Correctly re-probe cloned groups" ],
         [ "probe-3", "Probe (pending node)" ],
         [ "probe-4", "Probe (pending node + stopped resource)" ],
         [ "standby", "Standby" ],
         [ "comments", "Comments" ],
     ],
     [
         [ "one-or-more-0", "Everything starts" ],
         [ "one-or-more-1", "Nothing starts because of A" ],
         [ "one-or-more-2", "D can start because of C" ],
         [ "one-or-more-3", "D cannot start because of B and C" ],
         [ "one-or-more-4", "D cannot start because of target-role" ],
         [ "one-or-more-5", "Start A and F even though C and D are stopped" ],
         [ "one-or-more-6", "Leave A running even though B is stopped" ],
         [ "one-or-more-7", "Leave A running even though C is stopped" ],
         [ "bug-5140-require-all-false", "Allow basegrp:0 to stop" ],
         [ "clone-require-all-1", "clone B starts node 3 and 4" ],
         [ "clone-require-all-2", "clone B remains stopped everywhere" ],
         [ "clone-require-all-3", "clone B stops everywhere because A stops everywhere" ],
         [ "clone-require-all-4", "clone B remains on node 3 and 4 with only one instance of A remaining" ],
         [ "clone-require-all-5", "clone B starts on node 1 3 and 4" ],
         [ "clone-require-all-6", "clone B remains active after shutting down instances of A" ],
         [ "clone-require-all-7",
           "clone A and B both start at the same time. all instances of A start before B" ],
         [ "clone-require-all-no-interleave-1", "C starts everywhere after A and B" ],
         [ "clone-require-all-no-interleave-2",
           "C starts on nodes 1, 2, and 4 with only one active instance of B" ],
         [ "clone-require-all-no-interleave-3",
           "C remains active when instance of B is stopped on one node and started on another" ],
         [ "one-or-more-unrunnable-instances", "Avoid dependencies on instances that won't ever be started" ],
     ],
     [
         [ "location-date-rules-1", "Use location constraints with ineffective date-based rules" ],
         [ "location-date-rules-2", "Use location constraints with effective date-based rules" ],
         [ "nvpair-date-rules-1", "Use nvpair blocks with a variety of date-based rules" ],
     ],
     [
         [ "order1", "Order start 1" ],
         [ "order2", "Order start 2" ],
         [ "order3", "Order stop" ],
         [ "order4", "Order (multiple)" ],
         [ "order5", "Order (move)" ],
         [ "order6", "Order (move w/ restart)" ],
         [ "order7", "Order (mandatory)" ],
         [ "order-optional", "Order (score=0)" ],
         [ "order-required", "Order (score=INFINITY)" ],
         [ "bug-lf-2171", "Prevent group start when clone is stopped" ],
         [ "order-clone", "Clone ordering should be able to prevent startup of dependent clones" ],
         [ "order-sets", "Ordering for resource sets" ],
         [ "order-serialize", "Serialize resources without inhibiting migration" ],
         [ "order-serialize-set", "Serialize a set of resources without inhibiting migration" ],
         [ "clone-order-primitive", "Order clone start after a primitive" ],
         [ "clone-order-16instances", "Verify ordering of 16 cloned resources" ],
         [ "order-optional-keyword", "Order (optional keyword)" ],
         [ "order-mandatory", "Order (mandatory keyword)" ],
         [ "bug-lf-2493",
           "Don't imply colocation requirements when applying ordering constraints with clones" ],
         [ "ordered-set-basic-startup", "Constraint set with default order settings" ],
         [ "ordered-set-natural", "Allow natural set ordering" ],
         [ "order-wrong-kind", "Order (error)" ],
     ],
     [
         [ "coloc-loop", "Colocation - loop" ],
         [ "coloc-many-one", "Colocation - many-to-one" ],
         [ "coloc-list", "Colocation - many-to-one with list" ],
         [ "coloc-group", "Colocation - groups" ],
         [ "coloc-slave-anti", "Anti-colocation with slave shouldn't prevent master colocation" ],
         [ "coloc-attr", "Colocation based on node attributes" ],
         [ "coloc-negative-group", "Negative colocation with a group" ],
         [ "coloc-intra-set", "Intra-set colocation" ],
         [ "bug-lf-2435", "Colocation sets with a negative score" ],
         [ "coloc-clone-stays-active",
           "Ensure clones don't get stopped/demoted because a dependent must stop" ],
         [ "coloc_fp_logic", "Verify floating point calculations in colocation are working" ],
         [ "colo_master_w_native",
           "cl#5070 - Verify promotion order is affected when colocating master to native rsc" ],
         [ "colo_slave_w_native",
           "cl#5070 - Verify promotion order is affected when colocating slave to native rsc" ],
         [ "anti-colocation-order",
           "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node" ],
         [ "anti-colocation-master", "Organize order of actions for master resources in anti-colocations" ],
         [ "anti-colocation-slave", "Organize order of actions for slave resources in anti-colocations" ],
         [ "enforce-colo1", "Always enforce B with A INFINITY" ],
         [ "complex_enforce_colo", "Always enforce B with A INFINITY. (make sure heat-engine stops)" ],
     ],
     [
         [ "rsc-sets-seq-true", "Resource Sets - sequential=false" ],
         [ "rsc-sets-seq-false", "Resource Sets - sequential=true" ],
         [ "rsc-sets-clone", "Resource Sets - Clone" ],
         [ "rsc-sets-master", "Resource Sets - Master" ],
         [ "rsc-sets-clone-1", "Resource Sets - Clone (lf#2404)" ],
     ],
     [
         [ "attrs1", "string: eq (and)" ],
         [ "attrs2", "string: lt / gt (and)" ],
         [ "attrs3", "string: ne (or)" ],
         [ "attrs4", "string: exists" ],
         [ "attrs5", "string: not_exists" ],
         [ "attrs6", "is_dc: true" ],
         [ "attrs7", "is_dc: false" ],
         [ "attrs8", "score_attribute" ],
         [ "per-node-attrs", "Per node resource parameters" ],
     ],
     [
         [ "mon-rsc-1", "Schedule Monitor - start" ],
         [ "mon-rsc-2", "Schedule Monitor - move" ],
         [ "mon-rsc-3", "Schedule Monitor - pending start" ],
         [ "mon-rsc-4", "Schedule Monitor - move/pending start" ],
     ],
     [
         [ "rec-rsc-0", "Resource Recover - no start" ],
         [ "rec-rsc-1", "Resource Recover - start" ],
         [ "rec-rsc-2", "Resource Recover - monitor" ],
         [ "rec-rsc-3", "Resource Recover - stop - ignore" ],
         [ "rec-rsc-4", "Resource Recover - stop - block" ],
         [ "rec-rsc-5", "Resource Recover - stop - fence" ],
         [ "rec-rsc-6", "Resource Recover - multiple - restart" ],
         [ "rec-rsc-7", "Resource Recover - multiple - stop" ],
         [ "rec-rsc-8", "Resource Recover - multiple - block" ],
         [ "rec-rsc-9", "Resource Recover - group/group" ],
         [ "monitor-recovery", "on-fail=block + resource recovery detected by recurring monitor" ],
         [ "stop-failure-no-quorum", "Stop failure without quorum" ],
         [ "stop-failure-no-fencing", "Stop failure without fencing available" ],
         [ "stop-failure-with-fencing", "Stop failure with fencing available" ],
         [ "multiple-active-block-group", "Support of multiple-active=block for resource groups" ],
         [ "multiple-monitor-one-failed",
           "Consider resource failed if any of the configured monitor operations failed" ],
     ],
     [
         [ "quorum-1", "No quorum - ignore" ],
         [ "quorum-2", "No quorum - freeze" ],
         [ "quorum-3", "No quorum - stop" ],
         [ "quorum-4", "No quorum - start anyway" ],
         [ "quorum-5", "No quorum - start anyway (group)" ],
         [ "quorum-6", "No quorum - start anyway (clone)" ],
         [ "bug-cl-5212", "No promotion with no-quorum-policy=freeze" ],
         [ "suicide-needed-inquorate", "no-quorum-policy=suicide: suicide necessary" ],
         [ "suicide-not-needed-initial-quorum",
           "no-quorum-policy=suicide: suicide not necessary at initial quorum" ],
         [ "suicide-not-needed-never-quorate",
           "no-quorum-policy=suicide: suicide not necessary if never quorate" ],
         [ "suicide-not-needed-quorate", "no-quorum-policy=suicide: suicide necessary if quorate" ],
     ],
     [
         [ "rec-node-1", "Node Recover - Startup   - no fence" ],
         [ "rec-node-2", "Node Recover - Startup   - fence" ],
         [ "rec-node-3", "Node Recover - HA down   - no fence" ],
         [ "rec-node-4", "Node Recover - HA down   - fence" ],
         [ "rec-node-5", "Node Recover - CRM down  - no fence" ],
         [ "rec-node-6", "Node Recover - CRM down  - fence" ],
         [ "rec-node-7", "Node Recover - no quorum - ignore" ],
         [ "rec-node-8", "Node Recover - no quorum - freeze" ],
         [ "rec-node-9", "Node Recover - no quorum - stop" ],
         [ "rec-node-10", "Node Recover - no quorum - stop w/fence" ],
         [ "rec-node-11", "Node Recover - CRM down w/ group - fence" ],
         [ "rec-node-12", "Node Recover - nothing active - fence" ],
         [ "rec-node-13", "Node Recover - failed resource + shutdown - fence" ],
         [ "rec-node-15", "Node Recover - unknown lrm section" ],
         [ "rec-node-14", "Serialize all stonith's" ],
     ],
     [
         [ "multi1", "Multiple Active (stop/start)" ],
     ],
     [
         [ "migrate-begin", "Normal migration" ],
         [ "migrate-success", "Completed migration" ],
         [ "migrate-partial-1", "Completed migration, missing stop on source" ],
         [ "migrate-partial-2", "Successful migrate_to only" ],
         [ "migrate-partial-3", "Successful migrate_to only, target down" ],
         [ "migrate-partial-4", "Migrate from the correct host after migrate_to+migrate_from" ],
         [ "bug-5186-partial-migrate", "Handle partial migration when src node loses membership" ],
         [ "migrate-fail-2", "Failed migrate_from" ],
         [ "migrate-fail-3", "Failed migrate_from + stop on source" ],
         [ "migrate-fail-4",
           "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target" ],
         [ "migrate-fail-5", "Failed migrate_from + stop on source and target" ],
         [ "migrate-fail-6", "Failed migrate_to" ],
         [ "migrate-fail-7", "Failed migrate_to + stop on source" ],
         [ "migrate-fail-8",
           "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target" ],
         [ "migrate-fail-9", "Failed migrate_to + stop on source and target" ],
         [ "migration-ping-pong", "Old migrate_to failure + successful migrate_from on same node" ],
         [ "migrate-stop", "Migration in a stopping stack" ],
         [ "migrate-start", "Migration in a starting stack" ],
         [ "migrate-stop_start", "Migration in a restarting stack" ],
         [ "migrate-stop-complex", "Migration in a complex stopping stack" ],
         [ "migrate-start-complex", "Migration in a complex starting stack" ],
         [ "migrate-stop-start-complex", "Migration in a complex moving stack" ],
         [ "migrate-shutdown", "Order the post-migration 'stop' before node shutdown" ],
         [ "migrate-1", "Migrate (migrate)" ],
         [ "migrate-2", "Migrate (stable)" ],
         [ "migrate-3", "Migrate (failed migrate_to)" ],
         [ "migrate-4", "Migrate (failed migrate_from)" ],
         [ "novell-252693", "Migration in a stopping stack" ],
         [ "novell-252693-2", "Migration in a starting stack" ],
         [ "novell-252693-3", "Non-Migration in a starting and stopping stack" ],
         [ "bug-1820", "Migration in a group" ],
         [ "bug-1820-1", "Non-migration in a group" ],
         [ "migrate-5", "Primitive migration with a clone" ],
         [ "migrate-fencing", "Migration after Fencing" ],
         [ "migrate-both-vms", "Migrate two VMs that have no colocation" ],
         [ "migration-behind-migrating-remote", "Migrate resource behind migrating remote connection" ],
         [ "1-a-then-bm-move-b", "Advanced migrate logic. A then B. migrate B" ],
         [ "2-am-then-b-move-a", "Advanced migrate logic, A then B, migrate A without stopping B" ],
         [ "3-am-then-bm-both-migrate", "Advanced migrate logic. A then B. migrate both" ],
         [ "4-am-then-bm-b-not-migratable", "Advanced migrate logic, A then B, B not migratable" ],
         [ "5-am-then-bm-a-not-migratable", "Advanced migrate logic. A then B. move both, a not migratable" ],
         [ "6-migrate-group", "Advanced migrate logic, migrate a group" ],
         [ "7-migrate-group-one-unmigratable",
           "Advanced migrate logic, migrate group mixed with allow-migrate true/false" ],
         [ "8-am-then-bm-a-migrating-b-stopping",
           "Advanced migrate logic, A then B, A migrating, B stopping" ],
         [ "9-am-then-bm-b-migrating-a-stopping",
           "Advanced migrate logic, A then B, B migrate, A stopping" ],
         [ "10-a-then-bm-b-move-a-clone",
           "Advanced migrate logic, A clone then B, migrate B while stopping A" ],
         [ "11-a-then-bm-b-move-a-clone-starting",
           "Advanced migrate logic, A clone then B, B moving while A is start/stopping" ],
         [ "a-promote-then-b-migrate", "A promote then B start. migrate B" ],
         [ "a-demote-then-b-migrate", "A demote then B stop. migrate B" ],
 
         # @TODO: If pacemaker implements versioned attributes, uncomment this test
         #[ "migrate-versioned", "Disable migration for versioned resources" ],
 
         [ "bug-lf-2422", "Dependency on partially active group - stop ocfs:*" ],
     ],
     [
         [ "clone-anon-probe-1", "Probe the correct (anonymous) clone instance for each node" ],
         [ "clone-anon-probe-2", "Avoid needless re-probing of anonymous clones" ],
         [ "clone-anon-failcount", "Merge failcounts for anonymous clones" ],
         [ "force-anon-clone-max", "Update clone-max properly when forcing a clone to be anonymous" ],
         [ "anon-instance-pending", "Assign anonymous clone instance numbers properly when action pending" ],
         [ "inc0", "Incarnation start" ],
         [ "inc1", "Incarnation start order" ],
         [ "inc2", "Incarnation silent restart, stop, move" ],
         [ "inc3", "Inter-incarnation ordering, silent restart, stop, move" ],
         [ "inc4", "Inter-incarnation ordering, silent restart, stop, move (ordered)" ],
         [ "inc5", "Inter-incarnation ordering, silent restart, stop, move (restart 1)" ],
         [ "inc6", "Inter-incarnation ordering, silent restart, stop, move (restart 2)" ],
         [ "inc7", "Clone colocation" ],
         [ "inc8", "Clone anti-colocation" ],
         [ "inc9", "Non-unique clone" ],
         [ "inc10", "Non-unique clone (stop)" ],
         [ "inc11", "Primitive colocation with clones" ],
         [ "inc12", "Clone shutdown" ],
         [ "cloned-group", "Make sure only the correct number of cloned groups are started" ],
         [ "cloned-group-stop", "Ensure stopping qpidd also stops glance and cinder" ],
         [ "clone-no-shuffle", "Don't prioritize allocation of instances that must be moved" ],
         [ "clone-max-zero", "Orphan processing with clone-max=0" ],
         [ "clone-anon-dup",
           "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" ],
         [ "bug-lf-2160", "Don't shuffle clones due to colocation" ],
         [ "bug-lf-2213", "clone-node-max enforcement for cloned groups" ],
         [ "bug-lf-2153", "Clone ordering constraints" ],
         [ "bug-lf-2361", "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable" ],
         [ "bug-lf-2317", "Avoid needless restart of primitive depending on a clone" ],
         [ "clone-colocate-instance-1", "Colocation with a specific clone instance (negative example)" ],
         [ "clone-colocate-instance-2", "Colocation with a specific clone instance" ],
         [ "clone-order-instance", "Ordering with specific clone instances" ],
         [ "bug-lf-2453", "Enforce mandatory clone ordering without colocation" ],
         [ "bug-lf-2508", "Correctly reconstruct the status of anonymous cloned groups" ],
         [ "bug-lf-2544", "Balanced clone placement" ],
         [ "bug-lf-2445", "Redistribute clones with node-max > 1 and stickiness = 0" ],
         [ "bug-lf-2574", "Avoid clone shuffle" ],
         [ "bug-lf-2581", "Avoid group restart due to unrelated clone (re)start" ],
         [ "bug-cl-5168", "Don't shuffle clones" ],
         [ "bug-cl-5170", "Prevent clone from starting with on-fail=block" ],
         [ "clone-fail-block-colocation", "Move colocated group when failed clone has on-fail=block" ],
         [ "clone-interleave-1",
           "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)" ],
         [ "clone-interleave-2", "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)" ],
         [ "clone-interleave-3",
           "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)" ],
         [ "rebalance-unique-clones", "Rebalance unique clone instances with no stickiness" ],
         [ "clone-requires-quorum-recovery", "Clone with requires=quorum on failed node needing recovery" ],
         [ "clone-requires-quorum",
           "Clone with requires=quorum with presumed-inactive instance on failed node" ],
     ],
     [
         [ "cloned_start_one", "order first clone then clone... first clone_min=2" ],
         [ "cloned_start_two", "order first clone then clone... first clone_min=2" ],
         [ "cloned_stop_one", "order first clone then clone... first clone_min=2" ],
         [ "cloned_stop_two", "order first clone then clone... first clone_min=2" ],
         [ "clone_min_interleave_start_one",
           "order first clone then clone... first clone_min=2 and then has interleave=true" ],
         [ "clone_min_interleave_start_two",
           "order first clone then clone... first clone_min=2 and then has interleave=true" ],
         [ "clone_min_interleave_stop_one",
           "order first clone then clone... first clone_min=2 and then has interleave=true" ],
         [ "clone_min_interleave_stop_two",
           "order first clone then clone... first clone_min=2 and then has interleave=true" ],
         [ "clone_min_start_one", "order first clone then primitive... first clone_min=2" ],
         [ "clone_min_start_two", "order first clone then primitive... first clone_min=2" ],
         [ "clone_min_stop_all", "order first clone then primitive... first clone_min=2" ],
         [ "clone_min_stop_one", "order first clone then primitive... first clone_min=2" ],
         [ "clone_min_stop_two", "order first clone then primitive... first clone_min=2" ],
     ],
     [
         [ "unfence-startup", "Clean unfencing" ],
         [ "unfence-definition", "Unfencing when the agent changes" ],
         [ "unfence-parameters", "Unfencing when the agent parameters changes" ],
         [ "unfence-device", "Unfencing when a cluster has only fence devices" ],
     ],
     [
         [ "master-0", "Stopped -> Slave" ],
         [ "master-1", "Stopped -> Promote" ],
         [ "master-2", "Stopped -> Promote : notify" ],
         [ "master-3", "Stopped -> Promote : master location" ],
         [ "master-4", "Started -> Promote : master location" ],
         [ "master-5", "Promoted -> Promoted" ],
         [ "master-6", "Promoted -> Promoted (2)" ],
         [ "master-7", "Promoted -> Fenced" ],
         [ "master-8", "Promoted -> Fenced -> Moved" ],
         [ "master-9", "Stopped + Promotable + No quorum" ],
         [ "master-10", "Stopped -> Promotable : notify with monitor" ],
         [ "master-11", "Stopped -> Promote : colocation" ],
         [ "novell-239082", "Demote/Promote ordering" ],
         [ "novell-239087", "Stable master placement" ],
         [ "master-12", "Promotion based solely on rsc_location constraints" ],
         [ "master-13", "Include preferences of colocated resources when placing master" ],
         [ "master-demote", "Ordering when actions depends on demoting a slave resource" ],
         [ "master-ordering", "Prevent resources from starting that need a master" ],
         [ "bug-1765", "Master-Master Colocation (do not stop the slaves)" ],
         [ "master-group", "Promotion of cloned groups" ],
         [ "bug-lf-1852", "Don't shuffle master/slave instances unnecessarily" ],
         [ "master-failed-demote", "Don't retry failed demote actions" ],
         [ "master-failed-demote-2", "Don't retry failed demote actions (notify=false)" ],
         [ "master-depend",
           "Ensure resources that depend on the master don't get allocated until the master does" ],
         [ "master-reattach", "Re-attach to a running master" ],
         [ "master-allow-start", "Don't include master score if it would prevent allocation" ],
         [ "master-colocation",
           "Allow master instances placemaker to be influenced by colocation constraints" ],
         [ "master-pseudo", "Make sure promote/demote pseudo actions are created correctly" ],
         [ "master-role", "Prevent target-role from promoting more than master-max instances" ],
         [ "bug-lf-2358", "Master-Master anti-colocation" ],
         [ "master-promotion-constraint", "Mandatory master colocation constraints" ],
         [ "unmanaged-master", "Ensure role is preserved for unmanaged resources" ],
         [ "master-unmanaged-monitor", "Start the correct monitor operation for unmanaged masters" ],
         [ "master-demote-2", "Demote does not clear past failure" ],
         [ "master-move", "Move master based on failure of colocated group" ],
         [ "master-probed-score", "Observe the promotion score of probed resources" ],
         [ "colocation_constraint_stops_master",
           "cl#5054 - Ensure master is demoted when stopped by colocation constraint" ],
         [ "colocation_constraint_stops_slave",
           "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint" ],
         [ "order_constraint_stops_master",
           "cl#5054 - Ensure master is demoted when stopped by order constraint" ],
         [ "order_constraint_stops_slave",
           "cl#5054 - Ensure slave is not demoted when stopped by order constraint" ],
         [ "master_monitor_restart", "cl#5072 - Ensure master monitor operation will start after promotion" ],
         [ "bug-rh-880249", "Handle replacement of an m/s resource with a primitive" ],
         [ "bug-5143-ms-shuffle", "Prevent master shuffling due to promotion score" ],
         [ "master-demote-block", "Block promotion if demote fails with on-fail=block" ],
         [ "master-dependent-ban",
           "Don't stop instances from being active because a dependent is banned from that host" ],
         [ "master-stop", "Stop instances due to location constraint with role=Started" ],
         [ "master-partially-demoted-group", "Allow partially demoted group to finish demoting" ],
         [ "bug-cl-5213", "Ensure role colocation with -INFINITY is enforced" ],
         [ "bug-cl-5219", "Allow unrelated resources with a common colocation target to remain promoted" ],
         [ "master-asymmetrical-order",
           "Fix the behaviors of multi-state resources with asymmetrical ordering" ],
         [ "master-notify", "Master promotion with notifies" ],
         [ "master-score-startup", "Use permanent master scores without LRM history" ],
         [ "failed-demote-recovery", "Recover resource in slave role after demote fails" ],
         [ "failed-demote-recovery-master", "Recover resource in master role after demote fails" ],
     ],
     [
         [ "history-1", "Correctly parse stateful-1 resource state" ],
     ],
     [
         [ "managed-0", "Managed (reference)" ],
         [ "managed-1", "Not managed - down" ],
         [ "managed-2", "Not managed - up" ],
         [ "bug-5028", "Shutdown should block if anything depends on an unmanaged resource" ],
         [ "bug-5028-detach", "Ensure detach still works" ],
         [ "bug-5028-bottom",
           "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" ],
         [ "unmanaged-stop-1",
           "cl#5155 - Block the stop of resources if any depending resource is unmanaged" ],
         [ "unmanaged-stop-2",
           "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged" ],
         [ "unmanaged-stop-3",
           "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged" ],
         [ "unmanaged-stop-4",
           "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged" ],
         [ "unmanaged-block-restart",
           "Block restart of resources if any dependent resource in a group is unmanaged" ],
     ],
     [
         [ "interleave-0", "Interleave (reference)" ],
         [ "interleave-1", "coloc - not interleaved" ],
         [ "interleave-2", "coloc - interleaved" ],
         [ "interleave-3", "coloc - interleaved (2)" ],
         [ "interleave-pseudo-stop", "Interleaved clone during stonith" ],
         [ "interleave-stop", "Interleaved clone during stop" ],
         [ "interleave-restart", "Interleaved clone during dependency restart" ],
     ],
     [
         [ "notify-0", "Notify reference" ],
         [ "notify-1", "Notify simple" ],
         [ "notify-2", "Notify simple, confirm" ],
         [ "notify-3", "Notify move, confirm" ],
         [ "novell-239079", "Notification priority" ],
         #[ "notify-2", "Notify - 764" ],
         [ "notifs-for-unrunnable", "Don't schedule notifications for an unrunnable action" ],
         [ "route-remote-notify", "Route remote notify actions through correct cluster node" ],
         [ "notify-behind-stopping-remote", "Don't schedule notifications behind stopped remote" ],
     ],
     [
         [ "594", "OSDL #594 - Unrunnable actions scheduled in transition" ],
         [ "662", "OSDL #662 - Two resources start on one node when incarnation_node_max = 1" ],
         [ "696", "OSDL #696 - CRM starts stonith RA without monitor" ],
         [ "726", "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop" ],
         [ "735", "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3" ],
         [ "764", "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1" ],
         [ "797", "OSDL #797 - Assert triggered: task_id_i > max_call_id" ],
         [ "829", "OSDL #829" ],
         [ "994",
           "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted" ],
         [ "994-2", "OSDL #994 - with a dependent resource" ],
         [ "1360", "OSDL #1360 - Clone stickiness" ],
         [ "1484", "OSDL #1484 - on_fail=stop" ],
         [ "1494", "OSDL #1494 - Clone stability" ],
         [ "unrunnable-1", "Unrunnable" ],
         [ "unrunnable-2", "Unrunnable 2" ],
         [ "stonith-0", "Stonith loop - 1" ],
         [ "stonith-1", "Stonith loop - 2" ],
         [ "stonith-2", "Stonith loop - 3" ],
         [ "stonith-3", "Stonith startup" ],
         [ "stonith-4", "Stonith node state" ],
         [ "dc-fence-ordering", "DC needs fencing while other nodes are shutting down" ],
         [ "bug-1572-1", "Recovery of groups depending on master/slave" ],
         [ "bug-1572-2", "Recovery of groups depending on master/slave when the master is never re-promoted" ],
         [ "bug-1685", "Depends-on-master ordering" ],
         [ "bug-1822", "Don't promote partially active groups" ],
         [ "bug-pm-11", "New resource added to a m/s group" ],
         [ "bug-pm-12", "Recover only the failed portion of a cloned group" ],
         [ "bug-n-387749", "Don't shuffle clone instances" ],
         [ "bug-n-385265",
           "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" ],
         [ "bug-n-385265-2",
           "Ensure groups are migrated instead of remaining partially active on the current node" ],
         [ "bug-lf-1920", "Correctly handle probes that find active resources" ],
         [ "bnc-515172", "Location constraint with multiple expressions" ],
         [ "colocate-primitive-with-clone", "Optional colocation with a clone" ],
         [ "use-after-free-merge", "Use-after-free in native_merge_weights" ],
         [ "bug-lf-2551", "STONITH ordering for stop" ],
         [ "bug-lf-2606", "Stonith implies demote" ],
         [ "bug-lf-2474", "Ensure resource op timeout takes precedence over op_defaults" ],
         [ "bug-suse-707150", "Prevent vm-01 from starting due to colocation/ordering" ],
         [ "bug-5014-A-start-B-start", "Verify when A starts B starts using symmetrical=false" ],
         [ "bug-5014-A-stop-B-started",
           "Verify when A stops B does not stop if it has already started using symmetric=false" ],
         [ "bug-5014-A-stopped-B-stopped",
           "Verify when A is stopped and B has not started, B does not start before A using symmetric=false" ],
         [ "bug-5014-CthenAthenB-C-stopped",
           "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts" ],
         [ "bug-5014-CLONE-A-start-B-start",
           "Verify when A starts B starts using clone resources with symmetric=false" ],
         [ "bug-5014-CLONE-A-stop-B-started",
           "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false" ],
         [ "bug-5014-GROUP-A-start-B-start",
           "Verify when A starts B starts when using group resources with symmetric=false" ],
         [ "bug-5014-GROUP-A-stopped-B-started",
           "Verify when A stops B does not stop if it has already started using group resources with symmetric=false" ],
         [ "bug-5014-GROUP-A-stopped-B-stopped",
           "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false" ],
         [ "bug-5014-ordered-set-symmetrical-false",
           "Verify ordered sets work with symmetrical=false" ],
         [ "bug-5014-ordered-set-symmetrical-true",
           "Verify ordered sets work with symmetrical=true" ],
         [ "bug-5007-masterslave_colocation",
           "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources" ],
         [ "bug-5038", "Prevent restart of anonymous clones when clone-max decreases" ],
         [ "bug-5025-1", "Automatically clean up failcount after resource config change with reload" ],
         [ "bug-5025-2", "Make sure clear failcount action isn't set when config does not change" ],
         [ "bug-5025-3", "Automatically clean up failcount after resource config change with restart" ],
         [ "bug-5025-4", "Clear failcount when last failure is a start op and rsc attributes changed" ],
         [ "failcount", "Ensure failcounts are correctly expired" ],
         [ "failcount-block", "Ensure failcounts are not expired when on-fail=block is present" ],
         [ "per-op-failcount", "Ensure per-operation failcount is handled and not passed to fence agent" ],
         [ "on-fail-ignore", "Ensure on-fail=ignore works even beyond migration-threshold" ],
         [ "monitor-onfail-restart", "bug-5058 - Monitor failure with on-fail set to restart" ],
         [ "monitor-onfail-stop", "bug-5058 - Monitor failure wiht on-fail set to stop" ],
         [ "bug-5059", "No need to restart p_stateful1:*" ],
         [ "bug-5069-op-enabled", "Test on-fail=ignore with failure when monitor is enabled" ],
         [ "bug-5069-op-disabled", "Test on-fail-ignore with failure when monitor is disabled" ],
         [ "obsolete-lrm-resource", "cl#5115 - Do not use obsolete lrm_resource sections" ],
         [ "expire-non-blocked-failure",
           "Ignore failure-timeout only if the failed operation has on-fail=block" ],
         [ "asymmetrical-order-move", "Respect asymmetrical ordering when trying to move resources" ],
         [ "asymmetrical-order-restart", "Respect asymmetrical ordering when restarting dependent resource" ],
         [ "start-then-stop-with-unfence", "Avoid graph loop with start-then-stop constraint plus unfencing" ],
         [ "order-expired-failure", "Order failcount cleanup after remote fencing" ],
         [ "year-2038", "Check handling of timestamps beyond 2038-01-19 03:14:08 UTC" ],
     
         [ "ignore_stonith_rsc_order1",
           "cl#5056- Ignore order constraint between stonith and non-stonith rsc" ],
         [ "ignore_stonith_rsc_order2",
           "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith" ],
         [ "ignore_stonith_rsc_order3", "cl#5056- Ignore order constraint, stonith clone and mixed group" ],
         [ "ignore_stonith_rsc_order4",
           "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group" ],
         [ "honor_stonith_rsc_order1",
           "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)" ],
         [ "honor_stonith_rsc_order2",
           "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)" ],
         [ "honor_stonith_rsc_order3",
           "cl#5056- Honor order constraint, stonith clones with nested pure stonith group" ],
         [ "honor_stonith_rsc_order4",
           "cl#5056- Honor order constraint, between two native stonith rscs" ],
         [ "multiply-active-stonith", "Multiply active stonith" ],
         [ "probe-timeout", "cl#5099 - Default probe timeout" ],
         [ "order-first-probes",
           "cl#5301 - respect order constraints when relevant resources are being probed" ],
         [ "concurrent-fencing", "Allow performing fencing operations in parallel" ],
     ],
     [
         [ "systemhealth1", "System Health ()               #1" ],
         [ "systemhealth2", "System Health ()               #2" ],
         [ "systemhealth3", "System Health ()               #3" ],
         [ "systemhealthn1", "System Health (None)           #1" ],
         [ "systemhealthn2", "System Health (None)           #2" ],
         [ "systemhealthn3", "System Health (None)           #3" ],
         [ "systemhealthm1", "System Health (Migrate On Red) #1" ],
         [ "systemhealthm2", "System Health (Migrate On Red) #2" ],
         [ "systemhealthm3", "System Health (Migrate On Red) #3" ],
         [ "systemhealtho1", "System Health (Only Green)     #1" ],
         [ "systemhealtho2", "System Health (Only Green)     #2" ],
         [ "systemhealtho3", "System Health (Only Green)     #3" ],
         [ "systemhealthp1", "System Health (Progessive)     #1" ],
         [ "systemhealthp2", "System Health (Progessive)     #2" ],
         [ "systemhealthp3", "System Health (Progessive)     #3" ],
     ],
     [
         [ "utilization", "Placement Strategy - utilization" ],
         [ "minimal", "Placement Strategy - minimal" ],
         [ "balanced", "Placement Strategy - balanced" ],
     ],
     [
         [ "placement-stickiness", "Optimized Placement Strategy - stickiness" ],
         [ "placement-priority", "Optimized Placement Strategy - priority" ],
         [ "placement-location", "Optimized Placement Strategy - location" ],
         [ "placement-capacity", "Optimized Placement Strategy - capacity" ],
     ],
     [
         [ "utilization-order1", "Utilization Order - Simple" ],
         [ "utilization-order2", "Utilization Order - Complex" ],
         [ "utilization-order3", "Utilization Order - Migrate" ],
         [ "utilization-order4", "Utilization Order - Live Migration (bnc#695440)" ],
         [ "utilization-shuffle",
           "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3" ],
         [ "load-stopped-loop", "Avoid transition loop due to load_stopped (cl#5044)" ],
         [ "load-stopped-loop-2",
           "cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering" ],
     ],
     [
         [ "colocated-utilization-primitive-1", "Colocated Utilization - Primitive" ],
         [ "colocated-utilization-primitive-2", "Colocated Utilization - Choose the most capable node" ],
         [ "colocated-utilization-group", "Colocated Utilization - Group" ],
         [ "colocated-utilization-clone", "Colocated Utilization - Clone" ],
         [ "utilization-check-allowed-nodes",
           "Only check the capacities of the nodes that can run the resource" ],
     ],
     [
         [ "reprobe-target_rc", "Ensure correct target_rc for reprobe of inactive resources" ],
         [ "node-maintenance-1", "cl#5128 - Node maintenance" ],
         [ "node-maintenance-2", "cl#5128 - Node maintenance (coming out of maintenance mode)" ],
         [ "shutdown-maintenance-node", "Do not fence a maintenance node if it shuts down cleanly" ],
         [ "rsc-maintenance", "Per-resource maintenance" ],
     ],
     [
         [ "not-installed-agent", "The resource agent is missing" ],
         [ "not-installed-tools", "Something the resource agent needs is missing" ],
     ],
     [
         [ "stopped-monitor-00", "Stopped Monitor - initial start" ],
         [ "stopped-monitor-01", "Stopped Monitor - failed started" ],
         [ "stopped-monitor-02", "Stopped Monitor - started multi-up" ],
         [ "stopped-monitor-03", "Stopped Monitor - stop started" ],
         [ "stopped-monitor-04", "Stopped Monitor - failed stop" ],
         [ "stopped-monitor-05", "Stopped Monitor - start unmanaged" ],
         [ "stopped-monitor-06", "Stopped Monitor - unmanaged multi-up" ],
         [ "stopped-monitor-07", "Stopped Monitor - start unmanaged multi-up" ],
         [ "stopped-monitor-08", "Stopped Monitor - migrate" ],
         [ "stopped-monitor-09", "Stopped Monitor - unmanage started" ],
         [ "stopped-monitor-10", "Stopped Monitor - unmanaged started multi-up" ],
         [ "stopped-monitor-11", "Stopped Monitor - stop unmanaged started" ],
         [ "stopped-monitor-12", "Stopped Monitor - unmanaged started multi-up (target-role=Stopped)" ],
         [ "stopped-monitor-20", "Stopped Monitor - initial stop" ],
         [ "stopped-monitor-21", "Stopped Monitor - stopped single-up" ],
         [ "stopped-monitor-22", "Stopped Monitor - stopped multi-up" ],
         [ "stopped-monitor-23", "Stopped Monitor - start stopped" ],
         [ "stopped-monitor-24", "Stopped Monitor - unmanage stopped" ],
         [ "stopped-monitor-25", "Stopped Monitor - unmanaged stopped multi-up" ],
         [ "stopped-monitor-26", "Stopped Monitor - start unmanaged stopped" ],
         [ "stopped-monitor-27", "Stopped Monitor - unmanaged stopped multi-up (target-role=Started)" ],
         [ "stopped-monitor-30", "Stopped Monitor - new node started" ],
         [ "stopped-monitor-31", "Stopped Monitor - new node stopped" ],
     ],
     [
         # This is a combo test to check:
         # - probe timeout defaults to the minimum-interval monitor's
         # - duplicate recurring operations are ignored
         # - if timeout spec is bad, the default timeout is used
         # - failure is blocked with on-fail=block even if ISO8601 interval is specified
         # - started/stopped role monitors are started/stopped on right nodes
         [ "intervals", "Recurring monitor interval handling" ],
     ],
     [
         [ "ticket-primitive-1", "Ticket - Primitive (loss-policy=stop, initial)" ],
         [ "ticket-primitive-2", "Ticket - Primitive (loss-policy=stop, granted)" ],
         [ "ticket-primitive-3", "Ticket - Primitive (loss-policy-stop, revoked)" ],
         [ "ticket-primitive-4", "Ticket - Primitive (loss-policy=demote, initial)" ],
         [ "ticket-primitive-5", "Ticket - Primitive (loss-policy=demote, granted)" ],
         [ "ticket-primitive-6", "Ticket - Primitive (loss-policy=demote, revoked)" ],
         [ "ticket-primitive-7", "Ticket - Primitive (loss-policy=fence, initial)" ],
         [ "ticket-primitive-8", "Ticket - Primitive (loss-policy=fence, granted)" ],
         [ "ticket-primitive-9", "Ticket - Primitive (loss-policy=fence, revoked)" ],
         [ "ticket-primitive-10", "Ticket - Primitive (loss-policy=freeze, initial)" ],
         [ "ticket-primitive-11", "Ticket - Primitive (loss-policy=freeze, granted)" ],
         [ "ticket-primitive-12", "Ticket - Primitive (loss-policy=freeze, revoked)" ],
         [ "ticket-primitive-13", "Ticket - Primitive (loss-policy=stop, standby, granted)" ],
         [ "ticket-primitive-14", "Ticket - Primitive (loss-policy=stop, granted, standby)" ],
         [ "ticket-primitive-15", "Ticket - Primitive (loss-policy=stop, standby, revoked)" ],
         [ "ticket-primitive-16", "Ticket - Primitive (loss-policy=demote, standby, granted)" ],
         [ "ticket-primitive-17", "Ticket - Primitive (loss-policy=demote, granted, standby)" ],
         [ "ticket-primitive-18", "Ticket - Primitive (loss-policy=demote, standby, revoked)" ],
         [ "ticket-primitive-19", "Ticket - Primitive (loss-policy=fence, standby, granted)" ],
         [ "ticket-primitive-20", "Ticket - Primitive (loss-policy=fence, granted, standby)" ],
         [ "ticket-primitive-21", "Ticket - Primitive (loss-policy=fence, standby, revoked)" ],
         [ "ticket-primitive-22", "Ticket - Primitive (loss-policy=freeze, standby, granted)" ],
         [ "ticket-primitive-23", "Ticket - Primitive (loss-policy=freeze, granted, standby)" ],
         [ "ticket-primitive-24", "Ticket - Primitive (loss-policy=freeze, standby, revoked)" ],
     ],
     [
         [ "ticket-group-1", "Ticket - Group (loss-policy=stop, initial)" ],
         [ "ticket-group-2", "Ticket - Group (loss-policy=stop, granted)" ],
         [ "ticket-group-3", "Ticket - Group (loss-policy-stop, revoked)" ],
         [ "ticket-group-4", "Ticket - Group (loss-policy=demote, initial)" ],
         [ "ticket-group-5", "Ticket - Group (loss-policy=demote, granted)" ],
         [ "ticket-group-6", "Ticket - Group (loss-policy=demote, revoked)" ],
         [ "ticket-group-7", "Ticket - Group (loss-policy=fence, initial)" ],
         [ "ticket-group-8", "Ticket - Group (loss-policy=fence, granted)" ],
         [ "ticket-group-9", "Ticket - Group (loss-policy=fence, revoked)" ],
         [ "ticket-group-10", "Ticket - Group (loss-policy=freeze, initial)" ],
         [ "ticket-group-11", "Ticket - Group (loss-policy=freeze, granted)" ],
         [ "ticket-group-12", "Ticket - Group (loss-policy=freeze, revoked)" ],
         [ "ticket-group-13", "Ticket - Group (loss-policy=stop, standby, granted)" ],
         [ "ticket-group-14", "Ticket - Group (loss-policy=stop, granted, standby)" ],
         [ "ticket-group-15", "Ticket - Group (loss-policy=stop, standby, revoked)" ],
         [ "ticket-group-16", "Ticket - Group (loss-policy=demote, standby, granted)" ],
         [ "ticket-group-17", "Ticket - Group (loss-policy=demote, granted, standby)" ],
         [ "ticket-group-18", "Ticket - Group (loss-policy=demote, standby, revoked)" ],
         [ "ticket-group-19", "Ticket - Group (loss-policy=fence, standby, granted)" ],
         [ "ticket-group-20", "Ticket - Group (loss-policy=fence, granted, standby)" ],
         [ "ticket-group-21", "Ticket - Group (loss-policy=fence, standby, revoked)" ],
         [ "ticket-group-22", "Ticket - Group (loss-policy=freeze, standby, granted)" ],
         [ "ticket-group-23", "Ticket - Group (loss-policy=freeze, granted, standby)" ],
         [ "ticket-group-24", "Ticket - Group (loss-policy=freeze, standby, revoked)" ],
     ],
     [
         [ "ticket-clone-1", "Ticket - Clone (loss-policy=stop, initial)" ],
         [ "ticket-clone-2", "Ticket - Clone (loss-policy=stop, granted)" ],
         [ "ticket-clone-3", "Ticket - Clone (loss-policy-stop, revoked)" ],
         [ "ticket-clone-4", "Ticket - Clone (loss-policy=demote, initial)" ],
         [ "ticket-clone-5", "Ticket - Clone (loss-policy=demote, granted)" ],
         [ "ticket-clone-6", "Ticket - Clone (loss-policy=demote, revoked)" ],
         [ "ticket-clone-7", "Ticket - Clone (loss-policy=fence, initial)" ],
         [ "ticket-clone-8", "Ticket - Clone (loss-policy=fence, granted)" ],
         [ "ticket-clone-9", "Ticket - Clone (loss-policy=fence, revoked)" ],
         [ "ticket-clone-10", "Ticket - Clone (loss-policy=freeze, initial)" ],
         [ "ticket-clone-11", "Ticket - Clone (loss-policy=freeze, granted)" ],
         [ "ticket-clone-12", "Ticket - Clone (loss-policy=freeze, revoked)" ],
         [ "ticket-clone-13", "Ticket - Clone (loss-policy=stop, standby, granted)" ],
         [ "ticket-clone-14", "Ticket - Clone (loss-policy=stop, granted, standby)" ],
         [ "ticket-clone-15", "Ticket - Clone (loss-policy=stop, standby, revoked)" ],
         [ "ticket-clone-16", "Ticket - Clone (loss-policy=demote, standby, granted)" ],
         [ "ticket-clone-17", "Ticket - Clone (loss-policy=demote, granted, standby)" ],
         [ "ticket-clone-18", "Ticket - Clone (loss-policy=demote, standby, revoked)" ],
         [ "ticket-clone-19", "Ticket - Clone (loss-policy=fence, standby, granted)" ],
         [ "ticket-clone-20", "Ticket - Clone (loss-policy=fence, granted, standby)" ],
         [ "ticket-clone-21", "Ticket - Clone (loss-policy=fence, standby, revoked)" ],
         [ "ticket-clone-22", "Ticket - Clone (loss-policy=freeze, standby, granted)" ],
         [ "ticket-clone-23", "Ticket - Clone (loss-policy=freeze, granted, standby)" ],
         [ "ticket-clone-24", "Ticket - Clone (loss-policy=freeze, standby, revoked)" ],
     ],
     [
         [ "ticket-master-1", "Ticket - Master (loss-policy=stop, initial)" ],
         [ "ticket-master-2", "Ticket - Master (loss-policy=stop, granted)" ],
         [ "ticket-master-3", "Ticket - Master (loss-policy-stop, revoked)" ],
         [ "ticket-master-4", "Ticket - Master (loss-policy=demote, initial)" ],
         [ "ticket-master-5", "Ticket - Master (loss-policy=demote, granted)" ],
         [ "ticket-master-6", "Ticket - Master (loss-policy=demote, revoked)" ],
         [ "ticket-master-7", "Ticket - Master (loss-policy=fence, initial)" ],
         [ "ticket-master-8", "Ticket - Master (loss-policy=fence, granted)" ],
         [ "ticket-master-9", "Ticket - Master (loss-policy=fence, revoked)" ],
         [ "ticket-master-10", "Ticket - Master (loss-policy=freeze, initial)" ],
         [ "ticket-master-11", "Ticket - Master (loss-policy=freeze, granted)" ],
         [ "ticket-master-12", "Ticket - Master (loss-policy=freeze, revoked)" ],
         [ "ticket-master-13", "Ticket - Master (loss-policy=stop, standby, granted)" ],
         [ "ticket-master-14", "Ticket - Master (loss-policy=stop, granted, standby)" ],
         [ "ticket-master-15", "Ticket - Master (loss-policy=stop, standby, revoked)" ],
         [ "ticket-master-16", "Ticket - Master (loss-policy=demote, standby, granted)" ],
         [ "ticket-master-17", "Ticket - Master (loss-policy=demote, granted, standby)" ],
         [ "ticket-master-18", "Ticket - Master (loss-policy=demote, standby, revoked)" ],
         [ "ticket-master-19", "Ticket - Master (loss-policy=fence, standby, granted)" ],
         [ "ticket-master-20", "Ticket - Master (loss-policy=fence, granted, standby)" ],
         [ "ticket-master-21", "Ticket - Master (loss-policy=fence, standby, revoked)" ],
         [ "ticket-master-22", "Ticket - Master (loss-policy=freeze, standby, granted)" ],
         [ "ticket-master-23", "Ticket - Master (loss-policy=freeze, granted, standby)" ],
         [ "ticket-master-24", "Ticket - Master (loss-policy=freeze, standby, revoked)" ],
     ],
     [
         [ "ticket-rsc-sets-1", "Ticket - Resource sets (1 ticket, initial)" ],
         [ "ticket-rsc-sets-2", "Ticket - Resource sets (1 ticket, granted)" ],
         [ "ticket-rsc-sets-3", "Ticket - Resource sets (1 ticket, revoked)" ],
         [ "ticket-rsc-sets-4", "Ticket - Resource sets (2 tickets, initial)" ],
         [ "ticket-rsc-sets-5", "Ticket - Resource sets (2 tickets, granted)" ],
         [ "ticket-rsc-sets-6", "Ticket - Resource sets (2 tickets, granted)" ],
         [ "ticket-rsc-sets-7", "Ticket - Resource sets (2 tickets, revoked)" ],
         [ "ticket-rsc-sets-8", "Ticket - Resource sets (1 ticket, standby, granted)" ],
         [ "ticket-rsc-sets-9", "Ticket - Resource sets (1 ticket, granted, standby)" ],
         [ "ticket-rsc-sets-10", "Ticket - Resource sets (1 ticket, standby, revoked)" ],
         [ "ticket-rsc-sets-11", "Ticket - Resource sets (2 tickets, standby, granted)" ],
         [ "ticket-rsc-sets-12", "Ticket - Resource sets (2 tickets, standby, granted)" ],
         [ "ticket-rsc-sets-13", "Ticket - Resource sets (2 tickets, granted, standby)" ],
         [ "ticket-rsc-sets-14", "Ticket - Resource sets (2 tickets, standby, revoked)" ],
         [ "cluster-specific-params", "Cluster-specific instance attributes based on rules" ],
         [ "site-specific-params", "Site-specific instance attributes based on rules" ],
     ],
     [
         [ "template-1", "Template - 1" ],
         [ "template-2", "Template - 2" ],
         [ "template-3", "Template - 3 (merge operations)" ],
         [ "template-coloc-1", "Template - Colocation 1" ],
         [ "template-coloc-2", "Template - Colocation 2" ],
         [ "template-coloc-3", "Template - Colocation 3" ],
         [ "template-order-1", "Template - Order 1" ],
         [ "template-order-2", "Template - Order 2" ],
         [ "template-order-3", "Template - Order 3" ],
         [ "template-ticket", "Template - Ticket" ],
         [ "template-rsc-sets-1", "Template - Resource Sets 1" ],
         [ "template-rsc-sets-2", "Template - Resource Sets 2" ],
         [ "template-rsc-sets-3", "Template - Resource Sets 3" ],
         [ "template-rsc-sets-4", "Template - Resource Sets 4" ],
         [ "template-clone-primitive", "Cloned primitive from template" ],
         [ "template-clone-group", "Cloned group from template" ],
         [ "location-sets-templates", "Resource sets and templates - Location" ],
         [ "tags-coloc-order-1", "Tags - Colocation and Order (Simple)" ],
         [ "tags-coloc-order-2", "Tags - Colocation and Order (Resource Sets with Templates)" ],
         [ "tags-location", "Tags - Location" ],
         [ "tags-ticket", "Tags - Ticket" ],
     ],
     [
         [ "container-1", "Container - initial" ],
         [ "container-2", "Container - monitor failed" ],
         [ "container-3", "Container - stop failed" ],
         [ "container-4", "Container - reached migration-threshold" ],
         [ "container-group-1", "Container in group - initial" ],
         [ "container-group-2", "Container in group - monitor failed" ],
         [ "container-group-3", "Container in group - stop failed" ],
         [ "container-group-4", "Container in group - reached migration-threshold" ],
         [ "container-is-remote-node", "Place resource within container when container is remote-node" ],
         [ "bug-rh-1097457", "Kill user defined container/contents ordering" ],
         [ "bug-cl-5247", "Graph loop when recovering m/s resource in a container" ],
         [ "bundle-order-startup", "Bundle startup ordering" ],
         [ "bundle-order-partial-start",
-          "Bundle startup ordering when some dependancies are already running" ],
+          "Bundle startup ordering when some dependencies are already running" ],
         [ "bundle-order-partial-start-2",
-          "Bundle startup ordering when some dependancies and the container are already running" ],
+          "Bundle startup ordering when some dependencies and the container are already running" ],
         [ "bundle-order-stop", "Bundle stop ordering" ],
-        [ "bundle-order-partial-stop", "Bundle startup ordering when some dependancies are already stopped" ],
+        [ "bundle-order-partial-stop", "Bundle startup ordering when some dependencies are already stopped" ],
         [ "bundle-order-stop-on-remote", "Stop nested resource after bringing up the connection" ],
         [ "bundle-order-startup-clone", "Prevent startup because bundle isn't promoted" ],
         [ "bundle-order-startup-clone-2", "Bundle startup with clones" ],
         [ "bundle-order-stop-clone", "Stop bundle because clone is stopping" ],
         [ "bundle-nested-colocation", "Colocation of nested connection resources" ],
         [ "bundle-order-fencing",
           "Order pseudo bundle fencing after parent node fencing if both are happening" ],
         [ "bundle-probe-order-1", "order 1" ],
         [ "bundle-probe-order-2", "order 2" ],
         [ "bundle-probe-order-3", "order 3" ],
         [ "bundle-probe-remotes", "Ensure remotes get probed too" ],
         [ "bundle-replicas-change", "Change bundle from 1 replica to multiple" ],
         [ "nested-remote-recovery", "Recover bundle's container hosted on remote node" ],
     ],
     [
         [ "whitebox-fail1", "Fail whitebox container rsc" ],
         [ "whitebox-fail2", "Fail cluster connection to guest node" ],
         [ "whitebox-fail3", "Failed containers should not run nested on remote nodes" ],
         [ "whitebox-start", "Start whitebox container with resources assigned to it" ],
         [ "whitebox-stop", "Stop whitebox container with resources assigned to it" ],
         [ "whitebox-move", "Move whitebox container with resources assigned to it" ],
         [ "whitebox-asymmetric", "Verify connection rsc opts-in based on container resource" ],
         [ "whitebox-ms-ordering", "Verify promote/demote can not occur before connection is established" ],
         [ "whitebox-ms-ordering-move", "Stop/Start cycle within a moving container" ],
         [ "whitebox-orphaned", "Properly shutdown orphaned whitebox container" ],
         [ "whitebox-orphan-ms", "Properly tear down orphan ms resources on remote-nodes" ],
         [ "whitebox-unexpectedly-running", "Recover container nodes the cluster did not start" ],
         [ "whitebox-migrate1", "Migrate both container and connection resource" ],
         [ "whitebox-imply-stop-on-fence",
           "imply stop action on container node rsc when host node is fenced" ],
         [ "whitebox-nested-group", "Verify guest remote-node works nested in a group" ],
         [ "guest-node-host-dies", "Verify guest node is recovered if host goes away" ],
         [ "guest-node-cleanup", "Order guest node connection recovery after container probe" ],
     ],
     [
         [ "remote-startup-probes", "Baremetal remote-node startup probes" ],
         [ "remote-startup", "Startup a newly discovered remote-nodes with no status" ],
         [ "remote-fence-unclean", "Fence unclean baremetal remote-node" ],
         [ "remote-fence-unclean2",
           "Fence baremetal remote-node after cluster node fails and connection can not be recovered" ],
         [ "remote-fence-unclean-3", "Probe failed remote nodes (triggers fencing)" ],
         [ "remote-move", "Move remote-node connection resource" ],
         [ "remote-disable", "Disable a baremetal remote-node" ],
         [ "remote-probe-disable", "Probe then stop a baremetal remote-node" ],
         [ "remote-orphaned", "Properly shutdown orphaned connection resource" ],
         [ "remote-orphaned2",
           "verify we can handle orphaned remote connections with active resources on the remote" ],
         [ "remote-recover", "Recover connection resource after cluster-node fails" ],
         [ "remote-stale-node-entry",
           "Make sure we properly handle leftover remote-node entries in the node section" ],
         [ "remote-partial-migrate",
           "Make sure partial migrations are handled before ops on the remote node" ],
         [ "remote-partial-migrate2",
           "Make sure partial migration target is prefered for remote connection" ],
         [ "remote-recover-fail", "Make sure start failure causes fencing if rsc are active on remote" ],
         [ "remote-start-fail",
           "Make sure a start failure does not result in fencing if no active resources are on remote" ],
         [ "remote-unclean2",
           "Make monitor failure always results in fencing, even if no rsc are active on remote" ],
         [ "remote-fence-before-reconnect", "Fence before clearing recurring monitor failure" ],
         [ "remote-recovery", "Recover remote connections before attempting demotion" ],
         [ "remote-recover-connection", "Optimistically recovery of only the connection" ],
         [ "remote-recover-all", "Fencing when the connection has no home" ],
         [ "remote-recover-no-resources", "Fencing when the connection has no home and no active resources" ],
         [ "remote-recover-unknown",
           "Fencing when the connection has no home and the remote has no operation history" ],
         [ "remote-reconnect-delay", "Waiting for remote reconnect interval to expire" ],
         [ "remote-connection-unrecoverable",
           "Remote connection host must be fenced, with connection unrecoverable" ],
     ],
     [
         [ "resource-discovery", "Exercises resource-discovery location constraint option" ],
         [ "rsc-discovery-per-node", "Disable resource discovery per node" ],
     ],
     
     # @TODO: If pacemaker implements versioned attributes, uncomment these tests
     #[
     #    [ "versioned-resources", "Start resources with #ra-version rules" ],
     #    [ "restart-versioned", "Restart resources on #ra-version change" ],
     #    [ "reload-versioned", "Reload resources on #ra-version change" ],
     #],
     #[
     #    [ "versioned-operations-1", "Use #ra-version to configure operations of native resources" ],
     #    [ "versioned-operations-2", "Use #ra-version to configure operations of stonith resources" ],
     #    [ "versioned-operations-3", "Use #ra-version to configure operations of master/slave resources" ],
     #    [ "versioned-operations-4", "Use #ra-version to configure operations of groups of the resources" ],
     #],
 ]
 
 
 # Constants substituted in the build process
 class BuildVars(object):
     SBINDIR = "@sbindir@"
     BUILDDIR = "@abs_top_builddir@"
     CRM_SCHEMA_DIRECTORY = "@CRM_SCHEMA_DIRECTORY@"
 
 
 # These values must be kept in sync with include/crm/crm.h
 class CrmExit(object):
     OK                   =    0
     ERROR                =    1
     NOT_INSTALLED        =    5
     NOINPUT              =   66
 
 
 def is_executable(path):
     """ Check whether a file at a given path is executable. """
 
     try:
         return os.stat(path)[stat.ST_MODE] & stat.S_IXUSR
     except OSError:
         return False
 
 
 def diff(file1, file2, **kwargs):
     """ Call diff on two files """
 
     return subprocess.call([ "diff", "-u", "-N", "--ignore-all-space",
                              "--ignore-blank-lines", file1, file2 ], **kwargs)
 
 
 def sort_file(filename):
     """ Sort a file alphabetically """
 
     with io.open(filename, "rt") as f:
         lines = sorted(f)
     with io.open(filename, "wt") as f:
         f.writelines(lines)
 
 
 def remove_files(filenames):
     """ Remove a list of files """
 
     for filename in filenames:
         try:
             os.remove(filename)
         except OSError:
             pass
 
 
 def normalize(filename):
     """ Remove text from a file that isn't important for comparison """
 
     if not hasattr(normalize, "patterns"):
         normalize.patterns = [
             re.compile(r'crm_feature_set="[^"]*"'),
             re.compile(r'batch-limit="[0-9]*"')
         ]
     if os.path.isfile(filename):
         with io.open(filename, "rt") as f:
             lines = f.readlines()
         with io.open(filename, "wt") as f:
             for line in lines:
                 for pattern in normalize.patterns:
                     line = pattern.sub("", line)
                 f.write(line)
 
 
 def cat(filename, dest=sys.stdout):
     """ Copy a file to a destination file descriptor """
 
     with io.open(filename, "rt") as f:
         shutil.copyfileobj(f, dest)
 
 
 class CtsScheduler(object):
     """ Regression tests for Pacemaker's scheduler """
 
     def _parse_args(self, argv):
         """ Parse command-line arguments """
 
         parser = argparse.ArgumentParser(description=DESC)
 
         parser.add_argument('-V', '--verbose', action='count',
                             help='Display any differences from expected output')
 
         parser.add_argument('--run', metavar='TEST',
                             help=('Run only single specified test (any further '
                                   'arguments will be passed to crm_simulate)'))
 
         parser.add_argument('--update', action='store_true',
                             help='Update expected results with actual results')
 
         parser.add_argument('-b', '--binary', metavar='PATH',
                             help='Specify path to crm_simulate')
 
         parser.add_argument('-i', '--io-dir', metavar='PATH',
                             help='Specify path to regression test data directory')
 
         parser.add_argument('-o', '--out-dir', metavar='PATH',
                             help='Specify where intermediate and output files should go')
 
         parser.add_argument('-v', '--valgrind', action='store_true',
                             help='Run all commands under valgrind')
 
         parser.add_argument('--valgrind-dhat', action='store_true',
                             help='Run all commands under valgrind with heap analyzer')
 
         parser.add_argument('--valgrind-skip-output', action='store_true',
                             help='If running under valgrind, do not display output')
 
         parser.add_argument('--testcmd-options', metavar='OPTIONS', default='',
                             help='Additional options for command under test')
 
         # argparse can't handle "everything after --run TEST", so grab that
         self.single_test_args = []
         narg = 0
         for arg in argv:
             narg = narg + 1
             if arg == '--run':
-                (argv, self.single_test_args) = (argv[:narg+2], argv[narg+2:])
+                (argv, self.single_test_args) = (argv[:narg+1], argv[narg+1:])
                 break
 
         self.args = parser.parse_args(argv[1:])
 
     def _error(self, s):
         print("      * ERROR:   %s" % s)
 
     def _failed(self, s):
         print("      * FAILED:  %s" % s)
 
     def _get_valgrind_cmd(self):
         """ Return command arguments needed (or not) to run valgrind """
 
         if self.args.valgrind:
             os.environ['G_SLICE'] = "always-malloc"
             return [
                 "valgrind",
                 "-q",
                 "--gen-suppressions=all",
                 "--time-stamp=yes",
                 "--trace-children=no",
                 "--show-reachable=no",
                 "--leak-check=full",
                 "--num-callers=20",
                 "--suppressions=%s/valgrind-pcmk.suppressions" % (self.test_home)
             ]
 
         if self.args.valgrind_dhat:
             os.environ['G_SLICE'] = "always-malloc"
             return [
                 "valgrind",
                 "--tool=exp-dhat",
                 "--time-stamp=yes",
                 "--trace-children=no",
                 "--show-top-n=100",
                 "--num-callers=4"
             ]
 
         return []
 
     def _get_simulator_cmd(self):
         """ Locate the simulation binary """
 
         if self.args.binary is None:
             self.args.binary = BuildVars.BUILDDIR + "/tools/crm_simulate"
             if not is_executable(self.args.binary):
                 self.args.binary = BuildVars.SBINDIR + "/crm_simulate"
 
         if not is_executable(self.args.binary):
             # @TODO it would be more pythonic to raise an exception
             self._error("Test binary " + self.args.binary + " not found")
             sys.exit(CrmExit.NOT_INSTALLED)
 
         return [ self.args.binary ] + shlex.split(self.args.testcmd_options)
 
     def set_schema_env(self):
         """ Ensure schema directory environment variable is set, if possible """
 
         try:
             return os.environ['PCMK_schema_directory']
         except KeyError:
             for d in [ os.path.join(BuildVars.BUILDDIR, "xml"),
                        BuildVars.CRM_SCHEMA_DIRECTORY ]:
                 if os.path.isdir(d):
                     os.environ['PCMK_schema_directory'] = d
                     return d
             return None
 
     def __init__(self, argv=sys.argv):
 
         self._parse_args(argv)
 
         # Where this executable lives
         self.test_home = os.path.dirname(os.path.realpath(argv[0]))
 
         # Where test data resides
         if self.args.io_dir is None:
             self.args.io_dir = os.path.join(self.test_home, "scheduler")
 
         # Where to store generated files
         if self.args.out_dir is None:
             self.args.out_dir = self.args.io_dir
             self.failed_filename = os.path.join(self.test_home, ".regression.failed.diff")
         else:
             self.failed_filename = os.path.join(self.args.out_dir, ".regression.failed.diff")
         os.environ['CIB_shadow_dir'] = self.args.out_dir
         self.failed_file = None
 
         # Single test mode (if requested)
         try:
             # User can give test base name or file name of a test input
             self.args.run = os.path.splitext(os.path.basename(self.args.run))[0]
         except (AttributeError, TypeError):
             pass # --run was not specified
 
         self.set_schema_env()
 
         # Arguments needed (or not) to run commands
         self.valgrind_args = self._get_valgrind_cmd()
         self.simulate_args = self._get_simulator_cmd()
 
         # Test counters
         self.num_failed = 0
         self.num_tests = 0
 
     def _compare_files(self, filename1, filename2):
         """ Add any file differences to failed results """
 
         with io.open("/dev/null", "wt") as dev_null:
             if diff(filename1, filename2, stdout=dev_null) != 0:
                 diff(filename1, filename2, stdout=self.failed_file, stderr=dev_null)
                 self.failed_file.write("\n");
                 return True
         return False
 
     def run_one(self, test_name, test_desc, test_args=[]):
         """ Run one scheduler test """
 
         print("  Test %-25s %s" % ((test_name + ":"), test_desc))
 
         did_fail = False
         self.num_tests = self.num_tests + 1
 
         # Test inputs
         input_filename = "%s/%s.xml" % (self.args.io_dir, test_name)
         expected_filename = "%s/%s.exp" % (self.args.io_dir, test_name)
         dot_expected_filename = "%s/%s.dot" % (self.args.io_dir, test_name)
         scores_filename = "%s/%s.scores" % (self.args.io_dir, test_name)
         summary_filename = "%s/%s.summary" % (self.args.io_dir, test_name)
         stderr_expected_filename = "%s/%s.stderr" % (self.args.io_dir, test_name)
 
         # (Intermediate) test outputs
         output_filename = "%s/%s.out" % (self.args.out_dir, test_name)
         dot_output_filename = "%s/%s.pe.dot" % (self.args.out_dir, test_name)
         score_output_filename = "%s/%s.scores.pe" % (self.args.out_dir, test_name)
         summary_output_filename = "%s/%s.summary.pe" % (self.args.out_dir, test_name)
         stderr_output_filename = "%s/%s.stderr.pe" % (self.args.out_dir, test_name)
         valgrind_output_filename = "%s/%s.valgrind" % (self.args.out_dir, test_name)
 
         # Common arguments for running test
         test_cmd = []
         if self.valgrind_args:
             test_cmd = self.valgrind_args + [ "--log-file=%s" % valgrind_output_filename ]
         test_cmd = test_cmd + self.simulate_args
 
         # @TODO It would be more pythonic to raise exceptions for errors,
         # then perhaps it would be nice to make a single-test class
 
         # Ensure necessary test inputs exist
         if not os.path.isfile(input_filename):
             self._error("No input")
             self.num_failed = self.num_failed + 1
             return CrmExit.NOINPUT
         if not self.args.update and not os.path.isfile(expected_filename):
             self._error("no stored output")
             return CrmExit.NOINPUT
 
         # Run simulation to generate summary output
         if self.args.run: # Single test mode
             test_cmd_full = test_cmd + [ '-x', input_filename, '-S' ] + test_args
             print(" ".join(test_cmd_full))
         else:
             # @TODO Why isn't test_args added here?
             test_cmd_full = test_cmd + [ '-x', input_filename, '-S' ]
         with io.open(summary_output_filename, "wt") as f:
             subprocess.call(test_cmd_full, stdout=f, stderr=subprocess.STDOUT, env=os.environ)
         if self.args.run:
             cat(summary_output_filename)
 
         # Re-run simulation to generate dot, graph, and scores
         test_cmd_full = test_cmd + [
             '-x', input_filename,
             '-D', dot_output_filename,
             '-G', output_filename,
             '-sSQ' ] + test_args
         with io.open(stderr_output_filename, "wt") as f_stderr, \
              io.open(score_output_filename,  "wt") as f_score:
             rc = subprocess.call(test_cmd_full, stdout=f_score, stderr=f_stderr, env=os.environ)
 
         # Check for test command failure
         if rc != CrmExit.OK:
             self._failed("Test returned: %d" % rc)
             did_fail = True
             print(" ".join(test_cmd_full))
 
         # Check for valgrind errors
         if self.valgrind_args and not self.args.valgrind_skip_output:
             if os.stat(valgrind_output_filename).st_size > 0:
                 self._failed("Valgrind reported errors")
                 did_fail = True
                 cat(valgrind_output_filename)
             remove_files([ valgrind_output_filename ])
 
         # Check for core dump
         if os.path.isfile("core"):
             self._failed("Core-file detected: core." + test_name)
             did_fail = True
             os.rename("core", "%s/core.%s" % (self.test_home, test_name))
 
         # Check any stderr output
         if os.path.isfile(stderr_expected_filename):
             if self._compare_files(stderr_expected_filename, stderr_output_filename):
                 self._failed("stderr changed")
                 did_fail = True
         elif os.stat(stderr_output_filename).st_size > 0:
             self._failed("Output was written to stderr")
             did_fail = True
             cat(stderr_output_filename)
         remove_files([ stderr_output_filename ])
 
         # Check whether output graph exists, and normalize it
         if (not os.path.isfile(output_filename)
             or os.stat(output_filename).st_size == 0):
             self._error("No graph produced")
             did_fail = True
             self.num_failed = self.num_failed + 1
             remove_files([ output_filename ])
             return CrmExit.ERROR
         normalize(output_filename)
 
         # Check whether dot output exists, and sort it
         if (not os.path.isfile(dot_output_filename) or
             os.stat(dot_output_filename).st_size == 0):
             self._error("No dot-file summary produced")
             did_fail = True
             self.num_failed = self.num_failed + 1
             remove_files([ dot_output_filename, output_filename ])
             return CrmExit.ERROR
         with io.open(dot_output_filename, "rt") as f:
             first_line = f.readline() # "digraph" line with opening brace
             lines = f.readlines()
             last_line = lines[-1] # closing brace
             del lines[-1]
             lines = sorted(set(lines)) # unique sort
         with io.open(dot_output_filename, "wt") as f:
             f.write(first_line)
             f.writelines(lines)
             f.write(last_line)
 
         # Check whether score output exists, and sort it
         if (not os.path.isfile(score_output_filename)
             or os.stat(score_output_filename).st_size == 0):
             self._error("No allocation scores produced")
             did_fail = True
             self.num_failed = self.num_failed + 1
             remove_files([ score_output_filename, output_filename ])
             return CrmExit.ERROR
         else:
             sort_file(score_output_filename)
 
         if self.args.update:
             shutil.copyfile(output_filename, expected_filename)
             shutil.copyfile(dot_output_filename, dot_expected_filename)
             shutil.copyfile(score_output_filename, scores_filename)
             shutil.copyfile(summary_output_filename, summary_filename)
             print("  Updated expected outputs")
 
         if self._compare_files(summary_filename, summary_output_filename):
             self._failed("summary changed")
             did_fail = True
 
         if self._compare_files(dot_expected_filename, dot_output_filename):
             self._failed("dot-file summary changed")
             did_fail = True
         else:
             remove_files([ dot_output_filename ])
 
         if self._compare_files(expected_filename, output_filename):
             self._failed("xml-file changed")
             did_fail = True
 
         if self._compare_files(scores_filename, score_output_filename):
             self._failed("scores-file changed")
             did_fail = True
 
         remove_files([ output_filename,
                        score_output_filename,
                        summary_output_filename])
 
         if did_fail:
             self.num_failed = self.num_failed + 1
             return CrmExit.ERROR
 
         return CrmExit.OK
 
     def run_all(self):
         """ Run all defined tests """
 
         for group in TESTS:
             for test in group:
                 try:
                     args = test[2]
                 except IndexError:
                     args = []
                 self.run_one(test[0], test[1], args)
             print()
 
     def _print_summary(self):
         """ Print a summary of parameters for this test run """
 
         print("Test home is:\t" + self.test_home)
         print("Test binary is:\t" + self.args.binary)
         if 'PCMK_schema_directory' in os.environ:
             print("Schema home is:\t" + os.environ['PCMK_schema_directory'])
         if self.valgrind_args != []:
             print("Activating memory testing with valgrind")
         print()
 
     def _test_results(self):
         if self.num_failed == 0:
             return CrmExit.OK
 
         if os.path.isfile(self.failed_filename) and os.stat(self.failed_filename).st_size != 0:
             if self.args.verbose:
                 self._error("Results of %d failed tests (out of %d):" %
                     (self.num_failed, self.num_tests))
                 cat(self.failed_filename)
             else:
                 self._error("Results of %d failed tests (out of %d) are in %s" %
                     (self.num_failed, self.num_tests, self.failed_filename))
                 self._error("Use -V to display them after running the tests")
         else:
             self._error("%d (of %d) tests failed (no diff results)" %
                 (self.num_failed, self.num_tests))
             if os.path.isfile(self.failed_filename):
                 os.remove(self.failed_filename)
         return CrmExit.ERROR
 
     def run(self):
         """ Run test(s) as specified """
 
         self._print_summary()
 
         # Zero out the error log
         self.failed_file = io.open(self.failed_filename, "wt")
 
         if self.args.run is None:
             print("Performing the following tests from " + self.args.io_dir)
             print()
             self.run_all()
             print()
             self.failed_file.close()
             rc = self._test_results()
         else:
             rc = self.run_one(self.args.run, "Single shot", self.single_test_args)
             self.failed_file.close()
             cat(self.failed_filename)
 
         return rc
 
 
 if __name__ == "__main__":
     sys.exit(CtsScheduler().run())
 
 # vim: set filetype=python expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=120:
diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c
index bbe2a5bc6d..f5552905dd 100644
--- a/daemons/controld/controld_throttle.c
+++ b/daemons/controld/controld_throttle.c
@@ -1,540 +1,554 @@
 /*
  * Copyright 2013-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU General Public License version 2
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_internal.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include <unistd.h>
 #include <ctype.h>
 #include <dirent.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/cluster.h>
 
 #include <pacemaker-controld.h>
 
+/* These values don't need to be bits, but these particular values must be kept
+ * for backward compatibility during rolling upgrades.
+ */
 enum throttle_state_e {
-    throttle_extreme = 0x1000,
-    throttle_high = 0x0100,
-    throttle_med  = 0x0010,
-    throttle_low  = 0x0001,
-    throttle_none = 0x0000,
+    throttle_none       = 0x0000,
+    throttle_low        = 0x0001,
+    throttle_med        = 0x0010,
+    throttle_high       = 0x0100,
+    throttle_extreme    = 0x1000,
 };
 
 struct throttle_record_s {
     int max;
     enum throttle_state_e mode;
     char *node;
 };
 
 static int throttle_job_max = 0;
 static float throttle_load_target = 0.0;
 
 #define THROTTLE_FACTOR_LOW    1.2
 #define THROTTLE_FACTOR_MEDIUM 1.6
 #define THROTTLE_FACTOR_HIGH   2.0
 
 static GHashTable *throttle_records = NULL;
 static mainloop_timer_t *throttle_timer = NULL;
 
+static const char *
+load2str(enum throttle_state_e mode)
+{
+    switch (mode) {
+        case throttle_extreme:  return "extreme";
+        case throttle_high:     return "high";
+        case throttle_med:      return "medium";
+        case throttle_low:      return "low";
+        case throttle_none:     return "negligible";
+        default:                return "undetermined";
+    }
+}
+
 #if SUPPORT_PROCFS
 /*!
  * \internal
  * \brief Return name of /proc file containing the CIB daemon's load statistics
  *
  * \return Newly allocated memory with file name on success, NULL otherwise
  *
  * \note It is the caller's responsibility to free the return value.
  *       This will return NULL if the daemon is being run via valgrind.
  *       This should be called only on Linux systems.
  */
 static char *
 find_cib_loadfile(void)
 {
     int pid = crm_procfs_pid_of("pacemaker-based");
 
     return pid? crm_strdup_printf("/proc/%d/stat", pid) : NULL;
 }
 
 static bool
 throttle_cib_load(float *load)
 {
 /*
        /proc/[pid]/stat
               Status information about the process.  This is used by ps(1).  It is defined in /usr/src/linux/fs/proc/array.c.
 
               The fields, in order, with their proper scanf(3) format specifiers, are:
 
               pid %d      (1) The process ID.
 
               comm %s     (2) The filename of the executable, in parentheses.  This is visible whether or not the executable is swapped out.
 
               state %c    (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging.
 
               ppid %d     (4) The PID of the parent.
 
               pgrp %d     (5) The process group ID of the process.
 
               session %d  (6) The session ID of the process.
 
               tty_nr %d   (7) The controlling terminal of the process.  (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.)
 
               tpgid %d    (8) The ID of the foreground process group of the controlling terminal of the process.
 
               flags %u (%lu before Linux 2.6.22)
                           (9) The kernel flags word of the process.  For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h.  Details depend on the kernel version.
 
               minflt %lu  (10) The number of minor faults the process has made which have not required loading a memory page from disk.
 
               cminflt %lu (11) The number of minor faults that the process's waited-for children have made.
 
               majflt %lu  (12) The number of major faults the process has made which have required loading a memory page from disk.
 
               cmajflt %lu (13) The number of major faults that the process's waited-for children have made.
 
               utime %lu   (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).  This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations.
 
               stime %lu   (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
  */
 
     static char *loadfile = NULL;
     static time_t last_call = 0;
     static long ticks_per_s = 0;
     static unsigned long last_utime, last_stime;
 
     char buffer[64*1024];
     FILE *stream = NULL;
     time_t now = time(NULL);
 
     if(load == NULL) {
         return FALSE;
     } else {
         *load = 0.0;
     }
 
     if(loadfile == NULL) {
         last_call = 0;
         last_utime = 0;
         last_stime = 0;
         loadfile = find_cib_loadfile();
         if (loadfile == NULL) {
             crm_warn("Couldn't find CIB load file");
             return FALSE;
         }
         ticks_per_s = sysconf(_SC_CLK_TCK);
         crm_trace("Found %s", loadfile);
     }
 
     stream = fopen(loadfile, "r");
     if(stream == NULL) {
         int rc = errno;
 
         crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc);
         free(loadfile); loadfile = NULL;
         return FALSE;
     }
 
     if(fgets(buffer, sizeof(buffer), stream)) {
         char *comm = calloc(1, 256);
         char state = 0;
         int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
         unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
 
         rc = sscanf(buffer,  "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
                     &pid, comm, &state,
                     &ppid, &pgrp, &session, &tty_nr, &tpgid,
                     &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
         free(comm);
 
         if(rc != 15) {
             crm_err("Only %d of 15 fields found in %s", rc, loadfile);
             fclose(stream);
             return FALSE;
 
         } else if(last_call > 0
            && last_call < now
            && last_utime <= utime
            && last_stime <= stime) {
 
             time_t elapsed = now - last_call;
             unsigned long delta_utime = utime - last_utime;
             unsigned long delta_stime = stime - last_stime;
 
             *load = (delta_utime + delta_stime); /* Cast to a float before division */
             *load /= ticks_per_s;
             *load /= elapsed;
             crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed);
 
         } else {
             crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s);
         }
 
         last_call = now;
         last_utime = utime;
         last_stime = stime;
 
         fclose(stream);
         return TRUE;
     }
 
     fclose(stream);
     return FALSE;
 }
 
 static bool
 throttle_load_avg(float *load)
 {
     char buffer[256];
     FILE *stream = NULL;
     const char *loadfile = "/proc/loadavg";
 
     if(load == NULL) {
         return FALSE;
     }
 
     stream = fopen(loadfile, "r");
     if(stream == NULL) {
         int rc = errno;
         crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_strerror(rc), rc);
         return FALSE;
     }
 
     if(fgets(buffer, sizeof(buffer), stream)) {
         char *nl = strstr(buffer, "\n");
 
         /* Grab the 1-minute average, ignore the rest */
         *load = strtof(buffer, NULL);
         if(nl) { nl[0] = 0; }
 
         fclose(stream);
         return TRUE;
     }
 
     fclose(stream);
     return FALSE;
 }
 
 /*!
  * \internal
  * \brief Check a load value against throttling thresholds
  *
  * \param[in] load        Load value to check
  * \param[in] desc        Description of metric (for logging)
  * \param[in] thresholds  Low/medium/high/extreme thresholds
  *
  * \return Throttle mode corresponding to load value
  */
 static enum throttle_state_e
 throttle_check_thresholds(float load, const char *desc, float thresholds[4])
 {
     if (load > thresholds[3]) {
         crm_notice("Extreme %s detected: %f", desc, load);
         return throttle_extreme;
 
     } else if (load > thresholds[2]) {
         crm_notice("High %s detected: %f", desc, load);
         return throttle_high;
 
     } else if (load > thresholds[1]) {
         crm_info("Moderate %s detected: %f", desc, load);
         return throttle_med;
 
     } else if (load > thresholds[0]) {
         crm_debug("Noticeable %s detected: %f", desc, load);
         return throttle_low;
     }
 
     crm_trace("Negligible %s detected: %f", desc, load);
     return throttle_none;
 }
 
 static enum throttle_state_e
 throttle_handle_load(float load, const char *desc, int cores)
 {
     float normalize;
     float thresholds[4];
 
     if (cores == 1) {
         /* On a single core machine, a load of 1.0 is already too high */
         normalize = 0.6;
 
     } else {
         /* Normalize the load to be per-core */
         normalize = cores;
     }
     thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW;
     thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM;
     thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH;
     thresholds[3] = load + 1.0; /* never extreme */
 
     return throttle_check_thresholds(load, desc, thresholds);
 }
 #endif
 
 static enum throttle_state_e
 throttle_mode(void)
 {
+    enum throttle_state_e mode = throttle_none;
+
 #if SUPPORT_PROCFS
     unsigned int cores;
     float load;
     float thresholds[4];
-    enum throttle_state_e mode = throttle_none;
 
     cores = crm_procfs_num_cores();
     if(throttle_cib_load(&load)) {
         float cib_max_cpu = 0.95;
 
         /* The CIB is a single-threaded task and thus cannot consume
          * more than 100% of a CPU (and 1/cores of the overall system
          * load).
          *
          * On a many-cored system, the CIB might therefore be maxed out
          * (causing operations to fail or appear to fail) even though
          * the overall system load is still reasonable.
          *
          * Therefore, the 'normal' thresholds can not apply here, and we
          * need a special case.
          */
         if(cores == 1) {
             cib_max_cpu = 0.4;
         }
         if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) {
             cib_max_cpu = throttle_load_target;
         }
 
         thresholds[0] = cib_max_cpu * 0.8;
         thresholds[1] = cib_max_cpu * 0.9;
         thresholds[2] = cib_max_cpu;
         /* Can only happen on machines with a low number of cores */
         thresholds[3] = cib_max_cpu * 1.5;
 
-        mode |= throttle_check_thresholds(load, "CIB load", thresholds);
+        mode = throttle_check_thresholds(load, "CIB load", thresholds);
     }
 
     if(throttle_load_target <= 0) {
         /* If we ever make this a valid value, the cluster will at least behave as expected */
         return mode;
     }
 
     if(throttle_load_avg(&load)) {
-        crm_debug("Current load is %f across %u core(s)", load, cores);
-        mode |= throttle_handle_load(load, "CPU load", cores);
-    }
+        enum throttle_state_e cpu_load;
 
-    if(mode & throttle_extreme) {
-        return throttle_extreme;
-    } else if(mode & throttle_high) {
-        return throttle_high;
-    } else if(mode & throttle_med) {
-        return throttle_med;
-    } else if(mode & throttle_low) {
-        return throttle_low;
+        cpu_load = throttle_handle_load(load, "CPU load", cores);
+        if (cpu_load > mode) {
+            mode = cpu_load;
+        }
+        crm_debug("Current load is %f across %u core(s)", load, cores);
     }
 #endif // SUPPORT_PROCFS
-    return throttle_none;
+    return mode;
 }
 
 static void
 throttle_send_command(enum throttle_state_e mode)
 {
     xmlNode *xml = NULL;
     static enum throttle_state_e last = -1;
 
     if(mode != last) {
-        crm_info("New throttle mode: %.4x (was %.4x)", mode, last);
+        crm_info("New throttle mode: %s load (was %s)",
+                 load2str(mode), load2str(last));
         last = mode;
 
         xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
         crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode);
         crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max);
 
         send_cluster_message(NULL, crm_msg_crmd, xml, TRUE);
         free_xml(xml);
     }
 }
 
 static gboolean
 throttle_timer_cb(gpointer data)
 {
     throttle_send_command(throttle_mode());
     return TRUE;
 }
 
 static void
 throttle_record_free(gpointer p)
 {
     struct throttle_record_s *r = p;
     free(r->node);
     free(r);
 }
 
 void
 throttle_set_load_target(float target)
 {
     throttle_load_target = target;
 }
 
 void
 throttle_update_job_max(const char *preference)
 {
     int max = 0;
 
     throttle_job_max = 2 * crm_procfs_num_cores();
 
     if(preference) {
         /* Global preference from the CIB */
         max = crm_int_helper(preference, NULL);
         if(max > 0) {
             throttle_job_max = max;
         }
     }
 
     preference = getenv("PCMK_node_action_limit");
     if(preference) {
         /* Per-node override */
         max = crm_int_helper(preference, NULL);
         if(max > 0) {
             throttle_job_max = max;
         }
     }
 }
 
 void
 throttle_init(void)
 {
     if(throttle_records == NULL) {
         throttle_records = g_hash_table_new_full(
             crm_str_hash, g_str_equal, NULL, throttle_record_free);
         throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
     }
 
     throttle_update_job_max(NULL);
     mainloop_timer_start(throttle_timer);
 }
 
 void
 throttle_fini(void)
 {
     if (throttle_timer != NULL) {
         mainloop_timer_del(throttle_timer);
         throttle_timer = NULL;
     }
     if (throttle_records != NULL) {
         g_hash_table_destroy(throttle_records);
         throttle_records = NULL;
     }
 }
 
 int
 throttle_get_total_job_limit(int l)
 {
     /* Cluster-wide limit */
     GHashTableIter iter;
     int limit = l;
     int peers = crm_active_peers();
     struct throttle_record_s *r = NULL;
 
     g_hash_table_iter_init(&iter, throttle_records);
 
     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) {
         switch(r->mode) {
 
             case throttle_extreme:
                 if(limit == 0 || limit > peers/4) {
                     limit = QB_MAX(1, peers/4);
                 }
                 break;
 
             case throttle_high:
                 if(limit == 0 || limit > peers/2) {
                     limit = QB_MAX(1, peers/2);
                 }
                 break;
             default:
                 break;
         }
     }
     if(limit == l) {
         /* crm_trace("No change to batch-limit=%d", limit); */
 
     } else if(l == 0) {
         crm_trace("Using batch-limit=%d", limit);
 
     } else {
         crm_trace("Using batch-limit=%d instead of %d", limit, l);
     }
     return limit;
 }
 
 int
 throttle_get_job_limit(const char *node)
 {
     int jobs = 1;
     struct throttle_record_s *r = NULL;
 
     r = g_hash_table_lookup(throttle_records, node);
     if(r == NULL) {
         r = calloc(1, sizeof(struct throttle_record_s));
         r->node = strdup(node);
         r->mode = throttle_low;
         r->max = throttle_job_max;
         crm_trace("Defaulting to local values for unknown node %s", node);
 
         g_hash_table_insert(throttle_records, r->node, r);
     }
 
     switch(r->mode) {
         case throttle_extreme:
         case throttle_high:
             jobs = 1; /* At least one job must always be allowed */
             break;
         case throttle_med:
             jobs = QB_MAX(1, r->max / 4);
             break;
         case throttle_low:
             jobs = QB_MAX(1, r->max / 2);
             break;
         case throttle_none:
             jobs = QB_MAX(1, r->max);
             break;
         default:
             crm_err("Unknown throttle mode %.4x on %s", r->mode, node);
             break;
     }
     return jobs;
 }
 
 void
 throttle_update(xmlNode *xml)
 {
     int max = 0;
-    enum throttle_state_e mode = 0;
+    int mode = 0;
     struct throttle_record_s *r = NULL;
     const char *from = crm_element_value(xml, F_CRM_HOST_FROM);
 
-    crm_element_value_int(xml, F_CRM_THROTTLE_MODE, (int*)&mode);
+    crm_element_value_int(xml, F_CRM_THROTTLE_MODE, &mode);
     crm_element_value_int(xml, F_CRM_THROTTLE_MAX, &max);
 
     r = g_hash_table_lookup(throttle_records, from);
 
     if(r == NULL) {
         r = calloc(1, sizeof(struct throttle_record_s));
         r->node = strdup(from);
         g_hash_table_insert(throttle_records, r->node, r);
     }
 
     r->max = max;
-    r->mode = mode;
+    r->mode = (enum throttle_state_e) mode;
 
-    crm_debug("Host %s supports a maximum of %d jobs and throttle mode %.4x.  New job limit is %d",
-              from, max, mode, throttle_get_job_limit(from));
+    crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d",
+              from, load2str((enum throttle_state_e) mode), max,
+              throttle_get_job_limit(from));
 }
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 5b86f0f78a..9c27e6553d 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1,2059 +1,2064 @@
 /*
- * Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2009-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU General Public License version 2
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <sys/utsname.h>
 
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <ctype.h>
 #include <regex.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/ipc.h>
 #include <crm/common/ipcs.h>
 #include <crm/cluster/internal.h>
 
 #include <crm/stonith-ng.h>
 #include <crm/fencing/internal.h>
 #include <crm/common/xml.h>
 
 #include <crm/common/util.h>
 #include <pacemaker-fenced.h>
 
 #define TIMEOUT_MULTIPLY_FACTOR 1.2
 
 /* When one fencer queries its peers for devices able to handle a fencing
  * request, each peer will reply with a list of such devices available to it.
  * Each reply will be parsed into a st_query_result_t, with each device's
  * information kept in a device_properties_t.
  */
 
 typedef struct device_properties_s {
     /* Whether access to this device has been verified */
     gboolean verified;
 
     /* The remaining members are indexed by the operation's "phase" */
 
     /* Whether this device has been executed in each phase */
     gboolean executed[st_phase_max];
     /* Whether this device is disallowed from executing in each phase */
     gboolean disallowed[st_phase_max];
     /* Action-specific timeout for each phase */
     int custom_action_timeout[st_phase_max];
     /* Action-specific maximum random delay for each phase */
     int delay_max[st_phase_max];
     /* Action-specific base delay for each phase */
     int delay_base[st_phase_max];
 } device_properties_t;
 
 typedef struct st_query_result_s {
     /* Name of peer that sent this result */
     char *host;
     /* Only try peers for non-topology based operations once */
     gboolean tried;
     /* Number of entries in the devices table */
     int ndevices;
     /* Devices available to this host that are capable of fencing the target */
     GHashTable *devices;
 } st_query_result_t;
 
 GHashTable *stonith_remote_op_list = NULL;
 
 void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer);
 static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
                                   int call_options);
 
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
 static int get_op_total_timeout(const remote_fencing_op_t *op,
                                 const st_query_result_t *chosen_peer);
 
 static gint
 sort_strings(gconstpointer a, gconstpointer b)
 {
     return strcmp(a, b);
 }
 
 static void
 free_remote_query(gpointer data)
 {
     if (data) {
         st_query_result_t *query = data;
 
         crm_trace("Free'ing query result from %s", query->host);
         g_hash_table_destroy(query->devices);
         free(query->host);
         free(query);
     }
 }
 
 void
 free_stonith_remote_op_list()
 {
     if (stonith_remote_op_list != NULL) {
         g_hash_table_destroy(stonith_remote_op_list);
         stonith_remote_op_list = NULL;
     }
 }
 
 struct peer_count_data {
     const remote_fencing_op_t *op;
     gboolean verified_only;
     int count;
 };
 
 /*!
  * \internal
  * \brief Increment a counter if a device has not been executed yet
  *
  * \param[in] key        Device ID (ignored)
  * \param[in] value      Device properties
  * \param[in] user_data  Peer count data
  */
 static void
 count_peer_device(gpointer key, gpointer value, gpointer user_data)
 {
     device_properties_t *props = (device_properties_t*)value;
     struct peer_count_data *data = user_data;
 
     if (!props->executed[data->op->phase]
         && (!data->verified_only || props->verified)) {
         ++(data->count);
     }
 }
 
 /*!
  * \internal
  * \brief Check the number of available devices in a peer's query results
  *
  * \param[in] op             Operation that results are for
  * \param[in] peer           Peer to count
  * \param[in] verified_only  Whether to count only verified devices
  *
  * \return Number of devices available to peer that were not already executed
  */
 static int
 count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer,
                    gboolean verified_only)
 {
     struct peer_count_data data;
 
     data.op = op;
     data.verified_only = verified_only;
     data.count = 0;
     if (peer) {
         g_hash_table_foreach(peer->devices, count_peer_device, &data);
     }
     return data.count;
 }
 
 /*!
  * \internal
  * \brief Search for a device in a query result
  *
  * \param[in] op      Operation that result is for
  * \param[in] peer    Query result for a peer
  * \param[in] device  Device ID to search for
  *
  * \return Device properties if found, NULL otherwise
  */
 static device_properties_t *
 find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer,
                  const char *device)
 {
     device_properties_t *props = g_hash_table_lookup(peer->devices, device);
 
     return (props && !props->executed[op->phase]
            && !props->disallowed[op->phase])? props : NULL;
 }
 
 /*!
  * \internal
  * \brief Find a device in a peer's device list and mark it as executed
  *
  * \param[in]     op                     Operation that peer result is for
  * \param[in,out] peer                   Peer with results to search
  * \param[in]     device                 ID of device to mark as done
  * \param[in]     verified_devices_only  Only consider verified devices
  *
  * \return TRUE if device was found and marked, FALSE otherwise
  */
 static gboolean
 grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer,
                  const char *device, gboolean verified_devices_only)
 {
     device_properties_t *props = find_peer_device(op, peer, device);
 
     if ((props == NULL) || (verified_devices_only && !props->verified)) {
         return FALSE;
     }
 
     crm_trace("Removing %s from %s (%d remaining)",
               device, peer->host, count_peer_devices(op, peer, FALSE));
     props->executed[op->phase] = TRUE;
     return TRUE;
 }
 
 static void
 clear_remote_op_timers(remote_fencing_op_t * op)
 {
     if (op->query_timer) {
         g_source_remove(op->query_timer);
         op->query_timer = 0;
     }
     if (op->op_timer_total) {
         g_source_remove(op->op_timer_total);
         op->op_timer_total = 0;
     }
     if (op->op_timer_one) {
         g_source_remove(op->op_timer_one);
         op->op_timer_one = 0;
     }
 }
 
 static void
 free_remote_op(gpointer data)
 {
     remote_fencing_op_t *op = data;
 
     crm_trace("Free'ing op %s for %s", op->id, op->target);
     crm_log_xml_debug(op->request, "Destroying");
 
     clear_remote_op_timers(op);
 
     free(op->id);
     free(op->action);
     free(op->delegate);
     free(op->target);
     free(op->client_id);
     free(op->client_name);
     free(op->originator);
 
     if (op->query_results) {
         g_list_free_full(op->query_results, free_remote_query);
     }
     if (op->request) {
         free_xml(op->request);
         op->request = NULL;
     }
     if (op->devices_list) {
         g_list_free_full(op->devices_list, free);
         op->devices_list = NULL;
     }
     g_list_free_full(op->automatic_list, free);
     g_list_free(op->duplicates);
     free(op);
 }
 
 void
 init_stonith_remote_op_hash_table(GHashTable **table)
 {
     if (*table == NULL) {
         *table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
     }
 }
 
 /*!
  * \internal
  * \brief Return an operation's originally requested action (before any remap)
  *
  * \param[in] op  Operation to check
  *
  * \return Operation's original action
  */
 static const char *
 op_requested_action(const remote_fencing_op_t *op)
 {
     return ((op->phase > st_phase_requested)? "reboot" : op->action);
 }
 
 /*!
  * \internal
  * \brief Remap a "reboot" operation to the "off" phase
  *
  * \param[in,out] op      Operation to remap
  */
 static void
 op_phase_off(remote_fencing_op_t *op)
 {
     crm_info("Remapping multiple-device reboot of %s (%s) to off",
              op->target, op->id);
     op->phase = st_phase_off;
 
     /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
      * memory allocation at each phase.
      */
     strcpy(op->action, "off");
 }
 
 /*!
  * \internal
  * \brief Advance a remapped reboot operation to the "on" phase
  *
  * \param[in,out] op  Operation to remap
  */
 static void
 op_phase_on(remote_fencing_op_t *op)
 {
     GListPtr iter = NULL;
 
     crm_info("Remapped off of %s complete, remapping to on for %s.%.8s",
              op->target, op->client_name, op->id);
     op->phase = st_phase_on;
     strcpy(op->action, "on");
 
     /* Skip devices with automatic unfencing, because the cluster will handle it
      * when the node rejoins.
      */
     for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
         GListPtr match = g_list_find_custom(op->devices_list, iter->data,
                                             sort_strings);
 
         if (match) {
             op->devices_list = g_list_remove(op->devices_list, match->data);
         }
     }
     g_list_free_full(op->automatic_list, free);
     op->automatic_list = NULL;
 
     /* Rewind device list pointer */
     op->devices = op->devices_list;
 }
 
 /*!
  * \internal
  * \brief Reset a remapped reboot operation
  *
  * \param[in,out] op  Operation to reset
  */
 static void
 undo_op_remap(remote_fencing_op_t *op)
 {
     if (op->phase > 0) {
         crm_info("Undoing remap of reboot of %s for %s.%.8s",
                  op->target, op->client_name, op->id);
         op->phase = st_phase_requested;
         strcpy(op->action, "reboot");
     }
 }
 
 static xmlNode *
 create_op_done_notify(remote_fencing_op_t * op, int rc)
 {
     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
 
     crm_xml_add_int(notify_data, "state", op->state);
     crm_xml_add_int(notify_data, F_STONITH_RC, rc);
     crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
     crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
     crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
     crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
     crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
 
     return notify_data;
 }
 
 void
 stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc)
 {
     static int count = 0;
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
     xmlNode *notify_data = create_op_done_notify(op, rc);
 
     count++;
     crm_trace("Broadcasting result to peers");
     crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
     crm_xml_add(bcast, F_SUBTYPE, "broadcast");
     crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
     crm_xml_add_int(bcast, "count", count);
     add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
     send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
     free_xml(notify_data);
     free_xml(bcast);
 
     return;
 }
 
 static void
 handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
 {
     xmlNode *notify_data = NULL;
     xmlNode *reply = NULL;
 
     if (op->notify_sent == TRUE) {
         /* nothing to do */
         return;
     }
 
     /* Do notification with a clean data object */
     notify_data = create_op_done_notify(op, rc);
     crm_xml_add_int(data, "state", op->state);
     crm_xml_add(data, F_STONITH_TARGET, op->target);
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
 
     reply = stonith_construct_reply(op->request, NULL, data, rc);
     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
 
     /* Send fencing OP reply to local client that initiated fencing */
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
 
     /* bcast to all local clients that the fencing operation happend */
     do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
     do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
 
     /* mark this op as having notify's already sent */
     op->notify_sent = TRUE;
     free_xml(reply);
     free_xml(notify_data);
 }
 
 static void
 handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
 {
     GListPtr iter = NULL;
 
     for (iter = op->duplicates; iter != NULL; iter = iter->next) {
         remote_fencing_op_t *other = iter->data;
 
         if (other->state == st_duplicate) {
             other->state = op->state;
             crm_debug("Performing duplicate notification for %s@%s.%.8s = %s",
                       other->client_name, other->originator, other->id,
                       pcmk_strerror(rc));
             remote_op_done(other, data, rc, TRUE);
 
         } else {
             // Possible if (for example) it timed out already
             crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name,
                     other->originator, other->state);
         }
     }
 }
 
 /*!
  * \internal
  * \brief Finalize a remote operation.
  *
  * \description This function has two code paths.
  *
  * Path 1. This node is the owner of the operation and needs
  *         to notify the cpg group via a broadcast as to the operation's
  *         results.
  *
  * Path 2. The cpg broadcast is received. All nodes notify their local
  *         stonith clients the operation results.
  *
  * So, The owner of the operation first notifies the cluster of the result,
  * and once that cpg notify is received back it notifies all the local clients.
  *
  * Nodes that are passive watchers of the operation will receive the
  * broadcast and only need to notify their local clients the operation finished.
  *
  * \param op, The fencing operation to finalize
  * \param data, The xml msg reply (if present) of the last delegated fencing
  *              operation.
  * \param dup, Is this operation a duplicate, if so treat it a little differently
  *             making sure the broadcast is not sent out.
  */
 static void
 remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
 {
     int level = LOG_ERR;
     const char *subt = NULL;
     xmlNode *local_data = NULL;
 
     op->completed = time(NULL);
     clear_remote_op_timers(op);
     undo_op_remap(op);
 
     if (op->notify_sent == TRUE) {
         crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s",
                 op->action, op->target, op->delegate ? op->delegate : "<no-one>",
                 op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc));
         goto remote_op_done_cleanup;
     }
 
     if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) {
         xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE);
         if(ndata) {
             op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE);
         } else { 
             op->delegate = crm_element_value_copy(data, F_ORIG);
         }
     }
 
     if (data == NULL) {
         data = create_xml_node(NULL, "remote-op");
         local_data = data;
     }
 
     /* Tell everyone the operation is done, we will continue
      * with doing the local notifications once we receive
      * the broadcast back. */
     subt = crm_element_value(data, F_SUBTYPE);
     if (dup == FALSE && safe_str_neq(subt, "broadcast")) {
         /* Defer notification until the bcast message arrives */
         stonith_bcast_result_to_peers(op, rc);
         goto remote_op_done_cleanup;
     }
 
     if (rc == pcmk_ok || dup) {
         level = LOG_NOTICE;
     } else if (safe_str_neq(op->originator, stonith_our_uname)) {
         level = LOG_NOTICE;
     }
 
     do_crm_log(level,
                "Operation %s of %s by %s for %s@%s.%.8s: %s",
                op->action, op->target, op->delegate ? op->delegate : "<no-one>",
                op->client_name, op->originator, op->id, pcmk_strerror(rc));
 
     handle_local_reply_and_notify(op, data, rc);
 
     if (dup == FALSE) {
         handle_duplicates(op, data, rc);
     }
 
     /* Free non-essential parts of the record
      * Keep the record around so we can query the history
      */
     if (op->query_results) {
         g_list_free_full(op->query_results, free_remote_query);
         op->query_results = NULL;
     }
 
     if (op->request) {
         free_xml(op->request);
         op->request = NULL;
     }
 
   remote_op_done_cleanup:
     free_xml(local_data);
 }
 
 static gboolean
 remote_op_watchdog_done(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
 
     op->op_timer_one = 0;
 
     crm_notice("Self-fencing (%s) by %s for %s.%8s assumed complete",
                op->action, op->target, op->client_name, op->id);
     op->state = st_done;
     remote_op_done(op, NULL, pcmk_ok, FALSE);
     return FALSE;
 }
 
 static gboolean
 remote_op_timeout_one(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
 
     op->op_timer_one = 0;
 
     crm_notice("Peer's fencing (%s) of %s for %s timed out" CRM_XS "id=%s",
                op->action, op->target, op->client_name, op->id);
     call_remote_stonith(op, NULL);
     return FALSE;
 }
 
 static gboolean
 remote_op_timeout(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
 
     op->op_timer_total = 0;
 
     if (op->state == st_done) {
         crm_debug("Action %s (%s) for %s (%s) already completed",
                   op->action, op->id, op->target, op->client_name);
         return FALSE;
     }
 
     crm_debug("Action %s (%s) for %s (%s) timed out",
               op->action, op->id, op->target, op->client_name);
 
     if (op->phase == st_phase_on) {
         /* A remapped reboot operation timed out in the "on" phase, but the
          * "off" phase completed successfully, so quit trying any further
          * devices, and return success.
          */
         remote_op_done(op, NULL, pcmk_ok, FALSE);
         return FALSE;
     }
 
     op->state = st_failed;
 
     remote_op_done(op, NULL, -ETIME, FALSE);
 
     return FALSE;
 }
 
 static gboolean
 remote_op_query_timeout(gpointer data)
 {
     remote_fencing_op_t *op = data;
 
     op->query_timer = 0;
     if (op->state == st_done) {
         crm_debug("Operation %s for %s already completed", op->id, op->target);
     } else if (op->state == st_exec) {
         crm_debug("Operation %s for %s already in progress", op->id, op->target);
     } else if (op->query_results) {
         crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state);
         call_remote_stonith(op, NULL);
     } else {
         crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state);
         if (op->op_timer_total) {
             g_source_remove(op->op_timer_total);
             op->op_timer_total = 0;
         }
         remote_op_timeout(op);
     }
 
     return FALSE;
 }
 
 static gboolean
 topology_is_empty(stonith_topology_t *tp)
 {
     int i;
 
     if (tp == NULL) {
         return TRUE;
     }
 
     for (i = 0; i < ST_LEVEL_MAX; i++) {
         if (tp->levels[i] != NULL) {
             return FALSE;
         }
     }
     return TRUE;
 }
 
 /*!
  * \internal
  * \brief Add a device to an operation's automatic unfencing list
  *
  * \param[in,out] op      Operation to modify
  * \param[in]     device  Device ID to add
  */
 static void
 add_required_device(remote_fencing_op_t *op, const char *device)
 {
     GListPtr match  = g_list_find_custom(op->automatic_list, device,
                                          sort_strings);
 
     if (!match) {
         op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
     }
 }
 
 /*!
  * \internal
  * \brief Remove a device from the automatic unfencing list
  *
  * \param[in,out] op      Operation to modify
  * \param[in]     device  Device ID to remove
  */
 static void
 remove_required_device(remote_fencing_op_t *op, const char *device)
 {
     GListPtr match = g_list_find_custom(op->automatic_list, device,
                                         sort_strings);
 
     if (match) {
         op->automatic_list = g_list_remove(op->automatic_list, match->data);
     }
 }
 
 /* deep copy the device list */
 static void
 set_op_device_list(remote_fencing_op_t * op, GListPtr devices)
 {
     GListPtr lpc = NULL;
 
     if (op->devices_list) {
         g_list_free_full(op->devices_list, free);
         op->devices_list = NULL;
     }
     for (lpc = devices; lpc != NULL; lpc = lpc->next) {
         op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
     }
     op->devices = op->devices_list;
 }
 
 /*!
  * \internal
  * \brief Check whether a node matches a topology target
  *
  * \param[in] tp    Topology table entry to check
  * \param[in] node  Name of node to check
  *
  * \return TRUE if node matches topology target
  */
 static gboolean
 topology_matches(const stonith_topology_t *tp, const char *node)
 {
     regex_t r_patt;
 
     CRM_CHECK(node && tp && tp->target, return FALSE);
     switch(tp->kind) {
         case 2:
             /* This level targets by attribute, so tp->target is a NAME=VALUE pair
              * of a permanent attribute applied to targeted nodes. The test below
              * relies on the locally cached copy of the CIB, so if fencing needs to
              * be done before the initial CIB is received or after a malformed CIB
              * is received, then the topology will be unable to be used.
              */
             if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
                 crm_notice("Matched %s with %s by attribute", node, tp->target);
                 return TRUE;
             }
             break;
         case 1:
             /* This level targets by name, so tp->target is a regular expression
              * matching names of nodes to be targeted.
              */
 
             if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
                 crm_info("Bad regex '%s' for fencing level", tp->target);
             } else {
                 int status = regexec(&r_patt, node, 0, NULL, 0);
 
                 regfree(&r_patt);
                 if (status == 0) {
                     crm_notice("Matched %s with %s by name", node, tp->target);
                     return TRUE;
                 }
             }
             break;
         case 0:
             crm_trace("Testing %s against %s", node, tp->target);
             return safe_str_eq(tp->target, node);
     }
     crm_trace("No match for %s with %s", node, tp->target);
     return FALSE;
 }
 
 stonith_topology_t *
 find_topology_for_host(const char *host) 
 {
     GHashTableIter tIter;
     stonith_topology_t *tp = g_hash_table_lookup(topology, host);
 
     if(tp != NULL) {
         crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
         return tp;
     }
 
     g_hash_table_iter_init(&tIter, topology);
     while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
         if (topology_matches(tp, host)) {
             crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
             return tp;
         }
     }
 
     crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
     return NULL;
 }
 
 /*!
  * \internal
  * \brief Set fencing operation's device list to target's next topology level
  *
  * \param[in,out] op  Remote fencing operation to modify
  *
  * \return pcmk_ok if successful, target was not specified (i.e. queries) or
  *         target has no topology, or -EINVAL if no more topology levels to try
  */
 static int
 stonith_topology_next(remote_fencing_op_t * op)
 {
     stonith_topology_t *tp = NULL;
 
     if (op->target) {
         /* Queries don't have a target set */
         tp = find_topology_for_host(op->target);
     }
     if (topology_is_empty(tp)) {
         return pcmk_ok;
     }
 
     set_bit(op->call_options, st_opt_topology);
 
     /* This is a new level, so undo any remapping left over from previous */
     undo_op_remap(op);
 
     do {
         op->level++;
 
     } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
 
     if (op->level < ST_LEVEL_MAX) {
         crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s",
                   op->level, op->target, g_list_length(tp->levels[op->level]),
                   op->client_name, op->originator, op->id);
         set_op_device_list(op, tp->levels[op->level]);
 
         if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
             /* A reboot has been requested for a topology level with multiple
              * devices. Instead of rebooting the devices sequentially, we will
              * turn them all off, then turn them all on again. (Think about
              * switched power outlets for redundant power supplies.)
              */
             op_phase_off(op);
         }
         return pcmk_ok;
     }
 
     crm_notice("All fencing options to fence %s for %s@%s.%.8s failed",
                op->target, op->client_name, op->originator, op->id);
     return -EINVAL;
 }
 
 /*!
  * \brief Check to see if this operation is a duplicate of another in flight
  * operation. If so merge this operation into the inflight operation, and mark
  * it as a duplicate.
  */
 static void
 merge_duplicates(remote_fencing_op_t * op)
 {
     GHashTableIter iter;
     remote_fencing_op_t *other = NULL;
 
     time_t now = time(NULL);
 
     g_hash_table_iter_init(&iter, stonith_remote_op_list);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
         crm_node_t *peer = NULL;
         const char *other_action = op_requested_action(other);
 
         if (other->state > st_exec) {
             /* Must be in-progress */
             continue;
         } else if (safe_str_neq(op->target, other->target)) {
             /* Must be for the same node */
             continue;
         } else if (safe_str_neq(op->action, other_action)) {
             crm_trace("Must be for the same action: %s vs. %s",
                       op->action, other_action);
             continue;
         } else if (safe_str_eq(op->client_name, other->client_name)) {
             crm_trace("Must be for different clients: %s", op->client_name);
             continue;
         } else if (safe_str_eq(other->target, other->originator)) {
             crm_trace("Can't be a suicide operation: %s", other->target);
             continue;
         }
 
         peer = crm_get_peer(0, other->originator);
         if(fencing_peer_active(peer) == FALSE) {
             crm_notice("Failing stonith action %s for node %s originating from %s@%s.%.8s: Originator is dead",
                        other->action, other->target, other->client_name, other->originator, other->id);
             other->state = st_failed;
             continue;
 
         } else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) {
             crm_info("Stonith action %s for node %s originating from %s@%s.%.8s is too old: %ld vs. %ld + %d",
                      other->action, other->target, other->client_name, other->originator, other->id,
                      now, other->created, other->total_timeout);
             continue;
         }
 
         /* There is another in-flight request to fence the same host
          * Piggyback on that instead.  If it fails, so do we.
          */
         other->duplicates = g_list_append(other->duplicates, op);
         if (other->total_timeout == 0) {
             crm_trace("Making a best-guess as to the timeout used");
             other->total_timeout = op->total_timeout =
                 TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
         }
         crm_notice
             ("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)",
              op->action, op->target, op->client_name, op->id, other->client_name, other->originator,
              other->id, other->total_timeout);
         report_timeout_period(op, other->total_timeout);
         op->state = st_duplicate;
     }
 }
 
 static uint32_t fencing_active_peers(void)
 {
     uint32_t count = 0;
     crm_node_t *entry;
     GHashTableIter gIter;
 
     g_hash_table_iter_init(&gIter, crm_peer_cache);
     while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
         if(fencing_peer_active(entry)) {
             count++;
         }
     }
     return count;
 }
 
 int
 stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
 {
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
 
     op->state = st_done;
     op->completed = time(NULL);
     op->delegate = strdup("a human");
 
     crm_notice("Injecting manual confirmation that %s is safely off/down",
                crm_element_value(dev, F_STONITH_TARGET));
 
     remote_op_done(op, msg, pcmk_ok, FALSE);
 
     /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
     return -EINPROGRESS;
 }
 
 /*!
  * \internal
  * \brief Create a new remote stonith operation
  *
  * \param[in] client   ID of local stonith client that initiated the operation
  * \param[in] request  The request from the client that started the operation
  * \param[in] peer     TRUE if this operation is owned by another stonith peer
  *                     (an operation owned by one peer is stored on all peers,
  *                     but only the owner executes it; all nodes get the results
  *                     once the owner finishes execution)
  */
 void *
 create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
 {
     remote_fencing_op_t *op = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
     int call_options = 0;
 
     init_stonith_remote_op_hash_table(&stonith_remote_op_list);
 
     /* If this operation is owned by another node, check to make
      * sure we haven't already created this operation. */
     if (peer && dev) {
         const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
 
         CRM_CHECK(op_id != NULL, return NULL);
 
         op = g_hash_table_lookup(stonith_remote_op_list, op_id);
         if (op) {
             crm_debug("%s already exists", op_id);
             return op;
         }
     }
 
     op = calloc(1, sizeof(remote_fencing_op_t));
 
     crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
 
     if (peer && dev) {
         op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
     } else {
         op->id = crm_generate_uuid();
     }
 
     g_hash_table_replace(stonith_remote_op_list, op->id, op);
     CRM_LOG_ASSERT(g_hash_table_lookup(stonith_remote_op_list, op->id) != NULL);
     crm_trace("Created %s", op->id);
 
     op->state = st_query;
     op->replies_expected = fencing_active_peers();
     op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
     op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
     op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
     op->created = time(NULL);
 
     if (op->originator == NULL) {
         /* Local or relayed request */
         op->originator = strdup(stonith_our_uname);
     }
 
     CRM_LOG_ASSERT(client != NULL);
     if (client) {
         op->client_id = strdup(client);
     }
 
     op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
 
     op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
     op->request = copy_xml(request);    /* TODO: Figure out how to avoid this */
     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
     op->call_options = call_options;
 
     crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
 
     crm_trace("%s new stonith op: %s - %s of %s for %s",
               (peer
                && dev) ? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name);
 
     if (op->call_options & st_opt_cs_nodeid) {
         int nodeid = crm_atoi(op->target, NULL);
         crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);
 
         /* Ensure the conversion only happens once */
         op->call_options &= ~st_opt_cs_nodeid;
 
         if (node && node->uname) {
             free(op->target);
             op->target = strdup(node->uname);
 
         } else {
             crm_warn("Could not expand nodeid '%s' into a host name", op->target);
         }
     }
 
     /* check to see if this is a duplicate operation of another in-flight operation */
     merge_duplicates(op);
 
     if (op->state != st_duplicate) {
         /* kick history readers */
         do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
     }
 
     /* safe to trim as long as that doesn't touch pending ops */
     stonith_fence_history_trim();
 
     return op;
 }
 
 remote_fencing_op_t *
 initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
 {
     int query_timeout = 0;
     xmlNode *query = NULL;
     const char *client_id = NULL;
     remote_fencing_op_t *op = NULL;
 
     if (client) {
         client_id = client->id;
     } else {
         client_id = crm_element_value(request, F_STONITH_CLIENTID);
     }
 
     CRM_LOG_ASSERT(client_id != NULL);
     op = create_remote_stonith_op(client_id, request, FALSE);
     op->owner = TRUE;
     if (manual_ack) {
         crm_notice("Initiating manual confirmation for %s: %s",
                    op->target, op->id);
         return op;
     }
 
     CRM_CHECK(op->action, return NULL);
 
     if (stonith_topology_next(op) != pcmk_ok) {
         op->state = st_failed;
     }
 
     switch (op->state) {
         case st_failed:
             crm_warn("Could not request peer fencing (%s) of %s "
                      CRM_XS " id=%s", op->action, op->target, op->id);
             remote_op_done(op, NULL, -EINVAL, FALSE);
             return op;
 
         case st_duplicate:
             crm_info("Requesting peer fencing (%s) of %s (duplicate) "
                      CRM_XS " id=%s", op->action, op->target, op->id);
             return op;
 
         default:
             crm_notice("Requesting peer fencing (%s) of %s "
                        CRM_XS " id=%s state=%d",
                        op->action, op->target, op->id, op->state);
     }
 
     query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
                               NULL, op->call_options);
 
     crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
     crm_xml_add(query, F_STONITH_TARGET, op->target);
     crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
     crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
     crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
     crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
     crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
 
     send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
     free_xml(query);
 
     query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
     op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
 
     return op;
 }
 
 enum find_best_peer_options {
     /*! Skip checking the target peer for capable fencing devices */
     FIND_PEER_SKIP_TARGET = 0x0001,
     /*! Only check the target peer for capable fencing devices */
     FIND_PEER_TARGET_ONLY = 0x0002,
     /*! Skip peers and devices that are not verified */
     FIND_PEER_VERIFIED_ONLY = 0x0004,
 };
 
 static st_query_result_t *
 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
 {
     GListPtr iter = NULL;
     gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
 
     if (!device && is_set(op->call_options, st_opt_topology)) {
         return NULL;
     }
 
     for (iter = op->query_results; iter != NULL; iter = iter->next) {
         st_query_result_t *peer = iter->data;
 
         crm_trace("Testing result from %s for %s with %d devices: %d %x",
                   peer->host, op->target, peer->ndevices, peer->tried, options);
         if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) {
             continue;
         }
         if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) {
             continue;
         }
 
         if (is_set(op->call_options, st_opt_topology)) {
 
             if (grab_peer_device(op, peer, device, verified_devices_only)) {
                 return peer;
             }
 
         } else if ((peer->tried == FALSE)
                    && count_peer_devices(op, peer, verified_devices_only)) {
 
             /* No topology: Use the current best peer */
             crm_trace("Simple fencing");
             return peer;
         }
     }
 
     return NULL;
 }
 
 static st_query_result_t *
 stonith_choose_peer(remote_fencing_op_t * op)
 {
     const char *device = NULL;
     st_query_result_t *peer = NULL;
     uint32_t active = fencing_active_peers();
 
     do {
         if (op->devices) {
             device = op->devices->data;
             crm_trace("Checking for someone to fence (%s) %s with %s",
                       op->action, op->target, device);
         } else {
             crm_trace("Checking for someone to fence (%s) %s",
                       op->action, op->target);
         }
 
         /* Best choice is a peer other than the target with verified access */
         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
         if (peer) {
             crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
             return peer;
         }
 
         if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
             crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
             return NULL;
         }
 
         /* If no other peer has verified access, next best is unverified access */
         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
         if (peer) {
             crm_trace("Found best unverified peer %s", peer->host);
             return peer;
         }
 
         /* If no other peer can do it, last option is self-fencing
          * (which is never allowed for the "on" phase of a remapped reboot)
          */
         if (op->phase != st_phase_on) {
             peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
             if (peer) {
                 crm_trace("%s will fence itself", peer->host);
                 return peer;
             }
         }
 
         /* Try the next fencing level if there is one (unless we're in the "on"
          * phase of a remapped "reboot", because we ignore errors in that case)
          */
     } while ((op->phase != st_phase_on)
              && is_set(op->call_options, st_opt_topology)
              && stonith_topology_next(op) == pcmk_ok);
 
     crm_notice("Couldn't find anyone to fence (%s) %s with %s",
                op->action, op->target, (device? device : "any device"));
     return NULL;
 }
 
 static int
 get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
                    const char *device)
 {
     device_properties_t *props;
 
     if (!peer || !device) {
         return op->base_timeout;
     }
 
     props = g_hash_table_lookup(peer->devices, device);
     if (!props) {
         return op->base_timeout;
     }
 
     return (props->custom_action_timeout[op->phase]?
            props->custom_action_timeout[op->phase] : op->base_timeout)
            + props->delay_max[op->phase];
 }
 
 struct timeout_data {
     const remote_fencing_op_t *op;
     const st_query_result_t *peer;
     int total_timeout;
 };
 
 /*!
  * \internal
  * \brief Add timeout to a total if device has not been executed yet
  *
  * \param[in] key        GHashTable key (device ID)
  * \param[in] value      GHashTable value (device properties)
  * \param[in] user_data  Timeout data
  */
 static void
 add_device_timeout(gpointer key, gpointer value, gpointer user_data)
 {
     const char *device_id = key;
     device_properties_t *props = value;
     struct timeout_data *timeout = user_data;
 
     if (!props->executed[timeout->op->phase]
         && !props->disallowed[timeout->op->phase]) {
         timeout->total_timeout += get_device_timeout(timeout->op,
                                                      timeout->peer, device_id);
     }
 }
 
 static int
 get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
 {
     struct timeout_data timeout;
 
     timeout.op = op;
     timeout.peer = peer;
     timeout.total_timeout = 0;
 
     g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
 
     return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
 }
 
 static int
 get_op_total_timeout(const remote_fencing_op_t *op,
                      const st_query_result_t *chosen_peer)
 {
     int total_timeout = 0;
     stonith_topology_t *tp = find_topology_for_host(op->target);
 
     if (is_set(op->call_options, st_opt_topology) && tp) {
         int i;
         GListPtr device_list = NULL;
         GListPtr iter = NULL;
 
         /* Yep, this looks scary, nested loops all over the place.
          * Here is what is going on.
          * Loop1: Iterate through fencing levels.
          * Loop2: If a fencing level has devices, loop through each device
          * Loop3: For each device in a fencing level, see what peer owns it
          *        and what that peer has reported the timeout is for the device.
          */
         for (i = 0; i < ST_LEVEL_MAX; i++) {
             if (!tp->levels[i]) {
                 continue;
             }
             for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
                 for (iter = op->query_results; iter != NULL; iter = iter->next) {
                     const st_query_result_t *peer = iter->data;
 
                     if (find_peer_device(op, peer, device_list->data)) {
                         total_timeout += get_device_timeout(op, peer,
                                                             device_list->data);
                         break;
                     }
                 }               /* End Loop3: match device with peer that owns device, find device's timeout period */
             }                   /* End Loop2: iterate through devices at a specific level */
         }                       /*End Loop1: iterate through fencing levels */
 
     } else if (chosen_peer) {
         total_timeout = get_peer_timeout(op, chosen_peer);
     } else {
         total_timeout = op->base_timeout;
     }
 
     return total_timeout ? total_timeout : op->base_timeout;
 }
 
 static void
 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
 {
     GListPtr iter = NULL;
     xmlNode *update = NULL;
     const char *client_node = NULL;
     const char *client_id = NULL;
     const char *call_id = NULL;
 
     if (op->call_options & st_opt_sync_call) {
         /* There is no reason to report the timeout for a synchronous call. It
          * is impossible to use the reported timeout to do anything when the client
          * is blocking for the response.  This update is only important for
          * async calls that require a callback to report the results in. */
         return;
     } else if (!op->request) {
         return;
     }
 
     crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id);
     client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
     call_id = crm_element_value(op->request, F_STONITH_CALLID);
     client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
     if (!client_node || !call_id || !client_id) {
         return;
     }
 
     if (safe_str_eq(client_node, stonith_our_uname)) {
         /* The client is connected to this node, send the update direclty to them */
         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
         return;
     }
 
     /* The client is connected to another node, relay this update to them */
     update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
     crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
     crm_xml_add(update, F_STONITH_CLIENTID, client_id);
     crm_xml_add(update, F_STONITH_CALLID, call_id);
     crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
 
     send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
 
     free_xml(update);
 
     for (iter = op->duplicates; iter != NULL; iter = iter->next) {
         remote_fencing_op_t *dup = iter->data;
 
         crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id);
         report_timeout_period(iter->data, op_timeout);
     }
 }
 
 /*!
  * \internal
  * \brief Advance an operation to the next device in its topology
  *
  * \param[in,out] op      Operation to advance
  * \param[in]     device  ID of device just completed
  * \param[in]     msg     XML reply that contained device result (if available)
  * \param[in]     rc      Return code of device's execution
  */
 static void
 advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
                     int rc)
 {
     /* Advance to the next device at this topology level, if any */
     if (op->devices) {
         op->devices = op->devices->next;
     }
 
     /* Handle automatic unfencing if an "on" action was requested */
     if ((op->phase == st_phase_requested) && safe_str_eq(op->action, "on")) {
         /* If the device we just executed was required, it's not anymore */
         remove_required_device(op, device);
 
         /* If there are no more devices at this topology level, run through any
          * remaining devices with automatic unfencing
          */
         if (op->devices == NULL) {
             op->devices = op->automatic_list;
         }
     }
 
     if ((op->devices == NULL) && (op->phase == st_phase_off)) {
         /* We're done with this level and with required devices, but we had
          * remapped "reboot" to "off", so start over with "on". If any devices
          * need to be turned back on, op->devices will be non-NULL after this.
          */
         op_phase_on(op);
     }
 
     if (op->devices) {
         /* Necessary devices remain, so execute the next one */
         crm_trace("Next for %s on behalf of %s@%s (rc was %d)",
                   op->target, op->originator, op->client_name, rc);
         call_remote_stonith(op, NULL);
     } else {
         /* We're done with all devices and phases, so finalize operation */
         crm_trace("Marking complex fencing op for %s as complete", op->target);
         op->state = st_done;
         remote_op_done(op, msg, rc, FALSE);
     }
 }
 
 void
 call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
 {
     const char *device = NULL;
     int timeout = op->base_timeout;
 
     crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state);
     if (peer == NULL && !is_set(op->call_options, st_opt_topology)) {
         peer = stonith_choose_peer(op);
     }
 
     if (!op->op_timer_total) {
         int total_timeout = get_op_total_timeout(op, peer);
 
         op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout;
         op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
         report_timeout_period(op, op->total_timeout);
         crm_info("Total timeout set to %d for peer's fencing of %s for %s"
                  CRM_XS "id=%s",
                  total_timeout, op->target, op->client_name, op->id);
     }
 
     if (is_set(op->call_options, st_opt_topology) && op->devices) {
         /* Ignore any peer preference, they might not have the device we need */
         /* When using topology, stonith_choose_peer() removes the device from
          * further consideration, so be sure to calculate timeout beforehand */
         peer = stonith_choose_peer(op);
 
         device = op->devices->data;
         timeout = get_device_timeout(op, peer, device);
     }
 
     if (peer) {
         int timeout_one = 0;
         xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
 
         crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
         crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
         crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
         crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
         crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
         crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
         crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
         crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
 
         if (device) {
             timeout_one = TIMEOUT_MULTIPLY_FACTOR *
                           get_device_timeout(op, peer, device);
             crm_notice("Requesting that '%s' perform op '%s %s' with '%s' " CRM_XS " for %s (%ds)", peer->host,
                      op->target, op->action, device, op->client_name, timeout_one);
             crm_xml_add(remote_op, F_STONITH_DEVICE, device);
             crm_xml_add(remote_op, F_STONITH_MODE, "slave");
 
         } else {
             timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
             crm_notice("Requesting that '%s' perform op '%s %s' " CRM_XS " for %s (%ds, %lds)",
                      peer->host, op->target, op->action, op->client_name, timeout_one, stonith_watchdog_timeout_ms);
             crm_xml_add(remote_op, F_STONITH_MODE, "smart");
 
         }
 
         op->state = st_exec;
         if (op->op_timer_one) {
             g_source_remove(op->op_timer_one);
         }
 
         if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) {
             crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
                        stonith_watchdog_timeout_ms/1000, op->target,
                        op->action, op->client_name, op->id, device);
             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
 
             /* TODO check devices to verify watchdog will be in use */
         } else if(stonith_watchdog_timeout_ms > 0
                   && safe_str_eq(peer->host, op->target)
                   && safe_str_neq(op->action, "on")) {
             crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
                        stonith_watchdog_timeout_ms/1000, op->target,
                        op->action, op->client_name, op->id, device);
             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
 
         } else {
             op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
         }
 
 
         send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
         peer->tried = TRUE;
         free_xml(remote_op);
         return;
 
     } else if (op->phase == st_phase_on) {
         /* A remapped "on" cannot be executed, but the node was already
          * turned off successfully, so ignore the error and continue.
          */
         crm_warn("Ignoring %s 'on' failure (no capable peers) for %s after successful 'off'",
                  device, op->target);
         advance_op_topology(op, device, NULL, pcmk_ok);
         return;
 
     } else if (op->owner == FALSE) {
         crm_err("Fencing (%s) of %s for %s is not ours to control",
                 op->action, op->target, op->client_name);
 
     } else if (op->query_timer == 0) {
         /* We've exhausted all available peers */
         crm_info("No remaining peers capable of fencing (%s) %s for %s (%d)",
                  op->target, op->action, op->client_name, op->state);
         CRM_LOG_ASSERT(op->state < st_done);
         remote_op_timeout(op);
 
     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
         int rc = -EHOSTUNREACH;
 
         /* if the operation never left the query state,
          * but we have all the expected replies, then no devices
          * are available to execute the fencing operation. */
 
         if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) {
             crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
                      stonith_watchdog_timeout_ms/1000, op->target,
                      op->action, op->client_name, op->id, device);
 
             op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
             return;
         }
 
         if (op->state == st_query) {
            crm_info("No peers (out of %d) have devices capable of fencing (%s) %s for %s (%d)",
                    op->replies, op->action, op->target, op->client_name,
                    op->state);
 
             rc = -ENODEV;
         } else {
            crm_info("No peers (out of %d) are capable of fencing (%s) %s for %s (%d)",
                    op->replies, op->action, op->target, op->client_name,
                    op->state);
         }
 
         op->state = st_failed;
         remote_op_done(op, NULL, rc, FALSE);
 
     } else if (device) {
         crm_info("Waiting for additional peers capable of fencing (%s) %s with %s for %s.%.8s",
                  op->action, op->target, device, op->client_name, op->id);
     } else {
         crm_info("Waiting for additional peers capable of fencing (%s) %s for %s%.8s",
                  op->action, op->target, op->client_name, op->id);
     }
 }
 
 /*!
  * \internal
  * \brief Comparison function for sorting query results
  *
  * \param[in] a  GList item to compare
  * \param[in] b  GList item to compare
  *
  * \return Per the glib documentation, "a negative integer if the first value
  *         comes before the second, 0 if they are equal, or a positive integer
  *         if the first value comes after the second."
  */
 static gint
 sort_peers(gconstpointer a, gconstpointer b)
 {
     const st_query_result_t *peer_a = a;
     const st_query_result_t *peer_b = b;
 
     return (peer_b->ndevices - peer_a->ndevices);
 }
 
 /*!
  * \internal
  * \brief Determine if all the devices in the topology are found or not
  */
 static gboolean
 all_topology_devices_found(remote_fencing_op_t * op)
 {
     GListPtr device = NULL;
     GListPtr iter = NULL;
     device_properties_t *match = NULL;
     stonith_topology_t *tp = NULL;
     gboolean skip_target = FALSE;
     int i;
 
     tp = find_topology_for_host(op->target);
     if (!tp) {
         return FALSE;
     }
     if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) {
         /* Don't count the devices on the target node if we are killing
          * the target node. */
         skip_target = TRUE;
     }
 
     for (i = 0; i < ST_LEVEL_MAX; i++) {
         for (device = tp->levels[i]; device; device = device->next) {
             match = NULL;
             for (iter = op->query_results; iter && !match; iter = iter->next) {
                 st_query_result_t *peer = iter->data;
 
                 if (skip_target && safe_str_eq(peer->host, op->target)) {
                     continue;
                 }
                 match = find_peer_device(op, peer, device->data);
             }
             if (!match) {
                 return FALSE;
             }
         }
     }
 
     return TRUE;
 }
 
 /*!
  * \internal
  * \brief Parse action-specific device properties from XML
  *
  * \param[in]     msg     XML element containing the properties
  * \param[in]     peer    Name of peer that sent XML (for logs)
  * \param[in]     device  Device ID (for logs)
  * \param[in]     action  Action the properties relate to (for logs)
  * \param[in]     phase   Phase the properties relate to
  * \param[in,out] props   Device properties to update
  */
 static void
 parse_action_specific(xmlNode *xml, const char *peer, const char *device,
                       const char *action, remote_fencing_op_t *op,
                       enum st_remap_phase phase, device_properties_t *props)
 {
     props->custom_action_timeout[phase] = 0;
     crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
                           &props->custom_action_timeout[phase]);
     if (props->custom_action_timeout[phase]) {
         crm_trace("Peer %s with device %s returned %s action timeout %d",
                   peer, device, action, props->custom_action_timeout[phase]);
     }
 
     props->delay_max[phase] = 0;
     crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
     if (props->delay_max[phase]) {
         crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
                   peer, device, props->delay_max[phase], action);
     }
 
     props->delay_base[phase] = 0;
     crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
     if (props->delay_base[phase]) {
         crm_trace("Peer %s with device %s returned base delay %d for %s",
                   peer, device, props->delay_base[phase], action);
     }
 
     /* Handle devices with automatic unfencing */
     if (safe_str_eq(action, "on")) {
         int required = 0;
 
         crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
         if (required) {
             crm_trace("Peer %s requires device %s to execute for action %s",
                       peer, device, action);
             add_required_device(op, device);
         }
     }
 
     /* If a reboot is remapped to off+on, it's possible that a node is allowed
      * to perform one action but not another.
      */
     if (crm_is_true(crm_element_value(xml, F_STONITH_ACTION_DISALLOWED))) {
         props->disallowed[phase] = TRUE;
         crm_trace("Peer %s is disallowed from executing %s for device %s",
                   peer, action, device);
     }
 }
 
 /*!
  * \internal
  * \brief Parse one device's properties from peer's XML query reply
  *
  * \param[in]     xml       XML node containing device properties
  * \param[in,out] op        Operation that query and reply relate to
  * \param[in,out] result    Peer's results
  * \param[in]     device    ID of device being parsed
  */
 static void
 add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
                       st_query_result_t *result, const char *device)
 {
     xmlNode *child;
     int verified = 0;
     device_properties_t *props = calloc(1, sizeof(device_properties_t));
 
     /* Add a new entry to this result's devices list */
     CRM_ASSERT(props != NULL);
     g_hash_table_insert(result->devices, strdup(device), props);
 
     /* Peers with verified (monitored) access will be preferred */
     crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
     if (verified) {
         crm_trace("Peer %s has confirmed a verified device %s",
                   result->host, device);
         props->verified = TRUE;
     }
 
     /* Parse action-specific device properties */
     parse_action_specific(xml, result->host, device, op_requested_action(op),
                           op, st_phase_requested, props);
     for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
         /* Replies for "reboot" operations will include the action-specific
          * values for "off" and "on" in child elements, just in case the reboot
          * winds up getting remapped.
          */
         if (safe_str_eq(ID(child), "off")) {
             parse_action_specific(child, result->host, device, "off",
                                   op, st_phase_off, props);
         } else if (safe_str_eq(ID(child), "on")) {
             parse_action_specific(child, result->host, device, "on",
                                   op, st_phase_on, props);
         }
     }
 }
 
 /*!
  * \internal
  * \brief Parse a peer's XML query reply and add it to operation's results
  *
  * \param[in,out] op        Operation that query and reply relate to
  * \param[in]     host      Name of peer that sent this reply
  * \param[in]     ndevices  Number of devices expected in reply
  * \param[in]     xml       XML node containing device list
  *
  * \return Newly allocated result structure with parsed reply
  */
 static st_query_result_t *
 add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml)
 {
     st_query_result_t *result = calloc(1, sizeof(st_query_result_t));
     xmlNode *child;
 
     CRM_CHECK(result != NULL, return NULL);
     result->host = strdup(host);
     result->devices = crm_str_table_new();
 
     /* Each child element describes one capable device available to the peer */
     for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
         const char *device = ID(child);
 
         if (device) {
             add_device_properties(child, op, result, device);
         }
     }
 
     result->ndevices = g_hash_table_size(result->devices);
     CRM_CHECK(ndevices == result->ndevices,
               crm_err("Query claimed to have %d devices but %d found",
                       ndevices, result->ndevices));
 
     op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
     return result;
 }
 
 /*!
  * \internal
  * \brief Handle a peer's reply to our fencing query
  *
  * Parse a query result from XML and store it in the remote operation
  * table, and when enough replies have been received, issue a fencing request.
  *
  * \param[in] msg  XML reply received
  *
  * \return pcmk_ok on success, -errno on error
  *
  * \note See initiate_remote_stonith_op() for how the XML query was initially
  *       formed, and stonith_query() for how the peer formed its XML reply.
  */
 int
 process_remote_stonith_query(xmlNode * msg)
 {
     int ndevices = 0;
     gboolean host_is_target = FALSE;
     gboolean have_all_replies = FALSE;
     const char *id = NULL;
     const char *host = NULL;
     remote_fencing_op_t *op = NULL;
     st_query_result_t *result = NULL;
     uint32_t replies_expected;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
 
     CRM_CHECK(dev != NULL, return -EPROTO);
 
     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
     CRM_CHECK(id != NULL, return -EPROTO);
 
     dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
     CRM_CHECK(dev != NULL, return -EPROTO);
     crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
 
     op = g_hash_table_lookup(stonith_remote_op_list, id);
     if (op == NULL) {
         crm_debug("Received query reply for unknown or expired operation %s",
                   id);
         return -EOPNOTSUPP;
     }
 
-    replies_expected = QB_MIN(op->replies_expected, fencing_active_peers());
+    replies_expected = fencing_active_peers();
+    if (op->replies_expected < replies_expected) {
+        replies_expected = op->replies_expected;
+    }
     if ((++op->replies >= replies_expected) && (op->state == st_query)) {
         have_all_replies = TRUE;
     }
     host = crm_element_value(msg, F_ORIG);
     host_is_target = safe_str_eq(host, op->target);
 
     crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s",
              op->replies, replies_expected, host,
              op->target, op->action, ndevices, id);
     if (ndevices > 0) {
         result = add_result(op, host, ndevices, dev);
     }
 
     if (is_set(op->call_options, st_opt_topology)) {
         /* If we start the fencing before all the topology results are in,
          * it is possible fencing levels will be skipped because of the missing
          * query results. */
         if (op->state == st_query && all_topology_devices_found(op)) {
             /* All the query results are in for the topology, start the fencing ops. */
             crm_trace("All topology devices found");
             call_remote_stonith(op, result);
 
         } else if (have_all_replies) {
             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
                      replies_expected, op->replies);
             call_remote_stonith(op, NULL);
         }
 
     } else if (op->state == st_query) {
         int nverified = count_peer_devices(op, result, TRUE);
 
         /* We have a result for a non-topology fencing op that looks promising,
          * go ahead and start fencing before query timeout */
         if (result && (host_is_target == FALSE) && nverified) {
             /* we have a verified device living on a peer that is not the target */
             crm_trace("Found %d verified devices", nverified);
             call_remote_stonith(op, result);
 
         } else if (have_all_replies) {
             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
                      replies_expected, op->replies);
             call_remote_stonith(op, NULL);
 
         } else {
             crm_trace("Waiting for more peer results before launching fencing operation");
         }
 
     } else if (result && (op->state == st_done)) {
         crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
                  result->host, result->ndevices, op->state);
     }
 
     return pcmk_ok;
 }
 
 /*!
  * \internal
  * \brief Handle a peer's reply to a fencing request
  *
  * Parse a fencing reply from XML, and either finalize the operation
  * or attempt another device as appropriate.
  *
  * \param[in] msg  XML reply received
  *
  * \return pcmk_ok on success, -errno on error
  */
 int
 process_remote_stonith_exec(xmlNode * msg)
 {
     int rc = 0;
     const char *id = NULL;
     const char *device = NULL;
     remote_fencing_op_t *op = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
 
     CRM_CHECK(dev != NULL, return -EPROTO);
 
     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
     CRM_CHECK(id != NULL, return -EPROTO);
 
     dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
     CRM_CHECK(dev != NULL, return -EPROTO);
 
     crm_element_value_int(dev, F_STONITH_RC, &rc);
 
     device = crm_element_value(dev, F_STONITH_DEVICE);
 
     if (stonith_remote_op_list) {
         op = g_hash_table_lookup(stonith_remote_op_list, id);
     }
 
     if (op == NULL && rc == pcmk_ok) {
         /* Record successful fencing operations */
         const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
 
         op = create_remote_stonith_op(client_id, dev, TRUE);
     }
 
     if (op == NULL) {
         /* Could be for an event that began before we started */
         /* TODO: Record the op for later querying */
         crm_info("Received peer result of unknown or expired operation %s", id);
         return -EOPNOTSUPP;
     }
 
     if (op->devices && device && safe_str_neq(op->devices->data, device)) {
         crm_err("Received outdated reply for device %s (instead of %s) to "
                 "fence (%s) %s. Operation already timed out at peer level.",
                 device, (const char *) op->devices->data, op->action, op->target);
         return rc;
     }
 
     if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) {
         crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)",
                   op->action, op->target, op->client_name, op->id, op->originator,
                   pcmk_strerror(rc), rc);
         if (rc == pcmk_ok) {
             op->state = st_done;
         } else {
             op->state = st_failed;
         }
         remote_op_done(op, msg, rc, FALSE);
         return pcmk_ok;
     } else if (safe_str_neq(op->originator, stonith_our_uname)) {
         /* If this isn't a remote level broadcast, and we are not the
          * originator of the operation, we should not be receiving this msg. */
         crm_err
             ("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)",
              stonith_our_uname, device, op->target);
         return rc;
     }
 
     if (is_set(op->call_options, st_opt_topology)) {
         const char *device = crm_element_value(msg, F_STONITH_DEVICE);
 
         crm_notice("Call to %s for '%s %s' on behalf of %s@%s: %s (%d)",
                    device, op->target, op->action, op->client_name, op->originator,
                    pcmk_strerror(rc), rc);
 
         /* We own the op, and it is complete. broadcast the result to all nodes
          * and notify our local clients. */
         if (op->state == st_done) {
             remote_op_done(op, msg, rc, FALSE);
             return rc;
         }
 
         if ((op->phase == 2) && (rc != pcmk_ok)) {
             /* A remapped "on" failed, but the node was already turned off
              * successfully, so ignore the error and continue.
              */
             crm_warn("Ignoring %s 'on' failure (exit code %d) for %s after successful 'off'",
                      device, rc, op->target);
             rc = pcmk_ok;
         }
 
         if (rc == pcmk_ok) {
             /* An operation completed successfully. Try another device if
              * necessary, otherwise mark the operation as done. */
             advance_op_topology(op, device, msg, rc);
             return rc;
         } else {
             /* This device failed, time to try another topology level. If no other
              * levels are available, mark this operation as failed and report results. */
             if (stonith_topology_next(op) != pcmk_ok) {
                 op->state = st_failed;
                 remote_op_done(op, msg, rc, FALSE);
                 return rc;
             }
         }
     } else if (rc == pcmk_ok && op->devices == NULL) {
         crm_trace("All done for %s", op->target);
 
         op->state = st_done;
         remote_op_done(op, msg, rc, FALSE);
         return rc;
     } else if (rc == -ETIME && op->devices == NULL) {
         /* If the operation timed out don't bother retrying other peers. */
         op->state = st_failed;
         remote_op_done(op, msg, rc, FALSE);
         return rc;
     } else {
         /* fall-through and attempt other fencing action using another peer */
     }
 
     /* Retry on failure */
     crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
               op->client_name, rc);
     call_remote_stonith(op, NULL);
     return rc;
 }
 
 gboolean
 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
 {
     GHashTableIter iter;
     time_t now = time(NULL);
     remote_fencing_op_t *rop = NULL;
 
     crm_trace("tolerance=%d, stonith_remote_op_list=%p", tolerance,
               stonith_remote_op_list);
 
     if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
         action == NULL) {
         return FALSE;
     }
 
     g_hash_table_iter_init(&iter, stonith_remote_op_list);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
         if (strcmp(rop->target, target) != 0) {
             continue;
         } else if (rop->state != st_done) {
             continue;
         /* We don't have to worry about remapped reboots here
          * because if state is done, any remapping has been undone
          */
         } else if (strcmp(rop->action, action) != 0) {
             continue;
         } else if ((rop->completed + tolerance) < now) {
             continue;
         }
 
         crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
                    target, action, tolerance, rop->delegate, rop->originator);
         return TRUE;
     }
     return FALSE;
 }
diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c
index 17e69f0a87..d1e22a2972 100644
--- a/lib/common/mainloop.c
+++ b/lib/common/mainloop.c
@@ -1,1427 +1,1437 @@
 /*
  * Copyright 2004-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU Lesser General Public License
  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_internal.h>
 
 #ifndef _GNU_SOURCE
 #  define _GNU_SOURCE
 #endif
 
 #include <stdlib.h>
+#include <string.h>
 #include <signal.h>
 #include <errno.h>
 
 #include <sys/wait.h>
 
 #include <crm/crm.h>
 #include <crm/common/xml.h>
 #include <crm/common/mainloop.h>
 #include <crm/common/ipcs.h>
 
 #include <qb/qbarray.h>
 
 struct mainloop_child_s {
     pid_t pid;
     char *desc;
     unsigned timerid;
     gboolean timeout;
     void *privatedata;
 
     enum mainloop_child_flags flags;
 
     /* Called when a process dies */
     void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
 };
 
 struct trigger_s {
     GSource source;
     gboolean running;
     gboolean trigger;
     void *user_data;
     guint id;
 
 };
 
 static gboolean
 crm_trigger_prepare(GSource * source, gint * timeout)
 {
     crm_trigger_t *trig = (crm_trigger_t *) source;
 
     /* cluster-glue's FD and IPC related sources make use of
      * g_source_add_poll() but do not set a timeout in their prepare
      * functions
      *
      * This means mainloop's poll() will block until an event for one
      * of these sources occurs - any /other/ type of source, such as
      * this one or g_idle_*, that doesn't use g_source_add_poll() is
      * S-O-L and won't be processed until there is something fd-based
      * happens.
      *
      * Luckily the timeout we can set here affects all sources and
      * puts an upper limit on how long poll() can take.
      *
      * So unconditionally set a small-ish timeout, not too small that
      * we're in constant motion, which will act as an upper bound on
      * how long the signal handling might be delayed for.
      */
     *timeout = 500;             /* Timeout in ms */
 
     return trig->trigger;
 }
 
 static gboolean
 crm_trigger_check(GSource * source)
 {
     crm_trigger_t *trig = (crm_trigger_t *) source;
 
     return trig->trigger;
 }
 
 static gboolean
 crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata)
 {
     int rc = TRUE;
     crm_trigger_t *trig = (crm_trigger_t *) source;
 
     if (trig->running) {
         /* Wait until the existing job is complete before starting the next one */
         return TRUE;
     }
     trig->trigger = FALSE;
 
     if (callback) {
         rc = callback(trig->user_data);
         if (rc < 0) {
             crm_trace("Trigger handler %p not yet complete", trig);
             trig->running = TRUE;
             rc = TRUE;
         }
     }
     return rc;
 }
 
 static void
 crm_trigger_finalize(GSource * source)
 {
     crm_trace("Trigger %p destroyed", source);
 }
 
 #if 0
 struct _GSourceCopy
 {
   gpointer callback_data;
   GSourceCallbackFuncs *callback_funcs;
 
   const GSourceFuncs *source_funcs;
   guint ref_count;
 
   GMainContext *context;
 
   gint priority;
   guint flags;
   guint source_id;
 
   GSList *poll_fds;
   
   GSource *prev;
   GSource *next;
 
   char    *name;
 
   void *priv;
 };
 
 static int
 g_source_refcount(GSource * source)
 {
     /* Duplicating the contents of private header files is a necessary evil */
     if (source) {
         struct _GSourceCopy *evil = (struct _GSourceCopy*)source;
         return evil->ref_count;
     }
     return 0;
 }
 #else
 static int g_source_refcount(GSource * source)
 {
     return 0;
 }
 #endif
 
 static GSourceFuncs crm_trigger_funcs = {
     crm_trigger_prepare,
     crm_trigger_check,
     crm_trigger_dispatch,
     crm_trigger_finalize,
 };
 
 static crm_trigger_t *
 mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data),
                        gpointer userdata)
 {
     crm_trigger_t *trigger = NULL;
 
     trigger = (crm_trigger_t *) source;
 
     trigger->id = 0;
     trigger->trigger = FALSE;
     trigger->user_data = userdata;
 
     if (dispatch) {
         g_source_set_callback(source, dispatch, trigger, NULL);
     }
 
     g_source_set_priority(source, priority);
     g_source_set_can_recurse(source, FALSE);
 
     crm_trace("Setup %p with ref-count=%u", source, g_source_refcount(source));
     trigger->id = g_source_attach(source, NULL);
     crm_trace("Attached %p with ref-count=%u", source, g_source_refcount(source));
 
     return trigger;
 }
 
 void
 mainloop_trigger_complete(crm_trigger_t * trig)
 {
     crm_trace("Trigger handler %p complete", trig);
     trig->running = FALSE;
 }
 
 /* If dispatch returns:
  *  -1: Job running but not complete
  *   0: Remove the trigger from mainloop
  *   1: Leave the trigger in mainloop
  */
 crm_trigger_t *
 mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata)
 {
     GSource *source = NULL;
 
     CRM_ASSERT(sizeof(crm_trigger_t) > sizeof(GSource));
     source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t));
     CRM_ASSERT(source != NULL);
 
     return mainloop_setup_trigger(source, priority, dispatch, userdata);
 }
 
 void
 mainloop_set_trigger(crm_trigger_t * source)
 {
     if(source) {
         source->trigger = TRUE;
     }
 }
 
 gboolean
 mainloop_destroy_trigger(crm_trigger_t * source)
 {
     GSource *gs = NULL;
 
     if(source == NULL) {
         return TRUE;
     }
 
     gs = (GSource *)source;
 
     if(g_source_refcount(gs) > 2) {
         crm_info("Trigger %p is still referenced %u times", gs, g_source_refcount(gs));
     }
 
     g_source_destroy(gs); /* Remove from mainloop, ref_count-- */
     g_source_unref(gs); /* The caller no longer carries a reference to source
                          *
                          * At this point the source should be free'd,
                          * unless we're currently processing said
                          * source, in which case mainloop holds an
                          * additional reference and it will be free'd
                          * once our processing completes
                          */
     return TRUE;
 }
 
 // Define a custom glib source for signal handling
 
 // Data structure for custom glib source
 typedef struct signal_s {
     crm_trigger_t trigger;      // trigger that invoked source (must be first)
     void (*handler) (int sig);  // signal handler
     int signal;                 // signal that was received
 } crm_signal_t;
 
 // Table to associate signal handlers with signal numbers
 static crm_signal_t *crm_signals[NSIG];
 
 /*!
  * \internal
  * \brief Dispatch an event from custom glib source for signals
  *
  * Given an signal event, clear the event trigger and call any registered
  * signal handler.
  *
  * \param[in] source    glib source that triggered this dispatch
  * \param[in] callback  (ignored)
  * \param[in] userdata  (ignored)
  */
 static gboolean
 crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata)
 {
     crm_signal_t *sig = (crm_signal_t *) source;
 
     if(sig->signal != SIGCHLD) {
         crm_notice("Caught '%s' signal "CRM_XS" %d (%s handler)",
                    strsignal(sig->signal), sig->signal,
                    (sig->handler? "invoking" : "no"));
     }
 
     sig->trigger.trigger = FALSE;
     if (sig->handler) {
         sig->handler(sig->signal);
     }
     return TRUE;
 }
 
 /*!
  * \internal
  * \brief Handle a signal by setting a trigger for signal source
  *
  * \param[in] sig  Signal number that was received
  *
  * \note This is the true signal handler for the mainloop signal source, and
  *       must be async-safe.
  */
 static void
 mainloop_signal_handler(int sig)
 {
     if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) {
         mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]);
     }
 }
 
 // Functions implementing our custom glib source for signal handling
 static GSourceFuncs crm_signal_funcs = {
     crm_trigger_prepare,
     crm_trigger_check,
     crm_signal_dispatch,
     crm_trigger_finalize,
 };
 
 /*!
  * \internal
  * \brief Set a true signal handler
  *
  * signal()-like interface to sigaction()
  *
  * \param[in] sig       Signal number to register handler for
  * \param[in] dispatch  Signal handler
  *
  * \return The previous value of the signal handler, or SIG_ERR on error
  * \note The dispatch function must be async-safe.
  */
 sighandler_t
 crm_signal_handler(int sig, sighandler_t dispatch)
 {
     sigset_t mask;
     struct sigaction sa;
     struct sigaction old;
 
     if (sigemptyset(&mask) < 0) {
         crm_err("Could not set handler for signal %d: %s",
                 sig, pcmk_strerror(errno));
         return SIG_ERR;
     }
 
     memset(&sa, 0, sizeof(struct sigaction));
     sa.sa_handler = dispatch;
     sa.sa_flags = SA_RESTART;
     sa.sa_mask = mask;
 
     if (sigaction(sig, &sa, &old) < 0) {
         crm_err("Could not set handler for signal %d: %s",
                 sig, pcmk_strerror(errno));
         return SIG_ERR;
     }
     return old.sa_handler;
 }
 
 /*
  * \brief Use crm_signal_handler() instead
  * \deprecated
  */
 gboolean
 crm_signal(int sig, void (*dispatch) (int sig))
 {
     return crm_signal_handler(sig, dispatch) != SIG_ERR;
 }
 
 static void
 mainloop_destroy_signal_entry(int sig)
 {
     crm_signal_t *tmp = crm_signals[sig];
 
     crm_signals[sig] = NULL;
 
     crm_trace("Destroying signal %d", sig);
     mainloop_destroy_trigger((crm_trigger_t *) tmp);
 }
 
 /*!
  * \internal
  * \brief Add a signal handler to a mainloop
  *
  * \param[in] sig       Signal number to handle
  * \param[in] dispatch  Signal handler function
  *
  * \note The true signal handler merely sets a mainloop trigger to call this
  *       dispatch function via the mainloop. Therefore, the dispatch function
  *       does not need to be async-safe.
  */
 gboolean
 mainloop_add_signal(int sig, void (*dispatch) (int sig))
 {
     GSource *source = NULL;
     int priority = G_PRIORITY_HIGH - 1;
 
     if (sig == SIGTERM) {
         /* TERM is higher priority than other signals,
          *   signals are higher priority than other ipc.
          * Yes, minus: smaller is "higher"
          */
         priority--;
     }
 
     if (sig >= NSIG || sig < 0) {
         crm_err("Signal %d is out of range", sig);
         return FALSE;
 
     } else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) {
         crm_trace("Signal handler for %d is already installed", sig);
         return TRUE;
 
     } else if (crm_signals[sig] != NULL) {
         crm_err("Different signal handler for %d is already installed", sig);
         return FALSE;
     }
 
     CRM_ASSERT(sizeof(crm_signal_t) > sizeof(GSource));
     source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t));
 
     crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL);
     CRM_ASSERT(crm_signals[sig] != NULL);
 
     crm_signals[sig]->handler = dispatch;
     crm_signals[sig]->signal = sig;
 
     if (crm_signal_handler(sig, mainloop_signal_handler) == SIG_ERR) {
         mainloop_destroy_signal_entry(sig);
         return FALSE;
     }
 #if 0
     /* If we want signals to interrupt mainloop's poll(), instead of waiting for
      * the timeout, then we should call siginterrupt() below
      *
      * For now, just enforce a low timeout
      */
     if (siginterrupt(sig, 1) < 0) {
         crm_perror(LOG_INFO, "Could not enable system call interruptions for signal %d", sig);
     }
 #endif
 
     return TRUE;
 }
 
 gboolean
 mainloop_destroy_signal(int sig)
 {
     if (sig >= NSIG || sig < 0) {
         crm_err("Signal %d is out of range", sig);
         return FALSE;
 
     } else if (crm_signal_handler(sig, NULL) == SIG_ERR) {
         crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig);
         return FALSE;
 
     } else if (crm_signals[sig] == NULL) {
         return TRUE;
     }
     mainloop_destroy_signal_entry(sig);
     return TRUE;
 }
 
 static qb_array_t *gio_map = NULL;
 
 void
 mainloop_cleanup(void) 
 {
     if (gio_map) {
         qb_array_free(gio_map);
     }
 
     for (int sig = 0; sig < NSIG; ++sig) {
         mainloop_destroy_signal_entry(sig);
     }
 }
 
 /*
  * libqb...
  */
 struct gio_to_qb_poll {
     int32_t is_used;
     guint source;
     int32_t events;
     void *data;
     qb_ipcs_dispatch_fn_t fn;
     enum qb_loop_priority p;
 };
 
 static gboolean
 gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data)
 {
     struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
     gint fd = g_io_channel_unix_get_fd(gio);
 
     crm_trace("%p.%d %d", data, fd, condition);
 
     /* if this assert get's hit, then there is a race condition between
      * when we destroy a fd and when mainloop actually gives it up */
     CRM_ASSERT(adaptor->is_used > 0);
 
     return (adaptor->fn(fd, condition, adaptor->data) == 0);
 }
 
 static void
 gio_poll_destroy(gpointer data)
 {
     struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
 
     adaptor->is_used--;
     CRM_ASSERT(adaptor->is_used >= 0);
 
     if (adaptor->is_used == 0) {
         crm_trace("Marking adaptor %p unused", adaptor);
         adaptor->source = 0;
     }
 }
 
 /*!
  * \internal
  * \brief Convert libqb's poll priority into GLib's one
  *
  * \param[in] prio  libqb's poll priority (#QB_LOOP_MED assumed as fallback)
  *
  * \return  best matching GLib's priority
  */
 static gint
 conv_prio_libqb2glib(enum qb_loop_priority prio)
 {
     gint ret = G_PRIORITY_DEFAULT;
     switch (prio) {
         case QB_LOOP_LOW:
             ret = G_PRIORITY_LOW;
             break;
         case QB_LOOP_HIGH:
             ret = G_PRIORITY_HIGH;
             break;
         default:
             crm_trace("Invalid libqb's loop priority %d, assuming QB_LOOP_MED",
                       prio);
             /* fall-through */
         case QB_LOOP_MED:
             break;
     }
     return ret;
 }
 
 /*!
  * \internal
  * \brief Convert libqb's poll priority to rate limiting spec
  *
  * \param[in] prio  libqb's poll priority (#QB_LOOP_MED assumed as fallback)
  *
  * \return  best matching rate limiting spec
  */
 static enum qb_ipcs_rate_limit
 conv_libqb_prio2ratelimit(enum qb_loop_priority prio)
 {
     /* this is an inversion of what libqb's qb_ipcs_request_rate_limit does */
     enum qb_ipcs_rate_limit ret = QB_IPCS_RATE_NORMAL;
     switch (prio) {
         case QB_LOOP_LOW:
             ret = QB_IPCS_RATE_SLOW;
             break;
         case QB_LOOP_HIGH:
             ret = QB_IPCS_RATE_FAST;
             break;
         default:
             crm_trace("Invalid libqb's loop priority %d, assuming QB_LOOP_MED",
                       prio);
             /* fall-through */
         case QB_LOOP_MED:
             break;
     }
     return ret;
 }
 
 static int32_t
 gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts,
                          void *data, qb_ipcs_dispatch_fn_t fn, int32_t add)
 {
     struct gio_to_qb_poll *adaptor;
     GIOChannel *channel;
     int32_t res = 0;
 
     res = qb_array_index(gio_map, fd, (void **)&adaptor);
     if (res < 0) {
         crm_err("Array lookup failed for fd=%d: %d", fd, res);
         return res;
     }
 
     crm_trace("Adding fd=%d to mainloop as adaptor %p", fd, adaptor);
 
     if (add && adaptor->source) {
         crm_err("Adaptor for descriptor %d is still in-use", fd);
         return -EEXIST;
     }
     if (!add && !adaptor->is_used) {
         crm_err("Adaptor for descriptor %d is not in-use", fd);
         return -ENOENT;
     }
 
     /* channel is created with ref_count = 1 */
     channel = g_io_channel_unix_new(fd);
     if (!channel) {
         crm_err("No memory left to add fd=%d", fd);
         return -ENOMEM;
     }
 
     if (adaptor->source) {
         g_source_remove(adaptor->source);
         adaptor->source = 0;
     }
 
     /* Because unlike the poll() API, glib doesn't tell us about HUPs by default */
     evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR);
 
     adaptor->fn = fn;
     adaptor->events = evts;
     adaptor->data = data;
     adaptor->p = p;
     adaptor->is_used++;
     adaptor->source =
         g_io_add_watch_full(channel, conv_prio_libqb2glib(p), evts,
                             gio_read_socket, adaptor, gio_poll_destroy);
 
     /* Now that mainloop now holds a reference to channel,
      * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
      *
      * This means that channel will be free'd by:
      * g_main_context_dispatch()
      *  -> g_source_destroy_internal()
      *      -> g_source_callback_unref()
      * shortly after gio_poll_destroy() completes
      */
     g_io_channel_unref(channel);
 
     crm_trace("Added to mainloop with gsource id=%d", adaptor->source);
     if (adaptor->source > 0) {
         return 0;
     }
 
     return -EINVAL;
 }
 
 static int32_t
 gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts,
                       void *data, qb_ipcs_dispatch_fn_t fn)
 {
     return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_TRUE);
 }
 
 static int32_t
 gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts,
                       void *data, qb_ipcs_dispatch_fn_t fn)
 {
     return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_FALSE);
 }
 
 static int32_t
 gio_poll_dispatch_del(int32_t fd)
 {
     struct gio_to_qb_poll *adaptor;
 
     crm_trace("Looking for fd=%d", fd);
     if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) {
         if (adaptor->source) {
             g_source_remove(adaptor->source);
             adaptor->source = 0;
         }
     }
     return 0;
 }
 
 struct qb_ipcs_poll_handlers gio_poll_funcs = {
     .job_add = NULL,
     .dispatch_add = gio_poll_dispatch_add,
     .dispatch_mod = gio_poll_dispatch_mod,
     .dispatch_del = gio_poll_dispatch_del,
 };
 
 static enum qb_ipc_type
 pick_ipc_type(enum qb_ipc_type requested)
 {
     const char *env = getenv("PCMK_ipc_type");
 
     if (env && strcmp("shared-mem", env) == 0) {
         return QB_IPC_SHM;
     } else if (env && strcmp("socket", env) == 0) {
         return QB_IPC_SOCKET;
     } else if (env && strcmp("posix", env) == 0) {
         return QB_IPC_POSIX_MQ;
     } else if (env && strcmp("sysv", env) == 0) {
         return QB_IPC_SYSV_MQ;
     } else if (requested == QB_IPC_NATIVE) {
         /* We prefer shared memory because the server never blocks on
          * send.  If part of a message fits into the socket, libqb
          * needs to block until the remainder can be sent also.
          * Otherwise the client will wait forever for the remaining
          * bytes.
          */
         return QB_IPC_SHM;
     }
     return requested;
 }
 
 qb_ipcs_service_t *
 mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
                         struct qb_ipcs_service_handlers *callbacks)
 {
     return mainloop_add_ipc_server_with_prio(name, type, callbacks, QB_LOOP_MED);
 }
 
 qb_ipcs_service_t *
 mainloop_add_ipc_server_with_prio(const char *name, enum qb_ipc_type type,
                                   struct qb_ipcs_service_handlers *callbacks,
                                   enum qb_loop_priority prio)
 {
     int rc = 0;
     qb_ipcs_service_t *server = NULL;
 
     if (gio_map == NULL) {
         gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1);
     }
 
     crm_client_init();
     server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks);
 
     if (server == NULL) {
         crm_err("Could not create %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc);
         return NULL;
     }
 
     if (prio != QB_LOOP_MED) {
         qb_ipcs_request_rate_limit(server, conv_libqb_prio2ratelimit(prio));
     }
 
 #ifdef HAVE_IPCS_GET_BUFFER_SIZE
     /* All clients should use at least ipc_buffer_max as their buffer size */
     qb_ipcs_enforce_buffer_size(server, crm_ipc_default_buffer_size());
 #endif
 
     qb_ipcs_poll_handlers_set(server, &gio_poll_funcs);
 
     rc = qb_ipcs_run(server);
     if (rc < 0) {
         crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc);
         return NULL;
     }
 
     return server;
 }
 
 void
 mainloop_del_ipc_server(qb_ipcs_service_t * server)
 {
     if (server) {
         qb_ipcs_destroy(server);
     }
 }
 
 struct mainloop_io_s {
     char *name;
     void *userdata;
 
     int fd;
     guint source;
     crm_ipc_t *ipc;
     GIOChannel *channel;
 
     int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata);
     int (*dispatch_fn_io) (gpointer userdata);
     void (*destroy_fn) (gpointer userdata);
 
 };
 
 static gboolean
 mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data)
 {
     gboolean keep = TRUE;
     mainloop_io_t *client = data;
 
     CRM_ASSERT(client->fd == g_io_channel_unix_get_fd(gio));
 
     if (condition & G_IO_IN) {
         if (client->ipc) {
             long rc = 0;
             int max = 10;
 
             do {
                 rc = crm_ipc_read(client->ipc);
                 if (rc <= 0) {
                     crm_trace("Message acquisition from %s[%p] failed: %s (%ld)",
                               client->name, client, pcmk_strerror(rc), rc);
 
                 } else if (client->dispatch_fn_ipc) {
                     const char *buffer = crm_ipc_buffer(client->ipc);
 
                     crm_trace("New message from %s[%p] = %ld (I/O condition=%d)", client->name, client, rc, condition);
                     if (client->dispatch_fn_ipc(buffer, rc, client->userdata) < 0) {
                         crm_trace("Connection to %s no longer required", client->name);
                         keep = FALSE;
                     }
                 }
 
             } while (keep && rc > 0 && --max > 0);
 
         } else {
             crm_trace("New message from %s[%p] %u", client->name, client, condition);
             if (client->dispatch_fn_io) {
                 if (client->dispatch_fn_io(client->userdata) < 0) {
                     crm_trace("Connection to %s no longer required", client->name);
                     keep = FALSE;
                 }
             }
         }
     }
 
     if (client->ipc && crm_ipc_connected(client->ipc) == FALSE) {
         crm_err("Connection to %s closed " CRM_XS "client=%p condition=%d",
                 client->name, client, condition);
         keep = FALSE;
 
     } else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) {
         crm_trace("The connection %s[%p] has been closed (I/O condition=%d)",
                   client->name, client, condition);
         keep = FALSE;
 
     } else if ((condition & G_IO_IN) == 0) {
         /*
            #define      GLIB_SYSDEF_POLLIN     =1
            #define      GLIB_SYSDEF_POLLPRI    =2
            #define      GLIB_SYSDEF_POLLOUT    =4
            #define      GLIB_SYSDEF_POLLERR    =8
            #define      GLIB_SYSDEF_POLLHUP    =16
            #define      GLIB_SYSDEF_POLLNVAL   =32
 
            typedef enum
            {
            G_IO_IN      GLIB_SYSDEF_POLLIN,
            G_IO_OUT     GLIB_SYSDEF_POLLOUT,
            G_IO_PRI     GLIB_SYSDEF_POLLPRI,
            G_IO_ERR     GLIB_SYSDEF_POLLERR,
            G_IO_HUP     GLIB_SYSDEF_POLLHUP,
            G_IO_NVAL    GLIB_SYSDEF_POLLNVAL
            } GIOCondition;
 
            A bitwise combination representing a condition to watch for on an event source.
 
            G_IO_IN      There is data to read.
            G_IO_OUT     Data can be written (without blocking).
            G_IO_PRI     There is urgent data to read.
            G_IO_ERR     Error condition.
            G_IO_HUP     Hung up (the connection has been broken, usually for pipes and sockets).
            G_IO_NVAL    Invalid request. The file descriptor is not open.
          */
         crm_err("Strange condition: %d", condition);
     }
 
     /* keep == FALSE results in mainloop_gio_destroy() being called
      * just before the source is removed from mainloop
      */
     return keep;
 }
 
 static void
 mainloop_gio_destroy(gpointer c)
 {
     mainloop_io_t *client = c;
     char *c_name = strdup(client->name);
 
     /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c
      * client->channel will still have ref_count > 0... should be == 1
      */
     crm_trace("Destroying client %s[%p]", c_name, c);
 
     if (client->ipc) {
         crm_ipc_close(client->ipc);
     }
 
     if (client->destroy_fn) {
         void (*destroy_fn) (gpointer userdata) = client->destroy_fn;
 
         client->destroy_fn = NULL;
         destroy_fn(client->userdata);
     }
 
     if (client->ipc) {
         crm_ipc_t *ipc = client->ipc;
 
         client->ipc = NULL;
         crm_ipc_destroy(ipc);
     }
 
     crm_trace("Destroyed client %s[%p]", c_name, c);
 
     free(client->name); client->name = NULL;
     free(client);
 
     free(c_name);
 }
 
 mainloop_io_t *
 mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata,
                         struct ipc_client_callbacks *callbacks)
 {
     mainloop_io_t *client = NULL;
     crm_ipc_t *conn = crm_ipc_new(name, max_size);
 
     if (conn && crm_ipc_connect(conn)) {
         int32_t fd = crm_ipc_get_fd(conn);
 
         client = mainloop_add_fd(name, priority, fd, userdata, NULL);
     }
 
     if (client == NULL) {
         crm_perror(LOG_TRACE, "Connection to %s failed", name);
         if (conn) {
             crm_ipc_close(conn);
             crm_ipc_destroy(conn);
         }
         return NULL;
     }
 
     client->ipc = conn;
     client->destroy_fn = callbacks->destroy;
     client->dispatch_fn_ipc = callbacks->dispatch;
     return client;
 }
 
 void
 mainloop_del_ipc_client(mainloop_io_t * client)
 {
     mainloop_del_fd(client);
 }
 
 crm_ipc_t *
 mainloop_get_ipc_client(mainloop_io_t * client)
 {
     if (client) {
         return client->ipc;
     }
     return NULL;
 }
 
 mainloop_io_t *
 mainloop_add_fd(const char *name, int priority, int fd, void *userdata,
                 struct mainloop_fd_callbacks * callbacks)
 {
     mainloop_io_t *client = NULL;
 
     if (fd >= 0) {
         client = calloc(1, sizeof(mainloop_io_t));
         if (client == NULL) {
             return NULL;
         }
         client->name = strdup(name);
         client->userdata = userdata;
 
         if (callbacks) {
             client->destroy_fn = callbacks->destroy;
             client->dispatch_fn_io = callbacks->dispatch;
         }
 
         client->fd = fd;
         client->channel = g_io_channel_unix_new(fd);
         client->source =
             g_io_add_watch_full(client->channel, priority,
                                 (G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback,
                                 client, mainloop_gio_destroy);
 
         /* Now that mainloop now holds a reference to channel,
          * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
          *
          * This means that channel will be free'd by:
          * g_main_context_dispatch() or g_source_remove()
          *  -> g_source_destroy_internal()
          *      -> g_source_callback_unref()
          * shortly after mainloop_gio_destroy() completes
          */
         g_io_channel_unref(client->channel);
         crm_trace("Added connection %d for %s[%p].%d", client->source, client->name, client, fd);
     } else {
         errno = EINVAL;
     }
 
     return client;
 }
 
 void
 mainloop_del_fd(mainloop_io_t * client)
 {
     if (client != NULL) {
         crm_trace("Removing client %s[%p]", client->name, client);
         if (client->source) {
             /* Results in mainloop_gio_destroy() being called just
              * before the source is removed from mainloop
              */
             g_source_remove(client->source);
         }
     }
 }
 
 static GListPtr child_list = NULL;
 
 pid_t
 mainloop_child_pid(mainloop_child_t * child)
 {
     return child->pid;
 }
 
 const char *
 mainloop_child_name(mainloop_child_t * child)
 {
     return child->desc;
 }
 
 int
 mainloop_child_timeout(mainloop_child_t * child)
 {
     return child->timeout;
 }
 
 void *
 mainloop_child_userdata(mainloop_child_t * child)
 {
     return child->privatedata;
 }
 
 void
 mainloop_clear_child_userdata(mainloop_child_t * child)
 {
     child->privatedata = NULL;
 }
 
 /* good function name */
 static void
 child_free(mainloop_child_t *child)
 {
     if (child->timerid != 0) {
         crm_trace("Removing timer %d", child->timerid);
         g_source_remove(child->timerid);
         child->timerid = 0;
     }
     free(child->desc);
     free(child);
 }
 
 /* terrible function name */
 static int
 child_kill_helper(mainloop_child_t *child)
 {
     int rc;
     if (child->flags & mainloop_leave_pid_group) {
         crm_debug("Kill pid %d only. leave group intact.", child->pid);
         rc = kill(child->pid, SIGKILL);
     } else {
         crm_debug("Kill pid %d's group", child->pid);
         rc = kill(-child->pid, SIGKILL);
     }
 
     if (rc < 0) {
         if (errno != ESRCH) {
             crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid);
         }
         return -errno;
     }
     return 0;
 }
 
 static gboolean
 child_timeout_callback(gpointer p)
 {
     mainloop_child_t *child = p;
     int rc = 0;
 
     child->timerid = 0;
     if (child->timeout) {
         crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid);
         return FALSE;
     }
 
     rc = child_kill_helper(child);
     if (rc == ESRCH) {
         /* Nothing left to do. pid doesn't exist */
         return FALSE;
     }
 
     child->timeout = TRUE;
     crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid);
 
     child->timerid = g_timeout_add(5000, child_timeout_callback, child);
     return FALSE;
 }
 
-static gboolean
+static bool
 child_waitpid(mainloop_child_t *child, int flags)
 {
     int rc = 0;
     int core = 0;
     int signo = 0;
     int status = 0;
     int exitcode = 0;
+    bool callback_needed = true;
 
     rc = waitpid(child->pid, &status, flags);
-    if(rc == 0) {
-        crm_perror(LOG_DEBUG, "wait(%d) = %d", child->pid, rc);
-        return FALSE;
-
-    } else if(rc != child->pid) {
+    if (rc == 0) { // WNOHANG in flags, and child status is not available
+        crm_trace("Child process %d (%s) still active",
+                  child->pid, child->desc);
+        callback_needed = false;
+
+    } else if (rc != child->pid) {
+        /* According to POSIX, possible conditions:
+         * - child->pid was non-positive (process group or any child),
+         *   and rc is specific child
+         * - errno ECHILD (pid does not exist or is not child)
+         * - errno EINVAL (invalid flags)
+         * - errno EINTR (caller interrupted by signal)
+         *
+         * @TODO Handle these cases more specifically.
+         */
         signo = SIGCHLD;
         exitcode = 1;
-        status = 1;
-        crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid);
-
-    } else {
-        crm_trace("Managed process %d exited: %p", child->pid, child);
-
-        if (WIFEXITED(status)) {
-            exitcode = WEXITSTATUS(status);
-            crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode);
-
-        } else if (WIFSIGNALED(status)) {
-            signo = WTERMSIG(status);
-            crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo);
-        }
-#ifdef WCOREDUMP
-        if (WCOREDUMP(status)) {
-            core = 1;
-            crm_err("Managed process %d (%s) dumped core", child->pid, child->desc);
-        }
+        crm_notice("Wait for child process %d (%s) interrupted: %s",
+                   child->pid, child->desc, pcmk_strerror(errno));
+
+    } else if (WIFEXITED(status)) {
+        exitcode = WEXITSTATUS(status);
+        crm_trace("Child process %d (%s) exited with status %d",
+                  child->pid, child->desc, exitcode);
+
+    } else if (WIFSIGNALED(status)) {
+        signo = WTERMSIG(status);
+        crm_trace("Child process %d (%s) exited with signal %d (%s)",
+                  child->pid, child->desc, signo, strsignal(signo));
+
+#ifdef WCOREDUMP // AIX, SunOS, maybe others
+    } else if (WCOREDUMP(status)) {
+        core = 1;
+        crm_err("Child process %d (%s) dumped core",
+                child->pid, child->desc);
 #endif
+
+    } else { // flags must contain WUNTRACED and/or WCONTINUED to reach this
+        crm_trace("Child process %d (%s) stopped or continued",
+                  child->pid, child->desc);
+        callback_needed = false;
     }
 
-    if (child->callback) {
+    if (callback_needed && child->callback) {
         child->callback(child, child->pid, core, signo, exitcode);
     }
-    return TRUE;
+    return callback_needed;
 }
 
 static void
 child_death_dispatch(int signal)
 {
-    GListPtr iter = child_list;
-    gboolean exited;
-
-    while(iter) {
-        GListPtr saved = NULL;
+    for (GList *iter = child_list; iter; ) {
+        GList *saved = iter;
         mainloop_child_t *child = iter->data;
-        exited = child_waitpid(child, WNOHANG);
 
-        saved = iter;
         iter = iter->next;
-
-        if (exited == FALSE) {
-            continue;
+        if (child_waitpid(child, WNOHANG)) {
+            crm_trace("Removing completed process %d from child list",
+                      child->pid);
+            child_list = g_list_remove_link(child_list, saved);
+            g_list_free(saved);
+            child_free(child);
         }
-        crm_trace("Removing process entry %p for %d", child, child->pid);
-
-        child_list = g_list_remove_link(child_list, saved);
-        g_list_free(saved);
-        child_free(child);
     }
 }
 
 static gboolean
 child_signal_init(gpointer p)
 {
     crm_trace("Installed SIGCHLD handler");
     /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */
     mainloop_add_signal(SIGCHLD, child_death_dispatch);
 
     /* In case they terminated before the signal handler was installed */
     child_death_dispatch(SIGCHLD);
     return FALSE;
 }
 
 int
 mainloop_child_kill(pid_t pid)
 {
     GListPtr iter;
     mainloop_child_t *child = NULL;
     mainloop_child_t *match = NULL;
     /* It is impossible to block SIGKILL, this allows us to
      * call waitpid without WNOHANG flag.*/
     int waitflags = 0, rc = 0;
 
     for (iter = child_list; iter != NULL && match == NULL; iter = iter->next) {
         child = iter->data;
         if (pid == child->pid) {
             match = child;
         }
     }
 
     if (match == NULL) {
         return FALSE;
     }
 
     rc = child_kill_helper(match);
     if(rc == -ESRCH) {
-        /* It's gone, but hasn't shown up in waitpid() yet
-         *
-         * Wait until we get SIGCHLD and let child_death_dispatch()
-         * clean it up as normal (so we get the correct return
-         * code/status)
-         *
-         * The blocking alternative would be to call:
-         *    child_waitpid(match, 0);
+        /* It's gone, but hasn't shown up in waitpid() yet. Wait until we get
+         * SIGCHLD and let handler clean it up as normal (so we get the correct
+         * return code/status). The blocking alternative would be to call
+         * child_waitpid(match, 0).
          */
-        crm_trace("Waiting for child %d to be reaped by child_death_dispatch()", match->pid);
+        crm_trace("Waiting for signal that child process %d completed",
+                  match->pid);
         return TRUE;
 
     } else if(rc != 0) {
         /* If KILL for some other reason set the WNOHANG flag since we
          * can't be certain what happened.
          */
         waitflags = WNOHANG;
     }
 
-    if (child_waitpid(match, waitflags) == FALSE) {
+    if (!child_waitpid(match, waitflags)) {
         /* not much we can do if this occurs */
         return FALSE;
     }
 
     child_list = g_list_remove(child_list, match);
     child_free(match);
     return TRUE;
 }
 
 /* Create/Log a new tracked process
  * To track a process group, use -pid
+ *
+ * @TODO Using a non-positive pid (i.e. any child, or process group) would
+ *       likely not be useful since we will free the child after the first
+ *       completed process.
  */
 void
 mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc, void *privatedata, enum mainloop_child_flags flags, 
                    void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode))
 {
     static bool need_init = TRUE;
     mainloop_child_t *child = g_new(mainloop_child_t, 1);
 
     child->pid = pid;
     child->timerid = 0;
     child->timeout = FALSE;
     child->privatedata = privatedata;
     child->callback = callback;
     child->flags = flags;
 
     if(desc) {
         child->desc = strdup(desc);
     }
 
     if (timeout) {
         child->timerid = g_timeout_add(timeout, child_timeout_callback, child);
     }
 
     child_list = g_list_append(child_list, child);
 
     if(need_init) {
         need_init = FALSE;
         /* SIGCHLD processing has to be invoked from mainloop.
          * We do not want it to be possible to both add a child pid
          * to mainloop, and have the pid's exit callback invoked within
          * the same callstack. */
         g_timeout_add(1, child_signal_init, NULL);
     }
 }
 
 void
 mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata,
                    void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode))
 {
     mainloop_child_add_with_flags(pid, timeout, desc, privatedata, 0, callback);
 }
 
 struct mainloop_timer_s {
         guint id;
         guint period_ms;
         bool repeat;
         char *name;
         GSourceFunc cb;
         void *userdata;
 };
 
 static gboolean mainloop_timer_cb(gpointer user_data)
 {
     int id = 0;
     bool repeat = FALSE;
     struct mainloop_timer_s *t = user_data;
 
     CRM_ASSERT(t != NULL);
 
     id = t->id;
     t->id = 0; /* Ensure it's unset during callbacks so that
                 * mainloop_timer_running() works as expected
                 */
 
     if(t->cb) {
         crm_trace("Invoking callbacks for timer %s", t->name);
         repeat = t->repeat;
         if(t->cb(t->userdata) == FALSE) {
             crm_trace("Timer %s complete", t->name);
             repeat = FALSE;
         }
     }
 
     if(repeat) {
         /* Restore if repeating */
         t->id = id;
     }
 
     return repeat;
 }
 
 bool mainloop_timer_running(mainloop_timer_t *t)
 {
     if(t && t->id != 0) {
         return TRUE;
     }
     return FALSE;
 }
 
 void mainloop_timer_start(mainloop_timer_t *t)
 {
     mainloop_timer_stop(t);
     if(t && t->period_ms > 0) {
         crm_trace("Starting timer %s", t->name);
         t->id = g_timeout_add(t->period_ms, mainloop_timer_cb, t);
     }
 }
 
 void mainloop_timer_stop(mainloop_timer_t *t)
 {
     if(t && t->id != 0) {
         crm_trace("Stopping timer %s", t->name);
         g_source_remove(t->id);
         t->id = 0;
     }
 }
 
 guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms)
 {
     guint last = 0;
 
     if(t) {
         last = t->period_ms;
         t->period_ms = period_ms;
     }
 
     if(t && t->id != 0 && last != t->period_ms) {
         mainloop_timer_start(t);
     }
     return last;
 }
 
 
 mainloop_timer_t *
 mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata)
 {
     mainloop_timer_t *t = calloc(1, sizeof(mainloop_timer_t));
 
     if(t) {
         if(name) {
             t->name = crm_strdup_printf("%s-%u-%d", name, period_ms, repeat);
         } else {
             t->name = crm_strdup_printf("%p-%u-%d", t, period_ms, repeat);
         }
         t->id = 0;
         t->period_ms = period_ms;
         t->repeat = repeat;
         t->cb = cb;
         t->userdata = userdata;
         crm_trace("Created timer %s with %p %p", t->name, userdata, t->userdata);
     }
     return t;
 }
 
 void
 mainloop_timer_del(mainloop_timer_t *t)
 {
     if(t) {
         crm_trace("Destroying timer %s", t->name);
         mainloop_timer_stop(t);
         free(t->name);
         free(t);
     }
 }
 
 /*
  * Helpers to make sure certain events aren't lost at shutdown
  */
 
 static gboolean
 drain_timeout_cb(gpointer user_data)
 {
     bool *timeout_popped = (bool*) user_data;
 
     *timeout_popped = TRUE;
     return FALSE;
 }
 
 /*!
  * \brief Process main loop events while a certain condition is met
  *
  * \param[in] mloop     Main loop to process
  * \param[in] timer_ms  Don't process longer than this amount of time
  * \param[in] check     Function that returns TRUE if events should be processed
  *
  * \note This function is intended to be called at shutdown if certain important
  *       events should not be missed. The caller would likely quit the main loop
  *       or exit after calling this function. The check() function will be
  *       passed the remaining timeout in milliseconds.
  */
 void
 pcmk_drain_main_loop(GMainLoop *mloop, guint timer_ms, bool (*check)(guint))
 {
     bool timeout_popped = FALSE;
     guint timer = 0;
     GMainContext *ctx = NULL;
 
     CRM_CHECK(mloop && check, return);
 
     ctx = g_main_loop_get_context(mloop);
     if (ctx) {
         time_t start_time = time(NULL);
 
         timer = g_timeout_add(timer_ms, drain_timeout_cb, &timeout_popped);
         while (!timeout_popped
                && check(timer_ms - (time(NULL) - start_time) * 1000)) {
             g_main_context_iteration(ctx, TRUE);
         }
     }
     if (!timeout_popped && (timer > 0)) {
         g_source_remove(timer);
     }
 }
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 8d3cdb80ad..c5a5072375 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -1,2575 +1,2561 @@
 /*
  * Copyright 2004-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU Lesser General Public License
  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 #include <crm/common/util.h>
 
 #include <ctype.h>
 #include <glib.h>
+#include <stdbool.h>
 
 #include <crm/pengine/rules.h>
 #include <crm/pengine/internal.h>
 
 #include <unpack.h>
 
 extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root);
 void print_str_str(gpointer key, gpointer value, gpointer user_data);
 gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data);
 void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
                       pe_working_set_t * data_set);
 static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key,
                                          gboolean include_disabled);
 
 #if ENABLE_VERSIONED_ATTRS
 pe_rsc_action_details_t *
 pe_rsc_action_details(pe_action_t *action)
 {
     pe_rsc_action_details_t *details;
 
     CRM_CHECK(action != NULL, return NULL);
 
     if (action->action_details == NULL) {
         action->action_details = calloc(1, sizeof(pe_rsc_action_details_t));
         CRM_CHECK(action->action_details != NULL, return NULL);
     }
 
     details = (pe_rsc_action_details_t *) action->action_details;
     if (details->versioned_parameters == NULL) {
         details->versioned_parameters = create_xml_node(NULL,
                                                         XML_TAG_OP_VER_ATTRS);
     }
     if (details->versioned_meta == NULL) {
         details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META);
     }
     return details;
 }
 
 static void
 pe_free_rsc_action_details(pe_action_t *action)
 {
     pe_rsc_action_details_t *details;
 
     if ((action == NULL) || (action->action_details == NULL)) {
         return;
     }
 
     details = (pe_rsc_action_details_t *) action->action_details;
 
     if (details->versioned_parameters) {
         free_xml(details->versioned_parameters);
     }
     if (details->versioned_meta) {
         free_xml(details->versioned_meta);
     }
 
     action->action_details = NULL;
 }
 #endif
 
 /*!
  * \internal
  * \brief Check whether we can fence a particular node
  *
  * \param[in] data_set  Working set for cluster
  * \param[in] node      Name of node to check
  *
  * \return TRUE if node can be fenced, FALSE otherwise
  *
  * \note This function should only be called for cluster nodes and
  *       remote nodes; guest nodes are fenced by stopping their container
  *       resource, so fence execution requirements do not apply to them.
  */
 bool pe_can_fence(pe_working_set_t * data_set, node_t *node)
 {
     if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
         return FALSE; /* Turned off */
 
     } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) {
         return FALSE; /* No devices */
 
     } else if (is_set(data_set->flags, pe_flag_have_quorum)) {
         return TRUE;
 
     } else if (data_set->no_quorum_policy == no_quorum_ignore) {
         return TRUE;
 
     } else if(node == NULL) {
         return FALSE;
 
     } else if(node->details->online) {
         crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname);
         return TRUE;
     }
 
     crm_trace("Cannot fence %s", node->details->uname);
     return FALSE;
 }
 
 node_t *
 node_copy(const node_t *this_node)
 {
     node_t *new_node = NULL;
 
     CRM_CHECK(this_node != NULL, return NULL);
 
     new_node = calloc(1, sizeof(node_t));
     CRM_ASSERT(new_node != NULL);
 
     crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node);
 
     new_node->rsc_discover_mode = this_node->rsc_discover_mode;
     new_node->weight = this_node->weight;
     new_node->fixed = this_node->fixed;
     new_node->details = this_node->details;
 
     return new_node;
 }
 
 /* any node in list1 or list2 and not in the other gets a score of -INFINITY */
 void
 node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores)
 {
     GHashTable *result = hash;
     node_t *other_node = NULL;
     GListPtr gIter = list;
 
     GHashTableIter iter;
     node_t *node = NULL;
 
     g_hash_table_iter_init(&iter, hash);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
 
         other_node = pe_find_node_id(list, node->details->id);
         if (other_node == NULL) {
             node->weight = -INFINITY;
         } else if (merge_scores) {
             node->weight = merge_weights(node->weight, other_node->weight);
         }
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         other_node = pe_hash_table_lookup(result, node->details->id);
 
         if (other_node == NULL) {
             node_t *new_node = node_copy(node);
 
             new_node->weight = -INFINITY;
             g_hash_table_insert(result, (gpointer) new_node->details->id, new_node);
         }
     }
 }
 
 GHashTable *
 node_hash_from_list(GListPtr list)
 {
     GListPtr gIter = list;
     GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
                                                free);
 
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
         node_t *n = node_copy(node);
 
         g_hash_table_insert(result, (gpointer) n->details->id, n);
     }
 
     return result;
 }
 
 GListPtr
 node_list_dup(GListPtr list1, gboolean reset, gboolean filter)
 {
     GListPtr result = NULL;
     GListPtr gIter = list1;
 
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *new_node = NULL;
         node_t *this_node = (node_t *) gIter->data;
 
         if (filter && this_node->weight < 0) {
             continue;
         }
 
         new_node = node_copy(this_node);
         if (reset) {
             new_node->weight = 0;
         }
         if (new_node != NULL) {
             result = g_list_prepend(result, new_node);
         }
     }
 
     return result;
 }
 
 gint
 sort_node_uname(gconstpointer a, gconstpointer b)
 {
     const char *name_a = ((const node_t *) a)->details->uname;
     const char *name_b = ((const node_t *) b)->details->uname;
 
     while (*name_a && *name_b) {
         if (isdigit(*name_a) && isdigit(*name_b)) {
             // If node names contain a number, sort numerically
 
             char *end_a = NULL;
             char *end_b = NULL;
             long num_a = strtol(name_a, &end_a, 10);
             long num_b = strtol(name_b, &end_b, 10);
 
             // allow ordering e.g. 007 > 7
             size_t len_a = end_a - name_a;
             size_t len_b = end_b - name_b;
 
             if (num_a < num_b) {
                 return -1;
             } else if (num_a > num_b) {
                 return 1;
             } else if (len_a < len_b) {
                 return -1;
             } else if (len_a > len_b) {
                 return 1;
             }
             name_a = end_a;
             name_b = end_b;
         } else {
             // Compare non-digits case-insensitively
             int lower_a = tolower(*name_a);
             int lower_b = tolower(*name_b);
 
             if (lower_a < lower_b) {
                 return -1;
             } else if (lower_a > lower_b) {
                 return 1;
             }
             ++name_a;
             ++name_b;
         }
     }
     if (!*name_a && *name_b) {
         return -1;
     } else if (*name_a && !*name_b) {
         return 1;
     }
     return 0;
 }
 
 void
 dump_node_scores_worker(int level, const char *file, const char *function, int line,
                         resource_t * rsc, const char *comment, GHashTable * nodes)
 {
     GHashTable *hash = nodes;
     GHashTableIter iter;
     node_t *node = NULL;
 
     if (rsc) {
         hash = rsc->allowed_nodes;
     }
 
     if (rsc && is_set(rsc->flags, pe_rsc_orphan)) {
         /* Don't show the allocation scores for orphans */
         return;
     }
 
     if (level == 0) {
         char score[128];
         int len = sizeof(score);
         /* For now we want this in sorted order to keep the regression tests happy */
         GListPtr gIter = NULL;
         GListPtr list = g_hash_table_get_values(hash);
 
         list = g_list_sort(list, sort_node_uname);
 
         gIter = list;
         for (; gIter != NULL; gIter = gIter->next) {
             node_t *node = (node_t *) gIter->data;
             /* This function is called a whole lot, use stack allocated score */
             score2char_stack(node->weight, score, len);
 
             if (rsc) {
                 printf("%s: %s allocation score on %s: %s\n",
                        comment, rsc->id, node->details->uname, score);
             } else {
                 printf("%s: %s = %s\n", comment, node->details->uname, score);
             }
         }
 
         g_list_free(list);
 
     } else if (hash) {
         char score[128];
         int len = sizeof(score);
         g_hash_table_iter_init(&iter, hash);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
             /* This function is called a whole lot, use stack allocated score */
             score2char_stack(node->weight, score, len);
 
             if (rsc) {
                 do_crm_log_alias(LOG_TRACE, file, function, line,
                                  "%s: %s allocation score on %s: %s", comment, rsc->id,
                                  node->details->uname, score);
             } else {
                 do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment,
                                  node->details->uname, score);
             }
         }
     }
 
     if (rsc && rsc->children) {
         GListPtr gIter = NULL;
 
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             dump_node_scores_worker(level, file, function, line, child, comment, nodes);
         }
     }
 }
 
 static void
 append_dump_text(gpointer key, gpointer value, gpointer user_data)
 {
     char **dump_text = user_data;
     char *new_text = crm_strdup_printf("%s %s=%s",
                                        *dump_text, (char *)key, (char *)value);
 
     free(*dump_text);
     *dump_text = new_text;
 }
 
 void
 dump_node_capacity(int level, const char *comment, node_t * node)
 {
     char *dump_text = crm_strdup_printf("%s: %s capacity:",
                                         comment, node->details->uname);
 
     g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text);
 
     if (level == 0) {
         fprintf(stdout, "%s\n", dump_text);
     } else {
         crm_trace("%s", dump_text);
     }
 
     free(dump_text);
 }
 
 void
 dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node)
 {
     char *dump_text = crm_strdup_printf("%s: %s utilization on %s:",
                                         comment, rsc->id, node->details->uname);
 
     g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text);
 
     if (level == 0) {
         fprintf(stdout, "%s\n", dump_text);
     } else {
         crm_trace("%s", dump_text);
     }
 
     free(dump_text);
 }
 
 gint
 sort_rsc_index(gconstpointer a, gconstpointer b)
 {
     const resource_t *resource1 = (const resource_t *)a;
     const resource_t *resource2 = (const resource_t *)b;
 
     if (a == NULL && b == NULL) {
         return 0;
     }
     if (a == NULL) {
         return 1;
     }
     if (b == NULL) {
         return -1;
     }
 
     if (resource1->sort_index > resource2->sort_index) {
         return -1;
     }
 
     if (resource1->sort_index < resource2->sort_index) {
         return 1;
     }
 
     return 0;
 }
 
 gint
 sort_rsc_priority(gconstpointer a, gconstpointer b)
 {
     const resource_t *resource1 = (const resource_t *)a;
     const resource_t *resource2 = (const resource_t *)b;
 
     if (a == NULL && b == NULL) {
         return 0;
     }
     if (a == NULL) {
         return 1;
     }
     if (b == NULL) {
         return -1;
     }
 
     if (resource1->priority > resource2->priority) {
         return -1;
     }
 
     if (resource1->priority < resource2->priority) {
         return 1;
     }
 
     return 0;
 }
 
 action_t *
 custom_action(resource_t * rsc, char *key, const char *task,
               node_t * on_node, gboolean optional, gboolean save_action,
               pe_working_set_t * data_set)
 {
     action_t *action = NULL;
     GListPtr possible_matches = NULL;
 
     CRM_CHECK(key != NULL, return NULL);
     CRM_CHECK(task != NULL, free(key); return NULL);
 
     if (save_action && rsc != NULL) {
         possible_matches = find_actions(rsc->actions, key, on_node);
     } else if(save_action) {
 #if 0
         action = g_hash_table_lookup(data_set->singletons, key);
 #else
         /* More expensive but takes 'node' into account */
         possible_matches = find_actions(data_set->actions, key, on_node);
 #endif
     }
 
     if(data_set->singletons == NULL) {
         data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);
     }
 
     if (possible_matches != NULL) {
         if (g_list_length(possible_matches) > 1) {
             pe_warn("Action %s for %s on %s exists %d times",
                     task, rsc ? rsc->id : "<NULL>",
                     on_node ? on_node->details->uname : "<NULL>", g_list_length(possible_matches));
         }
 
         action = g_list_nth_data(possible_matches, 0);
         pe_rsc_trace(rsc, "Found existing action %d (%s) for %s (%s) on %s",
                      action->id, action->uuid,
                      (rsc? rsc->id : "no resource"), task,
                      (on_node? on_node->details->uname : "no node"));
         g_list_free(possible_matches);
     }
 
     if (action == NULL) {
         if (save_action) {
             pe_rsc_trace(rsc, "Creating %s action %d: %s for %s (%s) on %s",
                          (optional? "optional" : " mandatory"),
                          data_set->action_id, key,
                          (rsc? rsc->id : "no resource"), task,
                          (on_node? on_node->details->uname : "no node"));
         }
 
         action = calloc(1, sizeof(action_t));
         if (save_action) {
             action->id = data_set->action_id++;
         } else {
             action->id = 0;
         }
         action->rsc = rsc;
         CRM_ASSERT(task != NULL);
         action->task = strdup(task);
         if (on_node) {
             action->node = node_copy(on_node);
         }
         action->uuid = strdup(key);
 
         pe_set_action_bit(action, pe_action_runnable);
         if (optional) {
             pe_set_action_bit(action, pe_action_optional);
         } else {
             pe_clear_action_bit(action, pe_action_optional);
         }
 
         action->extra = crm_str_table_new();
         action->meta = crm_str_table_new();
 
         if (save_action) {
             data_set->actions = g_list_prepend(data_set->actions, action);
             if(rsc == NULL) {
                 g_hash_table_insert(data_set->singletons, action->uuid, action);
             }
         }
 
         if (rsc != NULL) {
             action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE);
 
             unpack_operation(action, action->op_entry, rsc->container, data_set);
 
             if (save_action) {
                 rsc->actions = g_list_prepend(rsc->actions, action);
             }
         }
 
         if (save_action) {
             pe_rsc_trace(rsc, "Action %d created", action->id);
         }
     }
 
     if (!optional && is_set(action->flags, pe_action_optional)) {
         pe_rsc_trace(rsc, "Unset optional on action %d", action->id);
         pe_clear_action_bit(action, pe_action_optional);
     }
 
     if (rsc != NULL) {
         enum action_tasks a_task = text2task(action->task);
         int warn_level = LOG_TRACE;
 
         if (save_action) {
             warn_level = LOG_WARNING;
         }
 
         if (is_set(action->flags, pe_action_have_node_attrs) == FALSE
             && action->node != NULL && action->op_entry != NULL) {
             pe_set_action_bit(action, pe_action_have_node_attrs);
             pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS,
                                        action->node->details->attrs,
                                        action->extra, NULL, FALSE, data_set);
         }
 
         if (is_set(action->flags, pe_action_pseudo)) {
             /* leave untouched */
 
         } else if (action->node == NULL) {
             pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid);
             pe_clear_action_bit(action, pe_action_runnable);
 
         } else if (is_not_set(rsc->flags, pe_rsc_managed)
                    && g_hash_table_lookup(action->meta,
                                           XML_LRM_ATTR_INTERVAL_MS) == NULL) {
             crm_debug("Action %s (unmanaged)", action->uuid);
             pe_rsc_trace(rsc, "Set optional on %s", action->uuid);
             pe_set_action_bit(action, pe_action_optional);
 /*   			action->runnable = FALSE; */
 
         } else if (action->node->details->online == FALSE
                    && (!pe__is_guest_node(action->node)
                        || action->node->details->remote_requires_reset)) {
             pe_clear_action_bit(action, pe_action_runnable);
             do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)",
                        action->uuid, action->node->details->uname);
             if (is_set(action->rsc->flags, pe_rsc_managed)
                 && save_action && a_task == stop_rsc
                 && action->node->details->unclean == FALSE) {
                 pe_fence_node(data_set, action->node, "resource actions are unrunnable");
             }
 
         } else if (action->node->details->pending) {
             pe_clear_action_bit(action, pe_action_runnable);
             do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)",
                        action->uuid, action->node->details->uname);
 
         } else if (action->needs == rsc_req_nothing) {
             pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid);
             pe_action_set_reason(action, NULL, TRUE);
             pe_set_action_bit(action, pe_action_runnable);
 #if 0
             /*
              * No point checking this
              * - if we don't have quorum we can't stonith anyway
              */
         } else if (action->needs == rsc_req_stonith) {
             crm_trace("Action %s requires only stonith", action->uuid);
             action->runnable = TRUE;
 #endif
         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE
                    && data_set->no_quorum_policy == no_quorum_stop) {
             pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "no quorum", pe_action_runnable, TRUE);
             crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid);
 
         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE
                    && data_set->no_quorum_policy == no_quorum_freeze) {
             pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role));
             if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) {
                 pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "quorum freeze", pe_action_runnable, TRUE);
                 pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)",
                              action->node->details->uname, action->uuid);
             }
 
         } else if(is_not_set(action->flags, pe_action_runnable)) {
             pe_rsc_trace(rsc, "Action %s is runnable", action->uuid);
             //pe_action_set_reason(action, NULL, TRUE);
             pe_set_action_bit(action, pe_action_runnable);
         }
 
         if (save_action) {
             switch (a_task) {
                 case stop_rsc:
                     set_bit(rsc->flags, pe_rsc_stopping);
                     break;
                 case start_rsc:
                     clear_bit(rsc->flags, pe_rsc_starting);
                     if (is_set(action->flags, pe_action_runnable)) {
                         set_bit(rsc->flags, pe_rsc_starting);
                     }
                     break;
                 default:
                     break;
             }
         }
     }
 
     free(key);
     return action;
 }
 
 static const char *
 unpack_operation_on_fail(action_t * action)
 {
 
     const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
 
     if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) {
         crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id);
         return NULL;
     } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) {
         /* demote on_fail defaults to master monitor value if present */
         xmlNode *operation = NULL;
         const char *name = NULL;
         const char *role = NULL;
         const char *on_fail = NULL;
         const char *interval_spec = NULL;
         const char *enabled = NULL;
 
         CRM_CHECK(action->rsc != NULL, return NULL);
 
         for (operation = __xml_first_child_element(action->rsc->ops_xml);
              operation && !value; operation = __xml_next_element(operation)) {
 
             if (!crm_str_eq((const char *)operation->name, "op", TRUE)) {
                 continue;
             }
             name = crm_element_value(operation, "name");
             role = crm_element_value(operation, "role");
             on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
             enabled = crm_element_value(operation, "enabled");
             interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
             if (!on_fail) {
                 continue;
             } else if (enabled && !crm_is_true(enabled)) {
                 continue;
             } else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) {
                 continue;
             } else if (crm_parse_interval_spec(interval_spec) == 0) {
                 continue;
             }
 
             value = on_fail;
         }
     }
 
     return value;
 }
 
 static xmlNode *
 find_min_interval_mon(resource_t * rsc, gboolean include_disabled)
 {
     guint interval_ms = 0;
     guint min_interval_ms = G_MAXUINT;
     const char *name = NULL;
     const char *value = NULL;
     const char *interval_spec = NULL;
     xmlNode *op = NULL;
     xmlNode *operation = NULL;
 
     for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL;
          operation = __xml_next_element(operation)) {
 
         if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
             name = crm_element_value(operation, "name");
             interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
             value = crm_element_value(operation, "enabled");
             if (!include_disabled && value && crm_is_true(value) == FALSE) {
                 continue;
             }
 
             if (safe_str_neq(name, RSC_STATUS)) {
                 continue;
             }
 
             interval_ms = crm_parse_interval_spec(interval_spec);
 
             if (interval_ms && (interval_ms < min_interval_ms)) {
                 min_interval_ms = interval_ms;
                 op = operation;
             }
         }
     }
 
     return op;
 }
 
 static int
 unpack_start_delay(const char *value, GHashTable *meta)
 {
     int start_delay = 0;
 
     if (value != NULL) {
         start_delay = crm_get_msec(value);
 
         if (start_delay < 0) {
             start_delay = 0;
         }
 
         if (meta) {
             g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay));
         }
     }
 
     return start_delay;
 }
 
-static int
-unpack_interval_origin(const char *value, GHashTable *meta, xmlNode *xml_obj,
-                       guint interval_ms, crm_time_t *now)
+// true if value contains valid, non-NULL interval origin for recurring op
+static bool
+unpack_interval_origin(const char *value, xmlNode *xml_obj, guint interval_ms,
+                       crm_time_t *now, long long *start_delay)
 {
-    int start_delay = 0;
-
-    if ((interval_ms > 0) && (value != NULL)) {
-        crm_time_t *origin = crm_time_new(value);
-
-        if (origin && now) {
-            crm_time_t *delay = NULL;
-            int rc = crm_time_compare(origin, now);
-            long long delay_s = 0;
-            int interval_sec = interval_ms / 1000;
-
-            crm_trace("Origin: %s, interval: %d", value, interval_sec);
+    long long result = 0;
+    guint interval_sec = interval_ms / 1000;
+    crm_time_t *origin = NULL;
 
-            /* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */
-            while(rc > 0) {
-                crm_time_add_seconds(origin, -interval_sec);
-                rc = crm_time_compare(origin, now);
-            }
-
-            /* Now find the first "multiple" that occurs after 'now' */
-            while (rc < 0) {
-                crm_time_add_seconds(origin, interval_sec);
-                rc = crm_time_compare(origin, now);
-            }
-
-            delay = crm_time_calculate_duration(origin, now);
+    // Ignore unspecified values and non-recurring operations
+    if ((value == NULL) || (interval_ms == 0) || (now == NULL)) {
+        return false;
+    }
 
-            crm_time_log(LOG_TRACE, "origin", origin,
-                         crm_time_log_date | crm_time_log_timeofday |
-                         crm_time_log_with_timezone);
-            crm_time_log(LOG_TRACE, "now", now,
-                         crm_time_log_date | crm_time_log_timeofday |
-                         crm_time_log_with_timezone);
-            crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration);
+    // Parse interval origin from text
+    origin = crm_time_new(value);
+    if (origin == NULL) {
+        crm_config_err("Operation '%s' contains invalid " XML_OP_ATTR_ORIGIN
+                       " '%s'",
+                       (ID(xml_obj)? ID(xml_obj) : "(unspecified)"), value);
+        return false;
+    }
 
-            delay_s = crm_time_get_seconds(delay);
-            if (delay_s < 0) {
-                delay_s = 0;
-            }
-            start_delay = delay_s * 1000;
+    // Get seconds since origin (negative if origin is in the future)
+    result = crm_time_get_seconds(now) - crm_time_get_seconds(origin);
+    crm_time_free(origin);
 
-            if (xml_obj) {
-                crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj));
-            }
+    // Calculate seconds from closest interval to now
+    result = result % interval_sec;
 
-            if (meta) {
-                g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY),
-                                     crm_itoa(start_delay));
-            }
+    // Calculate seconds remaining until next interval
+    result = ((result <= 0)? 0 : interval_sec) - result;
+    crm_info("Calculated a start delay of %llds for operation '%s'",
+             result,
+             (ID(xml_obj)? ID(xml_obj) : "(unspecified)"));
 
-            crm_time_free(origin);
-            crm_time_free(delay);
-        } else if (!origin && xml_obj) {
-            crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s",
-                           ID(xml_obj), value);
-        }
+    if (start_delay != NULL) {
+        *start_delay = result * 1000; // milliseconds
     }
-
-    return start_delay;
+    return true;
 }
 
 static int
 unpack_timeout(const char *value)
 {
     int timeout = crm_get_msec(value);
 
     if (timeout < 0) {
         timeout = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
     }
     return timeout;
 }
 
 int
 pe_get_configured_timeout(resource_t *rsc, const char *action, pe_working_set_t *data_set)
 {
     xmlNode *child = NULL;
     const char *timeout = NULL;
     int timeout_ms = 0;
 
     for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP);
          child != NULL; child = crm_next_same_xml(child)) {
         if (safe_str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME))) {
             timeout = crm_element_value(child, XML_ATTR_TIMEOUT);
             break;
         }
     }
 
     if (timeout == NULL && data_set->op_defaults) {
         GHashTable *action_meta = crm_str_table_new();
         pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS,
                                    NULL, action_meta, NULL, FALSE, data_set);
         timeout = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT);
     }
 
     // @TODO check meta-attributes (including versioned meta-attributes)
     // @TODO maybe use min-interval monitor timeout as default for monitors
 
     timeout_ms = crm_get_msec(timeout);
     if (timeout_ms < 0) {
         timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
     }
     return timeout_ms;
 }
 
 #if ENABLE_VERSIONED_ATTRS
 static void
 unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj,
                       guint interval_ms, crm_time_t *now)
 {
     xmlNode *attrs = NULL;
     xmlNode *attr = NULL;
 
     for (attrs = __xml_first_child_element(versioned_meta); attrs != NULL;
          attrs = __xml_next_element(attrs)) {
 
         for (attr = __xml_first_child_element(attrs); attr != NULL;
              attr = __xml_next_element(attr)) {
 
             const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
             const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
 
             if (safe_str_eq(name, XML_OP_ATTR_START_DELAY)) {
                 int start_delay = unpack_start_delay(value, NULL);
 
                 crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
             } else if (safe_str_eq(name, XML_OP_ATTR_ORIGIN)) {
-                int start_delay = unpack_interval_origin(value, NULL, xml_obj,
-                                                         interval_ms, now);
+                long long start_delay = 0;
 
-                crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_START_DELAY);
-                crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
+                if (unpack_interval_origin(value, xml_obj, interval_ms, now,
+                                           &start_delay)) {
+                    crm_xml_add(attr, XML_NVPAIR_ATTR_NAME,
+                                XML_OP_ATTR_START_DELAY);
+                    crm_xml_add_ll(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
+                }
             } else if (safe_str_eq(name, XML_ATTR_TIMEOUT)) {
                 int timeout = unpack_timeout(value);
 
                 crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout);
             }
         }
     }
 }
 #endif
 
 /*!
  * \brief Unpack operation XML into an action structure
  *
  * Unpack an operation's meta-attributes (normalizing the interval, timeout,
  * and start delay values as integer milliseconds), requirements, and
  * failure policy.
  *
  * \param[in,out] action     Action to unpack into
  * \param[in]     xml_obj    Operation XML (or NULL if all defaults)
  * \param[in]     container  Resource that contains affected resource, if any
  * \param[in]     data_set   Cluster state
  */
 void
 unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
                  pe_working_set_t * data_set)
 {
     guint interval_ms = 0;
     int timeout = 0;
     char *value_ms = NULL;
     const char *value = NULL;
     const char *field = NULL;
     char *default_timeout = NULL;
 #if ENABLE_VERSIONED_ATTRS
     pe_rsc_action_details_t *rsc_details = NULL;
 #endif
 
     CRM_CHECK(action && action->rsc, return);
 
     // Cluster-wide <op_defaults> <meta_attributes>
     pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, NULL,
                                action->meta, NULL, FALSE, data_set);
 
     // Probe timeouts default differently, so handle timeout default later
     default_timeout = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
     if (default_timeout) {
         default_timeout = strdup(default_timeout);
         g_hash_table_remove(action->meta, XML_ATTR_TIMEOUT);
     }
 
     if (xml_obj) {
         xmlAttrPtr xIter = NULL;
 
         // <op> <meta_attributes> take precedence over defaults
         pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, NULL,
                                    action->meta, NULL, TRUE, data_set);
 
 #if ENABLE_VERSIONED_ATTRS
         rsc_details = pe_rsc_action_details(action);
         pe_unpack_versioned_attributes(data_set->input, xml_obj,
                                        XML_TAG_ATTR_SETS, NULL,
                                        rsc_details->versioned_parameters,
                                        data_set->now, NULL);
         pe_unpack_versioned_attributes(data_set->input, xml_obj,
                                        XML_TAG_META_SETS, NULL,
                                        rsc_details->versioned_meta,
                                        data_set->now, NULL);
 #endif
 
         /* Anything set as an <op> XML property has highest precedence.
          * This ensures we use the name and interval from the <op> tag.
          */
         for (xIter = xml_obj->properties; xIter; xIter = xIter->next) {
             const char *prop_name = (const char *)xIter->name;
             const char *prop_value = crm_element_value(xml_obj, prop_name);
 
             g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value));
         }
     }
 
     g_hash_table_remove(action->meta, "id");
 
     // Normalize interval to milliseconds
     field = XML_LRM_ATTR_INTERVAL;
     value = g_hash_table_lookup(action->meta, field);
     if (value != NULL) {
         interval_ms = crm_parse_interval_spec(value);
 
     } else if ((xml_obj == NULL) && !strcmp(action->task, RSC_STATUS)) {
         /* An orphaned recurring monitor will not have any XML. However, we
          * want the interval to be set, so the action can be properly detected
          * as a recurring monitor. Parse it from the key in this case.
          */
         parse_op_key(action->uuid, NULL, NULL, &interval_ms);
     }
     if (interval_ms > 0) {
         value_ms = crm_strdup_printf("%u", interval_ms);
         g_hash_table_replace(action->meta, strdup(field), value_ms);
 
     } else if (value) {
         g_hash_table_remove(action->meta, field);
     }
 
     // Handle timeout default, now that we know the interval
     if (g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT)) {
         free(default_timeout);
 
     } else {
         // Probe timeouts default to minimum-interval monitor's
         if (safe_str_eq(action->task, RSC_STATUS) && (interval_ms == 0)) {
 
             xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE);
 
             if (min_interval_mon) {
                 value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT);
                 if (value) {
                     crm_trace("\t%s defaults to minimum-interval monitor's timeout '%s'",
                               action->uuid, value);
                     free(default_timeout);
                     default_timeout = strdup(value);
                 }
             }
         }
 
         if (default_timeout) {
             g_hash_table_insert(action->meta, strdup(XML_ATTR_TIMEOUT),
                                 default_timeout);
         }
     }
 
     if (safe_str_neq(action->task, RSC_START)
         && safe_str_neq(action->task, RSC_PROMOTE)) {
         action->needs = rsc_req_nothing;
         value = "nothing (not start/promote)";
 
     } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) {
         action->needs = rsc_req_stonith;
         value = "fencing (resource)";
 
     } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) {
         action->needs = rsc_req_quorum;
         value = "quorum (resource)";
 
     } else {
         action->needs = rsc_req_nothing;
         value = "nothing (resource)";
     }
 
     pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->uuid, value);
 
     value = unpack_operation_on_fail(action);
 
     if (value == NULL) {
 
     } else if (safe_str_eq(value, "block")) {
         action->on_fail = action_fail_block;
         g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block"));
         value = "block"; // The above could destroy the original string
 
     } else if (safe_str_eq(value, "fence")) {
         action->on_fail = action_fail_fence;
         value = "node fencing";
 
         if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
             crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense");
             action->on_fail = action_fail_stop;
             action->fail_role = RSC_ROLE_STOPPED;
             value = "stop resource";
         }
 
     } else if (safe_str_eq(value, "standby")) {
         action->on_fail = action_fail_standby;
         value = "node standby";
 
     } else if (safe_str_eq(value, "ignore")
                || safe_str_eq(value, "nothing")) {
         action->on_fail = action_fail_ignore;
         value = "ignore";
 
     } else if (safe_str_eq(value, "migrate")) {
         action->on_fail = action_fail_migrate;
         value = "force migration";
 
     } else if (safe_str_eq(value, "stop")) {
         action->on_fail = action_fail_stop;
         action->fail_role = RSC_ROLE_STOPPED;
         value = "stop resource";
 
     } else if (safe_str_eq(value, "restart")) {
         action->on_fail = action_fail_recover;
         value = "restart (and possibly migrate)";
 
     } else if (safe_str_eq(value, "restart-container")) {
         if (container) {
             action->on_fail = action_fail_restart_container;
             value = "restart container (and possibly migrate)";
 
         } else {
             value = NULL;
         }
 
     } else {
         pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value);
         value = NULL;
     }
 
     /* defaults */
     if (value == NULL && container) {
         action->on_fail = action_fail_restart_container;
         value = "restart container (and possibly migrate) (default)";
 
     /* For remote nodes, ensure that any failure that results in dropping an
      * active connection to the node results in fencing of the node.
      *
      * There are only two action failures that don't result in fencing.
      * 1. probes - probe failures are expected.
      * 2. start - a start failure indicates that an active connection does not already
      * exist. The user can set op on-fail=fence if they really want to fence start
      * failures. */
     } else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) &&
                 (pe__resource_is_remote_conn(action->rsc, data_set) &&
                !(safe_str_eq(action->task, CRMD_ACTION_STATUS) && (interval_ms == 0)) &&
                 (safe_str_neq(action->task, CRMD_ACTION_START)))) {
 
         if (!is_set(action->rsc->flags, pe_rsc_managed)) {
             action->on_fail = action_fail_stop;
             action->fail_role = RSC_ROLE_STOPPED;
             value = "stop unmanaged remote node (enforcing default)";
 
         } else {
             if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
                 value = "fence remote node (default)";
             } else {
                 value = "recover remote node connection (default)";
             }
 
             if (action->rsc->remote_reconnect_ms) {
                 action->fail_role = RSC_ROLE_STOPPED;
             }
             action->on_fail = action_fail_reset_remote;
         }
 
     } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) {
         if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
             action->on_fail = action_fail_fence;
             value = "resource fence (default)";
 
         } else {
             action->on_fail = action_fail_block;
             value = "resource block (default)";
         }
 
     } else if (value == NULL) {
         action->on_fail = action_fail_recover;
         value = "restart (and possibly migrate) (default)";
     }
 
     pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value);
 
     value = NULL;
     if (xml_obj != NULL) {
         value = g_hash_table_lookup(action->meta, "role_after_failure");
         if (value) {
             pe_warn_once(pe_wo_role_after,
                         "Support for role_after_failure is deprecated and will be removed in a future release");
         }
     }
     if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) {
         action->fail_role = text2role(value);
     }
     /* defaults */
     if (action->fail_role == RSC_ROLE_UNKNOWN) {
         if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) {
             action->fail_role = RSC_ROLE_SLAVE;
         } else {
             action->fail_role = RSC_ROLE_STARTED;
         }
     }
     pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task,
                  role2text(action->fail_role));
 
     value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY);
     if (value) {
         unpack_start_delay(value, action->meta);
     } else {
+        long long start_delay = 0;
+
         value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN);
-        unpack_interval_origin(value, action->meta, xml_obj, interval_ms,
-                               data_set->now);
+        if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now,
+                                   &start_delay)) {
+            g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY),
+                                 crm_strdup_printf("%lld", start_delay));
+        }
     }
 
     value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
     timeout = unpack_timeout(value);
     g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), crm_itoa(timeout));
 
 #if ENABLE_VERSIONED_ATTRS
     unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms,
                           data_set->now);
 #endif
 }
 
 static xmlNode *
 find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled)
 {
     guint interval_ms = 0;
     gboolean do_retry = TRUE;
     char *local_key = NULL;
     const char *name = NULL;
     const char *value = NULL;
     const char *interval_spec = NULL;
     char *match_key = NULL;
     xmlNode *op = NULL;
     xmlNode *operation = NULL;
 
   retry:
     for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL;
          operation = __xml_next_element(operation)) {
         if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
             name = crm_element_value(operation, "name");
             interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
             value = crm_element_value(operation, "enabled");
             if (!include_disabled && value && crm_is_true(value) == FALSE) {
                 continue;
             }
 
             interval_ms = crm_parse_interval_spec(interval_spec);
             match_key = generate_op_key(rsc->id, name, interval_ms);
             if (safe_str_eq(key, match_key)) {
                 op = operation;
             }
             free(match_key);
 
             if (rsc->clone_name) {
                 match_key = generate_op_key(rsc->clone_name, name, interval_ms);
                 if (safe_str_eq(key, match_key)) {
                     op = operation;
                 }
                 free(match_key);
             }
 
             if (op != NULL) {
                 free(local_key);
                 return op;
             }
         }
     }
 
     free(local_key);
     if (do_retry == FALSE) {
         return NULL;
     }
 
     do_retry = FALSE;
     if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) {
         local_key = generate_op_key(rsc->id, "migrate", 0);
         key = local_key;
         goto retry;
 
     } else if (strstr(key, "_notify_")) {
         local_key = generate_op_key(rsc->id, "notify", 0);
         key = local_key;
         goto retry;
     }
 
     return NULL;
 }
 
 xmlNode *
 find_rsc_op_entry(resource_t * rsc, const char *key)
 {
     return find_rsc_op_entry_helper(rsc, key, FALSE);
 }
 
 void
 print_node(const char *pre_text, node_t * node, gboolean details)
 {
     if (node == NULL) {
         crm_trace("%s%s: <NULL>", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
         return;
     }
 
     CRM_ASSERT(node->details);
     crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)",
               pre_text == NULL ? "" : pre_text,
               pre_text == NULL ? "" : ": ",
               node->details->online ? "" : "Unavailable/Unclean ",
               node->details->uname, node->weight, node->fixed ? "True" : "False");
 
     if (details) {
         char *pe_mutable = strdup("\t\t");
         GListPtr gIter = node->details->running_rsc;
 
         crm_trace("\t\t===Node Attributes");
         g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable);
         free(pe_mutable);
 
         crm_trace("\t\t=== Resources");
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *rsc = (resource_t *) gIter->data;
 
             print_resource(LOG_TRACE, "\t\t", rsc, FALSE);
         }
     }
 }
 
 /*
  * Used by the HashTable for-loop
  */
 void
 print_str_str(gpointer key, gpointer value, gpointer user_data)
 {
     crm_trace("%s%s %s ==> %s",
               user_data == NULL ? "" : (char *)user_data,
               user_data == NULL ? "" : ": ", (char *)key, (char *)value);
 }
 
 void
 print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details)
 {
     long options = pe_print_log | pe_print_pending;
 
     if (rsc == NULL) {
         do_crm_log(log_level - 1, "%s%s: <NULL>",
                    pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
         return;
     }
     if (details) {
         options |= pe_print_details;
     }
     rsc->fns->print(rsc, pre_text, options, &log_level);
 }
 
 void
 pe_free_action(action_t * action)
 {
     if (action == NULL) {
         return;
     }
     g_list_free_full(action->actions_before, free);     /* action_wrapper_t* */
     g_list_free_full(action->actions_after, free);      /* action_wrapper_t* */
     if (action->extra) {
         g_hash_table_destroy(action->extra);
     }
     if (action->meta) {
         g_hash_table_destroy(action->meta);
     }
 #if ENABLE_VERSIONED_ATTRS
     if (action->rsc) {
         pe_free_rsc_action_details(action);
     }
 #endif
     free(action->cancel_task);
     free(action->reason);
     free(action->task);
     free(action->uuid);
     free(action->node);
     free(action);
 }
 
 GListPtr
 find_recurring_actions(GListPtr input, node_t * not_on_node)
 {
     const char *value = NULL;
     GListPtr result = NULL;
     GListPtr gIter = input;
 
     CRM_CHECK(input != NULL, return NULL);
 
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS);
         if (value == NULL) {
             /* skip */
         } else if (safe_str_eq(value, "0")) {
             /* skip */
         } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) {
             /* skip */
         } else if (not_on_node == NULL) {
             crm_trace("(null) Found: %s", action->uuid);
             result = g_list_prepend(result, action);
 
         } else if (action->node == NULL) {
             /* skip */
         } else if (action->node->details != not_on_node->details) {
             crm_trace("Found: %s", action->uuid);
             result = g_list_prepend(result, action);
         }
     }
 
     return result;
 }
 
 enum action_tasks
 get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic)
 {
     enum action_tasks task = text2task(name);
 
     if (rsc == NULL) {
         return task;
 
     } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) {
         switch (task) {
             case stopped_rsc:
             case started_rsc:
             case action_demoted:
             case action_promoted:
                 crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id);
                 return task - 1;
                 break;
             default:
                 break;
         }
     }
     return task;
 }
 
 action_t *
 find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node)
 {
     GListPtr gIter = NULL;
 
     CRM_CHECK(uuid || task, return NULL);
 
     for (gIter = input; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (uuid != NULL && safe_str_neq(uuid, action->uuid)) {
             continue;
 
         } else if (task != NULL && safe_str_neq(task, action->task)) {
             continue;
 
         } else if (on_node == NULL) {
             return action;
 
         } else if (action->node == NULL) {
             continue;
 
         } else if (on_node->details == action->node->details) {
             return action;
         }
     }
 
     return NULL;
 }
 
 GListPtr
 find_actions(GListPtr input, const char *key, const node_t *on_node)
 {
     GListPtr gIter = input;
     GListPtr result = NULL;
 
     CRM_CHECK(key != NULL, return NULL);
 
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (safe_str_neq(key, action->uuid)) {
             crm_trace("%s does not match action %s", key, action->uuid);
             continue;
 
         } else if (on_node == NULL) {
             crm_trace("Action %s matches (ignoring node)", key);
             result = g_list_prepend(result, action);
 
         } else if (action->node == NULL) {
             crm_trace("Action %s matches (unallocated, assigning to %s)",
                       key, on_node->details->uname);
 
             action->node = node_copy(on_node);
             result = g_list_prepend(result, action);
 
         } else if (on_node->details == action->node->details) {
             crm_trace("Action %s on %s matches", key, on_node->details->uname);
             result = g_list_prepend(result, action);
 
         } else {
             crm_trace("Action %s on node %s does not match requested node %s",
                       key, action->node->details->uname,
                       on_node->details->uname);
         }
     }
 
     return result;
 }
 
 GList *
 find_actions_exact(GList *input, const char *key, const pe_node_t *on_node)
 {
     GList *result = NULL;
 
     CRM_CHECK(key != NULL, return NULL);
 
     if (on_node == NULL) {
         crm_trace("Not searching for action %s because node not specified",
                   key);
         return NULL;
     }
 
     for (GList *gIter = input; gIter != NULL; gIter = gIter->next) {
         pe_action_t *action = (pe_action_t *) gIter->data;
 
         if (action->node == NULL) {
             crm_trace("Skipping comparison of %s vs action %s without node",
                       key, action->uuid);
 
         } else if (safe_str_neq(key, action->uuid)) {
             crm_trace("Desired action %s doesn't match %s", key, action->uuid);
 
         } else if (safe_str_neq(on_node->details->id,
                                 action->node->details->id)) {
             crm_trace("Action %s desired node ID %s doesn't match %s",
                       key, on_node->details->id, action->node->details->id);
 
         } else {
             crm_trace("Action %s matches", key);
             result = g_list_prepend(result, action);
         }
     }
 
     return result;
 }
 
 /*!
  * \brief Find all actions of given type for a resource
  *
  * \param[in] rsc           Resource to search
  * \param[in] node          Find only actions scheduled on this node
  * \param[in] task          Action name to search for
  * \param[in] require_node  If TRUE, NULL node or action node will not match
  *
  * \return List of actions found (or NULL if none)
  * \note If node is not NULL and require_node is FALSE, matching actions
  *       without a node will be assigned to node.
  */
 GList *
 pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node,
                      const char *task, bool require_node)
 {
     GList *result = NULL;
     char *key = generate_op_key(rsc->id, task, 0);
 
     if (require_node) {
         result = find_actions_exact(rsc->actions, key, node);
     } else {
         result = find_actions(rsc->actions, key, node);
     }
     free(key);
     return result;
 }
 
 static void
 resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag)
 {
     node_t *match = NULL;
 
     if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never))
         && safe_str_eq(tag, "symmetric_default")) {
         /* This string comparision may be fragile, but exclusive resources and
          * exclusive nodes should not have the symmetric_default constraint
          * applied to them.
          */
         return;
 
     } else if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             resource_node_score(child_rsc, node, score, tag);
         }
     }
 
     pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score);
     match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
     if (match == NULL) {
         match = node_copy(node);
         g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match);
     }
     match->weight = merge_weights(match->weight, score);
 }
 
 void
 resource_location(resource_t * rsc, node_t * node, int score, const char *tag,
                   pe_working_set_t * data_set)
 {
     if (node != NULL) {
         resource_node_score(rsc, node, score, tag);
 
     } else if (data_set != NULL) {
         GListPtr gIter = data_set->nodes;
 
         for (; gIter != NULL; gIter = gIter->next) {
             node_t *node_iter = (node_t *) gIter->data;
 
             resource_node_score(rsc, node_iter, score, tag);
         }
 
     } else {
         GHashTableIter iter;
         node_t *node_iter = NULL;
 
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) {
             resource_node_score(rsc, node_iter, score, tag);
         }
     }
 
     if (node == NULL && score == -INFINITY) {
         if (rsc->allocated_to) {
             crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname);
             free(rsc->allocated_to);
             rsc->allocated_to = NULL;
         }
     }
 }
 
 #define sort_return(an_int, why) do {					\
 	free(a_uuid);						\
 	free(b_uuid);						\
 	crm_trace("%s (%d) %c %s (%d) : %s",				\
 		  a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=',	\
 		  b_xml_id, b_call_id, why);				\
 	return an_int;							\
     } while(0)
 
 gint
 sort_op_by_callid(gconstpointer a, gconstpointer b)
 {
     int a_call_id = -1;
     int b_call_id = -1;
 
     char *a_uuid = NULL;
     char *b_uuid = NULL;
 
     const xmlNode *xml_a = a;
     const xmlNode *xml_b = b;
 
     const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID);
     const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID);
 
     if (safe_str_eq(a_xml_id, b_xml_id)) {
         /* We have duplicate lrm_rsc_op entries in the status
          * section which is unlikely to be a good thing
          *    - we can handle it easily enough, but we need to get
          *    to the bottom of why it's happening.
          */
         pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
         sort_return(0, "duplicate");
     }
 
     crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
     crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
 
     if (a_call_id == -1 && b_call_id == -1) {
         /* both are pending ops so it doesn't matter since
          *   stops are never pending
          */
         sort_return(0, "pending");
 
     } else if (a_call_id >= 0 && a_call_id < b_call_id) {
         sort_return(-1, "call id");
 
     } else if (b_call_id >= 0 && a_call_id > b_call_id) {
         sort_return(1, "call id");
 
     } else if (b_call_id >= 0 && a_call_id == b_call_id) {
         /*
          * The op and last_failed_op are the same
          * Order on last-rc-change
          */
         time_t last_a = -1;
         time_t last_b = -1;
 
         crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
         crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
 
         crm_trace("rc-change: %lld vs %lld",
                   (long long) last_a, (long long) last_b);
         if (last_a >= 0 && last_a < last_b) {
             sort_return(-1, "rc-change");
 
         } else if (last_b >= 0 && last_a > last_b) {
             sort_return(1, "rc-change");
         }
         sort_return(0, "rc-change");
 
     } else {
         /* One of the inputs is a pending operation
          * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
          */
 
         int a_id = -1;
         int b_id = -1;
 
         const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC);
         const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC);
 
         CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
         if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL,
                                      NULL)) {
             sort_return(0, "bad magic a");
         }
         if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL,
                                      NULL)) {
             sort_return(0, "bad magic b");
         }
         /* try to determine the relative age of the operation...
          * some pending operations (e.g. a start) may have been superseded
          *   by a subsequent stop
          *
          * [a|b]_id == -1 means it's a shutdown operation and _always_ comes last
          */
         if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) {
             /*
              * some of the logic in here may be redundant...
              *
              * if the UUID from the TE doesn't match then one better
              *   be a pending operation.
              * pending operations don't survive between elections and joins
              *   because we query the LRM directly
              */
 
             if (b_call_id == -1) {
                 sort_return(-1, "transition + call");
 
             } else if (a_call_id == -1) {
                 sort_return(1, "transition + call");
             }
 
         } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
             sort_return(-1, "transition");
 
         } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
             sort_return(1, "transition");
         }
     }
 
     /* we should never end up here */
     CRM_CHECK(FALSE, sort_return(0, "default"));
 
 }
 
 time_t
 get_effective_time(pe_working_set_t * data_set)
 {
     if(data_set) {
         if (data_set->now == NULL) {
             crm_trace("Recording a new 'now'");
             data_set->now = crm_time_new(NULL);
         }
         return crm_time_get_seconds_since_epoch(data_set->now);
     }
 
     crm_trace("Defaulting to 'now'");
     return time(NULL);
 }
 
 gboolean
 get_target_role(resource_t * rsc, enum rsc_role_e * role)
 {
     enum rsc_role_e local_role = RSC_ROLE_UNKNOWN;
     const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
 
     CRM_CHECK(role != NULL, return FALSE);
 
     if (value == NULL || safe_str_eq("started", value)
         || safe_str_eq("default", value)) {
         return FALSE;
     }
 
     local_role = text2role(value);
     if (local_role == RSC_ROLE_UNKNOWN) {
         crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value);
         return FALSE;
 
     } else if (local_role > RSC_ROLE_STARTED) {
         if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) {
             if (local_role > RSC_ROLE_SLAVE) {
                 /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */
                 return FALSE;
             }
 
         } else {
             crm_config_err("%s is not part of a promotable clone resource, a %s of '%s' makes no sense",
                            rsc->id, XML_RSC_ATTR_TARGET_ROLE, value);
             return FALSE;
         }
     }
 
     *role = local_role;
     return TRUE;
 }
 
 gboolean
 order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order)
 {
     GListPtr gIter = NULL;
     action_wrapper_t *wrapper = NULL;
     GListPtr list = NULL;
 
     if (order == pe_order_none) {
         return FALSE;
     }
 
     if (lh_action == NULL || rh_action == NULL) {
         return FALSE;
     }
 
     crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid);
 
     /* Ensure we never create a dependency on ourselves... it's happened */
     CRM_ASSERT(lh_action != rh_action);
 
     /* Filter dups, otherwise update_action_states() has too much work to do */
     gIter = lh_action->actions_after;
     for (; gIter != NULL; gIter = gIter->next) {
         action_wrapper_t *after = (action_wrapper_t *) gIter->data;
 
         if (after->action == rh_action && (after->type & order)) {
             return FALSE;
         }
     }
 
     wrapper = calloc(1, sizeof(action_wrapper_t));
     wrapper->action = rh_action;
     wrapper->type = order;
 
     list = lh_action->actions_after;
     list = g_list_prepend(list, wrapper);
     lh_action->actions_after = list;
 
     wrapper = NULL;
 
 /* 	order |= pe_order_implies_then; */
 /* 	order ^= pe_order_implies_then; */
 
     wrapper = calloc(1, sizeof(action_wrapper_t));
     wrapper->action = lh_action;
     wrapper->type = order;
     list = rh_action->actions_before;
     list = g_list_prepend(list, wrapper);
     rh_action->actions_before = list;
     return TRUE;
 }
 
 action_t *
 get_pseudo_op(const char *name, pe_working_set_t * data_set)
 {
     action_t *op = NULL;
 
     if(data_set->singletons) {
         op = g_hash_table_lookup(data_set->singletons, name);
     }
     if (op == NULL) {
         op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set);
         set_bit(op->flags, pe_action_pseudo);
         set_bit(op->flags, pe_action_runnable);
     }
 
     return op;
 }
 
 void
 destroy_ticket(gpointer data)
 {
     ticket_t *ticket = data;
 
     if (ticket->state) {
         g_hash_table_destroy(ticket->state);
     }
     free(ticket->id);
     free(ticket);
 }
 
 ticket_t *
 ticket_new(const char *ticket_id, pe_working_set_t * data_set)
 {
     ticket_t *ticket = NULL;
 
     if (ticket_id == NULL || strlen(ticket_id) == 0) {
         return NULL;
     }
 
     if (data_set->tickets == NULL) {
         data_set->tickets =
             g_hash_table_new_full(crm_str_hash, g_str_equal, free,
                                   destroy_ticket);
     }
 
     ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
     if (ticket == NULL) {
 
         ticket = calloc(1, sizeof(ticket_t));
         if (ticket == NULL) {
             crm_err("Cannot allocate ticket '%s'", ticket_id);
             return NULL;
         }
 
         crm_trace("Creaing ticket entry for %s", ticket_id);
 
         ticket->id = strdup(ticket_id);
         ticket->granted = FALSE;
         ticket->last_granted = -1;
         ticket->standby = FALSE;
         ticket->state = crm_str_table_new();
 
         g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket);
     }
 
     return ticket;
 }
 
 static void
 filter_parameters(xmlNode * param_set, const char *param_string, bool need_present)
 {
     if (param_set && param_string) {
         xmlAttrPtr xIter = param_set->properties;
 
         while (xIter) {
             const char *prop_name = (const char *)xIter->name;
             char *name = crm_strdup_printf(" %s ", prop_name);
             char *match = strstr(param_string, name);
 
             free(name);
 
             //  Do now, because current entry might get removed below
             xIter = xIter->next;
 
             if (need_present && match == NULL) {
                 crm_trace("%s not found in %s", prop_name, param_string);
                 xml_remove_prop(param_set, prop_name);
 
             } else if (need_present == FALSE && match) {
                 crm_trace("%s found in %s", prop_name, param_string);
                 xml_remove_prop(param_set, prop_name);
             }
         }
     }
 }
 
 #if ENABLE_VERSIONED_ATTRS
 static void
 append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNode *params)
 {
     GHashTable *hash = pe_unpack_versioned_parameters(versioned_params, ra_version);
     char *key = NULL;
     char *value = NULL;
     GHashTableIter iter;
 
     g_hash_table_iter_init(&iter, hash);
     while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
         crm_xml_add(params, key, value);
     }
     g_hash_table_destroy(hash);
 }
 #endif
 
 /*!
  * \internal
  * \brief Calculate action digests and store in node's digest cache
  *
  * \param[in] rsc          Resource that action was for
  * \param[in] task         Name of action performed
  * \param[in] key          Action's task key
  * \param[in] node         Node action was performed on
  * \param[in] xml_op       XML of operation in CIB status (if available)
  * \param[in] calc_secure  Whether to calculate secure digest
  * \param[in] data_set     Cluster working set
  *
  * \return Pointer to node's digest cache entry
  */
 static op_digest_cache_t *
 rsc_action_digest(pe_resource_t *rsc, const char *task, const char *key,
                   pe_node_t *node, xmlNode *xml_op, bool calc_secure,
                   pe_working_set_t *data_set)
 {
     op_digest_cache_t *data = NULL;
 
     data = g_hash_table_lookup(node->details->digest_cache, key);
     if (data == NULL) {
         GHashTable *local_rsc_params = crm_str_table_new();
         action_t *action = custom_action(rsc, strdup(key), task, node, TRUE, FALSE, data_set);
 #if ENABLE_VERSIONED_ATTRS
         xmlNode *local_versioned_params = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS);
         const char *ra_version = NULL;
 #endif
 
         const char *op_version;
         const char *restart_list = NULL;
         const char *secure_list = " passwd password ";
 
         data = calloc(1, sizeof(op_digest_cache_t));
         CRM_ASSERT(data != NULL);
 
         get_rsc_attributes(local_rsc_params, rsc, node, data_set);
 #if ENABLE_VERSIONED_ATTRS
         pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set);
 #endif
 
         data->params_all = create_xml_node(NULL, XML_TAG_PARAMS);
 
         // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
         if (pe__add_bundle_remote_name(rsc, data->params_all,
                                        XML_RSC_ATTR_REMOTE_RA_ADDR)) {
             crm_trace("Set address for bundle connection %s (on %s)",
                       rsc->id, node->details->uname);
         }
 
         g_hash_table_foreach(local_rsc_params, hash2field, data->params_all);
         g_hash_table_foreach(action->extra, hash2field, data->params_all);
         g_hash_table_foreach(rsc->parameters, hash2field, data->params_all);
         g_hash_table_foreach(action->meta, hash2metafield, data->params_all);
 
         if(xml_op) {
             secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE);
             restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART);
 
             op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
 #if ENABLE_VERSIONED_ATTRS
             ra_version = crm_element_value(xml_op, XML_ATTR_RA_VERSION);
 #endif
 
         } else {
             op_version = CRM_FEATURE_SET;
         }
 
 #if ENABLE_VERSIONED_ATTRS
         append_versioned_params(local_versioned_params, ra_version, data->params_all);
         append_versioned_params(rsc->versioned_parameters, ra_version, data->params_all);
 
         {
             pe_rsc_action_details_t *details = pe_rsc_action_details(action);
             append_versioned_params(details->versioned_parameters, ra_version, data->params_all);
         }
 #endif
 
         filter_action_parameters(data->params_all, op_version);
 
         g_hash_table_destroy(local_rsc_params);
         pe_free_action(action);
 
         data->digest_all_calc = calculate_operation_digest(data->params_all, op_version);
 
         if (calc_secure) {
             data->params_secure = copy_xml(data->params_all);
             if(secure_list) {
                 filter_parameters(data->params_secure, secure_list, FALSE);
             }
             data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version);
         }
 
         if(xml_op && crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
             data->params_restart = copy_xml(data->params_all);
             if (restart_list) {
                 filter_parameters(data->params_restart, restart_list, TRUE);
             }
             data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version);
         }
 
         g_hash_table_insert(node->details->digest_cache, strdup(key), data);
     }
 
     return data;
 }
 
 op_digest_cache_t *
 rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
                       pe_working_set_t * data_set)
 {
     op_digest_cache_t *data = NULL;
 
     char *key = NULL;
     guint interval_ms = 0;
 
     const char *op_version;
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     const char *interval_ms_s = crm_element_value(xml_op,
                                                   XML_LRM_ATTR_INTERVAL_MS);
     const char *digest_all;
     const char *digest_restart;
 
     CRM_ASSERT(node != NULL);
 
     op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
     digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
     digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
 
     interval_ms = crm_parse_ms(interval_ms_s);
     key = generate_op_key(rsc->id, task, interval_ms);
     data = rsc_action_digest(rsc, task, key, node, xml_op,
                              is_set(data_set->flags, pe_flag_sanitized),
                              data_set);
 
     data->rc = RSC_DIGEST_MATCH;
     if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) {
         pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s",
                  key, node->details->uname,
                  crm_str(digest_restart), data->digest_restart_calc,
                  op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
         data->rc = RSC_DIGEST_RESTART;
 
     } else if (digest_all == NULL) {
         /* it is unknown what the previous op digest was */
         data->rc = RSC_DIGEST_UNKNOWN;
 
     } else if (strcmp(digest_all, data->digest_all_calc) != 0) {
         pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (%s:%s) %s",
                  key, node->details->uname,
                  crm_str(digest_all), data->digest_all_calc,
                  (interval_ms > 0)? "reschedule" : "reload",
                  op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
         data->rc = RSC_DIGEST_ALL;
     }
 
     free(key);
     return data;
 }
 
 /*!
  * \internal
  * \brief Create an unfencing summary for use in special node attribute
  *
  * Create a string combining a fence device's resource ID, agent type, and
  * parameter digest (whether for all parameters or just non-private parameters).
  * This can be stored in a special node attribute, allowing us to detect changes
  * in either the agent type or parameters, to know whether unfencing must be
  * redone or can be safely skipped when the device's history is cleaned.
  *
  * \param[in] rsc_id        Fence device resource ID
  * \param[in] agent_type    Fence device agent
  * \param[in] param_digest  Fence device parameter digest
  *
  * \return Newly allocated string with unfencing digest
  * \note The caller is responsible for freeing the result.
  */
 static inline char *
 create_unfencing_summary(const char *rsc_id, const char *agent_type,
                          const char *param_digest)
 {
     return crm_strdup_printf("%s:%s:%s", rsc_id, agent_type, param_digest);
 }
 
 /*!
  * \internal
  * \brief Check whether a node can skip unfencing
  *
  * Check whether a fence device's current definition matches a node's
  * stored summary of when it was last unfenced by the device.
  *
  * \param[in] rsc_id        Fence device's resource ID
  * \param[in] agent         Fence device's agent type
  * \param[in] digest_calc   Fence device's current parameter digest
  * \param[in] node_summary  Value of node's special unfencing node attribute
  *                          (a comma-separated list of unfencing summaries for
  *                          all devices that have unfenced this node)
  *
  * \return TRUE if digest matches, FALSE otherwise
  */
 static bool
 unfencing_digest_matches(const char *rsc_id, const char *agent,
                          const char *digest_calc, const char *node_summary)
 {
     bool matches = FALSE;
 
     if (rsc_id && agent && digest_calc && node_summary) {
         char *search_secure = create_unfencing_summary(rsc_id, agent,
                                                        digest_calc);
 
         /* The digest was calculated including the device ID and agent,
          * so there is no risk of collision using strstr().
          */
         matches = (strstr(node_summary, search_secure) != NULL);
         crm_trace("Calculated unfencing digest '%s' %sfound in '%s'",
                   search_secure, matches? "" : "not ", node_summary);
         free(search_secure);
     }
     return matches;
 }
 
 /* Magic string to use as action name for digest cache entries used for
  * unfencing checks. This is not a real action name (i.e. "on"), so
  * check_action_definition() won't confuse these entries with real actions.
  */
 #define STONITH_DIGEST_TASK "stonith-on"
 
 /*!
  * \internal
  * \brief Calculate fence device digests and digest comparison result
  *
  * \param[in] rsc       Fence device resource
  * \param[in] agent     Fence device's agent type
  * \param[in] node      Node with digest cache to use
  * \param[in] data_set  Cluster working set
  *
  * \return Node's digest cache entry
  */
 static op_digest_cache_t *
 fencing_action_digest_cmp(pe_resource_t *rsc, const char *agent,
                           pe_node_t *node, pe_working_set_t *data_set)
 {
     const char *node_summary = NULL;
 
     // Calculate device's current parameter digests
     char *key = generate_op_key(rsc->id, STONITH_DIGEST_TASK, 0);
     op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key,
                                                 node, NULL, TRUE, data_set);
 
     free(key);
 
     // Check whether node has special unfencing summary node attribute
     node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL);
     if (node_summary == NULL) {
         data->rc = RSC_DIGEST_UNKNOWN;
         return data;
     }
 
     // Check whether full parameter digest matches
     if (unfencing_digest_matches(rsc->id, agent, data->digest_all_calc,
                                  node_summary)) {
         data->rc = RSC_DIGEST_MATCH;
         return data;
     }
 
     // Check whether secure parameter digest matches
     node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE);
     if (unfencing_digest_matches(rsc->id, agent, data->digest_secure_calc,
                                  node_summary)) {
         data->rc = RSC_DIGEST_MATCH;
         if (is_set(data_set->flags, pe_flag_stdout)) {
             printf("Only 'private' parameters to %s for unfencing %s changed\n",
                    rsc->id, node->details->uname);
         }
         return data;
     }
 
     // Parameters don't match
     data->rc = RSC_DIGEST_ALL;
     if (is_set(data_set->flags, (pe_flag_sanitized|pe_flag_stdout))
         && data->digest_secure_calc) {
         char *digest = create_unfencing_summary(rsc->id, agent,
                                                 data->digest_secure_calc);
 
         printf("Parameters to %s for unfencing %s changed, try '%s'\n",
                rsc->id, node->details->uname, digest);
         free(digest);
     }
     return data;
 }
 
 const char *rsc_printable_id(resource_t *rsc)
 {
     if (is_not_set(rsc->flags, pe_rsc_unique)) {
         return ID(rsc->xml);
     }
     return rsc->id;
 }
 
 void
 clear_bit_recursive(resource_t * rsc, unsigned long long flag)
 {
     GListPtr gIter = rsc->children;
 
     clear_bit(rsc->flags, flag);
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         clear_bit_recursive(child_rsc, flag);
     }
 }
 
 void
 set_bit_recursive(resource_t * rsc, unsigned long long flag)
 {
     GListPtr gIter = rsc->children;
 
     set_bit(rsc->flags, flag);
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         set_bit_recursive(child_rsc, flag);
     }
 }
 
 static GListPtr
 find_unfencing_devices(GListPtr candidates, GListPtr matches) 
 {
     for (GListPtr gIter = candidates; gIter != NULL; gIter = gIter->next) {
         resource_t *candidate = gIter->data;
         const char *provides = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_PROVIDES);
         const char *requires = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_REQUIRES);
 
         if(candidate->children) {
             matches = find_unfencing_devices(candidate->children, matches);
         } else if (is_not_set(candidate->flags, pe_rsc_fence_device)) {
             continue;
 
         } else if (crm_str_eq(provides, "unfencing", FALSE) || crm_str_eq(requires, "unfencing", FALSE)) {
             matches = g_list_prepend(matches, candidate);
         }
     }
     return matches;
 }
 
 
 action_t *
 pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set)
 {
     char *op_key = NULL;
     action_t *stonith_op = NULL;
 
     if(op == NULL) {
         op = data_set->stonith_action;
     }
 
     op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op);
 
     if(data_set->singletons) {
         stonith_op = g_hash_table_lookup(data_set->singletons, op_key);
     }
 
     if(stonith_op == NULL) {
         stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set);
 
         add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
         add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
         add_hash_param(stonith_op->meta, "stonith_action", op);
 
         if (pe__is_guest_or_remote_node(node)
             && is_set(data_set->flags, pe_flag_enable_unfencing)) {
             /* Extra work to detect device changes on remotes
              *
              * We may do this for all nodes in the future, but for now
              * the check_action_definition() based stuff works fine.
              */
             long max = 1024;
             long digests_all_offset = 0;
             long digests_secure_offset = 0;
 
             char *digests_all = malloc(max);
             char *digests_secure = malloc(max);
             GListPtr matches = find_unfencing_devices(data_set->resources, NULL);
 
             for (GListPtr gIter = matches; gIter != NULL; gIter = gIter->next) {
                 resource_t *match = gIter->data;
                 const char *agent = g_hash_table_lookup(match->meta,
                                                         XML_ATTR_TYPE);
                 op_digest_cache_t *data = NULL;
 
                 data = fencing_action_digest_cmp(match, agent, node, data_set);
                 if(data->rc == RSC_DIGEST_ALL) {
                     optional = FALSE;
                     crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id);
                     if (is_set(data_set->flags, pe_flag_stdout)) {
                         fprintf(stdout, "  notice: Unfencing %s (remote): because the definition of %s changed\n", node->details->uname, match->id);
                     }
                 }
 
                 digests_all_offset += snprintf(
                     digests_all+digests_all_offset, max-digests_all_offset,
                     "%s:%s:%s,", match->id, agent, data->digest_all_calc);
 
                 digests_secure_offset += snprintf(
                     digests_secure+digests_secure_offset, max-digests_secure_offset,
                     "%s:%s:%s,", match->id, agent, data->digest_secure_calc);
             }
             g_hash_table_insert(stonith_op->meta,
                                 strdup(XML_OP_ATTR_DIGESTS_ALL),
                                 digests_all);
             g_hash_table_insert(stonith_op->meta,
                                 strdup(XML_OP_ATTR_DIGESTS_SECURE),
                                 digests_secure);
         }
 
     } else {
         free(op_key);
     }
 
     if(optional == FALSE && pe_can_fence(data_set, node)) {
         pe_action_required(stonith_op, NULL, reason);
     } else if(reason && stonith_op->reason == NULL) {
         stonith_op->reason = strdup(reason);
     }
 
     return stonith_op;
 }
 
 void
 trigger_unfencing(
     resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set) 
 {
     if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
         /* No resources require it */
         return;
 
     } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) {
         /* Wasn't a stonith device */
         return;
 
     } else if(node
               && node->details->online
               && node->details->unclean == FALSE
               && node->details->shutdown == FALSE) {
         action_t *unfence = pe_fence_op(node, "on", FALSE, reason, data_set);
 
         if(dependency) {
             order_actions(unfence, dependency, pe_order_optional);
         }
 
     } else if(rsc) {
         GHashTableIter iter;
 
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
             if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) {
                 trigger_unfencing(rsc, node, reason, dependency, data_set);
             }
         }
     }
 }
 
 gboolean
 add_tag_ref(GHashTable * tags, const char * tag_name,  const char * obj_ref)
 {
     tag_t *tag = NULL;
     GListPtr gIter = NULL;
     gboolean is_existing = FALSE;
 
     CRM_CHECK(tags && tag_name && obj_ref, return FALSE);
 
     tag = g_hash_table_lookup(tags, tag_name);
     if (tag == NULL) {
         tag = calloc(1, sizeof(tag_t));
         if (tag == NULL) {
             return FALSE;
         }
         tag->id = strdup(tag_name);
         tag->refs = NULL;
         g_hash_table_insert(tags, strdup(tag_name), tag);
     }
 
     for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
         const char *existing_ref = (const char *) gIter->data;
 
         if (crm_str_eq(existing_ref, obj_ref, TRUE)){
             is_existing = TRUE;
             break;
         }
     }
 
     if (is_existing == FALSE) {
         tag->refs = g_list_append(tag->refs, strdup(obj_ref));
         crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref);
     }
 
     return TRUE;
 }
 
 void pe_action_set_flag_reason(const char *function, long line,
                                pe_action_t *action, pe_action_t *reason, const char *text,
                                enum pe_action_flags flags, bool overwrite)
 {
     bool unset = FALSE;
     bool update = FALSE;
     const char *change = NULL;
 
     if(is_set(flags, pe_action_runnable)) {
         unset = TRUE;
         change = "unrunnable";
     } else if(is_set(flags, pe_action_optional)) {
         unset = TRUE;
         change = "required";
     } else if(is_set(flags, pe_action_migrate_runnable)) {
         unset = TRUE;
         overwrite = TRUE;
         change = "unrunnable";
     } else if(is_set(flags, pe_action_dangle)) {
         change = "dangling";
     } else if(is_set(flags, pe_action_requires_any)) {
         change = "required";
     } else {
         crm_err("Unknown flag change to %x by %s: 0x%s",
                 flags, action->uuid, (reason? reason->uuid : "0"));
     }
 
     if(unset) {
         if(is_set(action->flags, flags)) {
             action->flags = crm_clear_bit(function, line, action->uuid, action->flags, flags);
             update = TRUE;
         }
 
     } else {
         if(is_not_set(action->flags, flags)) {
             action->flags = crm_set_bit(function, line, action->uuid, action->flags, flags);
             update = TRUE;
         }
     }
 
     if((change && update) || text) {
         char *reason_text = NULL;
         if(reason == NULL) {
             pe_action_set_reason(action, text, overwrite);
 
         } else if(reason->rsc == NULL) {
             reason_text = crm_strdup_printf("%s %s%c %s", change, reason->task, text?':':0, text?text:"");
         } else {
             reason_text = crm_strdup_printf("%s %s %s%c %s", change, reason->rsc->id, reason->task, text?':':0, text?text:"NA");
         }
 
         if(reason_text && action->rsc != reason->rsc) {
             pe_action_set_reason(action, reason_text, overwrite);
         }
         free(reason_text);
     }
  }
 
 void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite) 
 {
     if(action->reason && overwrite) {
         pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, reason);
         free(action->reason);
         action->reason = NULL;
     }
     if(action->reason == NULL) {
         if(reason) {
             pe_rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, reason);
             action->reason = strdup(reason);
         } else {
             action->reason = NULL;
         }
     }
 }
 
 /*!
  * \internal
  * \brief Check whether shutdown has been requested for a node
  *
  * \param[in] node  Node to check
  *
  * \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise
  * \note This differs from simply using node->details->shutdown in that it can
  *       be used before that has been determined (and in fact to determine it),
  *       and it can also be used to distinguish requested shutdown from implicit
  *       shutdown of remote nodes by virtue of their connection stopping.
  */
 bool
 pe__shutdown_requested(pe_node_t *node)
 {
     const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
 
     return shutdown && strcmp(shutdown, "0");
 }
 
 /*!
  * \internal
  * \brief Update a data set's "recheck by" time
  *
  * \param[in]     recheck   Epoch time when recheck should happen
  * \param[in,out] data_set  Current working set
  */
 void
 pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set)
 {
     if ((recheck > get_effective_time(data_set))
         && ((data_set->recheck_by == 0)
             || (data_set->recheck_by > recheck))) {
         data_set->recheck_by = recheck;
     }
 }
 
 /*!
  * \internal
  * \brief Wrapper for pe_unpack_nvpairs() using a cluster working set
  */
 void
 pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name,
                            GHashTable *node_hash, GHashTable *hash,
                            const char *always_first, gboolean overwrite,
                            pe_working_set_t *data_set)
 {
     crm_time_t *next_change = crm_time_new_undefined();
 
     pe_unpack_nvpairs(data_set->input, xml_obj, set_name, node_hash, hash,
                       always_first, overwrite, data_set->now, next_change);
     if (crm_time_is_defined(next_change)) {
         time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change);
 
         pe__update_recheck_time(recheck, data_set);
     }
     crm_time_free(next_change);
 }
diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c
index 3326acdeca..9892c06688 100644
--- a/tools/crm_attribute.c
+++ b/tools/crm_attribute.c
@@ -1,341 +1,350 @@
 /*
  * Copyright 2004-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU General Public License version 2
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_internal.h>
 
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <time.h>
 
 #include <sys/param.h>
 #include <sys/types.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 #include <crm/common/ipc.h>
 #include <crm/common/util.h>
 #include <crm/cluster.h>
 
 #include <crm/cib.h>
 #include <crm/attrd.h>
 #include <sys/utsname.h>
 
 gboolean BE_QUIET = FALSE;
 char command = 'G';
 
 const char *dest_uname = NULL;
 char *dest_node = NULL;
 char *set_name = NULL;
 char *attr_id = NULL;
 char *attr_name = NULL;
 char *attr_pattern = NULL;
 const char *type = NULL;
 const char *rsc_id = NULL;
 const char *attr_value = NULL;
 const char *attr_default = NULL;
 const char *set_type = NULL;
 
 /* *INDENT-OFF* */
 static struct crm_option long_options[] = {
     /* Top-level Options */
     {"help",    0, 0, '?', "\tThis text"},
     {"version", 0, 0, '$', "\tVersion information"  },
     {"verbose", 0, 0, 'V', "\tIncrease debug output"},
     {"quiet",   0, 0, 'q', "\tPrint only the value on stdout\n"},
 
     {"name",    1, 0, 'n', "Name of the attribute/option to operate on"},
     {"pattern", 1, 0, 'P', "Pattern matching names of attributes (only with -v/-D and -l reboot)"},
 
     {"-spacer-",    0, 0, '-', "\nCommands:"},
     {"query",       0, 0, 'G', "\tQuery the current value of the attribute/option"},
     {"update",      1, 0, 'v', "Update the value of the attribute/option"},
     {"delete",      0, 0, 'D', "\tDelete the attribute/option"},
 
     {"-spacer-",    0, 0, '-', "\nAdditional Options:"},
     {"node",        1, 0, 'N', "Set an attribute for the named node (instead of a cluster option).  See also: -l"},
     {"type",        1, 0, 't', "Which part of the configuration to update/delete/query the option in"},
     {"-spacer-",    0, 0, '-', "\t\t\tValid values: crm_config, rsc_defaults, op_defaults, tickets"},
     {"lifetime",    1, 0, 'l', "Lifetime of the node attribute"},
     {"-spacer-",    0, 0, '-', "\t\t\tValid values: reboot, forever"},
     {"utilization", 0, 0, 'z', "Set an utilization attribute for the node."},
     {"set-name",    1, 0, 's', "(Advanced) The attribute set in which to place the value"},
     {"id",	    1, 0, 'i', "\t(Advanced) The ID used to identify the attribute"},
     {"default",     1, 0, 'd', "(Advanced) The default value to display if none is found in the configuration"},
 
     {"inhibit-policy-engine", 0, 0, '!', NULL, 1},
 
     /* legacy */
     {"quiet",       0, 0, 'Q', NULL, 1},
     {"node-uname",  1, 0, 'U', NULL, 1},
     {"get-value",   0, 0, 'G', NULL, 1},
     {"delete-attr", 0, 0, 'D', NULL, 1},
     {"attr-value",  1, 0, 'v', NULL, 1},
     {"attr-name",   1, 0, 'n', NULL, 1},
     {"attr-id",     1, 0, 'i', NULL, 1},
 
     {"-spacer-",	1, 0, '-', "\nExamples:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', "Add a new node attribute called 'location' with the value of 'office' for host 'myhost':", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_attribute --node myhost --name location --update office", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Query the value of the 'location' node attribute for host 'myhost':", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_attribute --node myhost --name location --query", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Change the value of the 'location' node attribute for host 'myhost':", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_attribute --node myhost --name location --update backoffice", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Delete the 'location' node attribute for host 'myhost':", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_attribute --node myhost --name location --delete", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Query the value of the cluster-delay cluster option:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_attribute --type crm_config --name cluster-delay --query", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Query the value of the cluster-delay cluster option. Only print the value:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_attribute --type crm_config --name cluster-delay --query --quiet", pcmk_option_example},
 
     {0, 0, 0, 0}
 };
 /* *INDENT-ON* */
 
 int
 main(int argc, char **argv)
 {
     cib_t *the_cib = NULL;
     int rc = pcmk_ok;
 
     int cib_opts = cib_sync_call;
     int argerr = 0;
     int flag;
 
     int option_index = 0;
     int is_remote_node = 0;
 
+    bool try_attrd = true;
+
     crm_log_cli_init("crm_attribute");
     crm_set_options(NULL, "<command> -n <attribute> [options]", long_options,
                     "Manage node's attributes and cluster options."
                     "\n\nAllows node attributes and cluster options to be queried, modified and deleted.\n");
 
     if (argc < 2) {
         crm_help('?', CRM_EX_USAGE);
     }
 
     while (1) {
         flag = crm_get_option(argc, argv, &option_index);
         if (flag == -1)
             break;
 
         switch (flag) {
             case 'V':
                 crm_bump_log_level(argc, argv);
                 break;
             case '$':
             case '?':
                 crm_help(flag, CRM_EX_OK);
                 break;
             case 'G':
                 command = flag;
                 attr_value = optarg;
                 break;
             case 'D':
             case 'v':
                 command = flag;
                 attr_value = optarg;
                 crm_log_args(argc, argv);
                 break;
             case 'q':
             case 'Q':
                 BE_QUIET = TRUE;
                 break;
             case 'U':
             case 'N':
                 dest_uname = strdup(optarg);
                 break;
             case 's':
                 set_name = strdup(optarg);
                 break;
             case 'l':
             case 't':
                 type = optarg;
                 break;
             case 'z':
                 type = XML_CIB_TAG_NODES;
                 set_type = XML_TAG_UTILIZATION;
                 break;
             case 'n':
                 attr_name = strdup(optarg);
                 break;
             case 'P':
                 attr_pattern = strdup(optarg);
                 break;
             case 'i':
                 attr_id = strdup(optarg);
                 break;
             case 'r':
                 rsc_id = optarg;
                 break;
             case 'd':
                 attr_default = optarg;
                 break;
             case '!':
                 crm_warn("Inhibiting notifications for this update");
                 cib_opts |= cib_inhibit_notify;
                 break;
             default:
                 printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
                 ++argerr;
                 break;
         }
     }
 
     if (optind < argc) {
         printf("non-option ARGV-elements: ");
         while (optind < argc)
             printf("%s ", argv[optind++]);
         printf("\n");
     }
 
     if (optind > argc) {
         ++argerr;
     }
 
     if (argerr) {
         crm_help('?', CRM_EX_USAGE);
     }
 
     the_cib = cib_new();
     rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command);
 
     if (rc != pcmk_ok) {
         fprintf(stderr, "Error connecting to the CIB manager: %s\n",
                 pcmk_strerror(rc));
         crm_exit(crm_errno2exit(rc));
     }
 
     if (type == NULL && dest_uname != NULL) {
 	    type = "forever";
     }
 
     if (safe_str_eq(type, "reboot")) {
         type = XML_CIB_TAG_STATUS;
 
     } else if (safe_str_eq(type, "forever")) {
         type = XML_CIB_TAG_NODES;
     }
 
     if (type == NULL && dest_uname == NULL) {
         /* we're updating cluster options - don't populate dest_node */
         type = XML_CIB_TAG_CRMCONFIG;
 
     } else if (safe_str_eq(type, XML_CIB_TAG_CRMCONFIG)) {
     } else if (safe_str_neq(type, XML_CIB_TAG_TICKETS)) {
         /* If we are being called from a resource agent via the cluster,
          * the correct local node name will be passed as an environment
          * variable. Otherwise, we have to ask the cluster.
          */
         dest_uname = attrd_get_target(dest_uname);
         if (dest_uname == NULL) {
             dest_uname = get_local_node_name();
         }
 
         rc = query_node_uuid(the_cib, dest_uname, &dest_node, &is_remote_node);
         if (pcmk_ok != rc) {
             fprintf(stderr, "Could not map name=%s to a UUID\n", dest_uname);
             the_cib->cmds->signoff(the_cib);
             cib_delete(the_cib);
             crm_exit(crm_errno2exit(rc));
         }
     }
 
     if ((command == 'D') && (attr_name == NULL) && (attr_pattern == NULL)) {
         fprintf(stderr, "Error: must specify attribute name or pattern to delete\n");
         crm_exit(CRM_EX_USAGE);
     }
 
     if (attr_pattern) {
         if (((command != 'v') && (command != 'D'))
             || safe_str_neq(type, XML_CIB_TAG_STATUS)) {
 
             fprintf(stderr, "Error: pattern can only be used with till-reboot update or delete\n");
             crm_exit(CRM_EX_USAGE);
         }
         command = 'u';
         free(attr_name);
         attr_name = attr_pattern;
     }
 
-    if (((command == 'v') || (command == 'D') || (command == 'u'))
-        && safe_str_eq(type, XML_CIB_TAG_STATUS)
+    // Only go through attribute manager for transient attributes
+    try_attrd = safe_str_eq(type, XML_CIB_TAG_STATUS);
+
+    // Don't try to contact attribute manager if we're using a file as CIB
+    if (getenv("CIB_file") || getenv("CIB_shadow")) {
+        try_attrd = FALSE;
+    }
+
+    if (((command == 'v') || (command == 'D') || (command == 'u')) && try_attrd
         && pcmk_ok == attrd_update_delegate(NULL, command, dest_uname, attr_name,
                                             attr_value, type, set_name, NULL, NULL,
                                             is_remote_node?attrd_opt_remote:attrd_opt_none)) {
         crm_info("Update %s=%s sent via pacemaker-attrd",
                  attr_name, ((command == 'D')? "<none>" : attr_value));
 
     } else if (command == 'D') {
         rc = delete_attr_delegate(the_cib, cib_opts, type, dest_node, set_type, set_name,
                                   attr_id, attr_name, attr_value, TRUE, NULL);
 
         if (rc == -ENXIO) {
             /* Nothing to delete...
              * which means it's not there...
              * which is what the admin wanted
              */
             rc = pcmk_ok;
         }
 
     } else if (command == 'v') {
         CRM_LOG_ASSERT(type != NULL);
         CRM_LOG_ASSERT(attr_name != NULL);
         CRM_LOG_ASSERT(attr_value != NULL);
 
         rc = update_attr_delegate(the_cib, cib_opts, type, dest_node, set_type, set_name,
                                   attr_id, attr_name, attr_value, TRUE, NULL, is_remote_node ? "remote" : NULL);
 
     } else {                    /* query */
 
         char *read_value = NULL;
 
         rc = read_attr_delegate(the_cib, type, dest_node, set_type, set_name,
                                 attr_id, attr_name, &read_value, TRUE, NULL);
 
         if (rc == -ENXIO && attr_default) {
             read_value = strdup(attr_default);
             rc = pcmk_ok;
         }
 
         crm_info("Read %s=%s %s%s",
                  attr_name, crm_str(read_value), set_name ? "in " : "", set_name ? set_name : "");
 
         if (rc == -ENOTUNIQ) {
             // Multiple matches (already displayed) are not error for queries
             rc = pcmk_ok;
 
         } else if (BE_QUIET == FALSE) {
             fprintf(stdout, "%s%s %s%s %s%s value=%s\n",
                     type ? "scope=" : "", type ? type : "",
                     attr_id ? "id=" : "", attr_id ? attr_id : "",
                     attr_name ? "name=" : "", attr_name ? attr_name : "",
                     read_value ? read_value : "(null)");
 
         } else if (read_value != NULL) {
             fprintf(stdout, "%s\n", read_value);
         }
         free(read_value);
     }
 
     if (rc == -ENOTUNIQ) {
         printf("Please choose from one of the matches above and supply the 'id' with --attr-id\n");
 
     } else if (rc != pcmk_ok) {
         fprintf(stderr, "Error performing operation: %s\n", pcmk_strerror(rc));
     }
 
     the_cib->cmds->signoff(the_cib);
     cib_delete(the_cib);
     crm_exit(crm_errno2exit(rc));
 }
diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index df5b2128d8..89e8a79305 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -1,1414 +1,1417 @@
 /*
  * Copyright 2004-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
  * This source code is licensed under the GNU General Public License version 2
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
  */
 
 #include <crm_resource.h>
 #include <pacemaker-internal.h>
 
 #include <sys/param.h>
 
 #include <crm/crm.h>
 #include <crm/stonith-ng.h>
 
 #include <stdio.h>
 #include <sys/types.h>
 #include <unistd.h>
 
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <time.h>
 
 bool BE_QUIET = FALSE;
 bool scope_master = FALSE;
 int cib_options = cib_sync_call;
 
 static GMainLoop *mainloop = NULL;
 
 #define MESSAGE_TIMEOUT_S 60
 
 static gboolean
 resource_ipc_timeout(gpointer data)
 {
     fprintf(stderr, "Aborting because no messages received in %d seconds\n",
             MESSAGE_TIMEOUT_S);
     crm_err("No messages received in %d seconds", MESSAGE_TIMEOUT_S);
     crm_exit(CRM_EX_TIMEOUT);
 }
 
 static void
 resource_ipc_connection_destroy(gpointer user_data)
 {
     crm_info("Connection to controller was terminated");
     crm_exit(CRM_EX_DISCONNECT);
 }
 
 static void
 start_mainloop(void)
 {
     if (crmd_replies_needed == 0) {
         return;
     }
 
     mainloop = g_main_loop_new(NULL, FALSE);
     fprintf(stderr, "Waiting for %d repl%s from the controller",
             crmd_replies_needed, (crmd_replies_needed == 1)? "y" : "ies");
     crm_debug("Waiting for %d repl%s from the controller",
               crmd_replies_needed, (crmd_replies_needed == 1)? "y" : "ies");
 
     g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL);
     g_main_loop_run(mainloop);
 }
 
 static int
 resource_ipc_callback(const char *buffer, ssize_t length, gpointer userdata)
 {
     xmlNode *msg = string2xml(buffer);
 
     fprintf(stderr, ".");
     crm_log_xml_trace(msg, "[inbound]");
 
     crmd_replies_needed--;
     if ((crmd_replies_needed == 0) && mainloop
         && g_main_loop_is_running(mainloop)) {
 
         fprintf(stderr, " OK\n");
         crm_debug("Got all the replies we expected");
         crm_exit(CRM_EX_OK);
     }
 
     free_xml(msg);
     return 0;
 }
 
 static int
 compare_id(gconstpointer a, gconstpointer b)
 {
     return strcmp((const char *)a, (const char *)b);
 }
 
 static GListPtr
 build_constraint_list(xmlNode *root)
 {
     GListPtr retval = NULL;
     xmlNode *cib_constraints = NULL;
     xmlXPathObjectPtr xpathObj = NULL;
     int ndx = 0;
 
     cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, root);
     xpathObj = xpath_search(cib_constraints, "//" XML_CONS_TAG_RSC_LOCATION);
 
     for (ndx = 0; ndx < numXpathResults(xpathObj); ndx++) {
         xmlNode *match = getXpathResult(xpathObj, ndx);
         retval = g_list_insert_sorted(retval, (gpointer) ID(match), compare_id);
     }
 
     freeXpathObject(xpathObj);
     return retval;
 }
 
 struct ipc_client_callbacks crm_callbacks = {
     .dispatch = resource_ipc_callback,
     .destroy = resource_ipc_connection_destroy,
 };
 
 
 /* short option letters still available: eEJkKXyYZ */
 
 /* *INDENT-OFF* */
 static struct crm_option long_options[] = {
     /* Top-level Options */
     {
         "help", no_argument, NULL, '?',
         "\t\tDisplay this text and exit"
     },
     {
         "version", no_argument, NULL, '$',
         "\t\tDisplay version information and exit"
     },
     {
         "verbose", no_argument, NULL, 'V',
         "\t\tIncrease debug output (may be specified multiple times)"
     },
     {
         "quiet", no_argument, NULL, 'Q',
         "\t\tBe less descriptive in results"
     },
     {
         "resource", required_argument, NULL, 'r',
         "\tResource ID"
     },
 
     { "-spacer-", no_argument, NULL, '-', "\nQueries:" },
     {
         "list", no_argument, NULL, 'L',
         "\t\tList all cluster resources with status"},
     {
         "list-raw", no_argument, NULL, 'l',
         "\t\tList IDs of all instantiated resources (individual members rather than groups etc.)"
     },
     {
         "list-cts", no_argument, NULL, 'c',
         NULL, pcmk_option_hidden
     },
     {
         "list-operations", no_argument, NULL, 'O',
         "\tList active resource operations, optionally filtered by --resource and/or --node"
     },
     {
         "list-all-operations", no_argument, NULL, 'o',
         "List all resource operations, optionally filtered by --resource and/or --node"
     },
     {
         "list-standards", no_argument, NULL, 0,
         "\tList supported standards"
     },
     {
         "list-ocf-providers", no_argument, NULL, 0,
         "List all available OCF providers"
     },
     {
         "list-agents", required_argument, NULL, 0,
         "List all agents available for the named standard and/or provider."
     },
     {
         "list-ocf-alternatives", required_argument, NULL, 0,
         "List all available providers for the named OCF agent"
     },
     {
         "show-metadata", required_argument, NULL, 0,
         "Show the metadata for the named class:provider:agent"
     },
     {
         "query-xml", no_argument, NULL, 'q',
         "\tShow XML configuration of resource (after any template expansion)"
     },
     {
         "query-xml-raw", no_argument, NULL, 'w',
         "\tShow XML configuration of resource (before any template expansion)"
     },
     {
         "get-parameter", required_argument, NULL, 'g',
         "Display named parameter for resource.\n"
         "\t\t\t\tUse instance attribute unless --meta or --utilization is specified"
     },
     {
         "get-property", required_argument, NULL, 'G',
         "Display named property of resource ('class', 'type', or 'provider') (requires --resource)",
         pcmk_option_hidden
     },
     {
         "locate", no_argument, NULL, 'W',
         "\t\tShow node(s) currently running resource"
     },
     {
         "stack", no_argument, NULL, 'A',
         "\t\tDisplay the prerequisites and dependents of a resource"
     },
     {
         "constraints", no_argument, NULL, 'a',
         "\tDisplay the (co)location constraints that apply to a resource"
     },
     {
         "why", no_argument, NULL, 'Y',
         "\t\tShow why resources are not running, optionally filtered by --resource and/or --node"
     },
 
     { "-spacer-", no_argument, NULL, '-', "\nCommands:" },
     {
         "validate", no_argument, NULL, 0,
         "\t\tValidate resource configuration by calling agent's validate-all action.\n"
         "\t\t\t\tThe configuration may be specified either by giving an existing\n"
         "\t\t\t\tresource name with -r, or by specifying --class, --agent, and\n"
         "\t\t\t\t--provider arguments, along with any number of --option arguments."
     },
     {
         "cleanup", no_argument, NULL, 'C',
         "\t\tIf resource has any past failures, clear its history and fail count.\n"
         "\t\t\t\tOptionally filtered by --resource, --node, --operation, and --interval (otherwise all).\n"
         "\t\t\t\t--operation and --interval apply to fail counts, but entire history is always cleared,\n"
-        "\t\t\t\tto allow current state to be rechecked.\n"
+        "\t\t\t\tto allow current state to be rechecked. If the named resource is part of a group, or\n"
+        "\t\t\t\tone numbered instance of a clone or bundled resource, the clean-up applies to the\n"
+        "\t\t\t\twhole collective resource unless --force is given."
     },
     {
         "refresh", no_argument, NULL, 'R',
         "\t\tDelete resource's history (including failures) so its current state is rechecked.\n"
-        "\t\t\t\tOptionally filtered by --resource and --node (otherwise all).\n"
-        "\t\t\t\tUnless --force is specified, resource's group or clone (if any) will also be refreshed."
+        "\t\t\t\tOptionally filtered by --resource and --node (otherwise all). If the named resource is\n"
+        "\t\t\t\tpart of a group, or one numbered instance of a clone or bundled resource, the clean-up\n"
+        "applies to the whole collective resource unless --force is given."
     },
     {
         "set-parameter", required_argument, NULL, 'p',
         "Set named parameter for resource (requires -v).\n"
         "\t\t\t\tUse instance attribute unless --meta or --utilization is specified."
     },
     {
         "delete-parameter", required_argument, NULL, 'd',
         "Delete named parameter for resource.\n"
         "\t\t\t\tUse instance attribute unless --meta or --utilization is specified."
     },
     {
         "set-property", required_argument, NULL, 'S',
         "Set named property of resource ('class', 'type', or 'provider') (requires -r, -t, -v)",
         pcmk_option_hidden
     },
 
     { "-spacer-", no_argument, NULL, '-', "\nResource location:" },
     {
         "move", no_argument, NULL, 'M',
         "\t\tCreate a constraint to move resource. If --node is specified, the constraint\n"
         "\t\t\t\twill be to move to that node, otherwise it will be to ban the current node.\n"
         "\t\t\t\tUnless --force is specified, this will return an error if the resource is\n"
         "\t\t\t\talready running on the specified node. If --force is specified, this will\n"
         "\t\t\t\talways ban the current node. Optional: --lifetime, --master.\n"
         "\t\t\t\tNOTE: This may prevent the resource from running on its previous location\n"
         "\t\t\t\tuntil the implicit constraint expires or is removed with --clear."
     },
     {
         "ban", no_argument, NULL, 'B',
         "\t\tCreate a constraint to keep resource off a node. Optional: --node, --lifetime, --master.\n"
         "\t\t\t\tNOTE: This will prevent the resource from running on the affected node\n"
         "\t\t\t\tuntil the implicit constraint expires or is removed with --clear.\n"
         "\t\t\t\tIf --node is not specified, it defaults to the node currently running the resource\n"
         "\t\t\t\tfor primitives and groups, or the master for promotable clones with promoted-max=1\n"
         "\t\t\t\t(all other situations result in an error as there is no sane default).\n"
     },
     {
         "clear", no_argument, NULL, 'U',
         "\t\tRemove all constraints created by the --ban and/or --move commands.\n"
         "\t\t\t\tRequires: --resource. Optional: --node, --master, --expired.\n"
         "\t\t\t\tIf --node is not specified, all constraints created by --ban and --move\n"
         "\t\t\t\twill be removed for the named resource. If --node and --force are specified,\n"
         "\t\t\t\tany constraint created by --move will be cleared, even if it is not for the specified node.\n"
         "\t\t\t\tIf --expired is specified, only those constraints whose lifetimes have expired will\n"
         "\t\t\t\tbe removed.\n"
     },
     {
         "expired", no_argument, NULL, 'e',
         "\t\tModifies the --clear argument to remove constraints with expired lifetimes.\n"
     },
     {
         "lifetime", required_argument, NULL, 'u',
         "\tLifespan (as ISO 8601 duration) of created constraints (with -B, -M)\n"
         "\t\t\t\t(see https://en.wikipedia.org/wiki/ISO_8601#Durations)"
     },
     {
         "master", no_argument, NULL, 0,
         "\t\tLimit scope of command to the Master role (with -B, -M, -U).\n"
         "\t\t\t\tFor -B and -M, the previous master may remain active in the Slave role."
     },
 
     { "-spacer-", no_argument, NULL, '-', "\nAdvanced Commands:" },
     {
         "delete", no_argument, NULL, 'D',
         "\t\t(Advanced) Delete a resource from the CIB. Required: -t"
     },
     {
         "fail", no_argument, NULL, 'F',
         "\t\t(Advanced) Tell the cluster this resource has failed"
     },
     {
         "restart", no_argument, NULL, 0,
         "\t\t(Advanced) Tell the cluster to restart this resource and anything that depends on it"
     },
     {
         "wait", no_argument, NULL, 0,
         "\t\t(Advanced) Wait until the cluster settles into a stable state"
     },
     {
         "force-demote", no_argument, NULL, 0,
         "\t(Advanced) Bypass the cluster and demote a resource on the local node.\n"
         "\t\t\t\tUnless --force is specified, this will refuse to do so if the cluster\n"
         "\t\t\t\tbelieves the resource is a clone instance already running on the local node."
     },
     {
         "force-stop", no_argument, NULL, 0,
         "\t(Advanced) Bypass the cluster and stop a resource on the local node."
     },
     {
         "force-start", no_argument, NULL, 0,
         "\t(Advanced) Bypass the cluster and start a resource on the local node.\n"
         "\t\t\t\tUnless --force is specified, this will refuse to do so if the cluster\n"
         "\t\t\t\tbelieves the resource is a clone instance already running on the local node."
     },
     {
         "force-promote", no_argument, NULL, 0,
         "\t(Advanced) Bypass the cluster and promote a resource on the local node.\n"
         "\t\t\t\tUnless --force is specified, this will refuse to do so if the cluster\n"
         "\t\t\t\tbelieves the resource is a clone instance already running on the local node."
     },
     {
         "force-check", no_argument, NULL, 0,
         "\t(Advanced) Bypass the cluster and check the state of a resource on the local node."
     },
 
     { "-spacer-", no_argument, NULL, '-', "\nValidate Options:" },
     {
         "class", required_argument, NULL, 0,
         "\tThe standard the resource agent confirms to (for example, ocf).\n"
         "\t\t\t\tUse with --agent, --provider, --option, and --validate."
     },
     {
         "agent", required_argument, NULL, 0,
         "\tThe agent to use (for example, IPaddr).\n"
         "\t\t\t\tUse with --class, --provider, --option, and --validate."
     },
     {
         "provider", required_argument, NULL, 0,
         "\tThe vendor that supplies the resource agent (for example, heartbeat).\n"
         "\t\t\t\tuse with --class, --agent, --option, and --validate."
     },
     {
         "option", required_argument, NULL, 0,
         "\tSpecify a device configuration parameter as NAME=VALUE\n"
         "\t\t\t\t(may be specified multiple times).  Use with --validate\n"
         "\t\t\t\tand without the -r option."
     },
 
     { "-spacer-", no_argument, NULL, '-', "\nAdditional Options:" },
     {
         "node", required_argument, NULL, 'N',
         "\tNode name"
     },
     {
         "recursive", no_argument, NULL, 0,
         "\tFollow colocation chains when using --set-parameter"
     },
     {
         "resource-type", required_argument, NULL, 't',
         "Resource XML element (primitive, group, etc.) (with -D)"
     },
     {
         "parameter-value", required_argument, NULL, 'v',
         "Value to use with -p"
     },
     {
         "meta", no_argument, NULL, 'm',
         "\t\tUse resource meta-attribute instead of instance attribute (with -p, -g, -d)"
     },
     {
         "utilization", no_argument, NULL, 'z',
         "\tUse resource utilization attribute instead of instance attribute (with -p, -g, -d)"
     },
     {
         "operation", required_argument, NULL, 'n',
         "\tOperation to clear instead of all (with -C -r)"
     },
     {
         "interval", required_argument, NULL, 'I',
         "\tInterval of operation to clear (default 0) (with -C -r -n)"
     },
     {
         "set-name", required_argument, NULL, 's',
         "\t(Advanced) XML ID of attributes element to use (with -p, -d)"
     },
     {
         "nvpair", required_argument, NULL, 'i',
         "\t(Advanced) XML ID of nvpair element to use (with -p, -d)"
     },
     {
         "timeout", required_argument, NULL, 'T',
         "\t(Advanced) Abort if command does not finish in this time (with --restart, --wait, --force-*)"
     },
     {
         "force", no_argument, NULL, 'f',
         "\t\tIf making CIB changes, do so regardless of quorum.\n"
         "\t\t\t\tSee help for individual commands for additional behavior.\n"
     },
     {
         "xml-file", required_argument, NULL, 'x',
         NULL, pcmk_option_hidden
     },
 
     /* legacy options */
     {"host-uname", required_argument, NULL, 'H', NULL, pcmk_option_hidden},
 
     {"-spacer-", 1, NULL, '-', "\nExamples:", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', "List the available OCF agents:", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', " crm_resource --list-agents ocf", pcmk_option_example},
     {"-spacer-", 1, NULL, '-', "List the available OCF agents from the linux-ha project:", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', " crm_resource --list-agents ocf:heartbeat", pcmk_option_example},
     {"-spacer-", 1, NULL, '-', "Move 'myResource' to a specific node:", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', " crm_resource --resource myResource --move --node altNode", pcmk_option_example},
     {"-spacer-", 1, NULL, '-', "Allow (but not force) 'myResource' to move back to its original location:", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', " crm_resource --resource myResource --clear", pcmk_option_example},
     {"-spacer-", 1, NULL, '-', "Stop 'myResource' (and anything that depends on it):", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', " crm_resource --resource myResource --set-parameter target-role --meta --parameter-value Stopped", pcmk_option_example},
     {"-spacer-", 1, NULL, '-', "Tell the cluster not to manage 'myResource':", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', "The cluster will not attempt to start or stop the resource under any circumstances."},
     {"-spacer-", 1, NULL, '-', "Useful when performing maintenance tasks on a resource.", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', " crm_resource --resource myResource --set-parameter is-managed --meta --parameter-value false", pcmk_option_example},
     {"-spacer-", 1, NULL, '-', "Erase the operation history of 'myResource' on 'aNode':", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', "The cluster will 'forget' the existing resource state (including any errors) and attempt to recover the resource."},
     {"-spacer-", 1, NULL, '-', "Useful when a resource had failed permanently and has been repaired by an administrator.", pcmk_option_paragraph},
     {"-spacer-", 1, NULL, '-', " crm_resource --resource myResource --cleanup --node aNode", pcmk_option_example},
 
     {0, 0, 0, 0}
 };
 /* *INDENT-ON* */
 
 
 int
 main(int argc, char **argv)
 {
     char rsc_cmd = 'L';
 
     const char *v_class = NULL;
     const char *v_agent = NULL;
     const char *v_provider = NULL;
     char *name = NULL;
     char *value = NULL;
     GHashTable *validate_options = NULL;
 
     const char *rsc_id = NULL;
     const char *host_uname = NULL;
     const char *prop_name = NULL;
     const char *prop_value = NULL;
     const char *rsc_type = NULL;
     const char *prop_id = NULL;
     const char *prop_set = NULL;
     const char *rsc_long_cmd = NULL;
     const char *longname = NULL;
     const char *operation = NULL;
     const char *interval_spec = NULL;
     const char *cib_file = getenv("CIB_file");
     GHashTable *override_params = NULL;
 
     char *xml_file = NULL;
     crm_ipc_t *crmd_channel = NULL;
     pe_working_set_t *data_set = NULL;
     xmlNode *cib_xml_copy = NULL;
     cib_t *cib_conn = NULL;
     resource_t *rsc = NULL;
     bool recursive = FALSE;
     char *our_pid = NULL;
 
     bool validate_cmdline = FALSE; /* whether we are just validating based on command line options */
     bool require_resource = TRUE; /* whether command requires that resource be specified */
     bool require_dataset = TRUE;  /* whether command requires populated dataset instance */
     bool require_crmd = FALSE;    // whether command requires controller connection
     bool clear_expired = FALSE;
 
     int rc = pcmk_ok;
     int is_ocf_rc = 0;
     int option_index = 0;
     int timeout_ms = 0;
     int argerr = 0;
     int flag;
     int find_flags = 0;           // Flags to use when searching for resource
     crm_exit_t exit_code = CRM_EX_OK;
 
     crm_log_cli_init("crm_resource");
     crm_set_options(NULL, "(query|command) [options]", long_options,
                     "Perform tasks related to cluster resources.\nAllows resources to be queried (definition and location), modified, and moved around the cluster.\n");
 
     validate_options = crm_str_table_new();
 
     while (1) {
         flag = crm_get_option_long(argc, argv, &option_index, &longname);
         if (flag == -1)
             break;
 
         switch (flag) {
             case 0: /* long options with no short equivalent */
                 if (safe_str_eq("master", longname)) {
                     scope_master = TRUE;
 
                 } else if(safe_str_eq(longname, "recursive")) {
                     recursive = TRUE;
 
                 } else if (safe_str_eq("wait", longname)) {
                     rsc_cmd = flag;
                     rsc_long_cmd = longname;
                     require_resource = FALSE;
                     require_dataset = FALSE;
 
                 } else if (
                     safe_str_eq("validate", longname)
                     || safe_str_eq("restart", longname)
                     || safe_str_eq("force-demote",  longname)
                     || safe_str_eq("force-stop",    longname)
                     || safe_str_eq("force-start",   longname)
                     || safe_str_eq("force-promote", longname)
                     || safe_str_eq("force-check",   longname)) {
                     rsc_cmd = flag;
                     rsc_long_cmd = longname;
                     find_flags = pe_find_renamed|pe_find_anon;
                     crm_log_args(argc, argv);
 
                 } else if (safe_str_eq("list-ocf-providers", longname)
                            || safe_str_eq("list-ocf-alternatives", longname)
                            || safe_str_eq("list-standards", longname)) {
                     const char *text = NULL;
                     lrmd_list_t *list = NULL;
                     lrmd_list_t *iter = NULL;
                     lrmd_t *lrmd_conn = lrmd_api_new();
 
                     if (safe_str_eq("list-ocf-providers", longname)
                         || safe_str_eq("list-ocf-alternatives", longname)) {
                         rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, optarg, &list);
                         text = "OCF providers";
 
                     } else if (safe_str_eq("list-standards", longname)) {
                         rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list);
                         text = "standards";
                     }
 
                     if (rc > 0) {
                         for (iter = list; iter != NULL; iter = iter->next) {
                             printf("%s\n", iter->val);
                         }
                         lrmd_list_freeall(list);
 
                     } else if (optarg) {
                         fprintf(stderr, "No %s found for %s\n", text, optarg);
                         exit_code = CRM_EX_NOSUCH;
 
                     } else {
                         fprintf(stderr, "No %s found\n", text);
                         exit_code = CRM_EX_NOSUCH;
                     }
 
                     lrmd_api_delete(lrmd_conn);
                     crm_exit(exit_code);
 
                 } else if (safe_str_eq("show-metadata", longname)) {
                     char *standard = NULL;
                     char *provider = NULL;
                     char *type = NULL;
                     char *metadata = NULL;
                     lrmd_t *lrmd_conn = lrmd_api_new();
 
                     rc = crm_parse_agent_spec(optarg, &standard, &provider, &type);
                     if (rc == pcmk_ok) {
                         rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard,
                                                            provider, type,
                                                            &metadata, 0);
                     } else {
                         fprintf(stderr,
                                 "'%s' is not a valid agent specification\n",
                                 optarg);
                         rc = -ENXIO;
                     }
 
                     if (metadata) {
                         printf("%s\n", metadata);
                     } else {
                         fprintf(stderr, "Metadata query for %s failed: %s\n",
                                 optarg, pcmk_strerror(rc));
                         exit_code = crm_errno2exit(rc);
                     }
                     lrmd_api_delete(lrmd_conn);
                     crm_exit(exit_code);
 
                 } else if (safe_str_eq("list-agents", longname)) {
                     lrmd_list_t *list = NULL;
                     lrmd_list_t *iter = NULL;
                     char *provider = strchr (optarg, ':');
                     lrmd_t *lrmd_conn = lrmd_api_new();
 
                     if (provider) {
                         *provider++ = 0;
                     }
                     rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, optarg, provider);
 
                     if (rc > 0) {
                         for (iter = list; iter != NULL; iter = iter->next) {
                             printf("%s\n", iter->val);
                         }
                         lrmd_list_freeall(list);
                     } else {
                         fprintf(stderr, "No agents found for standard=%s, provider=%s\n",
                                 optarg, (provider? provider : "*"));
                         exit_code = CRM_EX_NOSUCH;
                     }
                     lrmd_api_delete(lrmd_conn);
                     crm_exit(exit_code);
 
                 } else if (safe_str_eq("class", longname)) {
                     if (!(pcmk_get_ra_caps(optarg) & pcmk_ra_cap_params)) {
                         if (BE_QUIET == FALSE) {
                             fprintf(stdout, "Standard %s does not support parameters\n",
                                     optarg);
                         }
 
                         crm_exit(exit_code);
                     } else {
                         v_class = optarg;
                     }
 
                     validate_cmdline = TRUE;
                     require_resource = FALSE;
 
                 } else if (safe_str_eq("agent", longname)) {
                     validate_cmdline = TRUE;
                     require_resource = FALSE;
                     v_agent = optarg;
 
                 } else if (safe_str_eq("provider", longname)) {
                     validate_cmdline = TRUE;
                     require_resource = FALSE;
                     v_provider = optarg;
 
                 } else if (safe_str_eq("option", longname)) {
                     crm_info("Scanning: --option %s", optarg);
                     rc = pcmk_scan_nvpair(optarg, &name, &value);
                     if (rc != 2) {
                         fprintf(stderr, "Invalid option: --option %s: %s", optarg, pcmk_strerror(rc));
                         argerr++;
                     } else {
                         crm_info("Got: '%s'='%s'", name, value);
                     }
 
                     g_hash_table_replace(validate_options, name, value);
 
                 } else {
                     crm_err("Unhandled long option: %s", longname);
                 }
                 break;
             case 'V':
                 resource_verbose++;
                 crm_bump_log_level(argc, argv);
                 break;
             case '$':
             case '?':
                 crm_help(flag, CRM_EX_OK);
                 break;
             case 'x':
                 xml_file = strdup(optarg);
                 break;
             case 'Q':
                 BE_QUIET = TRUE;
                 break;
             case 'm':
                 attr_set_type = XML_TAG_META_SETS;
                 break;
             case 'z':
                 attr_set_type = XML_TAG_UTILIZATION;
                 break;
             case 'u':
                 move_lifetime = strdup(optarg);
                 break;
             case 'f':
                 do_force = TRUE;
                 crm_log_args(argc, argv);
                 break;
             case 'i':
                 prop_id = optarg;
                 break;
             case 's':
                 prop_set = optarg;
                 break;
             case 'r':
                 rsc_id = optarg;
                 break;
             case 'v':
                 prop_value = optarg;
                 break;
             case 't':
                 rsc_type = optarg;
                 break;
             case 'T':
                 timeout_ms = crm_get_msec(optarg);
                 break;
             case 'e':
                 clear_expired = TRUE;
                 require_resource = FALSE;
                 break;
 
             case 'C':
             case 'R':
                 crm_log_args(argc, argv);
                 require_resource = FALSE;
                 if (cib_file == NULL) {
                     require_crmd = TRUE;
                 }
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_anon;
                 break;
 
             case 'n':
                 operation = optarg;
                 break;
 
             case 'I':
                 interval_spec = optarg;
                 break;
 
             case 'D':
                 require_dataset = FALSE;
                 crm_log_args(argc, argv);
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_any;
                 break;
 
             case 'F':
                 require_crmd = TRUE;
                 crm_log_args(argc, argv);
                 rsc_cmd = flag;
                 break;
 
             case 'U':
             case 'B':
             case 'M':
                 crm_log_args(argc, argv);
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_anon;
                 break;
 
             case 'c':
             case 'L':
             case 'l':
             case 'O':
             case 'o':
                 require_resource = FALSE;
                 rsc_cmd = flag;
                 break;
 
             case 'Y':
                 require_resource = FALSE;
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_anon;
                 break;
 
             case 'q':
             case 'w':
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_any;
                 break;
 
             case 'W':
             case 'A':
             case 'a':
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_anon;
                 break;
 
             case 'S':
                 require_dataset = FALSE;
                 crm_log_args(argc, argv);
                 prop_name = optarg;
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_any;
                 break;
 
             case 'p':
             case 'd':
                 crm_log_args(argc, argv);
                 prop_name = optarg;
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_any;
                 break;
 
             case 'G':
             case 'g':
                 prop_name = optarg;
                 rsc_cmd = flag;
                 find_flags = pe_find_renamed|pe_find_any;
                 break;
 
             case 'H':
             case 'N':
                 crm_trace("Option %c => %s", flag, optarg);
                 host_uname = optarg;
                 break;
 
             default:
                 CMD_ERR("Argument code 0%o (%c) is not (?yet?) supported", flag, flag);
                 ++argerr;
                 break;
         }
     }
 
     // Catch the case where the user didn't specify a command
     if (rsc_cmd == 'L') {
         require_resource = FALSE;
     }
 
     // --expired without --clear/-U doesn't make sense
     if (clear_expired == TRUE && rsc_cmd != 'U') {
         CMD_ERR("--expired requires --clear or -U");
         argerr++;
     }
 
     if (optind < argc
         && argv[optind] != NULL
         && rsc_cmd == 0
         && rsc_long_cmd) {
 
         override_params = crm_str_table_new();
         while (optind < argc && argv[optind] != NULL) {
             char *name = calloc(1, strlen(argv[optind]));
             char *value = calloc(1, strlen(argv[optind]));
             int rc = sscanf(argv[optind], "%[^=]=%s", name, value);
 
             if(rc == 2) {
                 g_hash_table_replace(override_params, name, value);
 
             } else {
                 CMD_ERR("Error parsing '%s' as a name=value pair for --%s", argv[optind], rsc_long_cmd);
                 free(value);
                 free(name);
                 argerr++;
             }
             optind++;
         }
 
     } else if (optind < argc && argv[optind] != NULL && rsc_cmd == 0) {
         CMD_ERR("non-option ARGV-elements: ");
         while (optind < argc && argv[optind] != NULL) {
             CMD_ERR("[%d of %d] %s ", optind, argc, argv[optind]);
             optind++;
             argerr++;
         }
     }
 
     if (optind > argc) {
         ++argerr;
     }
 
     // Sanity check validating from command line parameters.  If everything checks out,
     // go ahead and run the validation.  This way we don't need a CIB connection.
     if (validate_cmdline == TRUE) {
         // -r cannot be used with any of --class, --agent, or --provider
         if (rsc_id != NULL) {
             CMD_ERR("--resource cannot be used with --class, --agent, and --provider");
             argerr++;
 
         // If --class, --agent, or --provider are given, --validate must also be given.
         } else if (!safe_str_eq(rsc_long_cmd, "validate")) {
             CMD_ERR("--class, --agent, and --provider require --validate");
             argerr++;
 
         // Not all of --class, --agent, and --provider need to be given.  Not all
         // classes support the concept of a provider.  Check that what we were given
         // is valid.
         } else if (crm_str_eq(v_class, "stonith", TRUE)) {
             if (v_provider != NULL) {
                 CMD_ERR("stonith does not support providers");
                 argerr++;
 
             } else if (stonith_agent_exists(v_agent, 0) == FALSE) {
                 CMD_ERR("%s is not a known stonith agent", v_agent ? v_agent : "");
                 argerr++;
             }
 
         } else if (resources_agent_exists(v_class, v_provider, v_agent) == FALSE) {
             CMD_ERR("%s:%s:%s is not a known resource",
                     v_class ? v_class : "",
                     v_provider ? v_provider : "",
                     v_agent ? v_agent : "");
             argerr++;
         }
 
         if (argerr == 0) {
             rc = cli_resource_execute_from_params("test", v_class, v_provider, v_agent,
                                                   "validate-all", validate_options,
                                                   override_params, timeout_ms);
             exit_code = crm_errno2exit(rc);
             crm_exit(exit_code);
         }
     }
 
     if (argerr) {
         CMD_ERR("Invalid option(s) supplied, use --help for valid usage");
         crm_exit(CRM_EX_USAGE);
     }
 
     our_pid = crm_getpid_s();
 
     if (do_force) {
         crm_debug("Forcing...");
         cib_options |= cib_quorum_override;
     }
 
     if (require_resource && !rsc_id) {
         CMD_ERR("Must supply a resource id with -r");
         rc = -ENXIO;
         goto bail;
     }
 
     if (find_flags && rsc_id) {
         require_dataset = TRUE;
     }
 
     /* Establish a connection to the CIB manager */
     cib_conn = cib_new();
     rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command);
     if (rc != pcmk_ok) {
         CMD_ERR("Error connecting to the CIB manager: %s", pcmk_strerror(rc));
         goto bail;
     }
 
     /* Populate working set from XML file if specified or CIB query otherwise */
     if (require_dataset) {
         if (xml_file != NULL) {
             cib_xml_copy = filename2xml(xml_file);
 
         } else {
             rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call);
         }
 
         if(rc != pcmk_ok) {
             goto bail;
         }
 
         /* Populate the working set instance */
         data_set = pe_new_working_set();
         if (data_set == NULL) {
             rc = -ENOMEM;
             goto bail;
         }
         rc = update_working_set_xml(data_set, &cib_xml_copy);
         if (rc != pcmk_ok) {
             goto bail;
         }
         cluster_status(data_set);
     }
 
     // If command requires that resource exist if specified, find it
     if (find_flags && rsc_id) {
         rsc = pe_find_resource_with_flags(data_set->resources, rsc_id,
                                           find_flags);
         if (rsc == NULL) {
             CMD_ERR("Resource '%s' not found", rsc_id);
             rc = -ENXIO;
             goto bail;
         }
     }
 
     // Establish a connection to the controller if needed
     if (require_crmd) {
         xmlNode *xml = NULL;
         mainloop_io_t *source =
             mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks);
         crmd_channel = mainloop_get_ipc_client(source);
 
         if (crmd_channel == NULL) {
             CMD_ERR("Error connecting to the controller");
             rc = -ENOTCONN;
             goto bail;
         }
 
         xml = create_hello_message(our_pid, crm_system_name, "0", "1");
         crm_ipc_send(crmd_channel, xml, 0, 0, NULL);
         free_xml(xml);
     }
 
     /* Handle rsc_cmd appropriately */
     if (rsc_cmd == 'L') {
         rc = pcmk_ok;
         cli_resource_print_list(data_set, FALSE);
 
     } else if (rsc_cmd == 'l') {
         int found = 0;
         GListPtr lpc = NULL;
 
         rc = pcmk_ok;
         for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) {
             rsc = (resource_t *) lpc->data;
 
             found++;
             cli_resource_print_raw(rsc);
         }
 
         if (found == 0) {
             printf("NO resources configured\n");
             rc = -ENXIO;
         }
 
     } else if (rsc_cmd == 0 && rsc_long_cmd && safe_str_eq(rsc_long_cmd, "restart")) {
         /* We don't pass data_set because rsc needs to stay valid for the entire
          * lifetime of cli_resource_restart(), but it will reset and update the
          * working set multiple times, so it needs to use its own copy.
          */
         rc = cli_resource_restart(rsc, host_uname, timeout_ms, cib_conn);
 
     } else if (rsc_cmd == 0 && rsc_long_cmd && safe_str_eq(rsc_long_cmd, "wait")) {
         rc = wait_till_stable(timeout_ms, cib_conn);
 
     } else if (rsc_cmd == 0 && rsc_long_cmd) {
         // validate, force-(stop|start|demote|promote|check)
         rc = cli_resource_execute(rsc, rsc_id, rsc_long_cmd, override_params,
                                   timeout_ms, cib_conn, data_set);
         if (rc >= 0) {
             is_ocf_rc = 1;
         }
 
     } else if (rsc_cmd == 'A' || rsc_cmd == 'a') {
         GListPtr lpc = NULL;
         xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS,
                                                    data_set->input);
 
         unpack_constraints(cib_constraints, data_set);
 
         // Constraints apply to group/clone, not member/instance
         rsc = uber_parent(rsc);
 
         for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) {
             resource_t *r = (resource_t *) lpc->data;
 
             clear_bit(r->flags, pe_rsc_allocating);
         }
 
         cli_resource_print_colocation(rsc, TRUE, rsc_cmd == 'A', 1);
 
         fprintf(stdout, "* %s\n", rsc->id);
         cli_resource_print_location(rsc, NULL);
 
         for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) {
             resource_t *r = (resource_t *) lpc->data;
 
             clear_bit(r->flags, pe_rsc_allocating);
         }
 
         cli_resource_print_colocation(rsc, FALSE, rsc_cmd == 'A', 1);
 
     } else if (rsc_cmd == 'c') {
         GListPtr lpc = NULL;
 
         rc = pcmk_ok;
         for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) {
             rsc = (resource_t *) lpc->data;
             cli_resource_print_cts(rsc);
         }
         cli_resource_print_cts_constraints(data_set);
 
     } else if (rsc_cmd == 'F') {
         rc = cli_resource_fail(crmd_channel, host_uname, rsc_id, data_set);
         if (rc == pcmk_ok) {
             start_mainloop();
         }
 
     } else if (rsc_cmd == 'O') {
         rc = cli_resource_print_operations(rsc_id, host_uname, TRUE, data_set);
 
     } else if (rsc_cmd == 'o') {
         rc = cli_resource_print_operations(rsc_id, host_uname, FALSE, data_set);
 
     } else if (rsc_cmd == 'W') {
         rc = cli_resource_search(rsc, rsc_id, data_set);
         if (rc >= 0) {
             rc = pcmk_ok;
         }
 
     } else if (rsc_cmd == 'q') {
         rc = cli_resource_print(rsc, data_set, TRUE);
 
     } else if (rsc_cmd == 'w') {
         rc = cli_resource_print(rsc, data_set, FALSE);
 
     } else if (rsc_cmd == 'Y') {
         node_t *dest = NULL;
 
         if (host_uname) {
             dest = pe_find_node(data_set->nodes, host_uname);
             if (dest == NULL) {
                 rc = -pcmk_err_node_unknown;
                 goto bail;
             }
         }
         cli_resource_why(cib_conn, data_set->resources, rsc, dest);
         rc = pcmk_ok;
 
     } else if (rsc_cmd == 'U') {
         GListPtr before = NULL;
         GListPtr after = NULL;
         GListPtr remaining = NULL;
         GListPtr ele = NULL;
         node_t *dest = NULL;
 
         if (BE_QUIET == FALSE) {
             before = build_constraint_list(data_set->input);
         }
 
         if (clear_expired == TRUE) {
             rc = cli_resource_clear_all_expired(data_set->input, cib_conn, rsc_id, host_uname, scope_master);
 
         } else if (host_uname) {
             dest = pe_find_node(data_set->nodes, host_uname);
             if (dest == NULL) {
                 rc = -pcmk_err_node_unknown;
                 if (BE_QUIET == FALSE) {
                     g_list_free(before);
                 }
                 goto bail;
             }
             rc = cli_resource_clear(rsc_id, dest->details->uname, NULL, cib_conn, TRUE);
 
         } else {
             rc = cli_resource_clear(rsc_id, NULL, data_set->nodes, cib_conn, TRUE);
         }
 
         if (BE_QUIET == FALSE) {
             rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call);
             if (rc != pcmk_ok) {
                 CMD_ERR("Could not get modified CIB: %s\n", pcmk_strerror(rc));
                 g_list_free(before);
                 goto bail;
             }
 
             data_set->input = cib_xml_copy;
             cluster_status(data_set);
 
             after = build_constraint_list(data_set->input);
             remaining = subtract_lists(before, after, (GCompareFunc) strcmp);
 
             for (ele = remaining; ele != NULL; ele = ele->next) {
                 printf("Removing constraint: %s\n", (char *) ele->data);
             }
 
             g_list_free(before);
             g_list_free(after);
             g_list_free(remaining);
         }
 
     } else if (rsc_cmd == 'M' && host_uname) {
         rc = cli_resource_move(rsc, rsc_id, host_uname, cib_conn, data_set);
 
     } else if (rsc_cmd == 'B' && host_uname) {
         node_t *dest = pe_find_node(data_set->nodes, host_uname);
 
         if (dest == NULL) {
             rc = -pcmk_err_node_unknown;
             goto bail;
         }
         rc = cli_resource_ban(rsc_id, dest->details->uname, NULL, cib_conn);
 
     } else if (rsc_cmd == 'B' || rsc_cmd == 'M') {
         pe_node_t *current = NULL;
         unsigned int nactive = 0;
 
         current = pe__find_active_requires(rsc, &nactive);
 
         if (nactive == 1) {
             rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn);
 
         } else if (is_set(rsc->flags, pe_rsc_promotable)) {
             int count = 0;
             GListPtr iter = NULL;
 
             current = NULL;
             for(iter = rsc->children; iter; iter = iter->next) {
                 resource_t *child = (resource_t *)iter->data;
                 enum rsc_role_e child_role = child->fns->state(child, TRUE);
 
                 if(child_role == RSC_ROLE_MASTER) {
                     count++;
                     current = pe__current_node(child);
                 }
             }
 
             if(count == 1 && current) {
                 rc = cli_resource_ban(rsc_id, current->details->uname, NULL, cib_conn);
 
             } else {
                 rc = -EINVAL;
                 exit_code = CRM_EX_USAGE;
                 CMD_ERR("Resource '%s' not moved: active in %d locations (promoted in %d).",
                         rsc_id, nactive, count);
                 CMD_ERR("To prevent '%s' from running on a specific location, "
                         "specify a node.", rsc_id);
                 CMD_ERR("To prevent '%s' from being promoted at a specific "
                         "location, specify a node and the master option.",
                         rsc_id);
             }
 
         } else {
             rc = -EINVAL;
             exit_code = CRM_EX_USAGE;
             CMD_ERR("Resource '%s' not moved: active in %d locations.", rsc_id, nactive);
             CMD_ERR("To prevent '%s' from running on a specific location, "
                     "specify a node.", rsc_id);
         }
 
     } else if (rsc_cmd == 'G') {
         rc = cli_resource_print_property(rsc, prop_name, data_set);
 
     } else if (rsc_cmd == 'S') {
         xmlNode *msg_data = NULL;
 
         if ((rsc_type == NULL) || !strlen(rsc_type)) {
             CMD_ERR("Must specify -t with resource type");
             rc = -ENXIO;
             goto bail;
 
         } else if ((prop_value == NULL) || !strlen(prop_value)) {
             CMD_ERR("Must supply -v with new value");
             rc = -EINVAL;
             goto bail;
         }
 
         CRM_LOG_ASSERT(prop_name != NULL);
 
         msg_data = create_xml_node(NULL, rsc_type);
         crm_xml_add(msg_data, XML_ATTR_ID, rsc_id);
         crm_xml_add(msg_data, prop_name, prop_value);
 
         rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, cib_options);
         free_xml(msg_data);
 
     } else if (rsc_cmd == 'g') {
         rc = cli_resource_print_attribute(rsc, prop_name, data_set);
 
     } else if (rsc_cmd == 'p') {
         if (prop_value == NULL || strlen(prop_value) == 0) {
             CMD_ERR("You need to supply a value with the -v option");
             rc = -EINVAL;
             goto bail;
         }
 
         /* coverity[var_deref_model] False positive */
         rc = cli_resource_update_attribute(rsc, rsc_id, prop_set, prop_id,
                                            prop_name, prop_value, recursive,
                                            cib_conn, data_set);
 
     } else if (rsc_cmd == 'd') {
         /* coverity[var_deref_model] False positive */
         rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id,
                                            prop_name, cib_conn, data_set);
 
     } else if ((rsc_cmd == 'C') && rsc) {
         if (do_force == FALSE) {
             rsc = uber_parent(rsc);
         }
         crmd_replies_needed = 0;
 
         crm_debug("Erasing failures of %s (%s requested) on %s",
                   rsc->id, rsc_id, (host_uname? host_uname: "all nodes"));
         rc = cli_resource_delete(crmd_channel, host_uname, rsc,
                                  operation, interval_spec, TRUE, data_set);
 
         if ((rc == pcmk_ok) && !BE_QUIET) {
             // Show any reasons why resource might stay stopped
             cli_resource_check(cib_conn, rsc);
         }
 
         if (rc == pcmk_ok) {
             start_mainloop();
         }
 
     } else if (rsc_cmd == 'C') {
         rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval_spec,
                              data_set);
         if (rc == pcmk_ok) {
             start_mainloop();
         }
 
     } else if ((rsc_cmd == 'R') && rsc) {
         if (do_force == FALSE) {
             rsc = uber_parent(rsc);
         }
         crmd_replies_needed = 0;
 
         crm_debug("Re-checking the state of %s (%s requested) on %s",
                   rsc->id, rsc_id, (host_uname? host_uname: "all nodes"));
         rc = cli_resource_delete(crmd_channel, host_uname, rsc,
                                  NULL, 0, FALSE, data_set);
 
         if ((rc == pcmk_ok) && !BE_QUIET) {
             // Show any reasons why resource might stay stopped
             cli_resource_check(cib_conn, rsc);
         }
 
         if (rc == pcmk_ok) {
             start_mainloop();
         }
 
     } else if (rsc_cmd == 'R') {
         const char *router_node = host_uname;
         xmlNode *msg_data = NULL;
         xmlNode *cmd = NULL;
         int attr_options = attrd_opt_none;
 
         if (host_uname) {
             node_t *node = pe_find_node(data_set->nodes, host_uname);
 
             if (pe__is_guest_or_remote_node(node)) {
                 node = pe__current_node(node->details->remote_rsc);
                 if (node == NULL) {
                     CMD_ERR("No cluster connection to Pacemaker Remote node %s detected",
                             host_uname);
                     rc = -ENXIO;
                     goto bail;
                 }
                 router_node = node->details->uname;
                 attr_options |= attrd_opt_remote;
             }
         }
 
         if (crmd_channel == NULL) {
             printf("Dry run: skipping clean-up of %s due to CIB_file\n",
                    host_uname? host_uname : "all nodes");
             rc = pcmk_ok;
             goto bail;
         }
 
         msg_data = create_xml_node(NULL, "crm-resource-reprobe-op");
         crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname);
         if (safe_str_neq(router_node, host_uname)) {
             crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node);
         }
 
         cmd = create_request(CRM_OP_REPROBE, msg_data, router_node,
                              CRM_SYSTEM_CRMD, crm_system_name, our_pid);
         free_xml(msg_data);
 
         crm_debug("Re-checking the state of all resources on %s", host_uname?host_uname:"all nodes");
 
         rc = attrd_clear_delegate(NULL, host_uname, NULL, NULL, NULL, NULL,
                                   attr_options);
 
         if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) {
             start_mainloop();
         }
 
         free_xml(cmd);
 
     } else if (rsc_cmd == 'D') {
         xmlNode *msg_data = NULL;
 
         if (rsc_type == NULL) {
             CMD_ERR("You need to specify a resource type with -t");
             rc = -ENXIO;
             goto bail;
         }
 
         msg_data = create_xml_node(NULL, rsc_type);
         crm_xml_add(msg_data, XML_ATTR_ID, rsc_id);
 
         rc = cib_conn->cmds->remove(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, cib_options);
         free_xml(msg_data);
 
     } else {
         CMD_ERR("Unknown command: %c", rsc_cmd);
     }
 
   bail:
 
     free(our_pid);
     pe_free_working_set(data_set);
     if (cib_conn != NULL) {
         cib_conn->cmds->signoff(cib_conn);
         cib_delete(cib_conn);
     }
 
     if (is_ocf_rc) {
         exit_code = rc;
 
     } else if (rc != pcmk_ok) {
         CMD_ERR("Error performing operation: %s", pcmk_strerror(rc));
         if (rc == -pcmk_err_no_quorum) {
             CMD_ERR("To ignore quorum, use the force option");
         }
         if (exit_code == CRM_EX_OK) {
             exit_code = crm_errno2exit(rc);
         }
     }
 
     crm_exit(exit_code);
 }