diff --git a/ChangeLog b/ChangeLog
index 8df588a977..a75ea4de8f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,2287 +1,2287 @@
 * Thu Jan 14 2016 Ken Gaillot <kgaillot@redhat.com> Pacemaker-1.1.14-1
 - Update source tarball to revision: f0b585a
 - Changesets: 724
 - Diff:        179 files changed, 13142 insertions(+), 7695 deletions(-)
 
 - Features added since Pacemaker-1.1.13
   + crm_resource: Indicate common reasons why a resource may not start after a cleanup
   + crm_resource: New --force-promote and --force-demote options for debugging
   + fencing: Support targeting fencing topologies by node name pattern or node attribute
   + fencing: Remap sequential topology reboots to all-off-then-all-on
   + pengine: Allow resources to start and stop as soon as their state is known on all nodes
   + pengine: Include a list of all and available nodes with clone notifications
   + pengine: Addition of the clone resource clone-min metadata option
   + pengine: Support of multiple-active=block for resource groups
   + remote: Resources that create guest nodes can be included in a group resource
   + remote: reconnect_interval option for remote nodes to delay reconnect after fence
 
 - Changes since Pacemaker-1.1.13
   + improve support for building on FreeBSD and Debian
   + fix multiple memory issues (leaks, use-after-free, double free, use-of-NULL) in components and tools
   + cib: Do not terminate due to badly behaving clients
   + cman: handle corosync-invented node names of the form Node{id} for peers not in its node list
   + controld: replace bashism
   + crm_node: Display node state with -l and quorum status with -q, if available
   + crmd: resources would sometimes be restarted when only non-unique parameters changed
   + crmd: fence remote node after connection failure only once
   + crmd: handle resources named the same as cluster nodes
   + crmd: Pre-emptively fail in-flight actions when lrmd connections fail
   + crmd: Record actions in the CIB as failed if we cannot execute them
   + crm_report: Enable password sanitizing by default
   + crm_report: Allow log file discovery to be disabled
   + crm_resource: Allow the resource configuration to be modified for --force-{check,start,..} calls
   + crm_resource: Compensate for -C and -p being called with the child resource for clones
   + crm_resource: Correctly clean up all children for anonymous cloned groups
   + crm_resource: Correctly clean up failcounts for inactive anonymous clones
   + crm_resource: Correctly observe --force when deleting and updating attributes
   + crm_shadow: Fix "crm_shadow --diff"
   + crm_simulate: Prevent segfault on arches with 64bit time_t
   + fencing: ensure "required"/"automatic" only apply to "on" actions
   + fencing: Return a provider for the internal fencing agent "#watchdog" instead of logging an error
   + fencing: ignore stderr output of fence agents (often used for debug messages)
   + fencing: fix issue where deleting a fence device attribute can delete the device
   + libcib: potential user input overflow
   + libcluster: overhaul peer cache management
   + log: make syslog less noisy
   + log: fix various misspellings in log messages
   + lrmd: cancel currently pending STONITH op if stonithd connection is lost
   + lrmd: Finalize all pending and recurring operations when cleaning up a resource
   + pengine: Bug cl#5247 - Imply resources running on a container are stopped when the container is stopped
   + pengine: cl#5235 - Prevent graph loops that can be introduced by "load_stopped -> migrate_to" ordering
   + pengine: Correctly bypass fencing for resources that do not require it
   + pengine: do not timeout remote node recurring monitor op failure until after fencing
   + pengine: Ensure recurring monitor operations are cancelled when clone instances are de-allocated
   + pengine: fixes segfault in pengine when fencing remote node
   + pengine: properly handle blocked clone actions
   + pengine: ensure failed actions that occurred in node shutdown are displayed
   + remote: Correctly display the usage of the ocf:pacemaker:remote resource agent
   + remote: do not fail operations because of a migration
   + remote: enable reloads for select remote connection options
   + resources: allow for top output with or without percent sign in HealthCPU
   + resources: Prevent an error message on stopping "Dummy" resource
   + systemd: Prevent segfault when logging failed operations
   + systemd: Reconnect to System DBus if the connection is closed
   + systemd: set systemd resources' timeout values higher than systemd's own default
   + tools: Do not send command lines to syslog
   + tools: update SNMP MIB
   + upstart: Ensure pending structs are correctly unreferenced
 
 
 * Wed Jun 24 2015 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.13-1
 - Update source tarball to revision: 2a1847e
 - Changesets: 750
 - Diff:       156 files changed, 11323 insertions(+), 3725 deletions(-)
 
 - Features added since Pacemaker-1.1.12
   + Allow fail-counts to be removed en-mass when the new attrd is in operation
   + attrd supports private attributes (not written to CIB)
   + crmd: Ensure a watchdog device is in use if stonith-watchdog-timeout is configured
   + crmd: If configured, trigger the watchdog immediately if we loose quorum and no-quorum-policy=suicide
   + crm_diff: Support generating a difference without versions details if --no-version/-u is supplied
   + crm_resource: Implement an intelligent restart capability
   + Fencing: Advertise the watchdog device for fencing operations
   + Fencing: Allow the cluster to recover resources if the watchdog is in use
   + fencing: cl#5134 - Support random fencing delay to avoid double fencing
   + mcp: Allow orphan children to initiate node panic via SIGQUIT
   + mcp: Turn on sbd integration if pacemakerd finds it running
   + mcp: Two new error codes that result in machine reset or power off
   + Officially support the resource-discovery attribute for location constraints
   + PE: Allow natural ordering of colocation sets
   + PE: Support non-actionable degraded mode for OCF
   + pengine: cl#5207 - Display "UNCLEAN" for resources running on unclean offline nodes
   + remote: pcmk remote client tool for use with container wrapper script
   + Support machine panics for some kinds of errors (via sbd if available)
   + tools: add crm_resource --wait option
   + tools: attrd_updater supports --query and --all options
   + tools: attrd_updater: Allow attributes to be set for other nodes
 
 - Changes since Pacemaker-1.1.12
   + pengine: exclusive discovery implies rsc is only allowed on exclusive subset of nodes
   + acl: Correctly implement the 'reference' acl directive
   + acl: Do not delay evaluation of added nodes in some situations
   + attrd: b22b1fe did uuid test too early
   + attrd: Clean out the node cache when requested by the admin
   + attrd: fixes double free in attrd legacy
   + attrd: properly write attributes for peers once uuid is discovered
   + attrd: refresh should force an immediate write-out of all attributes
   + attrd: Simplify how node deletions happen
   + Bug rhbz#1067544 - Tools: Correctly handle --ban, --move and --locate for master/slave groups
   + Bug rhbz#1181824 - Ensure the DC can be reliably fenced
   + cib: Ability to upgrade cib validation schema in legacy mode
   + cib: Always generate digests for cib diffs in legacy mode
   + cib: assignment where comparison intended
   + cib: Avoid nodeid conflicts we don't care about
   + cib: Correctly add "update-origin", "update-client" and "update-user" attributes for cib
   + cib: Correctly set up signal handlers
   + cib: Correctly track node state
   + cib: Do not update on disk backups if we're just querying them
   + cib: Enable cib legacy mode for plugin-based clusters
   + cib: Ensure file-based backends treat '-o section' consistently with the native backend
   + cib: Ensure upgrade operations from a non-DC get an acknowledgement
   + cib: No need to enforce cib digests for v2 diffs in legacy mode
   + cib: Revert d153b86 to instantly get cib synchronized in legacy mode
   + cib: tls sock cleanup for remote cib connections
   + cli: Ensure subsequent unknown long options are correctly detected
   + cluster: Invoke crm_remove_conflicting_peer() only when the new node's uname is being assigned in the node cache
   + common: Increment current and age for lib common as a result of APIs being added
   + corosync:  Bug cl#5232 - Somewhat gracefully handle nodes with invalid UUIDs
   + corosync: Avoid unnecessary repeated CMAP API calls
   + crmd/pengine: handle on-fail=ignore properly
   + crmd: Add "on_node" attribute for *_last_failure_0 lrm resource operations
   + crmd: All peers need to track node shutdown requests
   + crmd: Cached copies of transient attributes cease to be valid once a node leaves the membership
   + crmd: Correctly add the local option that validates against schema for pengine to calculate
   + crmd: Disable debug logging that results in significant overhead
   + crmd: do not remove connection resources during re-probe
   + crmd: don't update fail count twice for same failure
   + crmd: Ensure remote connection resources timeout properly during 'migrate_from' action
   + crmd: Ensure throttle_mode() does something on Linux
   + crmd: Fixes crash when remote connection migration fails
   + crmd: gracefully handle remote node disconnects during op execution
   + crmd: Handle remote connection failures while executing ops on remote connection
   + crmd: include remote nodes when forcing cluster wide resource reprobe
   + crmd: never stop recurring monitor ops for pcmk remote during incomplete migration
   + crmd: Prevent the old version of DC from being fenced when it shuts down for rolling-upgrade
   + crmd: Prevent use-of-NULL during reprobe
   + crmd: properly update job limit for baremetal remote-nodes
   + crmd: Remote-node throttle jobs count towards cluster-node hosting conneciton rsc
   + crmd: Reset stonith failcount to recover transitioner when the node rejoins
   + crmd: resolves memory leak in crmd.
   + crmd: respect start-failure-is-fatal even for artifically injected events
   + crmd: Wait for all pending operations to complete before poking the policy engine
   + crmd: When container's host is fenced, cancel in-flight operations
   + crm_attribute: Correctly update config options when -o crm_config is specified
   + crm_failcount: Better error reporting when no resource is specified
   + crm_mon: add exit reason to resource failure output
   + crm_mon: Fill CRM_notify_node in traps with node's uname rather than node's id if possible
   + crm_mon: Repair notification delivery when the v2 patch format is in use
   + crm_node: Correctly remove nodes from the CIB by nodeid
   + crm_report: More patterns for finding logs on non-DC nodes
   + crm_resource: Allow resource restart operations to be node specific
   + crm_resource: avoid deletion of lrm cache on node with resource discovery disabled.
   + crm_resource: Calculate how long to wait for a restart based on the resource timeouts
   + crm_resource: Clean up memory in --restart error paths
   + crm_resource: Display the locations of all anonymous clone children when supplying the children's common ID
   + crm_resource: Ensure --restart sets/clears meta attributes
   + crm_resource: Ensure fail-counts are purged when we redetect the state of all resources
   + crm_resource: Implement --timeout for resource restart operations
   + crm_resource: Include group members when calculating the next timeout
   + crm_resource: Memory leak in error paths
   + crm_resource: Prevent use-after-free
   + crm_resource: Repair regression test outputs
   + crm_resource: Use-after-free when restarting a resource
   + dbus: ref count leaks
   + dbus: Ensure both the read and write queues get dispatched
   + dbus: Fail gracefully if malloc fails
   + dbus: handle dispatch queue when multiple replies need to be processed
   + dbus: Notice when dbus connections get disabled
   + dbus: Remove double-free introduced while trying to make coverity shut up
   + ensure if B is colocated with A, B can never run without A
   + fence_legacy: Avoid passing 'port' to cluster-glue agents
   + fencing: Allow nodes to be purged from the member cache
   + fencing: Correctly make args for fencing agents
   + fencing: Correctly wait for self-fencing to occur when the watchdog is in use
   + fencing: Ensure the hostlist parameter is set for watchdog agents
   + fencing: Force 'stonith-ng' as the system name
   + fencing: Gracefully handle invalid metadata from agents
   + fencing: If configured, wait stonith-watchdog-timer seconds for self-fencing to complete
   + fencing: Reject actions for devices that haven't been explicitly registered yet
   + ipc: properly allocate server enforced buffer size on client
   + ipc: use server enforced buffer during ipc client send
   + lrmd, services: interpret LSB status codes properly
   + lrmd: add back support for class heartbeat agents
   + lrmd: cancel pending async connection during disconnect
   + lrmd: enable ipc proxy for docker-wrapper privileged mode
   + lrmd: fix rescheduling of systemd monitor op during start
   + lrmd: Handle systemd reporting 'done' before a resource is actually stopped
   + lrmd: Hint to child processes that using sd_notify is not required
   + lrmd: Log with the correct personality
   + lrmd: Prevent glib assert triggered by timers being removed from mainloop more than once
   + lrmd: report original timeout when systemd operation completes
   + lrmd: store failed operation exit reason in cib
   + mainloop: resolves race condition mainloop poll involving modification of ipc connections
   + make targetted reprobe for remote node work, crm_resource -C -N <remote node>
   + mcp: Allow a configurable delay when debugging shutdown issues
   + mcp: Avoid requiring 'export' for SYS-V sysconfig options
   + Membership: Detect and resolve nodes that change their ID
   + pacemakerd: resolves memory leak of xml structure in pacemakerd
   + pengine: ability to launch resources in isolated containers
   + pengine: add #kind=remote for baremetal remote-nodes
   + pengine: allow baremetal remote-nodes to recover without requiring fencing when cluster-node fails
   + pengine: allow remote-nodes to be placed in maintenance mode
   + pengine: Avoid trailing whitespaces when printing resource state
   + pengine: cl#5130 - Choose nodes capable of running all the colocated utilization resources
   + pengine: cl#5130 - Only check the capacities of the nodes that are allowed to run the resource
   + pengine: Correctly compare feature set to determine how to unpack meta attributes
   + pengine: disable migrations for resources with isolation containers
   + pengine: disable reloading of resources within isolated container wrappers
   + pengine: Do not aggregate children in a pending state into the started/stopped/etc lists
   + pengine: Do not record duplicate copies of the failed actions
   + pengine: Do not reschedule monitors that are no longer needed while resource definitions have changed
   + pengine: Fence baremetal remote when recurring monitor op fails
   + pengine: Fix colocation with unmanaged resources
   + pengine: Fix the behaviors of multi-state resources with asymmetrical ordering
   + pengine: fixes pengine crash with orphaned remote node connection resource
   + pengine: fixes segfault caused by malformed log warning
   + pengine: handle cloned isolated resources in a sane way
   + pengine: handle isolated resource scenario, cloned group of isolated resources
   + pengine: Handle ordering between stateful and migratable resources
   + pengine: imply stop in container node resources when host node is fenced
   + pengine: only fence baremetal remote when connection can fails or can not be recovered
   + pengine: only kill process group on timeout when on-fail does not equal block.
   + pengine: per-node control over resource discovery
   + pengine: prefer migration target for remote node connections
   + pengine: prevent disabling rsc discovery per node in certain situations
   + pengine: Prevent use-after-free in sort_rsc_process_order()
   + pengine: properly handle ordering during remote connection partial migration
   + pengine: properly recover remote-nodes when cluster-node proxy goes offline
   + pengine: remove unnecessary whitespace from notify environment variables
   + pengine: require-all feature for ordered clones
   + pengine: Resolve memory leaks
   + pengine: resource discovery mode for location constraints
   + pengine: restart master instances on instance attribute changes
   + pengine: Turn off legacy unpacking of resource options into the meta hashtable
   + pengine: Watchdog integration is sufficient for fencing
   + Perform systemd reloads asynchronously
   + ping: Correctly advertise multiplier default
   + Prefer to inherit the  watchdog timeout from SBD
   + properly record stop args after reload
   + provide fake meta data for ra class heartbeat
   + remote: report timestamps for remote connection resource operations
   + remote: Treat recv msg timeout as a disconnect
   + service: Prevent potential use-of-NULL in metadata lookups
   + solaris: Allow compilation when dirent.d_type is not available
   + solaris: Correctly replace the linux swab functions
   + solaris: Disable throttling since /proc doesn't exist
   + stonith-ng: Correctly observe the watchdog completion timeout
   + stonith-ng: Correctly track node state
   + stonith-ng: Reset mainloop source IDs after removing them
   + systemd: Correctly handle long running stop actions
   + systemd: Ensure failed monitor operations always return
   + systemd: Ensure we don't call dbus_message_unref() with NULL
   + systemd: fix crash caused when canceling in-flight operation
   + systemd: Kindly ask dbus NOT to kill the process if the dbus connection fails
   + systemd: Perform actions asynchronously
   + systemd: Perform monitor operations without blocking
   + systemd: Tell systemd not to take DBus down from underneath us
   + systemd: Trick systemd into not stopping our services before us during shutdown
   + tools: Improve crm_mon output with certain option combinations
   + upstart: Monitor actions always return 'ok' or 'not running'
   + upstart: Perform more parts of monitor operations without blocking
   + xml: add 'require-all' to xml schema for constraints
   + xml: cl#5231 - Unset the deleted attributes in the resulting diffs
   + xml: Clone the latest constraint schema in preparation for changes"
   + xml: Correctly create v1 patchsets when deleting attributes
   + xml: Do not change the ordering of properties when applying v1 cib diffs
   + xml: Do not dump deleted attributes
   + xml: Do not prune leaves from v1 cib diffs that are being created with digests
   + xml: Ensure ACLs are reapplied before calculating what a replace operation changed
   + xml: Fix upgrade-1.3.xsl to correctly transform ACL rules with "attribute"
   + xml: Prevent assert errors in crm_element_value() on applying a patch without version information
   + xml: Prevent potential use-of-NULL
 
 
 * Tue Jul 22 2014 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.12-1
 - Update source tarball to revision: 93a037d
 - Changesets: 795
 - Diff:       195 files changed, 13772 insertions(+), 6176 deletions(-)
 
 - Features added since Pacemaker-1.1.11
   + Changes to the ACL schema to support nodes and unix groups
   + cib: Check ACLs prior to making the update instead of parsing the diff afterwards
   + cib: Default ACL support to on
   + cib: Enable the more efficient xml patchset format
   + cib: Implement zero-copy status update
   + cib: Send all r/w operations via the cluster connection and have all nodes process them
   + crmd: Set "cluster-name" property to corosync's "cluster_name" by default for corosync-2
   + crm_mon: Display brief output if "-b/--brief" is supplied or 'b' is toggled
   + crm_report: Allow ssh alternatives to be used
   + crm_ticket: Support multiple modifications for a ticket in an atomic operation
   + extra: Add logrotate configuration file for /var/log/pacemaker.log
   + Fencing: Add the ability to call stonith_api_time() from stonith_admin
   + logging: daemons always get a log file, unless explicitly set to configured 'none'
   + logging: allows the user to specify a log level that is output to syslog
   + PE: Automatically re-unfence a node if the fencing device definition changes
   + pengine: cl#5174 - Allow resource sets and templates for location constraints
   + pengine: Support cib object tags
   + pengine: Support cluster-specific instance attributes based on rules
   + pengine: Support id-ref in nvpair with optional "name"
   + pengine: Support per-resource maintenance mode
   + pengine: Support site-specific instance attributes based on rules
   + tools: Allow crm_shadow to create older configuration versions
   + tools: Display pending state in crm_mon/crm_resource/crm_simulate if --pending/-j is supplied (cl#5178)
   + xml: Add the ability to have lightweight schema revisions
   + xml: Enable resource sets in location constraints for 1.2 schema
   + xml: Support resources that require unfencing
 
 - Changes since Pacemaker-1.1.11
   + acl: Authenticate pacemaker-remote requests with the node name as the client
   + acl: Read access must be explicitly granted
   + attrd: Ensure attribute dampening is always observed
   + attrd: Remove offline nodes from node cache for "peer-remove" requests
   + Bug cl#5055 - Improved migration support.
   + Bug cl#5184 - Ensure pending probes that ultimately fail are correctly updated
   + Bug cl#5196 - pengine: Check values after expanding templates
   + Bug cl#5212 - Do not promote instances when quorum is lots and no-quorum-policy=freeze
   + Bug cl#5213 - Ensure role colocation with -INFINITY is enforced
   + Bug cl#5213 - Limit the scope of the previous commit to the masters role
   + Bug cl#5219 - pengine: Allow unrelated resources with a common colocation target to remain promoted
   + Bug cl#5222 - cib: Repair rolling update capability
   + Bug cl#5222 - Enable legacy mode whenever a broadcast update is detected
   + Bug rhbz#1036631 - Stop members of cloned groups when dependencies are stopped
   + Bug rhbz#1054307 - cname pattern match should be more restrictive in init script
   + Bug rhbz#1057697 - Use native DBus library for systemd/upstart support to avoid problematic use of threads
   + Bug rhbz#1097457 - Limit the scope of the previous fix and include a helpful comment
   + Bug rhbz#1097457 - Prevent invalid transition when resource are ordered to start after the container they're started in
   + cib: allow setting permanent remote-node attributes
   + cib: Auto-detect which patchset format to use
   + cib: Determine the best value of validate-with if one is not supplied
   + cib: Do not disable cib disk writes if on-disk cib is corrupt
   + cib: Ensure 'cibadmin -R/--replace' commands get replies
   + cib: Erasing the cib is an admin action, bump the admin_epoch instead
   + cib: Fix remote cib based on TLS
   + cib: Ingore patch failures if we already have their contents
   + cib: Validate that everyone still sees the same configuration once all updates have completed
   + cibadmin: Allow priviliged clients to perform tasks as unpriviliged users
   + cibadmin: Remove dangerous commands that exposed unnecessary implementation internal details
   + cluster: Fix segfault on removing a node
   + cluster: Prevent search of unames from attempting to create node entries for unknown nodes
   + cluster: Remove unknown offline nodes with conflicting unames from node cache
   + controld: Do not consider the dlm up until the address list is present
   + controld: handling startup fencing within the controld agent, not the dlm
   + controld: Return OCF_ERR_INSTALLED instead of OCF_NOT_INSTALLED
   + crmd: Ack pending operations that were cancelled due to rsc deletion
   + crmd: Actions can only be executed if their pre-requisits completed successfully
   + crmd: avoid double free caused by nested hash table removal
   + crmd: Avoid spamming the cib by triggering a transition only once per non-status change
   + crmd: Correctly react to successful unfencing operations
   + crmd: Correctly recognise operation cancellations we initiated
   + crmd: Do not erase the status section for unfenced nodes
   + crmd: Do not overwrite existing node state when fencing completes
   + crmd: Do not start timers for already completed operations
   + crmd: Ensure crm_config options are re-read on updates
   + crmd: Fenced nodes that return prior to an election do not need to have their status section reset
   + crmd: make lrm_state hash table not case sensitive
   + crmd: make node_state erase correctly
   + crmd: Only write fence_averride if open() returns a positive file descriptor
   + crmd: Prevent manual fencing confirmations from attempting to create node entries for unknown nodes
   + crmd: Prevent SIGPIPE when notifying CMAN about fencing operations
   + crmd: Remove state of unknown nodes with conflicting unames from CIB
   + crmd: Remove unknown nodes with conflicting unames from CIB
   + crmd: Report unsuccessful unfencing operations
   + crm_diff: Allow the generation of xml patchsets without digests
   + crm_mon: Allow the file created by --as-html to be world readable
   + crm_mon: Ensure resource attributes have been unpacked before displaying connectivity data
   + crm_node: Only remove the named resource from the cib
   + crm_report: Gracefully handle rediculously large logfiles
   + crm_report: Only gather dlm data if dlm_controld is running
   + crm_resource: Gracefully handle -EACCESS when querying the cib
   + crm_verify: Perform a full set of calculations whenever the status section is present
   + fencing: Advertise support for reboot/on/off in the metadata for legacy agents
   + fencing: Automatically switch from 'list' to 'status' to 'static-list' if those actions are not advertised in the metadata
   + fencing: Cache metadata lookups to avoid repeated blocking during device registration
   + fencing: Correctly record which peer performed the fencing operation
   + fencing: default to 'off' when agent does not advertise 'reboot' in metadata
   + fencing: Do not unregister/register all stonith devices on every resource agent change
   + fencing: Execute all required fencing devices regardless of what topology level they are at
   + fencing: Fence using all required devices
   + fencing: Pass the correct options when looking up the history by node name
   + fencing: Update stonith device list only if stonith is enabled
   + get_cluster_type: failing concurrent tool invocations on heartbeat
   + ignore SIGPIPE when gnutls is in use
   + iso8601: Different logic is needed when logging and calculating durations
   + iso8601: Fix memory leak in duration calculation
   + Logging: Bootstrap daemon logging before processing arguments but configure it afterwards
   + lrmd: Cancel recurring operations before stop action is executed
   + lrmd: Expose logging variables expected by OCF agents
   + lrmd: Handle systemd reporting 'done' before a resource is actually stopped/started
   + lrmd: Merge duplicate recurring monitor operations
   + lrmd: Prevent OCF agents from logging to random files due to "value" of setenv() being NULL
   + lrmd: Provide stderr output from agents if available, otherwise fall back to stdout
   + mainloop: Better handle the killing of processes in the act of exiting
   + mainloop: Canceling in-flight operations should not fail if child process has already exited.
   + mainloop: Fixes use after free in process monitor code
   + mcp: Tell systemd not to respawn us if we exit with rc=100
   + membership: Avoid duplicate peer entries in the peer cache
   + pengine: Allow container nodes to migrate with connection resource
   + pengine: avoid assert by searching for stop action on correct node during LogActions
   + pengine: Block restart of resources if any dependent resource in a group is unmanaged
   + pengine: cl#5186 - Avoid running rsc on two nodes when node is fenced during migration
   + pengine: cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node
   + pengine: cl#5200 - Before migrating utilization-using resources to a node, take off the load that will no longer run there if it's not introducing transition loop
   + pengine: Correctly handle origin offsets in the future
   + pengine: Correctly observe requires=nothing
   + pengine: Default sequential to TRUE for resource sets for consistency with colocation sets
   + pengine: Delay unfencing until after we know the state of all resources that require unfencing
   + pengine: Do not initiate fencing for unclean nodes when fencing is disabled
   + pengine: Ensure instance numbers are preserved for cloned templates
   + pengine: Ensure unfencing only happens once, even if the transition is interrupted
   + pengine: Fencing devices default to only requiring quorum in order to start
   + pengine: fixes invalid transition caused by clones with more than 10 instances
   + pengine: Force record pending for migrate_to actions
   + pengine: handles edge case where container order constraints are not honored during migration
   + pengine: Ignore failure-timeout only if the failed operation has on-fail="block"
   + pengine: Mark unrunnable stop actions as "blocked" and show the correct current locations
   + pengine: Memory leaks
   + pengine: properly handle fencing of container remote-nodes when the container is orphaned
   + pengine: properly place resource within a container when container is a remote-node.
   + pengine: Unfencing is based on device probes, there is no need to unfence when normal resources are found active
   + pengine: Use "#cluster-name" in rules for setting cluster-specific instance attributes
   + pengine: Use "#site-name" in rules for setting site-specific instance attributes
   + remote: Allow baremetal remote-node connection resources to migrate
   + remote: clear remote-node status correctly
   + remote: Enable migration support for baremetal connection resources by default
   + remote: Handle request/response ipc proxy correctly
   + services: Correctly reset the nice value for lrmd's children
   + services: Do not allow duplicate recurring op entries
   + services: Do not block synced service executions
   + services: Fixes segfault associated with cancelling in-flight recurring operations.
   + services: Remove cancelled recurring ops from internal lists as early as possible
   + services: Remove file descriptors from mainloop as soon as we have drained them
   + services: Reset the scheduling policy and priority for lrmd's children without replying on SCHED_RESET_ON_FORK
   + services_action_cancel: Interpret return code from mainloop_child_kill() correctly
   + stonith_admin: Ensure pointers passed to sscanf() are properly initialized
   + stonith_api_time_helper now returns when the most recent fencing operation completed
   + systemd: Prevent use-of-NULL when determining if an agent exists
   + systemd: Try to handle dbus actions that complete prior to configuring a callback
   + Tools: Non-daemons shouldn't abort just because xml parsing failed
   + Upstart: Allow comilation with glib versions older than 2.28
   + Upstart: Do not attempt upstart jobs if we cannot connect to dbus
   + When data was old, it fixed so that the newest cib might not be acquired.
   + xml: Check all available schemas when doing upgrades
   + xml: Correctly determine the lowest allowed schema version
   + xml: Correctly enforce ACLs after a replace operation
   + xml: Correctly infer attribute changes after a replace operation
   + xml: Create the correct diff when only part of a document is changed
   + xml: Detect attribute ordering changes
   + xml: Detect content that is added and removed in the same update
   + xml: Do not prune meaningful leaves from v1 patchsets
   + xml: Empty patchsets are considered to have applied cleanly
   + xml: Ensure patches always have version details set
   + xml: Find the minimal set of changes when part of a document is replaced
   + xml: If validate-with is missing, we find the most recent schema that accepts it and go from there
   + xml: Introduce a 'move' primitive for v2 patch sets
   + xml: Preserve the attribute order in the patch for subsequent digest validation
   + xml: Resolve memory leak when logging xml blobs
   + xml: Update xml validation to allow '<node type=remote />'
 
 
 * Thu Feb 13 2014 David Vossel <davidvossel@gmail.com> Pacemaker-1.1.11-1
 - Update source tarball to revision: 33f9d09
 - Changesets: 462
 - Diff:       147 files changed, 6810 insertions(+), 4057 deletions(-)
 
 - Features added since Pacemaker-1.1.10
 
   + attrd: A truly atomic version of attrd for use where CPG is used for cluster communication
   + cib: Allow values to be added/updated and removed in a single update
   + cib: Support XML comments in diffs
   + Core: Allow blackbox logging to be disabled with SIGUSR2
   + crmd: Do not block on proxied calls from pacemaker_remoted
   + crmd: Enable cluster-wide throttling when the cib heavily exceeds its target load
   + crmd: Make the per-node action limit directly configurable in the CIB
   + crmd: Slow down recovery on nodes with IO load
   + crmd: Track CPU usage on cluster nodes and slow down recovery on nodes with high CPU/IO load
   + crm_mon: add --hide-headers option to hide all headers
   + crm_node: Display partition output in sorted order
   + crm_report: Collect logs directly from journald if available
   + Fencing: On timeout, clean up the agent's entire process group
   + Fencing: Support agents that need the host to be unfenced at startup
   + ipc: Raise the default buffer size to 128k
   + PE: Add a special attribute for distinguishing between real nodes and containers in constraint rules
   + PE: Allow location constraints to take a regex pattern to match against resource IDs
   + pengine: Distinguish between the agent being missing and something the agent needs being missing
   + remote: Properly version the remote connection protocol
 
 - Changes since Pacemaker-1.1.10
 
   + Bug rhbz#1011618 - Consistently use 'Slave' as the role for unpromoted master/slave resources
   + Bug rhbz#1057697 - Use native DBus library for systemd and upstart support to avoid problematic use of threads
   + attrd: Any variable called 'cluster' makes the daemon crash before reaching main()
   + attrd: Avoid infinite write loop for unknown peers
   + attrd: Drop all attributes for peers that left the cluster
   + attrd: Give remote-nodes ability to set attributes with attrd
   + attrd: Prevent inflation of attribute dampen intervals
   + attrd: Support SI units for attribute dampening
   + Bug cl#5171 - pengine: Don't prevent clones from running due to dependent resources
   + Bug cl#5179 - Corosync: Attempt to retrieve a peer's node name if it is not already known
   + Bug cl#5181 - corosync: Ensure node IDs are written to the CIB as unsigned integers
   + Bug rhbz#902407 - crm_resource: Handle --ban for master/slave resources as advertised
   + cib: Correctly check for archived configuration files
   + cib: Correctly log short-form xml diffs
   + cib: Fix remote cib based on TLS
   + cibadmin: Report errors during sign-off
   + cli: Do not enabled blackbox for cli tools
   + cluster: Fix segfault on removing a node
   + cman: Do not start pacemaker if cman startup fails
   + cman: Start clvmd and friends from the init script if enabled
   + Command-line tools should stop after an assertion failure
   + controld: Use the correct variant of dlm_controld for corosync-2 clusters
   + cpg: Correctly set the group name length
   + cpg: Ensure the CPG group is always null-terminated
   + cpg: Only process one message at a time to allow other priority jobs to be performed
   + crmd: Correctly observe the configured batch-limit
   + crmd: Correctly update expected state when the previous DC shuts down
   + crmd: Correcty update the history cache when recurring ops change their return code
   + crmd: Don't add node_state to cib, if we have not seen or fenced this node yet
   + crmd: don't segfault on shutdown when using heartbeat
   + crmd: Prevent recurring monitors being cancelled due to notify operations
   + crmd: Reliably detect and act on reprobe operations from the policy engine
   + crmd: When a peer expectedly shuts down, record the new join and expected states into the cib
   + crmd: When the DC gracefully shuts down, record the new expected state into the cib
   + crm_attribute: Do not swallow hostname lookup failures
   + crm_mon: Do not display duplicates of failed actions
   + crm_mon: Reduce flickering in interactive mode
   + crm_resource: Observe --master modifier for --move
   + crm_resource: Provide a meaningful error if --master is used for primitives and groups
   + fencing: Allow fencing for node after topology entries are deleted
   + fencing: Apply correct score to the resource of group
   + fencing: Ignore changes to non-fencing resources
   + fencing: Observe pcmk_host_list during automatic unfencing
   + fencing: Put all fencing agent processes into their own process group
   + fencing: Wait until all possible replies are recieved before continuing with unverified devices
   + ipc: Compress msgs based on client's actual max send size
   + ipc: Have the ipc server enforce a minimum buffer size all clients must use.
   + iso8601: Prevent dates from jumping backwards a day in some timezones
   + lrmd: Correctly calculate metadata for the 'service' class
   + lrmd: Correctly cancel monitor actions for lsb/systemd/service resources on cleaning up
   + mcp: Remove LSB hints that instruct chkconfig to start pacemaker at boot time
   + mcp: Some distros complain when LSB scripts do not include Default-Start/Stop directives
   + pengine: Allow fencing of baremetal remote nodes
   + pengine: cl#5186 - Avoid running rsc on two nodes when node is fenced during migration
   + pengine: Correctly account for the location preferences of things colocated with a group
   + pengine: Correctly handle demotion of grouped masters that are partially demoted
   + pengine: Disable container node probes due to constraint conflicts
   + pengine: Do not allow colocation with blocked clone instances
   + pengine: Do not re-allocate clone instances that are blocked in the Stopped state
   + pengine: Do not restart resources that depend on unmanaged resources
   + pengine: Force record pending for migrate_to actions
   + pengine: Location constraints with role=Started should prevent masters from running at all
   + pengine: Order demote/promote of resources on remote nodes to happen only once the connection is up
   + pengine: Properly handle orphaned multistate resources living on remote-nodes
   + pengine: Properly shutdown orphaned remote connection resources
   + pengine: Recover unexpectedly running container nodes.
   + remote: Add support for ipv6 into pacemaker_remote daemon
   + remote: Handle endian changes between client and server and improve forward compatibility
   + services: Fixes segfault associated with cancelling in-flight recurring operations.
   + services: Reset the scheduling policy and priority for lrmd's children without replying on SCHED_RESET_ON_FORK
 
 * Fri Jul 26 2013 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.10-1
 - Update source tarball to revision: ab2e209
 - Changesets: 602
 - Diff:       143 files changed, 8162 insertions(+), 5159 deletions(-)
 
 - Features added since Pacemaker-1.1.9
   + Core: Convert all exit codes to positive errno values
   + crm_error: Add the ability to list and print error symbols
   + crm_resource: Allow individual resources to be reprobed
   + crm_resource: Allow options to be set recursively
   + crm_resource: Implement --ban for moving resources away from nodes and --clear (replaces --unmove)
   + crm_resource: Support OCF tracing when using --force-(check|start|stop)
   + PE: Allow active nodes in our current membership to be fenced without quorum
   + PE: Suppress meaningless IDs when displaying anonymous clone status
   + Turn off auto-respawning of systemd services when the cluster starts them
   + Bug cl#5128 - pengine: Support maintenance mode for a single node
 
 - Changes since Pacemaker-1.1.9
   + crmd: cib: stonithd: Memory leaks resolved and improved use of glib reference counting
   + attrd: Fixes deleted attributes during dc election
   + Bug cf#5153 - Correctly display clone failcounts in crm_mon
   + Bug cl#5133 - pengine: Correctly observe on-fail=block for failed demote operation
   + Bug cl#5148 - legacy: Correctly remove a node that used to have a different nodeid
   + Bug cl#5151 - Ensure node names are consistently compared without case
   + Bug cl#5152 - crmd: Correctly clean up fenced nodes during membership changes
   + Bug cl#5154 - Do not expire failures when on-fail=block is present
   + Bug cl#5155 - pengine: Block the stop of resources if any depending resource is unmanaged
   + Bug cl#5157 - Allow migration in the absence of some colocation constraints
   + Bug cl#5161 - crmd: Prevent memory leak in operation cache
   + Bug cl#5164 - crmd: Fixes crash when using pacemaker-remote
   + Bug cl#5164 - pengine: Fixes segfault when calculating transition with remote-nodes.
   + Bug cl#5167 - crm_mon: Only print "stopped" node list for incomplete clone sets
   + Bug cl#5168 - Prevent clones from being bounced around the cluster due to location constraints
   + Bug cl#5170 - Correctly support on-fail=block for clones
   + cib: Correctly read back archived configurations if the primary is corrupted
   + cib: The result is not valid when diffs fail to apply cleanly for CLI tools
   + cib: Restore the ability to embed comments in the configuration
   + cluster: Detect and warn about node names with capitals
   + cman: Do not pretend we know the state of nodes we've never seen
   + cman: Do not unconditionally start cman if it is already running
   + cman: Support non-blocking CPG calls
   + Core: Ensure the blackbox is saved on abnormal program termination
   + corosync: Detect the loss of members for which we only know the nodeid
   + corosync: Do not pretend we know the state of nodes we've never seen
   + corosync: Ensure removed peers are erased from all caches
   + corosync: Nodes that can persist in sending CPG messages must be alive afterall
   + crmd: Do not get stuck in S_POLICY_ENGINE if a node we couldn't fence returns
   + crmd: Do not update fail-count and last-failure for old failures
   + crmd: Ensure all membership operations can complete while trying to cancel a transition
   + crmd: Ensure operations for cleaned up resources don't block recovery
   + crmd: Ensure we return to a stable state if there have been too many fencing failures
   + crmd: Initiate node shutdown if another node claims to have successfully fenced us
   + crmd: Prevent messages for remote crmd clients from being relayed to wrong daemons
   + crmd: Properly handle recurring monitor operations for remote-node agent
   + crmd: Store last-run and last-rc-change for all operations
   + crm_mon: Ensure stale pid files are updated when a new process is started
   + crm_report: Correctly collect logs when 'uname -n' reports fully qualified names
   + fencing: Fail the operation once all peers have been exhausted
   + fencing: Restore the ability to manually confirm that fencing completed
   + ipc: Allow unpriviliged clients to clean up after server failures
   + ipc: Restore the ability for members of the haclient group to connect to the cluster
   + legacy: Support "crm_node --remove" with a node name for corosync plugin (bnc#805278)
   + lrmd: Default to the upstream location for resource agent scratch directory
   + lrmd: Pass errors from lsb metadata generation back to the caller
   + pengine: Correctly handle resources that recover before we operate on them
   + pengine: Delete the old resource state on every node whenever the resource type is changed
   + pengine: Detect constraints with inappropriate actions (ie. promote for a clone)
   + pengine: Ensure per-node resource parameters are used during probes
   + pengine: If fencing is unavailable or disabled, block further recovery for resources that fail to stop
   + pengine: Implement the rest of get_timet_now() and rename to get_effective_time
   + pengine: Re-initiate _active_ recurring monitors that previously failed but have timed out
   + remote: Workaround for inconsistent tls handshake behavior between gnutls versions
   + systemd: Ensure we get shut down correctly by systemd
   + systemd: Reload systemd after adding/removing override files for cluster services
   + xml: Check for and replace non-printing characters with their octal equivalent while exporting xml text
   + xml: Prevent lockups by setting a more reliable buffer allocation strategy
 
 * Fri Mar 08 2013 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.9-1
 - Update source tarball to revision: 7e42d77
 - Statistics:
   Changesets: 731
   Diff:       1301 files changed, 92909 insertions(+), 57455 deletions(-)
 
 - Features added in Pacemaker-1.1.9
   + corosync: Allow cman and corosync 2.0 nodes to use a name other than uname()
   + corosync: Use queues to avoid blocking when sending CPG messages
   + ipc: Compress messages that exceed the configured IPC message limit
   + ipc: Use queues to prevent slow clients from blocking the server
   + ipc: Use shared memory by default
   + lrmd: Support nagios remote monitoring
   + lrmd: Pacemaker Remote Daemon for extending pacemaker functionality outside corosync cluster.
   + pengine: Check for master/slave resources that are not OCF agents
   + pengine: Support a 'requires' resource meta-attribute for controlling whether it needs quorum, fencing or nothing
   + pengine: Support for resource container
   + pengine: Support resources that require unfencing before start
 
 - Changes since Pacemaker-1.1.8
   + attrd: Correctly handle deletion of non-existant attributes
   + Bug cl#5135 - Improved detection of the active cluster type
   + Bug rhbz#913093 - Use crm_node instead of uname
   + cib: Avoid use-after-free by correctly support cib_no_children for non-xpath queries
   + cib: Correctly process XML diff's involving element removal
   + cib: Performance improvements for non-DC nodes
   + cib: Prevent error message by correctly handling peer replies
   + cib: Prevent ordering changes when applying xml diffs
   + cib: Remove text nodes from cib replace operations
   + cluster: Detect node name collisions in corosync
   + cluster: Preserve corosync membership state when matching node name/id entries
   + cman: Force fenced to terminate on shutdown
   + cman: Ignore qdisk 'nodes'
   + core: Drop per-user core directories
   + corosync: Avoid errors when closing failed connections
   + corosync: Ensure peer state is preserved when matching names to nodeids
   + corosync: Clean up CMAP connections after querying node name
   + corosync: Correctly detect corosync 2.0 clusters even if we don't have permission to access it
   + crmd: Bug cl#5144 - Do not updated the expected status of failed nodes
   + crmd: Correctly determin if cluster disconnection was abnormal
   + crmd: Correctly relay messages for remote clients (bnc#805626, bnc#804704)
   + crmd: Correctly stall the FSA when waiting for additional inputs
   + crmd: Detect and recover when we are evicted from CPG
   + crmd: Differentiate between a node that is up and coming up in peer_update_callback()
   + crmd: Have cib operation timeouts scale with node count
   + crmd: Improved continue/wait logic in do_dc_join_finalize()
   + crmd: Prevent election storms caused by getrusage() values being too close
   + crmd: Prevent timeouts when performing pacemaker level membership negotiation
   + crmd: Prevent use-after-free of fsa_message_queue during exit
   + crmd: Store all current actions when stalling the FSA
   + crm_mon: Do not try to render a blank cib and indicate the previous output is now stale
   + crm_mon: Fixes crm_mon crash when using snmp traps.
   + crm_mon: Look for the correct error codes when applying configuration updates
   + crm_report: Ensure policy engine logs are found
   + crm_report: Fix node list detection
   + crm_resource: Have crm_resource generate a valid transition key when sending resource commands to the crmd
   + date/time: Bug cl#5118 - Correctly convert seconds-since-epoch to the current time
   + fencing: Attempt to provide more information that just 'generic error' for failed actions
   + fencing: Correctly record completed but previously unknown fencing operations
   + fencing: Correctly terminate when all device options have been exhausted
   + fencing: cov#739453 - String not null terminated
   + fencing: Do not merge new fencing requests with stale ones from dead nodes
   + fencing: Do not start fencing until entire device topology is found or query results timeout.
   + fencing: Do not wait for the query timeout if all replies have arrived
   + fencing: Fix passing of parameters from CMAN containing '='
   + fencing: Fix non-comparison when sorting devices by priority
   + fencing: On failure, only try a topology device once from the remote level.
   + fencing: Only try peers for non-topology based operations once
   + fencing: Retry stonith device for duration of action's timeout period.
   + heartbeat: Remove incorrect assert during cluster connect
   + ipc: Bug cl#5110 - Prevent 100% CPU usage when looking for synchronous replies
   + ipc: Use 50k as the default compression threshold
   + legacy: Prevent assertion failure on routing ais messages (bnc#805626)
   + legacy: Re-enable logging from the pacemaker plugin
   + legacy: Relax the 'active' check for plugin based clusters to avoid false negatives
   + legacy: Skip peer process check if the process list is empty in crm_is_corosync_peer_active()
   + mcp: Only define HA_DEBUGLOG to avoid agent calls to ocf_log printing everything twice
   + mcp: Re-attach to existing pacemaker components when mcp fails
   + pengine: Any location constraint for the slave role applies to all roles
   + pengine: Avoid leaking memory when cleaning up failcounts and using containers
   + pengine: Bug cl#5101 - Ensure stop order is preserved for partially active groups
   + pengine: Bug cl#5140 - Allow set members to be stopped when the subseqent set has require-all=false
   + pengine: Bug cl#5143 - Prevent shuffling of anonymous master/slave instances
   + pengine: Bug rhbz#880249 - Ensure orphan masters are demoted before being stopped
   + pengine: Bug rhbz#880249 - Teach the PE how to recover masters into primitives
   + pengine: cl#5025 - Automatically clear failcount for start/monitor failures after resource parameters change
   + pengine: cl#5099 - Probe operation uses the timeout value from the minimum interval monitor by default (#bnc776386)
   + pengine: cl#5111 - When clone/master child rsc has on-fail=stop, insure all children stop on failure.
   + pengine: cl#5142 - Do not delete orphaned children of an anonymous clone
   + pengine: Correctly unpack active anonymous clones
   + pengine: Ensure previous migrations are closed out before attempting another one
   + pengine: Introducing the whitebox container resources feature
   + pengine: Prevent double-free for cloned primitive from template
   + pengine: Process rsc_ticket dependencies earlier for correctly allocating resources (bnc#802307)
   + pengine: Remove special cases for fencing resources
   + pengine: rhbz#902459 - Remove rsc node status for orphan resources
   + systemd: Gracefully handle unexpected DBus return types
   + Replace the use of the insecure mktemp(3) with mkstemp(3)
 
 * Thu Sep 20 2012 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.8-1
 
 - Update source tarball to revision: 1a5341f
 - Statistics:
   Changesets: 1019
   Diff:       2107 files changed, 117258 insertions(+), 73606 deletions(-)
 
 - All APIs have been cleaned up and reduced to essentials
 - Pacemaker now includes a replacement lrmd that supports systemd and upstart agents
 - Config and state files (cib.xml, PE inputs and core files) have moved to new locations
 - The crm shell has become a separate project and no longer included with Pacemaker
 - All daemons/tools now have a unified set of error codes based on errno.h (see crm_error)
 
 - Changes since Pacemaker-1.1.7
   + Core: Bug cl#5032 - Rewrite the iso8601 date handling code
   + Core: Correctly extract the version details from a diff
   + Core: Log blackbox contents, if enabled, when an error occurs
   + Core: Only LOG_NOTICE and higher are sent to syslog
   + Core: Replace use of IPC from clplumbing with IPC from libqb
   + Core: SIGUSR1 now enables blackbox logging, SIGTRAP to write out
   + Core: Support a blackbox for additional logging detail after crashes/errors
   + Promote support for advanced fencing logic to the stable schema
   + Promote support for node starting scores to the stable schema
   + Promote support for service and systemd to the stable schema
 
   + attrd: Differentiate between updating all our attributes and everybody updating all theirs too
   + attrd: Have single-shot clients wait for an ack before disconnecting
   + cib: cl#5026 - Synced cib updates should not return until the cpg broadcast is complete.
   + corosync: Detect when the first corosync has not yet formed and handle it gracefully
   + corosync: Obtain a full list of configured nodes, including their names, when we connect to the quorum API
   + corosync: Obtain a node name from DNS if one was not already known
   + corosync: Populate the cib nodelist from corosync if available
   + corosync: Use the CFG API and DNS to determine node names if not configured in corosync.conf
   + crmd: Block after 10 failed fencing attempts for a node
   + crmd: cl#5051 - Fixes file leak in pe ipc connection initialization.
   + crmd: cl#5053 - Fixes fail-count not being updated properly.
   + crmd: cl#5057 - Restart sub-systems correctly (bnc#755671)
   + crmd: cl#5068 - Fixes crm_node -R option so it works with corosync 2.0
   + crmd: Correctly re-establish failed attrd connections
   + crmd: Detect when the quorum API isn't configured for corosync 2.0
   + crmd: Do not overwrite any configured node type (eg. quorum node)
   + crmd: Enable use of new lrmd daemon and client library in crmd.
   + crmd: Overhaul the way node state is recorded and updated in the CIB
   + fencing: Bug rhbz#853537 - Prevent use-of-NULL when the cib libraries are not available
   + fencing: cl#5073 - Add 'off' as an valid value for stonith-action option.
   + fencing: cl#5092 - Always timeout stonith operations if timeout period expires.
   + fencing: cl#5093 - Stonith per device timeout option
   + fencing: Clean up if we detect a failed connection
   + fencing: Delegate complex self fencing requests - we wont be around to see it to completion
   + fencing: Ensure all peers are notified of complex fencing op completion
   + fencing: Fix passing of fence_legacy parameters containing '='
   + fencing: Gracefully handle metadata requests for unknown agents
   + fencing: Return cached dynamic target list for busy devices.
   + fencing: rhbz#801355 - Abort transition on DC when external fencing operation is detected
   + fencing: rhbz#801355 - Merge fence requests for identical operations already in progress.
   + fencing: rhbz#801355 - Report fencing operations external of pacemaker to cib
   + fencing: Specify the action to perform using action= instead of the older option=
   + fencing: Stop building fake metadata for broken agents
   + fencing: Tolerate agents that report empty metadata in the admin tool
   + mcp: Correctly retry the connection to corosync on failure
   + mcp: Do not shut down IPC until the last client exits
   + mcp: Prevent use-after-free when running against corosync 1.x
   + pengine: Bug cl#5059 - Use the correct action's status when calculating required actions for interleaved clones
   + pengine: Bypass online/offline checking resource detection for ping/quorum nodes
   + pengine: cl#5044 - migrate_to no longer requires load_stopped for avoiding possible transition loop
   + pengine: cl#5069 - Honor 'on-fail=ignore' even when operation is disabled.
   + pengine: cl#5070 - Allow influence of promotion score when multistate rsc is left hand of colocation
   + pengine: cl#5072 - Fixes monitor op stopping after rsc promotion.
   + pengine: cl#5072 - Fixes pengine regression test failures
   + pengine: Correctly set the status for nodes not intended to run Pacemaker
   + pengine: Do not append instance numbers to anonymous clones
   + pengine: Fix failcount expiration
   + pengine: Fix memory leaks found by valgrind
   + pengine: Fix use-after-free and use-of-NULL errors detected by coverity
   + pengine: Fixes use of colocation scores other than +/- INFINITY
   + pengine: Improve detection of rejoining nodes
   + pengine: Prevent use-of-NULL when tracing is enabled
   + pengine: Stonith resources are allowed to start even if their probes haven't completed on partially active nodes
   + services: New class called 'service' which expands to the correct (LSB/systemd/upstart) standard
   + services: Support Asynchronous systemd/upstart actions
   + Tools: crm_shadow - Bug cl#5062 - Correctly set argv[0] when forking a shell process
   + Tools: crm_report: Always include system logs (if we can find them)
 
 * Wed Mar 28 2012 Andrew Beekhof <andrew@beekhof.net> Pacemaker-1.1.7-1
 - Update source tarball to revision: bc7ff2c
 - Statistics:
   Changesets: 513
   Diff:       1171 files changed, 90472 insertions(+), 19368 deletions(-)
 
 - Changes since Pacemaker-1.1.6.1
   + ais: Prepare for corosync versions using IPC from libqb
   + cib: Correctly shutdown in the presence of peers without relying on timers
   + cib: Don't halt disk writes if the previous digest is missing
   + cib: Determine when there are no peers to respond to our shutdown request and exit
   + cib: Ensure no additional messages are processed after we begin terminating
   + Cluster: Hook up the callbacks to the corosync quorum notifications
   + Core: basename() may modify its input, do not pass in a constant
   + Core: Bug cl#5016 - Prevent failures in recurring ops from being lost
   + Core: Bug rhbz#800054 - Correctly retrieve heartbeat uuids
   + Core: Correctly determine when an XML file should be decompressed
   + Core: Correctly track the length of a string without reading from uninitialzied memory (valgrind)
   + Core: Ensure signals are handled eventually in the absense of timer sources or IPC messages
   + Core: Prevent use-of-NULL in crm_update_peer()
   + Core: Strip text nodes from on disk xml files
   + Core: Support libqb for logging
   + corosync: Consistently set the correct uuid with get_node_uuid()
   + Corosync: Correctly disconnect from corosync variants
   + Corosync: Correctly extract the node id from membership udpates
   + corosync: Correctly infer lost members from the quorum API
   + Corosync: Default to using the nodeid as the node's uuid (instead of uname)
   + corosync: Ensure we catch nodes that leave the membership, even if the ringid doesn't change
   + corosync: Hook up CPG membership
   + corosync: Relax a development assert and gracefully handle the error condition
   + corosync: Remove deprecated member of the CFG API
   + corosync: Treat CS_ERR_QUEUE_FULL the same as CS_ERR_TRY_AGAIN
   + corosync: Unset the process list when nodes dissappear on us
   + crmd: Also purge fencing results when we enter S_NOT_DC
   + crmd: Bug cl#5015 - Remove the failed operation as well as the resulting fail-count and last-failure attributes
   + crmd: Correctly determine when a node can suicide with fencing
   + crmd: Election - perform the age comparison only once
   + crmd: Fast-track shutdown if we couldn't request it via attrd
   + crmd: Leave it up to the PE to decide which ops can/cannot be reload
   + crmd: Prevent use-after-free when calling delete_resource due to CRM_OP_REPROBE
   + crmd: Supply format arguments in the correct order
   + fencing: Add missing format parameter
   + fencing: Add the fencing topology section to the 1.1 configuration schema
   + fencing: fence_legacy - Drop spurilous host argument from status query
   + fencing: fence_legacy - Ensure port is available as an environment variable when calling monitor
   + fencing: fence_pcmk - don't block if nothing is specified on stdin
   + fencing: Fix log format error
   + fencing: Fix segfault caused by passing garbage to dlsym()
   + fencing: Fix use-of-NULL in process_remote_stonith_query()
   + fencing: Fix use-of-NULL when listing installed devices
   + fencing: Implement support for advanced fencing topologies: eg. kdump || (network && disk) || power
   + fencing: More gracefully handle failed 'list' operations for devices that only support a single connection
   + fencing: Prevent duplicate free when listing devices
   + fencing: Prevent uninitialized pointers being passed to free
   + fencing: Prevent use-after-free, we may need the query result for subsequent operations
   + fencing: Provide enough data to construct an entry in the node's fencing history
   + fencing: Standardize on /one/ method for clients to request members be fenced
   + fencing: Supress errors when listing all registered devices
   + mcp: corosync_cfg_state_track was removed from the corosync API, luckily we didnt use it for anything
   + mcp: Do not specify a WorkingDirectory in the systemd unit file - startup fails if its not available
   + mcp: Set the HA_quorum_type env variable consistently with our corosync plugin
   + mcp: Shut down if one of our child processes can/should not be respawned
   + pengine: Bug cl#5000 - Ensure ordering is preserved when depending on partial sets
   + pengine: Bug cl#5028 - Unmanaged services should block shutdown unless in maintenance mode
   + pengine: Bug cl#5038 - Prevent restart of anonymous clones when clone-max decreases
   + pengine: Bug cl#5007 - Fixes use of colocation constraints with multi-state resources
   + pengine: Bug cl#5014 - Prevent asymmetrical order constraints from causing resource stops
   + pengine: Bug cl#5000 - Implements ability to create rsc_order constraint sets such that A can start after B or C has started.
   + pengine: Correctly migrate a resource that has just migrated
   + pengine: Correct return from error path
   + pengine: Detect reloads of previously migrated resources
   + pengine: Ensure post-migration stop actions occur before node shutdown
   + pengine: Log as loudly as possible when we cannot shut down a cluster node
   + pengine: Reload of a resource no longer causes a restart of dependent resources
   + pengine: Support limiting the number of concurrent live migrations
   + pengine: Support referencing templates in constraints
   + pengine: Support of referencing resource templates in resource sets
   + pengine: Support to make tickets standby for relinquishing tickets gracefully
   + stonith: A "start" operation of a stonith resource does a "monitor" on the device beyond registering it
   + stonith: Bug rhbz#745526 - Ensure stonith_admin actually gets called by fence_pcmk
   + Stonith: Ensure all nodes receive and deliver notifications of the manual override
   + stonith: Fix the stonith timeout issue (cl#5009, bnc#727498)
   + Stonith: Implement a manual override for when nodes are known to be safely off
   + Tools: Bug cl#5003 - Prevent use-after-free in crm_simlate
   + Tools: crm_mon - Support to display tickets (based on Yuusuke Iida's work)
   + Tools: crm_simulate - Support to grant/revoke/standby/activate tickets from the new ticket state section
   + Tools: Implement crm_node functionality for native corosync
   + Fix a number of potential problems reported by coverity
 
 * Wed Aug 31 2011 Andrew Beekhof <andrew@beekhof.net> 1.1.6-1
 - Update source tarball to revision: 676e5f25aa46 tip
 - Statistics:
   Changesets: 376
   Diff:       1761 files changed, 36259 insertions(+), 140578 deletions(-)
 
 - Changes since Pacemaker-1.1.5
   + ais: check for retryable errors when dispatching AIS messages
   + ais: Correctly disconnect from Corosync and Cman based clusters
   + ais: Followup to previous patch - Ensure we drain the corosync queue of messages when Glib tells us there is input
   + ais: Handle IPC error before checking for NULL data (bnc#702907)
   + cib: Check the validation version before adding the originator details of a CIB change
   + cib: Remove disconnected remote connections from mainloop
   + cman: Correctly override existing fenced operations
   + cman: Dequeue all the cman emitted events and not only the first one leaving the others in the event's queue.
   + cman: Don't call fenced_join and fenced_leave when notifying cman of a fencing event.
   + cman: We need to run the crmd as root for CMAN so that we can ACK fencing operations
   + Core: Cancelled and pending operations do not count as failed
   + Core: Ensure there is sufficient space for EOS when building short-form option strings
   + Core: Fix variable expansion in pkg-config files
   + Core: Partial revert of accidental commit in previous patch
   + Core: Use dlopen to load heartbeat libraries on-demand
   + crmd: Bug lf#2509 - Watch for config option changes from the CIB even if we're not the DC
   + crmd: Bug lf#2528 - Introduce a slight delay when creating a transition to allow attrd time to perform its updates
   + crmd: Bug lf#2559 - Fail actions that were scheduled for a failed/fenced node
   + crmd: Bug lf#2584 - Allow nodes to fence themselves if they're the last one standing
   + crmd: Bug lf#2632 - Correctly handle nodes that return faster than stonith
   + crmd: Cancel timers for actions that were pending on dead nodes
   + crmd: Catch fence operations that claim to succeed but did not really
   + crmd: Do not wait for actions that were pending on dead nodes
   + crmd: Ensure we do not attempt to perform action on failed nodes
   + crmd: Prevent use-of-NULL by g_hash_table_iter_next()
   + crmd: Recurring actions shouldn't cause the last non-recurring action to be forgotten
   + crmd: Store only the last and last failed operation in the CIB
   + mcp: dirname() modifies the input path - pass in a copy of the logfile path
   + mcp: Enable stack detection logic instead of forcing 'corosync'
   + mcp: Fix spelling mistake in systemd service script that prevents shutdown
   + mcp: Shut down if corosync becomes unavailable
   + mcp: systemd control file is now functional
   + pengine: Before migrating an utilization-using resource to a node, take off the load which will no longer run there (lf#2599, bnc#695440)
   + pengine: Before migrating an utilization-using resource to a node, take off the load which will no longer run there (regression tests) (lf#2599, bnc#695440)
   + pengine: Bug lf#2574 - Prevent shuffling by choosing the correct clone instance to stop
   + pengine: Bug lf#2575 - Use uname for migration variables, id is a UUID on heartbeat
   + pengine: Bug lf#2581 - Avoid group restart when clone (re)starts on an unrelated node
   + pengine: Bug lf#2613, lf#2619 - Group migration after failures and non-default utilization policies
   + pengine: Bug suse#707150 - Prevent services being active if dependencies on clones are not satisfied
   + pengine: Correctly recognise which recurring operations are currently active
   + pengine: Demote from Master does not clear previous errors
   + pengine: Ensure restarts due to definition changes cause the start action to be re-issued not probes
   + pengine: Ensure role is preserved for unmanaged resources
   + pengine: Ensure unmanaged resources have the correct role set so the correct monitor operation is chosen
   + pengine: Fix memory leak for re-allocated resources reported by valgrind
   + pengine: Implement cluster ticket and deadman
   + pengine: Implement resource template
   + pengine: Correctly determine the state of multi-state resources with a partial operation history
   + pengine: Only allocate master/slave resources once
   + pengine: Partial revert of 'Minor code cleanup CS: cf6bca32376c On: 2011-08-15'
   + pengine: Resolve memory leak reported by valgrind
   + pengine: Restore the ability to save inputs to disk
   + Shell: implement -w,--wait option to wait for the transition to finish
   + Shell: repair template list command
   + Shell: set of commands to examine logs, reports, etc
   + Stonith: Consolidate pcmk_host_map into run_stonith_agent so that it is applied consistently
   + Stonith: Deprecate pcmk_arg_map for the saner pcmk_host_argument
   + Stonith: Fix use-of-NULL by g_hash_table_lookup
   + Stonith: Improved pcmk_host_map parsing
   + Stonith: Prevent use-of-NULL by g_hash_table_lookup
   + Stonith: Prevent use-of-NULL when no Linux-HA stonith agents are present
   + stonith: Add missing entries to stonith_error2string()
   + Stonith: Correctly finish sending agent options if the initial write is interrupted
   + stonith: Correctly handle synchronous calls
   + stonith: Coverity - Correctly construct result list for the query API call
   + stonith: Coverity - Remove badly constructed memory allocation from the query API call
   + stonith: Ensure completed operations are recorded as such in the history
   + Stonith: Ensure device parameters are passed to the daemon during registration
   + stonith: Fix use-of-NULL in stonith_api_device_list()
   + stonith: stonith_admin - Prevent use of uninitialized pointer by --history command
   + Tools: Bug lf#2528 - Make progress when attrd_updater is called repeatedly within the dampen interval but with the same value
   + Tools: crm_report - Correctly extract data from the local node
   + Tools: crm_report - Remove newlines when detecting the node list
   + Tools: crm_report - Repair the ability to extract data from the local machine
   + Tools: crm_report - Report on all detected backtraces
 
 * Fri Feb 11 2011 Andrew Beekhof <andrew@beekhof.net> 1.1.5-1
 - Update source tarball to revision: baad6636a053
 - Statistics:
   Changesets: 184
   Diff:       605 files changed, 46103 insertions(+), 26417 deletions(-)
 
 - Changes since Pacemaker-1.1.4
   + Add the ability to delegate sub-sections of the cluster to non-root users via ACLs
 	  Needs to be enabled at compile time, not enabled by default.
   + ais: Bug lf#2550 - Report failed processes immediately
   + Core: Prevent recently introduced use-after-free in replace_xml_child()
   + Core: Reinstate the logic that skips past non-XML_ELEMENT_NODE children
   + Core: Remove extra calls to xmlCleanupParser resulting in use-after-free
   + Core: Repair reference to child-of-child after removal of xml_child_iter_filter from get_message_xml()
   + crmd: Bug lf#2545 - Ensure notify variables are accurate for stop operations
   + crmd: Cancel recurring operations while we're still connected to the lrmd
   + crmd: Reschedule the PE_START action if its not already running when we try to use it
   + crmd: Update failcount for failed promote and demote operations
   + pengine: Bug lf#2445 - Avoid relying on stickness for stable clone placement
   + pengine: Bug lf#2445 - Do not override configured clone stickiness values
   + pengine: Bug lf#2493 - Don't imply colocation requirements when applying ordering constraints with clones
   + pengine: Bug lf#2495 - Prevent segfault by validating the contents of ordering sets
   + pengine: Bug lf#2508 - Correctly reconstruct the status of anonymous cloned groups
   + pengine: Bug lf#2518 - Avoid spamming the logs with errors for orphan resources
   + pengine: Bug lf#2544 - Prevent unstable clone placement by factoring in the current node's score before all others
   + pengine: Bug lf#2554 - target-role alone is not sufficient to promote resources
   + pengine: Correct target_rc for probes of inactive resources (fix regression introduced by cs:ac3f03006e95)
   + pengine: Ensure that fencing has completed for stop actions on stonith-dependent resources (lf#2551)
   + pengine: Only update the node's promotion score if the resource is active there
   + pengine: Only use the promotion score from the current clone instance
   + pengine: Prevent use-of-NULL resulting from variable shadowing spotted by Coverity
   + pengine: Prevent use-of-NULL when there is status for an undefined node
   + pengine: Prevet use-after-free resulting from unintended recursion when chosing a node to promote master/slave resources
   + Shell: don't create empty optional sections (bnc#665131)
   + Stonith: Teach stonith_admin to automagically obtain the current node attributes for the target from the CIB
   + tools: Bug lf#2527 - Prevent use-of-NULL in crm_simulate
   + Tools: Prevent crm_resource commands from being lost due to the use of cib_scope_local
 
 * Wed Oct 20 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.4-1
 - Update source tarball to revision: 75406c3eb2c1 tip
 - Statistics:
   Changesets: 169
   Diff:       772 files changed, 56172 insertions(+), 39309 deletions(-)
 
 - Changes since Pacemaker-1.1.3
   + Italian translation of Clusters from Scratch
   + Significant performance enhancements to the Policy Engine and CIB
   + cib: Bug lf#2506 - Don't remove client's when notifications fail, they might just be too big
   + cib: Drop invalid/failed connections from the client hashtable
   + cib: Ensure all diffs sent to peers have sufficient ordering information
   + cib: Ensure non-change diffs can preserve the ordering on the other side
   + cib: Fix the feature set check
   + cib: Include version information on our synthesised diffs when nothing changed
   + cib: Optimize the way we detect group/set ordering changes - 15% speedup
   + cib: Prevent false detection of config updates with the new diff format
   + cib: Reduce unnecessary copying when comparing xml objects
   + cib: Repair the processing of updates sent from peer nodes
   + cib: Revert part of a recent commit that purged still valid connections
   + cib: The feature set version check is only valid if the current value is non-NULL
   + Core: Actually removing diff markers is necessary
   + Core: Bug lf#2506 - Drop the compression limit because Heartbeat's IPC code sucks
   + Core: Cache Relax-NG schemas - profiling indicates many cycles are wasted needlessly re-parsing them
   + Core: Correctly compare against crm_log_level in the logging macros
   + Core: Correctly extract the version details from a diff
   + Core: Correctly hook up the RNG schema cache
   + Core: Correctly use lazy_xml_sort() for v2 digests
   + Core: Don't compress large payload elements unless we're approaching message limits
   + Core: Don't insert empty ID tags when applying diffs
   + Core: Enable the improve v2 digests
   + Core: Ensure ordering is preserved when applying diffs
   + Core: Fix the CRM_CHECK macro
   + Core: Modify the v2 digest algorithm so that some fields are sorted
   + Core: Prevent use-after-free when creating a CIB update for a timed out action
   + Core: Prevent use-of-NULL when cleaning up RelaxNG data structures
   + Core: Provide significant performance improvements by implementing versioned diffs and digests
   + crmd: All pending operations should be recorded, even recurring ones with high start delays
   + crmd: Don't abort transitions when probes are completed on a node
   + crmd: Don't hide stop events that time out - allowing faster recovery in the presence of overloaded hosts
   + crmd: Ensure the CIB is always writable on the DC by removing a timing hole
   + crmd: Include the correct transition details for timed out operations
   + crmd: Prevent use of NULL by making copies of the operation's hash table
   + crmd: There's no need to check the cib version from the 'added' part of diff updates
   + crmd: Use the supplied timeout for stop actions
   + mcp: Ensure valgrind is able to log its output somewhere
   + mcp: Use 99/01 for the start/stop sequence to avoid problems with services (such as libvirtd) started by init - Patch from Vladislav Bogdanov
   + pengine: Ensure fencing of the DC preceeds the STONITH_DONE operation
   + pengine: Fix memory leak introduced as part of the conversion to GHashTables
   + pengine: Fix memory leak when processing completed migration actions
   + pengine: Fix typo leading to use-of-NULL in the new ordering code
   + pengine: Free memory in recently introduced helper function
   + pengine: lf#2478 - Implement improved handling and recovery of atomic resource migrations
   + pengine: Obtain massive speedup by prepending to the list of ordering constraints (which can grow quite large)
   + pengine: Optimize the logic for deciding which non-grouped anonymous clone instances to probe for
   + pengine: Prevent clones from being stopped because resources colocated with them cannot be active
   + pengine: Try to ensure atomic migration ops occur within a single transition
   + pengine: Use hashtables instead of linked lists for performance sensitive datastructures
   + pengine: Use the original digest algorithm for parameter lists
   + stonith: cleanup children on timeout in fence_legacy
   + Stonith: Fix two memory leaks
   + Tools: crm_shadow - Avoid replacing the entire configuration (including status)
 
 * Tue Sep 21 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.3-1
 - Update source tarball to revision: e3bb31c56244 tip
 - Statistics:
   Changesets: 352
   Diff:       481 files changed, 14130 insertions(+), 11156 deletions(-)
 
 - Changes since Pacemaker-1.1.2.1
   + ais: Bug lf#2401 - Improved processing when the peer crmd processes join/leave
   + ais: Correct the logic for conecting to plugin based clusters
   + ais: Do not supply a process list in mcp-mode
   + ais: Drop support for whitetank in the 1.1 release series
   + ais: Get an initial dump of the node membership when connecting to quorum-based clusters
   + ais: Guard against saturated cpg connections
   + ais: Handle CS_ERR_TRY_AGAIN in more cases
   + ais: Move the code for finding uid before the fork so that the child does no logging
   + ais: Never allow quorum plugins to affect connection to the pacemaker plugin
   + ais: Sign everyone up for peer process updates, not just the crmd
   + ais: The cluster type needs to be set before initializing classic openais connections
   + cib: Also free query result for xpath operations that return more than one hit
   + cib: Attempt to resolve memory corruption when forking a child to write the cib to disk
   + cib: Correctly free memory when writing out the cib to disk
   + cib: Fix the application of unversioned diffs
   + cib: Remove old developmental error logging
   + cib: Restructure the 'valid peer' check for deciding which instructions to ignore
   + cman: Correctly process membership/quorum changes from the pcmk plugin. Allow other message types through untouched
   + cman: Filter directed messages not intended for us
   + cman: Grab the initial membership when we connect
   + cman: Keep the list of peer processes up-to-date
   + cman: Make sure our common hooks are called after a cman membership update
   + cman: Make sure we can compile without cman present
   + cman: Populate sender details for cpg messages
   + cman: Update the ringid for cman based clusters
   + Core: Correctly unpack HA_Messages containing multiple entries with the same name
   + Core: crm_count_member() should only track nodes that have the full stack up
   + Core: New developmental logging system inspired by the kernel and a PoC from Lars Ellenberg
   + crmd: All nodes should see status updates, not just he DC
   + crmd: Allow non-DC nodes to clear failcounts too
   + crmd: Base DC election on process relative uptime
   + crmd: Bug lf#2439 - cancel_op() can also return HA_RSCBUSY
   + crmd: Bug lf#2439 - Handle asynchronous notification of resource deletion events
   + crmd: Bug lf#2458 - Ensure stop actions always have the relevant resource attributes
   + crmd: Disable age as a criteria for cman based clusters, its not reliable enough
   + crmd: Ensure we activate the DC timer if we detect an alternate DC
   + crmd: Factor the nanosecond component of process uptime in elections
   + crmd: Fix assertion failure when performing async resource failures
   + crmd: Fix handling of async resource deletion results
   + crmd: Include the action for crm graph operations
   + crmd: Make sure the membership cache is accurate after a sucessful fencing operation
   + crmd: Make sure we always poke the FSA after a transition to clear any TE_HALT actions
   + crmd: Offer crm-level membership once the peer starts the crmd process
   + crmd: Only need to request quorum update for plugin based clusters
   + crmd: Prevent assertion failure for stop actions resulting from cs: 3c0bc17c6daf
   + crmd: Prevent everyone from loosing DC elections by correctly initializing all relevant variables
   + crmd: Prevent segmentation fault
   + crmd: several fixes for async resource delete (thanks to beekhof)
   + crmd: Use the correct define/size for lrm resource IDs
   + Introduce two new cluster types 'cman' and 'corosync', replaces 'quorum_provider' concept
   + mcp: Add missing headers when built without heartbeat support
   + mcp: Correctly initialize the string containing the list of active daemons
   + mcp: Fix macro expansion in init script
   + mcp: Fix the expansion of the pid file in the init script
   + mcp: Handle CS_ERR_TRY_AGAIN when connecting to libcfg
   + mcp: Make sure we can compile the mcp without cman present
   + mcp: New master control process for (re)spawning pacemaker daemons
   + mcp: Read config early so we can re-initialize logging asap if daemonizing
   + mcp: Rename the mcp binary to pacemakerd and create a 'pacemaker' init script
   + mcp: Resend our process list after every CPG change
   + mcp: Tell chkconfig we need to shut down early on
   + pengine: Avoid creating invalid ordering constraints for probes that are not needed
   + pengine: Bug lf#1959 - Fail unmanaged resources should not prevent other services from shutting down
   + pengine: Bug lf#2422 - Ordering dependencies on partially active groups not observed properly
   + pengine: Bug lf#2424 - Use notify oepration definition if it exists in the configuration
   + pengine: Bug lf#2433 - No services should be stopped until probes finish
   + pengine: Bug lf#2453 - Enforce clone ordering in the absense of colocation constraints
   + pengine: Bug lf#2476 - Repair on-fail=block for groups and primitive resources
   + pengine: Correctly detect when there is a real failcount that expired and needs to be cleared
   + pengine: Correctly handle pseudo action creation
   + pengine: Correctly order clone startup after group/clone start
   + pengine: Correct use-after-free introduced in the prior patch
   + pengine: Do not demote resources because something that requires it can not run
   + pengine: Fix colocation for interleaved clones
   + pengine: Fix colocation with partially active groups
   + pengine: Fix potential use-after-free defect from coverity
   + pengine: Fix previous merge
   + pengine: Fix use-after-free in order_actions() reported by valgrind
   + pengine: Make the current data set a global variable so it does not need to be passed around everywhere
   + pengine: Prevent endless loop when looking for operation definitions in the configuration
   + pengine: Prevent segfault by ensuring the arguments to do_calculations() are initialized
   + pengine: Rewrite the ordering constraint logic to be simplicity, clarity and maintainability
   + pengine: Wait until stonith is available, do not fall back to shutdown for nodes requesting termination
   + Resolve coverity RESOURCE_LEAK defects
   + Shell: Complete the transition to using crm_attribute instead of crm_failcount and crm_standby
   + stonith: Advertise stonith-ng options in the metadata
   + stonith: Bug lf#2461 - Prevent segfault by not looking up operations if the hashtable has not been initialized yet
   + stonith: Bug lf#2473 - Add the timeout at the top level where the daemon is looking for it
   + Stonith: Bug lf#2473 - Ensure stonith operations complete within the timeout and are terminated if they run too long
   + stonith: Bug lf#2473 - Ensure timeouts are included for fencing operations
   + stonith: Bug lf#2473 - Gracefully handle remote operations that arrive late (after we have done notifications)
   + stonith: Correctly parse pcmk_host_list parameters that appear on a single line
   + stonith: Map poweron/poweroff back to on/off expected by the stonith tool from cluster-glue
   + stonith: pass the configuration to the stonith program via environment variables (bnc#620781)
   + Stonith: Use the timeout specified by the user
   + Support starting plugin-based Pacemaker clusters with the MCP as well
   + Tools: Bug lf#2456 - Fix assertion failure in crm_resource
   + tools: crm_node - Repair the ability to connect to openais based clusters
   + tools: crm_node - Use the correct short option for --cman
   + tools: crm_report - corosync.conf wont necessarily contain the text 'pacemaker' anymore
   + Tools: crm_simulate - Fix use-after-free in when terminating
   + tools: crm_simulate - Resolve coverity USE_AFTER_FREE defect
   + Tools: Drop the 'pingd' daemon and resource agent in favor of ocf:pacemaker:ping
   + Tools: Fix recently introduced use-of-NULL
   + Tools: Fix use-after-free defects from coverity
 
 * Wed May 12 2010 Andrew Beekhof <andrew@beekhof.net> 1.1.2-1
 - Update source tarball to revision: c25c972a25cc tip
 - Statistics:
   Changesets: 339
   Diff:       708 files changed, 37918 insertions(+), 10584 deletions(-)
 - Changes since Pacemaker-1.1.1
   + ais: Do not count votes from offline nodes and calculate current votes before sending quorum data
   + ais: Ensure the list of active processes sent to clients is always up-to-date
   + ais: Look for the correct conf variable for turning on file logging
   + ais: Need to find a better and thread-safe way to set core_uses_pid. Disable for now.
   + ais: Use the threadsafe version of getpwnam
   + Core: Bump the feature set due to the new failcount expiry feature
   + Core: fix memory leaks exposed by valgrind
   + Core: Bug lf#2414 - Prevent use-after-free reported by valgrind when doing xpath based deletions
   + crmd: Bug lf#2414 - Prevent use-after-free of the PE connection after it dies
   + crmd: Bug lf#2414 - Prevent use-after-free of the stonith-ng connection
   + crmd: Bug lf#2401 - Improved detection of partially active peers
   + crmd: Bug lf#2379 - Ensure the cluster terminates when the PE is not available
   + crmd: Do not allow the target_rc to be misused by resource agents
   + crmd: Do not ignore action timeouts based on FSA state
   + crmd: Ensure we dont get stuck in S_PENDING if we loose an election to someone that never talks to us again
   + crmd: Fix memory leaks exposed by valgrind
   + crmd: Remove race condition that could lead to multiple instances of a clone being active on a machine
   + crmd: Send erase_status_tag() calls to the local CIB when the DC is fenced, since there is no DC to accept them
   + crmd: Use global fencing notifications to prevent secondary fencing operations of the DC
   + pengine: Bug lf#2317 - Avoid needless restart of primitive depending on a clone
   + pengine: Bug lf#2361 - Ensure clones observe mandatory ordering constraints if the LHS is unrunnable
   + pengine: Bug lf#2383 - Combine failcounts for all instances of an anonymous clone on a host
   + pengine: Bug lf#2384 - Fix intra-set colocation and ordering
   + pengine: Bug lf#2403 - Enforce mandatory promotion (colocation) constraints
   + pengine: Bug lf#2412 - Correctly find clone instances by their prefix
   + pengine: Do not be so quick to pull the trigger on nodes that are coming up
   + pengine: Fix memory leaks exposed by valgrind
   + pengine: Rewrite native_merge_weights() to avoid Fix use-after-free
   + Shell: Bug bnc#590035 - always reload status if working with the cluster
   + Shell: Bug bnc#592762 - Default to using the status section from the live CIB
   + Shell: Bug lf#2315 - edit multiple meta_attributes sets in resource management
   + Shell: Bug lf#2221 - enable comments
   + Shell: Bug bnc#580492 - implement new cibstatus interface and commands
   + Shell: Bug bnc#585471 - new cibstatus import command
   + Shell: check timeouts also against the default-action-timeout property
   + Shell: new configure filter command
   + Tools: crm_mon - fix memory leaks exposed by valgrind
 
 * Tue Feb 16 2010 Andrew Beekhof <andrew@beekhof.net> - 1.1.1-1
 - First public release of Pacemaker 1.1
 - Package reference documentation in a doc subpackage
 - Move cts into a subpackage so that it can be easily consumed by others
 - Update source tarball to revision: 17d9cd4ee29f
   + New stonith daemon that supports global notifications
   + Service placement influenced by the physical resources
   + A new tool for simulating failures and the cluster’s reaction to them
   + Ability to serialize an otherwise unrelated a set of resource actions (eg. Xen migrations)
 
 * Mon Jan 18 2010 Andrew Beekhof <andrew@beekhof.net> - 1.0.7-1
 - Update source tarball to revision: 2eed906f43e9 (stable-1.0) tip
 - Statistics:
       Changesets:      193
       Diff:            220 files changed, 15933 insertions(+), 8782 deletions(-)
 - Changes since 1.0.5-4
   + pengine: Bug 2213 - Ensure groups process location constraints so that clone-node-max works for cloned groups
   + pengine: Bug lf#2153 - non-clones should not restart when clones stop/start on other nodes
   + pengine: Bug lf#2209 - Clone ordering should be able to prevent startup of dependent clones
   + pengine: Bug lf#2216 - Correctly identify the state of anonymous clones when deciding when to probe
   + pengine: Bug lf#2225 - Operations that require fencing should wait for 'stonith_complete' not 'all_stopped'.
   + pengine: Bug lf#2225 - Prevent clone peers from stopping while another is instance is (potentially) being fenced
   + pengine: Correctly anti-colocate with a group
   + pengine: Correctly unpack ordering constraints for resource sets to avoid graph loops
   + Tools: crm: load help from crm_cli.txt
   + Tools: crm: resource sets (bnc#550923)
   + Tools: crm: support for comments (LF 2221)
   + Tools: crm: support for description attribute in resources/operations (bnc#548690)
   + Tools: hb2openais: add EVMS2 CSM processing (and other changes) (bnc#548093)
   + Tools: hb2openais: do not allow empty rules, clones, or groups (LF 2215)
   + Tools: hb2openais: refuse to convert pure EVMS volumes
   + cib: Ensure the loop for login message terminates
   + cib: Finally fix reliability of receiving large messages over remote plaintext connections
   + cib: Fix remote notifications
   + cib: For remote connections, default to CRM_DAEMON_USER since thats the only one that the cib can validate the password for using PAM
   + cib: Remote plaintext - Retry sending parts of the message that did not fit the first time
   + crmd: Ensure batch-limit is correctly enforced
   + crmd: Ensure we have the latest status after a transition abort
   + (bnc#547579,547582): Tools: crm: status section editing support
   + shell: Add allow-migrate as allowed meta-attribute (bnc#539968)
   + Medium: Build: Do not automatically add -L/lib, it could cause 64-bit arches to break
   + Medium: pengine: Bug lf#2206 - rsc_order constraints always use score at the top level
   + Medium: pengine: Only complain about target-role=master for non m/s resources
   + Medium: pengine: Prevent non-multistate resources from being promoted through target-role
   + Medium: pengine: Provide a default action for resource-set ordering
   + Medium: pengine: Silently fix requires=fencing for stonith resources so that it can be set in op_defaults
   + Medium: Tools: Bug lf#2286 - Allow the shell to accept template parameters on the command line
   + Medium: Tools: Bug lf#2307 - Provide a way to determin the nodeid of past cluster members
   + Medium: Tools: crm: add update method to template apply (LF 2289)
   + Medium: Tools: crm: direct RA interface for ocf class resource agents (LF 2270)
   + Medium: Tools: crm: direct RA interface for stonith class resource agents (LF 2270)
   + Medium: Tools: crm: do not add score which does not exist
   + Medium: Tools: crm: do not consider warnings as errors (LF 2274)
   + Medium: Tools: crm: do not remove sets which contain id-ref attribute (LF 2304)
   + Medium: Tools: crm: drop empty attributes elements
   + Medium: Tools: crm: exclude locations when testing for pathological constraints (LF 2300)
   + Medium: Tools: crm: fix exit code on single shot commands
   + Medium: Tools: crm: fix node delete (LF 2305)
   + Medium: Tools: crm: implement -F (--force) option
   + Medium: Tools: crm: rename status to cibstatus (LF 2236)
   + Medium: Tools: crm: revisit configure commit
   + Medium: Tools: crm: stay in crm if user specified level only (LF 2286)
   + Medium: Tools: crm: verify changes on exit from the configure level
   + Medium: ais: Some clients such as gfs_controld want a cluster name, allow one to be specified in corosync.conf
   + Medium: cib: Clean up logic for receiving remote messages
   + Medium: cib: Create valid notification control messages
   + Medium: cib: Indicate where the remote connection came from
   + Medium: cib: Send password prompt to stderr so that stdout can be redirected
   + Medium: cts: Fix rsh handling when stdout is not required
   + Medium: doc: Fill in the section on removing a node from an AIS-based cluster
   + Medium: doc: Update the docs to reflect the 0.6/1.0 rolling upgrade problem
   + Medium: doc: Use Publican for docbook based documentation
   + Medium: fencing: stonithd: add metadata for stonithd instance attributes (and support in the shell)
   + Medium: fencing: stonithd: ignore case when comparing host names (LF 2292)
   + Medium: tools: Make crm_mon functional with remote connections
   + Medium: xml: Add stopped as a supported role for operations
   + Medium: xml: Bug bnc#552713 - Treat node unames as text fields not IDs
   + Medium: xml: Bug lf#2215 - Create an always-true expression for empty rules when upgrading from 0.6
 
 * Thu Oct 29 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-4
 - Include the fixes from CoroSync integration testing
 - Move the resource templates - they are not documentation
 - Ensure documentation is placed in a standard location
 - Exclude documentation that is included elsewhere in the package
 
 - Update the tarball from upstream to version ee19d8e83c2a
   + cib: Correctly clean up when both plaintext and tls remote ports are requested
   + pengine: Bug bnc#515172 - Provide better defaults for lt(e) and gt(e) comparisions
   + pengine: Bug lf#2197 - Allow master instances placemaker to be influenced by colocation constraints
   + pengine: Make sure promote/demote pseudo actions are created correctly
   + pengine: Prevent target-role from promoting more than master-max instances
   + ais: Bug lf#2199 - Prevent expected-quorum-votes from being populated with garbage
   + ais: Prevent deadlock - dont try to release IPC message if the connection failed
   + cib: For validation errors, send back the full CIB so the client can display the errors
   + cib: Prevent use-after-free for remote plaintext connections
   + crmd: Bug lf#2201 - Prevent use-of-NULL when running heartbeat
 
 * Wed Oct 13 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-3
 - Update the tarball from upstream to version 38cd629e5c3c
   + Core: Bug lf#2169 - Allow dtd/schema validation to be disabled
   + pengine: Bug lf#2106 - Not all anonymous clone children are restarted after configuration change
   + pengine: Bug lf#2170 - stop-all-resources option had no effect
   + pengine: Bug lf#2171 - Prevent groups from starting if they depend on a complex resource which can not
   + pengine: Disable resource management if stonith-enabled=true and no stonith resources are defined
   + pengine: do not include master score if it would prevent allocation
   + ais: Avoid excessive load by checking for dead children every 1s (instead of 100ms)
   + ais: Bug rh#525589 - Prevent shutdown deadlocks when running on CoroSync
   + ais: Gracefully handle changes to the AIS nodeid
   + crmd: Bug bnc#527530 - Wait for the transition to complete before leaving S_TRANSITION_ENGINE
   + crmd: Prevent use-after-free with LOG_DEBUG_3
   + Medium: xml: Mask the "symmetrical" attribute on rsc_colocation constraints (bnc#540672)
   + Medium (bnc#520707): Tools: crm: new templates ocfs2 and clvm
   + Medium: Build: Invert the disable ais/heartbeat logic so that --without (ais|heartbeat) is available to rpmbuild
   + Medium: pengine: Bug lf#2178 - Indicate unmanaged clones
   + Medium: pengine: Bug lf#2180 - Include node information for all failed ops
   + Medium: pengine: Bug lf#2189 - Incorrect error message when unpacking simple ordering constraint
   + Medium: pengine: Correctly log resources that would like to start but can not
   + Medium: pengine: Stop ptest from logging to syslog
   + Medium: ais: Include version details in plugin name
   + Medium: crmd: Requery the resource metadata after every start operation
 
 * Fri Aug 21 2009 Tomas Mraz <tmraz@redhat.com> - 1.0.5-2.1
 - rebuilt with new openssl
 
 * Wed Aug 19 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-2
 - Add versioned perl dependency as specified by
     https://fedoraproject.org/wiki/Packaging/Perl#Packages_that_link_to_libperl
 - No longer remove RPATH data, it prevents us finding libperl.so and no other
   libraries were being hardcoded
 - Compile in support for heartbeat
 - Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements
   depending on which stacks are supported
 
 * Mon Aug 17 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-1
 - Add dependency on resource-agents
 - Use the version of the configure macro that supplies --prefix, --libdir, etc
 - Update the tarball from upstream to version 462f1569a437 (Pacemaker 1.0.5 final)
   + Tools: crm_resource - Advertise --move instead of --migrate
   + Medium: Extra: New node connectivity RA that uses system ping and attrd_updater
   + Medium: crmd: Note that dc-deadtime can be used to mask the brokeness of some switches
 
 * Tue Aug 11 2009 Ville Skyttä <ville.skytta@iki.fi> - 1.0.5-0.7.c9120a53a6ae.hg
 - Use bzipped upstream tarball.
 
 * Wed Jul  29 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.6.c9120a53a6ae.hg
 - Add back missing build auto* dependencies
 - Minor cleanups to the install directive
 
 * Tue Jul  28 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.5.c9120a53a6ae.hg
 - Add a leading zero to the revision when alphatag is used
 
 * Tue Jul  28 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.5-0.4.c9120a53a6ae.hg
 - Incorporate the feedback from the cluster-glue review
 - Realistically, the version is a 1.0.5 pre-release
 - Use the global directive instead of define for variables
 - Use the haclient/hacluster group/user instead of daemon
 - Use the _configure macro
 - Fix install dependencies
 
 * Fri Jul  24 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.4-3
 - Initial Fedora checkin
 - Include an AUTHORS and license file in each package
 - Change the library package name to pacemaker-libs to be more
   Fedora compliant
 - Remove execute permissions from xml related files
 - Reference the new cluster-glue devel package name
 - Update the tarball from upstream to version c9120a53a6ae
   + pengine: Only prevent migration if the clone dependency is stopping/starting on the target node
   + pengine: Bug 2160 - Don't shuffle clones due to colocation
   + pengine: New implementation of the resource migration (not stop/start) logic
   + Medium: Tools: crm_resource - Prevent use-of-NULL by requiring a resource name for the -A and -a options
   + Medium: pengine: Prevent use-of-NULL in find_first_action()
 
 * Tue Jul 14 2009 Andrew Beekhof <andrew@beekhof.net> - 1.0.4-2
 - Reference authors from the project AUTHORS file instead of listing in description
 - Change Source0 to reference the Mercurial repo
 - Cleaned up the summaries and descriptions
 - Incorporate the results of Fedora package self-review
 
 * Thu Jun 04 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.4-1
 - Update source tarball to revision: 1d87d3e0fc7f (stable-1.0)
 - Statistics:
     Changesets:      209
     Diff:            266 files changed, 12010 insertions(+), 8276 deletions(-)
 - Changes since Pacemaker-1.0.3
   + (bnc#488291): ais: do not rely on byte endianness on ptr cast
   + (bnc#507255): Tools: crm: delete rsc/op_defaults (these meta_attributes are killing me)
   + (bnc#507255): Tools: crm: import properly rsc/op_defaults
   + (LF 2114): Tools: crm: add support for operation instance attributes
   + ais: Bug lf#2126 - Messages replies cannot be routed to transient clients
   + ais: Fix compilation for the latest Corosync API (v1719)
   + attrd: Do not perform all updates as complete refreshes
   + cib: Fix huge memory leak affecting heartbeat-based clusters
   + Core: Allow xpath queries to match attributes
   + Core: Generate the help text directly from a tool options struct
   + Core: Handle differences in 0.6 messaging format
   + crmd: Bug lf#2120 - All transient node attribute updates need to go via attrd
   + crmd: Correctly calculate how long an FSA action took to avoid spamming the logs with errors
   + crmd: Fix another large memory leak affecting Heartbeat based clusters
   + lha: Restore compatability with older versions
   + pengine: Bug bnc#495687 - Filesystem is not notified of successful STONITH under some conditions
   + pengine: Make running a cluster with STONITH enabled but no STONITH resources an error and provide details on resolutions
   + pengine: Prevent use-ofNULL when using resource ordering sets
   + pengine: Provide inter-notification ordering guarantees
   + pengine: Rewrite the notification code to be understanable and extendable
   + Tools: attrd - Prevent race condition resulting in the cluster forgetting the node wishes to shut down
   + Tools: crm: regression tests
   + Tools: crm_mon - Fix smtp notifications
   + Tools: crm_resource - Repair the ability to query meta attributes
   + Low Build: Bug lf#2105 - Debian package should contain pacemaker doc and crm templates
   + Medium (bnc#507255): Tools: crm: handle empty rsc/op_defaults properly
   + Medium (bnc#507255): Tools: crm: use the right obj_type when creating objects from xml nodes
   + Medium (LF 2107): Tools: crm: revisit exit codes in configure
   + Medium: cib: Do not bother validating updates that only affect the status section
   + Medium: Core: Include supported stacks in version information
   + Medium: crmd: Record in the CIB, the cluster infrastructure being used
   + Medium: cts: Do not combine crm_standby arguments - the wrapper can not process them
   + Medium: cts: Fix the CIBAusdit class
   + Medium: Extra: Refresh showscores script from Dominik
   + Medium: pengine: Build a statically linked version of ptest
   + Medium: pengine: Correctly log the actions for resources that are being recovered
   + Medium: pengine: Correctly log the occurance of promotion events
   + Medium: pengine: Implememt node health based on a patch from Mark Hamzy
   + Medium: Tools: Add examples to help text outputs
   + Medium: Tools: crm: catch syntax errors for configure load
   + Medium: Tools: crm: implement erasing nodes in configure erase
   + Medium: Tools: crm: work with parents only when managing xml objects
   + Medium: Tools: crm_mon - Add option to run custom notification program on resource operations (Patch by Dominik Klein)
   + Medium: Tools: crm_resource - Allow --cleanup to function on complex resources and cluster-wide
   + Medium: Tools: haresource2cib.py - Patch from horms to fix conversion error
   + Medium: Tools: Include stack information in crm_mon output
   + Medium: Tools: Two new options (--stack,--constraints) to crm_resource for querying how a resource is configured
 
 * Wed Apr 08 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.3-1
 - Update source tarball to revision: b133b3f19797 (stable-1.0) tip
 - Statistics:
     Changesets:      383
     Diff:            329 files changed, 15471 insertions(+), 15119 deletions(-)
 - Changes since Pacemaker-1.0.2
   + Added tag SLE11-HAE-GMC for changeset 9196be9830c2
   + ais plugin: Fix quorum calculation (bnc#487003)
   + ais: Another memory fix leak in error path
   + ais: Bug bnc#482847, bnc#482905 - Force a clean exit of OpenAIS once Pacemaker has finished unloading
   + ais: Bug bnc#486858 - Fix update_member() to prevent spamming clients with membership events containing no changes
   + ais: Centralize all quorum calculations in the ais plugin and allow expected votes to be configured int he cib
   + ais: Correctly handle a return value of zero from openais_dispatch_recv()
   + ais: Disable logging to a file
   + ais: Fix memory leak in error path
   + ais: IPC messages are only in scope until a response is sent
   + All signal handlers used with CL_SIGNAL() need to be as minimal as possible
   + cib: Bug bnc#482885 - Simplify CIB disk-writes to prevent data loss.  Required a change to the backup filename format
   + cib: crmd: Revert part of 9782ab035003.  Complex shutdown routines need G_main_add_SignalHandler to avoid race coditions
   + crm: Avoid infinite loop during crm configure edit (bnc#480327)
   + crmd: Avoid a race condition by waiting for the attrd update to trigger a transition automatically
   + crmd: Bug bnc#480977 - Prevent extra, partial, shutdown when a node restarts too quickly
   + crmd: Bug bnc#480977 - Prevent extra, partial, shutdown when a node restarts too quickly (verified)
   + crmd: Bug bnc#489063 - Ensure the DC is always unset after we 'loose' an election
   + crmd: Bug BSC#479543 - Correctly find the migration source for timed out migrate_from actions
   + crmd: Call crm_peer_init() before we start the FSA - prevents a race condition when used with Heartbeat
   + crmd: Erasing the status section should not be forced to the local node
   + crmd: Fix memory leak in cib notication processing code
   + crmd: Fix memory leak in transition graph processing
   + crmd: Fix memory leaks found by valgrind
   + crmd: More memory leaks fixes found by valgrind
   + fencing: stonithd: is_heartbeat_cluster is a no-no if there is no heartbeat support
   + pengine: Bug bnc#466788 - Exclude nodes that can not run resources
   + pengine: Bug bnc#466788 - Make colocation based on node attributes work
   + pengine: Bug BNC#478687 - Do not crash when clone-max is 0
   + pengine: Bug bnc#488721 - Fix id-ref expansion for clones, the doc-root for clone children is not the cib root
   + pengine: Bug bnc#490418 - Correctly determine node state for nodes wishing to be terminated
   + pengine: Bug LF#2087 - Correctly parse the state of anonymous clones that have multiple instances on a given node
   + pengine: Bug lf#2089 - Meta attributes are not inherited by clone children
   + pengine: Bug lf#2091 - Correctly restart modified resources that were found active by a probe
   + pengine: Bug lf#2094 - Fix probe ordering for cloned groups
   + pengine: Bug LF:2075 - Fix large pingd memory leaks
   + pengine: Correctly attach orphaned clone children to their parent
   + pengine: Correctly handle terminate node attributes that are set to the output from time()
   + pengine: Ensure orphaned clone members are hooked up to the parent when clone-max=0
   + pengine: Fix memory leak in LogActions
   + pengine: Fix the determination of whether a group is active
   + pengine: Look up the correct promotion preference for anonymous masters
   + pengine: Simplify handling of start failures by changing the default migration-threshold to INFINITY
   + pengine: The ordered option for clones no longer causes extra start/stop operations
   + RA: Bug bnc#490641 - Shut down dlm_controld with -TERM instead of -KILL
   + RA: pingd: Set default ping interval to 1 instead of 0 seconds
   + Resources: pingd - Correctly tell the ping daemon to shut down
   + Tools: Bug bnc#483365 - Ensure the command from cluster_test includes a value for --log-facility
   + Tools: cli: fix and improve delete command
   + Tools: crm: add and implement templates
   + Tools: crm: add support for command aliases and some common commands (i.e. cd,exit)
   + Tools: crm: create top configuration nodes if they are missing
   + Tools: crm: fix parsing attributes for rules (broken by the previous changeset)
   + Tools: crm: new ra set of commands
   + Tools: crm: resource agents information management
   + Tools: crm: rsc/op_defaults
   + Tools: crm: support for no value attribute in nvpairs
   + Tools: crm: the new configure monitor command
   + Tools: crm: the new configure node command
   + Tools: crm_mon - Prevent use-of-NULL when summarizing an orphan
   + Tools: hb2openais: create clvmd clone for respawn evmsd in ha.cf
   + Tools: hb2openais: fix a serious recursion bug in xml node processing
   + Tools: hb2openais: fix ocfs2 processing
   + Tools: pingd - prevent double free of getaddrinfo() output in error path
   + Tools: The default re-ping interval for pingd should be 1s not 1ms
   + Medium (bnc#479049): Tools: crm: add validation of resource type for the configure primitive command
   + Medium (bnc#479050): Tools: crm: add help for RA parameters in tab completion
   + Medium (bnc#479050): Tools: crm: add tab completion for primitive params/meta/op
   + Medium (bnc#479050): Tools: crm: reimplement cluster properties completion
   + Medium (bnc#486968): Tools: crm: listnodes function requires no parameters (do not mix completion with other stuff)
   + Medium: ais: Remove the ugly hack for dampening AIS membership changes
   + Medium: cib: Fix memory leaks by using mainloop_add_signal
   + Medium: cib: Move more logging to the debug level (was info)
   + Medium: cib: Overhaul the processing of synchronous replies
   + Medium: Core: Add library functions for instructing the cluster to terminate nodes
   + Medium: crmd: Add new expected-quorum-votes option
   + Medium: crmd: Allow up to 5 retires when an attrd update fails
   + Medium: crmd: Automatically detect and use new values for crm_config options
   + Medium: crmd: Bug bnc#490426 - Escalated shutdowns stall when there are pending resource operations
   + Medium: crmd: Clean up and optimize the DC election algorithm
   + Medium: crmd: Fix memory leak in shutdown
   + Medium: crmd: Fix memory leaks spotted by Valgrind
   + Medium: crmd: Ingore join messages from hosts other than our DC
   + Medium: crmd: Limit the scope of resource updates to the status section
   + Medium: crmd: Prevent the crmd from being respawned if its told to shut down when it did not ask to be
   + Medium: crmd: Re-check the election status after membership events
   + Medium: crmd: Send resource updates via the local CIB during elections
   + Medium: pengine: Bug bnc#491441 - crm_mon does not display operations returning 'uninstalled' correctly
   + Medium: pengine: Bug lf#2101 - For location constraints, role=Slave is equivalent to role=Started
   + Medium: pengine: Clean up the API - removed ->children() and renamed ->find_child() to fine_rsc()
   + Medium: pengine: Compress the display of healthy anonymous clones
   + Medium: pengine: Correctly log the actions for resources that are being recovered
   + Medium: pengine: Determin a promotion score for complex resources
   + Medium: pengine: Ensure clones always have a value for globally-unique
   + Medium: pengine: Prevent orphan clones from being allocated
   + Medium: RA: controld: Return proper exit code for stop op.
   + Medium: Tools: Bug bnc#482558 - Fix logging test in cluster_test
   + Medium: Tools: Bug bnc#482828 - Fix quoting in cluster_test logging setup
   + Medium: Tools: Bug bnc#482840 - Include directory path to CTSlab.py
   + Medium: Tools: crm: add more user input checks
   + Medium: Tools: crm: do not check resource status of we are working with a shadow
   + Medium: Tools: crm: fix id-refs and allow reference to top objects (i.e. primitive)
   + Medium: Tools: crm: ignore comments in the CIB
   + Medium: Tools: crm: multiple column output would not work with small lists
   + Medium: Tools: crm: refuse to delete running resources
   + Medium: Tools: crm: rudimentary if-else for templates
   + Medium: Tools: crm: Start/stop clones via target-role.
   + Medium: Tools: crm_mon - Compress the node status for healthy and offline nodes
   + Medium: Tools: crm_shadow - Return 0/cib_ok when --create-empty succeeds
   + Medium: Tools: crm_shadow - Support -e, the short form of --create-empty
   + Medium: Tools: Make attrd quieter
   + Medium: Tools: pingd - Avoid using various clplumbing functions as they seem to leak
   + Medium: Tools: Reduce pingd logging
 
 * Mon Feb 16 2009 Andrew Beekhof <abeekhof@suse.de> - 1.0.2-1
 - Update source tarball to revision: d232d19daeb9 (stable-1.0) tip
 - Statistics:
     Changesets:      441
     Diff:            639 files changed, 20871 insertions(+), 21594 deletions(-)
 - Changes since Pacemaker-1.0.1
   + (bnc#450815): Tools: crm cli: do not generate id for the operations tag
   + ais: Add support for the new AIS IPC layer
   + ais: Always set header.error to the correct default: SA_AIS_OK
   + ais: Bug BNC#456243 - Ensure the membership cache always contains an entry for the local node
   + ais: Bug BNC:456208 - Prevent deadlocks by not logging in the child process before exec()
   + ais: By default, disable supprt for the WIP openais IPC patch
   + ais: Detect and handle situations where ais and the crm disagree on the node name
   + ais: Ensure crm_peer_seq is updated after a membership update
   + ais: Make sure all IPC header fields are set to sane defaults
   + ais: Repair and streamline service load now that whitetank startup functions correctly
   + build: create and install doc files
   + cib: Allow clients without mainloop to connect to the cib
   + cib: CID:18 - Fix use-of-NULL in cib_perform_op
   + cib: CID:18 - Repair errors introduced in b5a18704477b - Fix use-of-NULL in cib_perform_op
   + cib: Ensure diffs contain the correct values of admin_epoch
   + cib: Fix four moderately sized memory leaks detected by Valgrind
   + Core: CID:10 - Prevent indexing into an array of schemas with a negative value
   + Core: CID:13 - Fix memory leak in log_data_element
   + Core: CID:15 - Fix memory leak in crm_get_peer
   + Core: CID:6 - Fix use-of-NULL in copy_ha_msg_input
   + Core: Fix crash in the membership code preventing node shutdown
   + Core: Fix more memory leaks foudn by valgrind
   + Core: Prevent unterminated strings after decompression
   + crmd: Bug BNC:467995 - Delay marking STONITH operations complete until STONITH tells us so
   + crmd: Bug LF:1962 - Do not NACK peers because they are not (yet) in our membership.  Just ignore them.
   + crmd: Bug LF:2010 - Ensure fencing cib updates create the node_state entry if needed to preent re-fencing during cluster startup
   + crmd: Correctly handle reconnections to attrd
   + crmd: Ensure updates for lost migrate operations indicate which node it tried to migrating to
   + crmd: If there are no nodes to finalize, start an election.
   + crmd: If there are no nodes to welcome, start an election.
   + crmd: Prevent node attribute loss by detecting attrd disconnections immediately
-  + crmd: Prevent node re-probe loops by ensuring manditory actions always complete
+  + crmd: Prevent node re-probe loops by ensuring mandatory actions always complete
   + pengine: Bug 2005 - Fix startup ordering of cloned stonith groups
   + pengine: Bug 2006 - Correctly reprobe cloned groups
   + pengine: Bug BNC:465484 - Fix the no-quorum-policy=suicide option
   + pengine: Bug LF:1996 - Correctly process disabled monitor operations
   + pengine: CID:19 - Fix use-of-NULL in determine_online_status
   + pengine: Clones now default to globally-unique=false
   + pengine: Correctly calculate the number of available nodes for the clone to use
   + pengine: Only shoot online nodes with no-quorum-policy=suicide
   + pengine: Prevent on-fail settings being ignored after a resource is successfully stopped
   + pengine: Prevent use-of-NULL for failed migrate actions in process_rsc_state()
   + pengine: Remove an optimization for the terminate node attribute that caused the cluster to block indefinitly
   + pengine: Repar the ability to colocate based on node attributes other than uname
   + pengine: Start the correct monitor operation for unmanaged masters
   + stonith: CID:3 - Fix another case of exceptionally poor error handling by the original stonith developers
   + stonith: CID:5 - Checking for NULL and then dereferencing it anyway is an interesting approach to error handling
   + stonithd: Sending IPC to the cluster is a privileged operation
   + stonithd: wrong checks for shmid (0 is a valid id)
   + Tools: attrd - Correctly determine when an attribute has stopped changing and should be committed to the CIB
   + Tools: Bug 2003 - pingd does not correctly detect failures when the interface is down
   + Tools: Bug 2003 - pingd does not correctly handle node-down events on multi-NIC systems
   + Tools: Bug 2021 - pingd does not detect sequence wrapping correctly, incorrectly reports nodes offline
   + Tools: Bug BNC:468066 - Do not use the result of uname() when its no longer in scope
   + Tools: Bug BNC:473265 - crm_resource -L dumps core
   + Tools: Bug LF:2001 - Transient node attributes should be set via attrd
   + Tools: Bug LF:2036 - crm_resource cannot set/get parameters for cloned resources
   + Tools: Bug LF:2046 - Node attribute updates are lost because attrd can take too long to start
   + Tools: Cause the correct clone instance to be failed with crm_resource -F
   + Tools: cluster_test - Allow the user to select a stack and fix CTS invocation
   + Tools: crm cli: allow rename only if the resource is stopped
   + Tools: crm cli: catch system errors on file operations
   + Tools: crm cli: completion for ids in configure
   + Tools: crm cli: drop '-rsc' from attributes for order constraint
   + Tools: crm cli: exit with an appropriate exit code
   + Tools: crm cli: fix wrong order of action and resource in order constraint
   + Tools: crm cli: fox wrong exit code
   + Tools: crm cli: improve handling of cib attributes
   + Tools: crm cli: new command: configure rename
   + Tools: crm cli: new command: configure upgrade
   + Tools: crm cli: new command: node delete
   + Tools: crm cli: prevent key errors on missing cib attributes
   + Tools: crm cli: print long help for help topics
   + Tools: crm cli: return on syntax error when parsing score
   + Tools: crm cli: rsc_location can be without nvpairs
   + Tools: crm cli: short node preference location constraint
   + Tools: crm cli: sometimes, on errors, level would change on single shot use
   + Tools: crm cli: syntax: drop a bunch of commas (remains of help tables conversion)
   + Tools: crm cli: verify user input for sanity
   + Tools: crm: find expressions within rules (do not always skip xml nodes due to used id)
   + Tools: crm_master should not define a set id now that attrd is used.  Defining one can break lookups
   + Tools: crm_mon Use the OID assigned to the project by IANA for SNMP traps
   + Medium (bnc#445622): Tools: crm cli: improve the node show command and drop node status
   + Medium (LF 2009): stonithd: improve timeouts for remote fencing
   + Medium: ais: Allow dead peers to be removed from membership calculations
   + Medium: ais: Pass node deletion events on to clients
   + Medium: ais: Sanitize ipc usage
   + Medium: ais: Supply the node uname in addtion to the id
   + Medium: Build: Clean up configure to ensure NON_FATAL_CFLAGS is consistent with CFLAGS (ie. includes -g)
   + Medium: Build: Install cluster_test
   + Medium: Build: Use more restrictive CFLAGS and fix the resulting errors
   + Medium: cib: CID:20 - Fix potential use-after-free in cib_native_signon
   + Medium: Core: Bug BNC:474727 - Set a maximum time to wait for IPC messages
   + Medium: Core: CID:12 - Fix memory leak in decode_transition_magic error path
   + Medium: Core: CID:14 - Fix memory leak in calculate_xml_digest error path
   + Medium: Core: CID:16 - Fix memory leak in date_to_string error path
   + Medium: Core: Try to track down the cause of XML parsing errors
   + Medium: crmd: Bug BNC:472473 - Do not wait excessive amounts of time for lost actions
   + Medium: crmd: Bug BNC:472473 - Reduce the transition timeout to action_timeout+network_delay
   + Medium: crmd: Do not fast-track the processing of LRM refreshes when there are pending actions.
   + Medium: crmd: do_dc_join_filter_offer - Check the 'join' message is for the current instance before deciding to NACK peers
   + Medium: crmd: Find option values without having to do a config upgrade
   + Medium: crmd: Implement shutdown using a transient node attribute
   + Medium: crmd: Update the crmd options to use dashes instead of underscores
   + Medium: cts: Add 'cluster reattach' to the suite of automated regression tests
   + Medium: cts: cluster_test - Make some usability enhancements
   + Medium: CTS: cluster_test - suggest a valid port number
   + Medium: CTS: Fix python import order
   + Medium: cts: Implement an automated SplitBrain test
   + Medium: CTS: Remove references to deleted classes
   + Medium: Extra: Resources - Use HA_VARRUN instead of HA_RSCTMP for state files as Heartbeat removes HA_RSCTMP at startup
   + Medium: HB: Bug 1933 - Fake crmd_client_status_callback() calls because HB does not provide them for already running processes
   + Medium: pengine: CID:17 - Fix memory leak in find_actions_by_task error path
   + Medium: pengine: CID:7,8 - Prevent hypothetical use-of-NULL in LogActions
   + Medium: pengine: Defer logging the actions performed on a resource until we have processed ordering constraints
   + Medium: pengine: Remove the symmetrical attribute of colocation constraints
   + Medium: Resources: pingd - fix the meta defaults
   + Medium: Resources: Stateful - Add missing meta defaults
   + Medium: stonithd: exit if we the pid file cannot be locked
   + Medium: Tools: Allow attrd clients to specify the ID the attribute should be created with
   + Medium: Tools: attrd - Allow attribute updates to be performed from a hosts peer
   + Medium: Tools: Bug LF:1994 - Clean up crm_verify return codes
   + Medium: Tools: Change the pingd defaults to ping hosts once every second (instead of 5 times every 10 seconds)
   + Medium: Tools: cibmin - Detect resource operations with a view to providing email/snmp/cim notification
   + Medium: Tools: crm cli: add back symmetrical for order constraints
   + Medium: Tools: crm cli: generate role in location when converting from xml
   + Medium: Tools: crm cli: handle shlex exceptions
   + Medium: Tools: crm cli: keep order of help topics
   + Medium: Tools: crm cli: refine completion for ids in configure
   + Medium: Tools: crm cli: replace inf with INFINITY
   + Medium: Tools: crm cli: streamline cib load and parsing
   + Medium: Tools: crm cli: supply provider only for ocf class primitives
   + Medium: Tools: crm_mon - Add support for sending mail notifications of resource events
   + Medium: Tools: crm_mon - Include the DC version in status summary
   + Medium: Tools: crm_mon - Sanitize startup and option processing
   + Medium: Tools: crm_mon - switch to event-driven updates and add support for sending snmp traps
   + Medium: Tools: crm_shadow - Replace the --locate option with the saner --edit
   + Medium: Tools: hb2openais: do not remove Evmsd resources, but replace them with clvmd
   + Medium: Tools: hb2openais: replace crmadmin with crm_mon
   + Medium: Tools: hb2openais: replace the lsb class with ocf for o2cb
   + Medium: Tools: hb2openais: reuse code
   + Medium: Tools: LF:2029 - Display an error if crm_resource is used to reset the operation history of non-primitive resources
   + Medium: Tools: Make pingd resilient to attrd failures
   + Medium: Tools: pingd - fix the command line switches
   + Medium: Tools: Rename ccm_tool to crm_node
 
 * Tue Nov 18 2008 Andrew Beekhof <abeekhof@suse.de> - 1.0.1-1
 - Update source tarball to revision: 6fc5ce8302ab (stable-1.0) tip
 - Statistics:
     Changesets:      170
     Diff:            816 files changed, 7633 insertions(+), 6286 deletions(-)
 - Changes since Pacemaker-1.0.1
   + ais: Allow the crmd to get callbacks whenever a node state changes
   + ais: Create an option for starting the mgmtd daemon automatically
   + ais: Ensure HA_RSCTMP exists for use by resource agents
   + ais: Hook up the openais.conf config logging options
   + ais: Zero out the PID of disconnecting clients
   + cib: Ensure global updates cause a disk write when appropriate
   + Core: Add an extra snaity check to getXpathResults() to prevent segfaults
   + Core: Do not redefine __FUNCTION__ unnecessarily
   + Core: Repair the ability to have comments in the configuration
   + crmd: Bug:1975 - crmd should wait indefinitely for stonith operations to complete
   + crmd: Ensure PE processing does not occur for all error cases in do_pe_invoke_callback
   + crmd: Requests to the CIB should cause any prior PE calculations to be ignored
   + heartbeat: Wait for membership 'up' events before removing stale node status data
   + pengine: Bug LF:1988 - Ensure recurring operations always have the correct target-rc set
   + pengine: Bug LF:1988 - For unmanaged resources we need to skip the usual can_run_resources() checks
   + pengine: Ensure the terminate node attribute is handled correctly
   + pengine: Fix optional colocation
   + pengine: Improve up the detection of 'new' nodes joining the cluster
   + pengine: Prevent assert failures in master_color() by ensuring unmanaged masters are always reallocated to their current location
   + Tools: crm cli: parser: return False on syntax error and None for comments
   + Tools: crm cli: unify template and edit commands
   + Tools: crm_shadow - Show more line number information after validation failures
   + Tools: hb2openais: add option to upgrade the CIB to v3.0
   + Tools: hb2openais: add U option to getopts and update usage
   + Tools: hb2openais: backup improved and multiple fixes
   + Tools: hb2openais: fix class/provider reversal
   + Tools: hb2openais: fix testing
   + Tools: hb2openais: move the CIB update to the end
   + Tools: hb2openais: update logging and set logfile appropriately
   + Tools: LF:1969 - Attrd never sets any properties in the cib
   + Tools: Make attrd functional on OpenAIS
   + Medium: ais: Hook up the options for specifying the expected number of nodes and total quorum votes
   + Medium: ais: Look for pacemaker options inside the service block with 'name: pacemaker' instead of creating an addtional configuration block
   + Medium: ais: Provide better feedback when nodes change nodeids (in openais.conf)
   + Medium: cib: Always store cib contents on disk with num_updates=0
   + Medium: cib: Ensure remote access ports are cleaned up on shutdown
   + Medium: crmd: Detect deleted resource operations automatically
   + Medium: crmd: Erase a nodes resource operations and transient attributes after a successful STONITH
   + Medium: crmd: Find a more appropriate place to update quorum and refresh attrd attributes
   + Medium: crmd: Fix the handling of unexpected PE exits to ensure the current CIB is stored
   + Medium: crmd: Fix the recording of pending operations in the CIB
   + Medium: crmd: Initiate an attrd refresh _after_ the status section has been fully repopulated
   + Medium: crmd: Only the DC should update quorum in an openais cluster
   + Medium: Ensure meta attributes are used consistantly
   + Medium: pengine: Allow group and clone level resource attributes
   + Medium: pengine: Bug N:437719 - Ensure scores from colocated resources count when allocating groups
   + Medium: pengine: Prevent lsb scripts from being used in globally unique clones
   + Medium: pengine: Make a best-effort guess at a migration threshold for people with 0.6 configs
   + Medium: Resources: controld - ensure we are part of a clone with globally_unique=false
   + Medium: Tools: attrd - Automatically refresh all attributes after a CIB replace operation
   + Medium: Tools: Bug LF:1985 - crm_mon - Correctly process failed cib queries to allow reconnection after cluster restarts
   + Medium: Tools: Bug LF:1987 - crm_verify incorrectly warns of configuration upgrades for the most recent version
   + Medium: Tools: crm (bnc#441028): check for key error in attributes management
   + Medium: Tools: crm_mon - display the meaning of the operation rc code instead of the status
   + Medium: Tools: crm_mon - Fix the display of timing data
   + Medium: Tools: crm_verify - check that we are being asked to validate a complete config
   + Medium: xml: Relax the restriction on the contents of rsc_locaiton.node
 
 * Thu Oct 16 2008 Andrew Beekhof <abeekhof@suse.de> - 1.0.0-1
 - Update source tarball to revision: 388654dfef8f tip
 - Statistics:
     Changesets:      261
     Diff:            3021 files changed, 244985 insertions(+), 111596 deletions(-)
 - Changes since f805e1b30103
   + add the crm cli program
   + ais: Move the service id definition to a common location and make sure it is always used
   + build: rename hb2openais.sh to .in and replace paths with vars
   + cib: Implement --create for crm_shadow
   + cib: Remove dead files
   + Core: Allow the expected number of quorum votes to be configrable
   + Core: cl_malloc and friends were removed from Heartbeat
   + Core: Only call xmlCleanupParser() if we parsed anything.  Doing so unconditionally seems to cause a segfault
   + hb2openais.sh: improve pingd handling; several bugs fixed
   + hb2openais: fix clone creation; replace EVMS strings
   + new hb2openais.sh conversion script
   + pengine: Bug LF:1950 - Ensure the current values for all notification variables are always set (even if empty)
   + pengine: Bug LF:1955 - Ensure unmanaged masters are unconditionally repromoted to ensure they are monitored correctly.
   + pengine: Bug LF:1955 - Fix another case of filtering causing unmanaged master failures
   + pengine: Bug LF:1955 - Umanaged mode prevents master resources from being allocated correctly
   + pengine: Bug N:420538 - Anit-colocation caused a positive node preference
   + pengine: Correctly handle unmanaged resources to prevent them from being started elsewhere
   + pengine: crm_resource - Fix the --migrate command
   + pengine: MAke stonith-enabled default to true and warn if no STONITH resources are found
   + pengine: Make sure orphaned clone children are created correctly
   + pengine: Monitors for unmanaged resources do not need to wait for start/promote/demote actions to complete
   + stonithd (LF 1951): fix remote stonith operations
   + stonithd: fix handling of timeouts
   + stonithd: fix logic for stonith resource priorities
   + stonithd: implement the fence-timeout instance attribute
   + stonithd: initialize value before reading fence-timeout
   + stonithd: set timeouts for fencing ops to the timeout of the start op
   + stonithd: stonith rsc priorities (new feature)
   + Tools: Add hb2openais - a tool for upgrading a Heartbeat cluster to use OpenAIS instead
   + Tools: crm_verify - clean up the upgrade logic to prevent crash on invalid configurations
   + Tools: Make pingd functional on Linux
   + Update version numbers for 1.0 candidates
   + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid
   + Medium: ais: Use the agreed service number
   + Medium: Build: Reliably detect heartbeat libraries during configure
   + Medium: Build: Supply prototypes for libreplace functions when needed
   + Medium: Build: Teach configure how to find corosync
   + Medium: Core: Provide better feedback if Pacemaker is started by a stack it does not support
   + Medium: crmd: Avoid calling GHashTable functions with NULL
   + Medium: crmd: Delay raising I_ERROR when the PE exits until we have had a chance to save the current CIB
   + Medium: crmd: Hook up the stonith-timeout option to stonithd
   + Medium: crmd: Prevent potential use-of-NULL in global_timer_callback
   + Medium: crmd: Rationalize the logging of graph aborts
   + Medium: pengine: Add a stonith_timeout option and remove new options that are better set in rsc_defaults
   + Medium: pengine: Allow external entities to ask for a node to be shot by creating a terminate=true transient node attribute
   + Medium: pengine: Bug LF:1950 - Notifications do not contain all documented resource state fields
   + Medium: pengine: Bug N:417585 - Do not restart group children whos individual score drops below zero
   + Medium: pengine: Detect clients that disconnect before receiving their reply
   + Medium: pengine: Implement a true maintenance mode
   + Medium: pengine: Implement on-fail=standby for NTT.  Derived from a patch by Satomi TANIGUCHI
   + Medium: pengine: Print the correct message when stonith is disabled
   + Medium: pengine: ptest - check the input is valid before proceeding
   + Medium: pengine: Revert group stickiness to the 'old way'
   + Medium: pengine: Use the correct attribute for action 'requires' (was prereq)
   + Medium: stonithd: Fix compilation without full heartbeat install
   + Medium: stonithd: exit with better code on empty host list
   + Medium: tools: Add a new regression test for CLI tools
   + Medium: tools: crm_resource - return with non-zero when a resource migration command is invalid
   + Medium: tools: crm_shadow - Allow the admin to start with an empty CIB (and no cluster connection)
   + Medium: xml: pacemaker-0.7 is now an alias for the 1.0 schema
 
 * Mon Sep 22 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.3-1
 - Update source tarball to revision: 33e677ab7764+ tip
 - Statistics:
     Changesets:      133
     Diff:            89 files changed, 7492 insertions(+), 1125 deletions(-)
 - Changes since f805e1b30103
   + Tools: add the crm cli program
   + Core: cl_malloc and friends were removed from Heartbeat
   + Core: Only call xmlCleanupParser() if we parsed anything.  Doing so unconditionally seems to cause a segfault
   + new hb2openais.sh conversion script
   + pengine: Bug LF:1950 - Ensure the current values for all notification variables are always set (even if empty)
   + pengine: Bug LF:1955 - Ensure unmanaged masters are unconditionally repromoted to ensure they are monitored correctly.
   + pengine: Bug LF:1955 - Fix another case of filtering causing unmanaged master failures
   + pengine: Bug LF:1955 - Umanaged mode prevents master resources from being allocated correctly
   + pengine: Bug N:420538 - Anit-colocation caused a positive node preference
   + pengine: Correctly handle unmanaged resources to prevent them from being started elsewhere
   + pengine: crm_resource - Fix the --migrate command
   + pengine: MAke stonith-enabled default to true and warn if no STONITH resources are found
   + pengine: Make sure orphaned clone children are created correctly
   + pengine: Monitors for unmanaged resources do not need to wait for start/promote/demote actions to complete
   + stonithd (LF 1951): fix remote stonith operations
   + Tools: crm_verify - clean up the upgrade logic to prevent crash on invalid configurations
   + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid
   + Medium: ais: Use the agreed service number
   + Medium: pengine: Allow external entities to ask for a node to be shot by creating a terminate=true transient node attribute
   + Medium: pengine: Bug LF:1950 - Notifications do not contain all documented resource state fields
   + Medium: pengine: Bug N:417585 - Do not restart group children whos individual score drops below zero
   + Medium: pengine: Implement a true maintenance mode
   + Medium: pengine: Print the correct message when stonith is disabled
   + Medium: stonithd: exit with better code on empty host list
   + Medium: xml: pacemaker-0.7 is now an alias for the 1.0 schema
 
 * Wed Aug 20 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.1-1
 - Update source tarball to revision: f805e1b30103+ tip
 - Statistics:
     Changesets:      184
     Diff:            513 files changed, 43408 insertions(+), 43783 deletions(-)
 - Changes since 0.7.0-19
   + Fix compilation when GNUTLS isn't found
   + admin: Fix use-after-free in crm_mon
   + Build: Remove testing code that prevented heartbeat-only builds
   + cib: Use single quotes so that the xpath queries for nvpairs will succeed
   + crmd: Always connect to stonithd when the TE starts and ensure we notice if it dies
   + crmd: Correctly handle a dead PE process
   + crmd: Make sure async-failures cause the failcount to be incremented
   + pengine: Bug LF:1941 - Handle failed clone instance probes when clone-max < #nodes
   + pengine: Parse resource ordering sets correctly
   + pengine: Prevent use-of-NULL - order->rsc_rh will not always be non-NULL
   + pengine: Unpack colocation sets correctly
   + Tools: crm_mon - Prevent use-of-NULL for orphaned resources
   + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid
   + Medium: ais: Allow transient clients to receive membership updates
   + Medium: ais: Avoid double-free in error path
   + Medium: ais: Include in the mebership nodes for which we have not determined their hostname
   + Medium: ais: Spawn the PE from the ais plugin instead of the crmd
   + Medium: cib: By default, new configurations use the latest schema
   + Medium: cib: Clean up the CIB if it was already disconnected
   + Medium: cib: Only increment num_updates if something actually changed
   + Medium: cib: Prevent use-after-free in client after abnormal termination of the CIB
   + Medium: Core: Fix memory leak in xpath searches
   + Medium: Core: Get more details regarding parser errors
   + Medium: Core: Repair expand_plus_plus - do not call char2score on unexpanded values
   + Medium: Core: Switch to the libxml2 parser - its significantly faster
   + Medium: Core: Use a libxml2 library function for xml -> text conversion
   + Medium: crmd: Asynchronous failure actions have no parameters
   + Medium: crmd: Avoid calling glib functions with NULL
   + Medium: crmd: Do not allow an election to promote a node from S_STARTING
   + Medium: crmd: Do not vote if we have not completed the local startup
   + Medium: crmd: Fix te_update_diff() now that get_object_root() functions differently
   + Medium: crmd: Fix the lrmd xpath expressions to not contain quotes
   + Medium: crmd: If we get a join offer during an election, better restart the election
   + Medium: crmd: No further processing is needed when using the LRMs API call for failing resources
   + Medium: crmd: Only update have-quorum if the value changed
   + Medium: crmd: Repair the input validation logic in do_te_invoke
   + Medium: cts: CIBs can no longer contain comments
   + Medium: cts: Enable a bunch of tests that were incorrectly disabled
   + Medium: cts: The libxml2 parser wont allow v1 resources to use integers as parameter names
   + Medium: Do not use the cluster UID and GID directly.  Look them up based on the configured value of HA_CCMUSER
   + Medium: Fix compilation when heartbeat is not supported
   + Medium: pengine: Allow groups to be involved in optional ordering constraints
   + Medium: pengine: Allow sets of operations to be reused by multiple resources
   + Medium: pengine: Bug LF:1941 - Mark extra clone instances as orphans and do not show inactive ones
   + Medium: pengine: Determin the correct migration-threshold during resource expansion
   + Medium: pengine: Implement no-quorum-policy=suicide (FATE #303619)
   + Medium: pengine: Clean up resources after stopping old copies of the PE
   + Medium: pengine: Teach the PE how to stop old copies of itself
   + Medium: Tools: Backport hb_report updates
   + Medium: Tools: cib_shadow - On create, spawn a new shell with CIB_shadow and PS1 set accordingly
   + Medium: Tools: Rename cib_shadow to crm_shadow
 
 * Fri Jul 18 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.0-19
 - Update source tarball to revision: 007c3a1c50f5 (unstable) tip
 - Statistics:
     Changesets:      108
     Diff:            216 files changed, 4632 insertions(+), 4173 deletions(-)
 - Changes added since unstable-0.7
   + admin: Fix use-after-free in crm_mon
   + ais: Change the tag for the ais plugin to "pacemaker" (used in openais.conf)
   + ais: Log terminated processes as an error
   + cib: Performance - Reorganize things to avoid calculating the XML diff twice
   + pengine: Bug LF:1941 - Handle failed clone instance probes when clone-max < #nodes
   + pengine: Fix memory leak in action2xml
   + pengine: Make OCF_ERR_ARGS a node-level error rather than a cluster-level one
   + pengine: Properly handle clones that are not installed on all nodes
   + Medium: admin: cibadmin - Show any validation errors if the upgrade failed
   + Medium: admin: cib_shadow - Implement --locate to display the underlying filename
   + Medium: admin: cib_shadow - Implement a --diff option
   + Medium: admin: cib_shadow - Implement a --switch option
   + Medium: admin: crm_resource - create more compact constraints that do not use lifetime (which is deprecated)
   + Medium: ais: Approximate born_on for OpenAIS based clusters
   + Medium: cib: Remove do_id_check, it is a poor substitute for ID validation by a schema
   + Medium: cib: Skip construction of pre-notify messages if no-one wants one
   + Medium: Core: Attempt to streamline some key functions to increase performance
   + Medium: Core: Clean up XML parser after validation
   + Medium: crmd: Detect and optimize the CRMs behavior when processing diffs of an LRM refresh
   + Medium: Fix memory leaks when resetting the name of an XML object
   + Medium: pengine: Prefer the current location if it is one of a group of nodes with the same (highest) score
 
 * Wed Jun 25 2008 Andrew Beekhof <abeekhof@suse.de> - 0.7.0-1
 - Update source tarball to revision: bde0c7db74fb tip
 - Statistics:
     Changesets:      439
     Diff:            676 files changed, 41310 insertions(+), 52071 deletions(-)
 - Changes added since stable-0.6
   + A new tool for setting up and invoking CTS
   + Admin: All tools now use --node (-N) for specifying node unames
   + Admin: All tools now use --xml-file (-x) and --xml-text (-X) for specifying where to find XML blobs
   + cib: Cleanup the API - remove redundant input fields
   + cib: Implement CIB_shadow - a facility for making and testing changes before uploading them to the cluster
   + cib: Make registering per-op callbacks an API call and renamed (for clarity) the API call for requesting notifications
   + Core: Add a facility for automatically upgrading old configurations
   + Core: Adopt libxml2 as the XML processing library - all external clients need to be recompiled
   + Core: Allow sending TLS messages larger than the MTU
   + Core: Fix parsing of time-only ISO dates
   + Core: Smarter handling of XML values containing quotes
   + Core: XML memory corruption - catch, and handle, cases where we are overwriting an attribute value with itself
   + Core: The xml ID type does not allow UUIDs that start with a number
   + Core: Implement XPath based versions of query/delete/replace/modify
   + Core: Remove some HA2.0.(3,4) compatability code
   + crmd: Overhaul the detection of nodes that are starting vs. failed
   + pengine: Bug LF:1459 - Allow failures to expire
   + pengine: Have the PE do non-persistent configuration upgrades before performing calculations
   + pengine: Replace failure-stickiness with a simple 'migration-threshold'
   + tengine: Simplify the design by folding the tengine process into the crmd
   + Medium: Admin: Bug LF:1438 - Allow the list of all/active resource operations to be queried by crm_resource
   + Medium: Admin: Bug LF:1708 - crm_resource should print a warning if an attribute is already set as a meta attribute
   + Medium: Admin: Bug LF:1883 - crm_mon should display fail-count and operation history
   + Medium: Admin: Bug LF:1883 - crm_mon should display operation timing data
   + Medium: Admin: Bug N:371785 - crm_resource -C does not also clean up fail-count attributes
   + Medium: Admin: crm_mon - include timing data for failed actions
   + Medium: ais: Read options from the environment since objdb is not completely usable yet
   + Medium: cib: Add sections for op_defaults and rsc_defaults
   + Medium: cib: Better matching notification callbacks (for detecting duplicates and removal)
   + Medium: cib: Bug LF:1348 - Allow rules and attribute sets to be referenced for use in other objects
   + Medium: cib: BUG LF:1918 - By default, all cib calls now timeout after 30s
   + Medium: cib: Detect updates that decrease the version tuple
   + Medium: cib: Implement a client-side operation timeout - Requires LHA update
   + Medium: cib: Implement callbacks and async notifications for remote connections
   + Medium: cib: Make cib->cmds->update() an alias for modify at the API level (also implemented in cibadmin)
   + Medium: cib: Mark the CIB as disconnected if the IPC connection is terminated
   + Medium: cib: New call option 'cib_can_create' which can be passed to modify actions - allows the object to be created if it does not exist yet
   + Medium: cib: Reimplement get|set|delete attributes using XPath
   + Medium: cib: Remove some useless parts of the API
   + Medium: cib: Remove the 'attributes' scaffolding from the new format
   + Medium: cib: Implement the ability for clients to connect to remote servers
   + Medium: Core: Add support for validating xml against RelaxNG schemas
   + Medium: Core: Allow more than one item to be modified/deleted in XPath based operations
   + Medium: Core: Fix the sort_pairs function for creating sorted xml objects
   + Medium: Core: iso8601 - Implement subtract_duration and fix subtract_time
   + Medium: Core: Reduce the amount of xml copying occuring
   + Medium: Core: Support value='value+=N' XML updates (in addtion to value='value++')
   + Medium: crmd: Add support for lrm_ops->fail_rsc if its available
   + Medium: crmd: HB - watch link status for node leaving events
   + Medium: crmd: Bug LF:1924 - Improved handling of lrmd disconnects and shutdowns
   + Medium: crmd: Do not wait for actions with a start_delay over 5 minutes. Confirm them immediately
   + Medium: pengine: Bug LF:1328 - Do not fencing nodes in clusters without managed resources
   + Medium: pengine: Bug LF:1461 - Give transient node attributes (in <status/>) preference over persistent ones (in <nodes/>)
   + Medium: pengine: Bug LF:1884, Bug LF:1885 - Implement N:M ordering and colocation constraints
   + Medium: pengine: Bug LF:1886 - Create a resource and operation 'defaults' config section
   + Medium: pengine: Bug LF:1892 - Allow recurring actions to be triggered at known times
   + Medium: pengine: Bug LF:1926 - Probes should complete before stop actions are invoked
   + Medium: pengine: Fix the standby when its set as a transient attribute
   + Medium: pengine: Implement a global 'stop-all-resources' option
   + Medium: pengine: Implement cibpipe, a tool for performing/simulating config changes "offline"
   + Medium: pengine: We do not allow colocation with specific clone instances
   + Medium: Tools: pingd - Implement a stack-independent version of pingd
   + Medium: xml: Ship an xslt for upgrading from 0.6 to 0.7
 
 * Thu Jun 19 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.5-1
 - Update source tarball to revision: b9fe723d1ac5 tip
 - Statistics:
     Changesets:      48
     Diff:            37 files changed, 1204 insertions(+), 234 deletions(-)
 - Changes since Pacemaker-0.6.4
   + Admin: Repair the ability to delete failcounts
   + ais: Audit IPC handling between the AIS plugin and CRM processes
   + ais: Have the plugin create needed /var/lib directories
   + ais: Make sure the sync and async connections are assigned correctly (not swapped)
   + cib: Correctly detect configuration changes - num_updates does not count
   + pengine: Apply stickiness values to the whole group, not the individual resources
   + pengine: Bug N:385265 - Ensure groups are migrated instead of remaining partially active on the current node
-  + pengine: Bug N:396293 - Enforce manditory group restarts due to ordering constraints
+  + pengine: Bug N:396293 - Enforce mandatory group restarts due to ordering constraints
   + pengine: Correctly recover master instances found active on more than one node
   + pengine: Fix memory leaks reported by Valgrind
   + Medium: Admin: crm_mon - Misc improvements from Satomi Taniguchi
   + Medium: Bug LF:1900 - Resource stickiness should not allow placement in asynchronous clusters
   + Medium: crmd: Ensure joins are completed promptly when a node taking part dies
   + Medium: pengine: Avoid clone instance shuffling in more cases
   + Medium: pengine: Bug LF:1906 - Remove an optimization in native_merge_weights() causing group scores to behave eratically
   + Medium: pengine: Make use of target_rc data to correctly process resource operations
   + Medium: pengine: Prevent a possible use of NULL in sort_clone_instance()
   + Medium: tengine: Include target rc in the transition key - used to correctly determin operation failure
 
 * Thu May 22 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.4-1
 - Update source tarball to revision: 226d8e356924 tip
 - Statistics:
     Changesets:       55
     Diff:             199 files changed, 7103 insertions(+), 12378 deletions(-)
 - Changes since Pacemaker-0.6.3
   + crmd: Bug LF:1881 LF:1882 - Overhaul the logic for operation cancelation and deletion
   + crmd: Bug LF:1894 - Make sure cancelled recurring operations are cleaned out from the CIB
   + pengine: Bug N:387749 - Colocation with clones causes unnecessary clone instance shuffling
   + pengine: Ensure 'master' monitor actions are cancelled _before_ we demote the resource
   + pengine: Fix assert failure leading to core dump - make sure variable is properly initialized
   + pengine: Make sure 'slave' monitoring happens after the resource has been demoted
   + pengine: Prevent failure stickiness underflows (where too many failures become a _positive_ preference)
   + Medium: Admin: crm_mon - Only complain if the output file could not be opened
   + Medium: Common: filter_action_parameters - enable legacy handling only for older versions
   + Medium: pengine: Bug N:385265 - The failure stickiness of group children is ignored until it reaches -INFINITY
   + Medium: pengine: Implement master and clone colocation by exlcuding nodes rather than setting ones score to INFINITY (similar to cs: 756afc42dc51)
   + Medium: tengine: Bug LF:1875 - Correctly find actions to cancel when their node leaves the cluster
 
 * Wed Apr 23 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.3-1
 - Update source tarball to revision: fd8904c9bc67 tip
 - Statistics:
     Changesets:      117
     Diff:            354 files changed, 19094 insertions(+), 11338 deletions(-)
 - Changes since Pacemaker-0.6.2
   + Admin: Bug LF:1848 - crm_resource - Pass set name and id to delete_resource_attr() in the correct order
   + Build: SNMP has been moved to the management/pygui project
   + crmd: Bug LF1837 - Unmanaged resources prevent crmd from shutting down
   + crmd: Prevent use-after-free in lrm interface code (Patch based on work by Keisuke MORI)
   + pengine: Allow the cluster to make progress by not retrying failed demote actions
   + pengine: Anti-colocation with slave should not prevent master colocation
   + pengine: Bug LF 1768 - Wait more often for STONITH ops to complete before starting resources
   + pengine: Bug LF1836 - Allow is-managed-default=false to be overridden by individual resources
   + pengine: Bug LF185 - Prevent pointless master/slave instance shuffling by ignoring the master-pref of stopped instances
   + pengine: Bug N-191176 - Implement interleaved ordering for clone-to-clone scenarios
   + pengine: Bug N-347004 - Ensure clone notifications are always sent when an instance is stopped/started
   + pengine: Bug N-347004 - Include notification ordering is correct for interleaved clones
   + pengine: Bug PM-11 - Directly link probe_complete to starting clone instances
   + pengine: Bug PM1 - Fix setting failcounts when applied to complex resources
   + pengine: Bug PM12, LF1648 - Extensive revision of group ordering
   + pengine: Bug PM7 - Ensure masters are always demoted before they are stopped
   + pengine: Create probes after allocation to allow smarter handling of anonymous clones
   + pengine: Do not prioritize clone instances that must be moved
   + pengine: Fix error in previous commit that allowed more than the required number of masters to be promoted
   + pengine: Group start ordering fixes
   + pengine: Implement promote/demote ordering for cloned groups
   + tengine: Repair failcount updates
   + tengine: Use the correct offset when updating failcount
   + Medium: Admin: Add a summary output that can be easily parsed by CTS for audit purposes
   + Medium: Build: Make configure fail if bz2 or libxml2 are not present
   + Medium: Build: Re-instate a better default for LCRSODIR
   + Medium: CIB: Bug LF-1861 - Filter irrelvant error status from synchronous CIB clients
   + Medium: Core: Bug 1849 - Invalid conversion of ordinal leap year to gregorian date
   + Medium: Core: Drop compataibility code for 2.0.4 and 2.0.5 clusters
   + Medium: crmd: Bug LF-1860 - Automatically cancel recurring ops before demote and promote operations (not only stops)
   + Medium: crmd: Save the current CIB contents if we detect the PE crashed
   + Medium: pengine: Bug LF:1866 - Fix version check when applying compatability handling for failed start operations
   + Medium: pengine: Bug LF:1866 - Restore the ability to have start failures not be fatal
   + Medium: pengine: Bug PM1 - Failcount applies to all instances of non-unique clone
   + Medium: pengine: Correctly set the state of partially active master/slave groups
   + Medium: pengine: Do not claim to be stopping an already stopped orphan
   + Medium: pengine: Ensure implies_left ordering constraints are always effective
   + Medium: pengine: Indicate each resources 'promotion' score
   + Medium: pengine: Prevent a possible use-of-NULL
   + Medium: pengine: Reprocess the current action if it changed (so that any prior dependencies are updated)
   + Medium: tengine: Bug LF-1859 - Wait for fail-count updates to complete before terminating the transition
   + Medium: tengine: Bug LF:1859 - Do not abort graphs due to our own failcount updates
   + Medium: tengine: Bug LF:1859 - Prevent the TE from interupting itself
 
 * Thu Feb 14 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.2-1
 - Update source tarball to revision: 28b1a8c1868b tip
 - Statistics:
     Changesets:    11
     Diff:          7 files changed, 58 insertions(+), 18 deletions(-)
 - Changes since Pacemaker-0.6.1
   + haresources2cib.py: set default-action-timeout to the default (20s)
   + haresources2cib.py: update ra parameters lists
   + Medium: SNMP: Allow the snmp subagent to be built (patch from MATSUDA, Daiki)
   + Medium: Tools: Make sure the autoconf variables in haresources2cib are expanded
 
 * Tue Feb 12 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.1-1
 - Update source tarball to revision: e7152d1be933 tip
 - Statistics:
     Changesets:    25
     Diff:          37 files changed, 1323 insertions(+), 227 deletions(-)
 - Changes since Pacemaker-0.6.0
   + CIB: Ensure changes to top-level attributes (like admin_epoch) cause a disk write
   + CIB: Ensure the archived file hits the disk before returning
   + CIB: Repair the ability to do 'atomic increment' updates (value="value++")
   + crmd: Bug #7 - Connecting to the crmd immediately after startup causes use-of-NULL
   + Medium: CIB: Mask cib_diff_resync results from the caller - they do not need to know
   + Medium: crmd: Delay starting the IPC server until we are fully functional
   + Medium: CTS: Fix the startup patterns
   + Medium: pengine: Bug 1820 - Allow the first resource in a group to be migrated
   + Medium: pengine: Bug 1820 - Check the colocation dependencies of resources to be migrated
 
 * Mon Jan 14 2008 Andrew Beekhof <abeekhof@suse.de> - 0.6.0-1
 - This is the first release of the Pacemaker Cluster Resource Manager formerly part of Heartbeat.
 - For those looking for the GUI, mgmtd, CIM or TSA components, they are now found in
   the new pacemaker-pygui project.  Build dependencies prevent them from being
   included in Heartbeat (since the built-in CRM is no longer supported) and,
   being non-core components, are not included with Pacemaker.
 - Update source tarball to revision: c94b92d550cf
 - Statistics:
     Changesets:      347
     Diff:            2272 files changed, 132508 insertions(+), 305991 deletions(-)
 - Test hardware:
     + 6-node vmware cluster (sles10-sp1/256Mb/vmware stonith) on a single host (opensuse10.3/2Gb/2.66Ghz Quad Core2)
     + 7-node EMC Centera cluster (sles10/512Mb/2Ghz Xeon/ssh stonith)
 - Notes: Heartbeat Stack
     + All testing was performed with STONITH enabled
     + The CRM was enabled using the "crm respawn" directive
 - Notes: OpenAIS Stack
     + This release contains a preview of support for the OpenAIS cluster stack
     + The current release of the OpenAIS project is missing two important
     patches that we require.  OpenAIS packages containing these patches are
     available for most major distributions at:
     http://download.opensuse.org/repositories/server:/ha-clustering
     + The OpenAIS stack is not currently recommended for use in clusters that
     have shared data as STONITH support is not yet implimented
     + pingd is not yet available for use with the OpenAIS stack
     + 3 significant OpenAIS issues were found during testing of 4 and 6 node
     clusters.  We are activly working together with the OpenAIS project to
     get these resolved.
 - Pending bugs encountered during testing:
     + OpenAIS   #1736 - Openais membership took 20s to stabilize
     + Heartbeat #1750 - ipc_bufpool_update: magic number in head does not match
     + OpenAIS   #1793 - Assertion failure in memb_state_gather_enter()
     + OpenAIS   #1796 - Cluster message corruption
 - Changes since Heartbeat-2.1.2-24
   + Add OpenAIS support
   + Admin: crm_uuid - Look in the right place for Heartbeat UUID files
   + admin: Exit and indicate a problem if the crmd exits while crmadmin is performing a query
   + cib: Fix CIB_OP_UPDATE calls that modify the whole CIB
   + cib: Fix compilation when supporting the heartbeat stack
   + cib: Fix memory leaks caused by the switch to get_message_xml()
   + cib: HA_VALGRIND_ENABLED needs to be set _and_ set to 1|yes|true
   + cib: Use get_message_xml() in preference to cl_get_struct()
   + cib: Use the return value from call to write() in cib_send_plaintext()
   + Core: ccm nodes can legitimately have a node id of 0
   + Core: Fix peer-process tracking for the Heartbeat stack
   + Core: Heartbeat does not send status notifications for nodes that were already part of the cluster.  Fake them instead
   + CRM: Add children to HA_Messages such that the field name matches F_XML_TAGNAME
   + crm: Adopt a more flexible appraoch to enabling Valgrind
   + crm: Fix compilation when bzip2 is not installed
   + CRM: Future-proof get_message_xml()
   + crmd: Filter election responses based on time not FSA state
   + crmd: Handle all possible peer states in crmd_ha_status_callback()
   + crmd: Make sure the current date/time is set - prevents use-of-NULL when evaluating rules
   + crmd: Relax an assertion regrading  ccm membership instances
   + crmd: Use (node->processes&crm_proc_ais) to accurately update the CIB after replace operations
   + crmd: Heartbeat: Accurately record peer client status
   + pengine: Bug 1777 - Allow colocation with a resource in the Stopped state
   + pengine: Bug 1822 - Prevent use-of-NULL in PromoteRsc()
   + pengine: Implement three recovery policies based on op_status and op_rc
   + pengine: Parse fail-count correctly (it may be set to ININFITY)
   + pengine: Prevent graph-loop when stonith agents need to be moved around before a STONITH op
   + pengine: Prevent graph-loops when two operations have the same name+interval
   + tengine: Cancel active timers when destroying graphs
   + tengine: Ensure failcount is set correctly for failed stops/starts
   + tengine: Update failcount for oeprations that time out
   + Medium: admin: Prevent hang in crm_mon -1 when there is no cib connection - Patch from Junko IKEDA
   + Medium: cib: Require --force|-f when performing potentially dangerous commands with cibadmin
   + Medium: cib: Tweak the shutdown code
   + Medium: Common: Only count peer processes of active nodes
   + Medium: Core: Create generic cluster sign-in method
   + Medium: core: Fix compilation when Heartbeat support is disabled
   + Medium: Core: General cleanup for supporting two stacks
   + Medium: Core: iso6601 - Support parsing of time-only strings
   + Medium: core: Isolate more code that is only needed when SUPPORT_HEARTBEAT is enabled
   + Medium: crm: Improved logging of errors in the XML parser
   + Medium: crmd: Fix potential use-of-NULL in string comparison
   + Medium: crmd: Reimpliment syncronizing of CIB queries and updates when invoking the PE
   + Medium: crm_mon: Indicate when a node is both in standby mode and offline
   + Medium: pengine: Bug 1822 - Do not try an promote groups if not all of it is active
   + Medium: pengine: on_fail=nothing is an alias for 'ignore' not 'restart'
   + Medium: pengine: Prevent a potential use-of-NULL in cron_range_satisfied()
   + snmp subagent: fix a problem on displaying an unmanaged group
   + snmp subagent: use the syslog setting
   + snmp: v2 support (thanks to Keisuke MORI)
   + snmp_subagent - made it not complain about some things if shutting down
diff --git a/README.markdown b/README.markdown
index 1b596a2ec1..f92b5e6fa7 100644
--- a/README.markdown
+++ b/README.markdown
@@ -1,96 +1,100 @@
 # Pacemaker
 
 ## What is Pacemaker?
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Linux-HA (Heartbeat) and/or Corosync.
 
 It supports "n-node" clusters with significant capabilities for
 managing resources and dependencies.
 
 It will run scripts at initialization, when machines go up or down,
 when related resources fail and can be configured to periodically check
 resource health.
 
 ## For more information look at:
 * [Website](http://www.clusterlabs.org)
 * [Issues/Bugs](http://bugs.clusterlabs.org)
 * Mailing lists per audience: [users](http://oss.clusterlabs.org/mailman/listinfo/users), [developers](http://oss.clusterlabs.org/mailman/listinfo/developers), or possibly [the original one](http://oss.clusterlabs.org/mailman/listinfo/pacemaker) ([deprecated](http://oss.clusterlabs.org/pipermail/pacemaker/2015-February/023521.html))
 * [Documentation](http://www.clusterlabs.org/doc)
 
 ## User interfaces / shells
 
 There are multiple user interfaces for Pacemaker, both command line
 tools, graphical user interfaces and web frontends. The _crm shell_
 used to be included in the Pacemaker source tree, but is now
 maintained as a separate project.
 
 This is not meant to be an exhaustive list:
 
 * _crmsh_: https://crmsh.github.io/
 * _pcs_: https://github.com/feist/pcs/
 * _LCMC_: http://lcmc.sourceforge.net/
 * _hawk_: https://github.com/ClusterLabs/hawk
 
 ## Build Dependencies
 * automake
 * autoconf
 * libtool-ltdl-devel
 * libuuid-devel
 * pkgconfig
 * python (or python-devel if that's preferred as a build dependency)
 * glib2-devel
 * libxml2-devel
 * libxslt-devel 
 * bzip2-devel
 * gnutls-devel
 * pam-devel
 * libqb-devel
 
 ## Cluster Stack Dependencies (Pick at least one)
 * clusterlib-devel (CMAN)
 * corosynclib-devel (Corosync)
 * heartbeat-devel (Heartbeat)
 
 ## Optional Build Dependencies
 * ncurses-devel
 * openssl-devel
 * libselinux-devel
 * systemd-devel
 * dbus-devel
 * cluster-glue-libs-devel (LHA style fencing agents)
 * libesmtp-devel (Email alerts)
 * lm_sensors-devel (SNMP alerts)
 * net-snmp-devel (SNMP alerts)
 * asciidoc (documentation)
 * help2man (documentation)
 * publican (documentation)
 * inkscape (documentation)
 * docbook-style-xsl (documentation)
 
 ## Optional testing dependencies
 * valgrind (if running CTS valgrind tests)
 * systemd-python (if using CTS on cluster nodes running systemd)
+* rsync (if running CTS container tests)
+* libvirt-daemon-driver-lxc (if running CTS container tests)
+* libvirt-daemon-lxc (if running CTS container tests)
+* libvirt-login-shell (if running CTS container tests)
 
 ## Source Control (GIT)
 
     git clone git://github.com/ClusterLabs/pacemaker.git
 
 [See Github](https://github.com/ClusterLabs/pacemaker)
 
 ## Installing from source
 
     $ ./autogen.sh
     $ ./configure
     $ make
     $ sudo make install
 
 ## How you can help
 If you find this project useful, you may want to consider supporting its future development.
 There are a number of ways to support the project.
 
 * Test and report issues.
 * Tick something off our [todo list](https://github.com/ClusterLabs/pacemaker/blob/master/TODO.markdown).
 * Help others on the [mailing list](http://oss.clusterlabs.org/mailman/listinfo/users).
 * Contribute documentation, examples and test cases.
 * Contribute patches.
 * Spread the word.
diff --git a/configure.ac b/configure.ac
index cc9dfe75c1..4c0b7a10d4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,1940 +1,1945 @@
 dnl
 dnl autoconf for Pacemaker
 dnl
 dnl License: GNU General Public License (GPL)
 
 dnl ===============================================
 dnl Bootstrap
 dnl ===============================================
 AC_PREREQ(2.59)
 
 dnl Suggested structure:
 dnl     information on the package
 dnl     checks for programs
 dnl     checks for libraries
 dnl     checks for header files
 dnl     checks for types
 dnl     checks for structures
 dnl     checks for compiler characteristics
 dnl     checks for library functions
 dnl     checks for system services
 
 m4_include([version.m4])
-AC_INIT([pacemaker], VERSION_NUMBER, pacemaker@oss.clusterlabs.org,pacemaker,http://clusterlabs.org)
+AC_INIT([pacemaker], VERSION_NUMBER, [pacemaker@oss.clusterlabs.org],
+        [pacemaker], PCMK_URL)
+dnl Workaround autoconf < 2.64
+if test x"${PACKAGE_URL}" = x""; then
+	AC_SUBST([PACKAGE_URL], PCMK_URL)
+fi
 
 PCMK_FEATURES=""
 HB_PKG=heartbeat
 
 AC_CONFIG_AUX_DIR(.)
 AC_CANONICAL_HOST
 
 dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below)
 dnl
 dnl Internal header: include/config.h
 dnl   - Contains ALL defines
 dnl   - include/config.h.in is generated automatically by autoheader
 dnl   - NOT to be included in any header files except lha_internal.h
 dnl     (which is also not to be included in any other header files)
 dnl
 dnl External header: include/crm_config.h
 dnl   - Contains a subset of defines checked here
 dnl   - Manually edit include/crm_config.h.in to have configure include
 dnl     new defines
 dnl   - Should not include HAVE_* defines
 dnl   - Safe to include anywhere
 AM_CONFIG_HEADER(include/config.h include/crm_config.h)
 ALL_LINGUAS="en fr"
 
 AC_ARG_WITH(version,
     [  --with-version=version   Override package version (if you're a packager needing to pretend) ],
     [ PACKAGE_VERSION="$withval" ])
 
 AC_ARG_WITH(pkg-name,
     [  --with-pkg-name=name     Override package name (if you're a packager needing to pretend) ],
     [ PACKAGE_NAME="$withval" ])
 
 dnl Older distros may need: AM_INIT_AUTOMAKE($PACKAGE_NAME, $PACKAGE_VERSION)
 AM_INIT_AUTOMAKE([foreign])
 AC_DEFINE_UNQUOTED(PACEMAKER_VERSION, "$PACKAGE_VERSION", Current pacemaker version)
 
 PACKAGE_SERIES=`echo $PACKAGE_VERSION | awk -F. '{ print $1"."$2 }'`
 AC_SUBST(PACKAGE_SERIES)
 AC_SUBST(PACKAGE_VERSION)
 
 dnl automake >= 1.11 offers --enable-silent-rules for suppressing the output from
 dnl normal compilation.  When a failure occurs, it will then display the full
 dnl command line
 dnl Wrap in m4_ifdef to avoid breaking on older platforms
 m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
 
 dnl Example 2.4. Silent Custom Rule to Generate a File
 dnl %-bar.pc: %.pc
 dnl	$(AM_V_GEN)$(LN_S) $(notdir $^) $@
 
 CC_IN_CONFIGURE=yes
 export CC_IN_CONFIGURE
 
 LDD=ldd
 BUILD_ATOMIC_ATTRD=1
 
 dnl ========================================================================
 dnl Compiler characteristics
 dnl ========================================================================
 
 AC_PROG_CC dnl Can force other with environment variable "CC".
 AM_PROG_CC_C_O
 AC_PROG_CC_STDC
 gl_EARLY
 gl_INIT
 
 AC_LIBTOOL_DLOPEN               dnl Enable dlopen support...
 AC_LIBLTDL_CONVENIENCE          dnl make libltdl a convenience lib
 AC_PROG_LIBTOOL
 
 AC_PROG_YACC
 AM_PROG_LEX
 
 AC_C_STRINGIZE
 AC_TYPE_SIZE_T
 AC_CHECK_SIZEOF(char)
 AC_CHECK_SIZEOF(short)
 AC_CHECK_SIZEOF(int)
 AC_CHECK_SIZEOF(long)
 AC_CHECK_SIZEOF(long long)
 AC_STRUCT_TIMEZONE
 
 dnl ===============================================
 dnl Helpers
 dnl ===============================================
 cc_supports_flag() {
          local CFLAGS="-Werror $@"
          AC_MSG_CHECKING(whether $CC supports "$@")
          AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ]], [[ ]])], [RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)])
          return $RC
 }
 
 try_extract_header_define() {
 	  AC_MSG_CHECKING(if $2 in $1 exists)
 	  Cfile=$srcdir/extract_define.$2.${$}
 	  printf "#include <stdio.h>\n" > ${Cfile}.c
 	  printf "#include <%s>\n" $1 >> ${Cfile}.c
 	  printf "int main(int argc, char **argv) {\n" >> ${Cfile}.c
 	  printf "#ifdef %s\n" $2 >> ${Cfile}.c
 	  printf "printf(\"%%s\", %s);\n" $2 >> ${Cfile}.c
 	  printf "#endif \n return 0; }\n" >> ${Cfile}.c
 	  $CC $CFLAGS ${Cfile}.c -o ${Cfile} 2>/dev/null
 	  value=
 	  if test -x ${Cfile}; then
 	      value=`${Cfile} 2>/dev/null`
 	  fi
 	  if  test x"${value}" == x""; then
 	      value=$3
 	      AC_MSG_RESULT(default: $value)
 	  else
 	      AC_MSG_RESULT($value)
 	  fi
 	  printf $value
 	  rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno
 	}
 
 extract_header_define() {
 	  AC_MSG_CHECKING(for $2 in $1)
 	  Cfile=$srcdir/extract_define.$2.${$}
 	  printf "#include <stdio.h>\n" > ${Cfile}.c
 	  printf "#include <%s>\n" $1 >> ${Cfile}.c
 	  printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c
 	  $CC $CFLAGS ${Cfile}.c -o ${Cfile}
 	  value=`${Cfile}`
 	  AC_MSG_RESULT($value)
 	  printf $value
 	  rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno
 	}
 
 dnl ===============================================
 dnl Configure Options
 dnl ===============================================
 
 dnl Some systems, like Solaris require a custom package name
 AC_ARG_WITH(pkgname,
     [  --with-pkgname=name     name for pkg (typically for Solaris) ],
     [ PKGNAME="$withval" ],
     [ PKGNAME="LXHAhb" ],
   )
 AC_SUBST(PKGNAME)
 
 AC_ARG_ENABLE([ansi],
 [  --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers.
      [default=no]])
 
 AC_ARG_ENABLE([fatal-warnings],
 [  --enable-fatal-warnings very pedantic and fatal warnings for gcc
      [default=yes]])
 
 AC_ARG_ENABLE([quiet],
 [  --enable-quiet
      Supress make output unless there is an error
      [default=no]])
 
 AC_ARG_ENABLE([thread-safe],
 [  --enable-thread-safe Enable some client libraries to be thread safe.
      [default=no]])
 
 AC_ARG_ENABLE([bundled-ltdl],
 [  --enable-bundled-ltdl  Configure, build and install the standalone ltdl library bundled with ${PACKAGE} [default=no]])
 LTDL_LIBS=""
 
 AC_ARG_ENABLE([no-stack],
     [  --enable-no-stack
        Only build the Policy Engine and pieces needed to support it [default=no]])
 
 AC_ARG_ENABLE([upstart],
     [  --enable-upstart
        Do not build support for the Upstart init system [default=yes]])
 
 AC_ARG_ENABLE([systemd],
     [  --enable-systemd
        Do not build support for the Systemd init system [default=yes]])
 
 AC_ARG_WITH(ais,
     [  --with-ais
        Support the Corosync messaging and membership layer ],
     [ SUPPORT_CS=$withval ],
     [ SUPPORT_CS=try ],
 )
 
 AC_ARG_WITH(corosync,
     [  --with-corosync
        Support the Corosync messaging and membership layer ],
     [ SUPPORT_CS=$withval ]
 dnl	initialized in AC_ARG_WITH(ais...) already,
 dnl	don't reset to try if it was given as --without-ais
 )
 
 AC_ARG_WITH(heartbeat,
     [  --with-heartbeat
        Support the Heartbeat messaging and membership layer ],
     [ SUPPORT_HEARTBEAT=$withval ],
     [ SUPPORT_HEARTBEAT=try ],
 )
 
 AC_ARG_WITH(cman,
     [  --with-cman
        Support the consumption of membership and quorum from cman ],
     [ SUPPORT_CMAN=$withval ],
     [ SUPPORT_CMAN=try ],
 )
 
 AC_ARG_WITH(cpg,
     [  --with-cs-quorum
        Support the consumption of membership and quorum from corosync ],
     [ SUPPORT_CS_QUORUM=$withval ],
     [ SUPPORT_CS_QUORUM=try ],
 )
 
 AC_ARG_WITH(nagios,
     [  --with-nagios
        Support nagios remote monitoring ],
     [ SUPPORT_NAGIOS=$withval ],
     [ SUPPORT_NAGIOS=try ],
 )
 
 AC_ARG_WITH(nagios-plugin-dir,
     [  --with-nagios-plugin-dir=DIR
        Directory for nagios plugins [${NAGIOS_PLUGIN_DIR}]],
     [ NAGIOS_PLUGIN_DIR="$withval" ]
 )
 
 AC_ARG_WITH(nagios-metadata-dir,
     [  --with-nagios-metadata-dir=DIR
        Directory for nagios plugins metadata [${NAGIOS_METADATA_DIR}]],
     [ NAGIOS_METADATA_DIR="$withval" ]
 )
 
 AC_ARG_WITH(snmp,
     [  --with-snmp
        Support the SNMP protocol ],
     [ SUPPORT_SNMP=$withval ],
     [ SUPPORT_SNMP=try ],
 )
 
 AC_ARG_WITH(esmtp,
     [  --with-esmtp
        Support the sending mail notifications with the esmtp library ],
     [ SUPPORT_ESMTP=$withval ],
     [ SUPPORT_ESMTP=try ],
 )
 
 AC_ARG_WITH(acl,
     [  --with-acl
        Support CIB ACL ],
     [ SUPPORT_ACL=$withval ],
     [ SUPPORT_ACL=yes ],
 )
 
 AC_ARG_WITH(cibsecrets,
     [  --with-cibsecrets
        Support CIB secrets ],
     [ SUPPORT_CIBSECRETS=$withval ],
     [ SUPPORT_CIBSECRETS=no ],
 )
 
 CSPREFIX=""
 AC_ARG_WITH(ais-prefix,
     [  --with-ais-prefix=DIR  Prefix used when Corosync was installed [$prefix]],
     [ CSPREFIX=$withval ],
     [ CSPREFIX=$prefix ])
 
 LCRSODIR=""
 AC_ARG_WITH(lcrso-dir,
     [  --with-lcrso-dir=DIR   Corosync lcrso files. ],
     [ LCRSODIR="$withval" ])
 
 INITDIR=""
 AC_ARG_WITH(initdir,
     [  --with-initdir=DIR      directory for init (rc) scripts [${INITDIR}]],
     [ INITDIR="$withval" ])
 
 SUPPORT_PROFILING=0
 AC_ARG_WITH(profiling,
     [  --with-profiling
        Disable optimizations for effective profiling ],
     [ SUPPORT_PROFILING=$withval ])
 
 AC_ARG_WITH(coverage,
     [  --with-coverage
        Disable optimizations for effective profiling ],
     [ SUPPORT_COVERAGE=$withval ])
 
 PUBLICAN_BRAND="common"
 AC_ARG_WITH(brand,
     [  --with-brand=brand  Brand to use for generated documentation [$PUBLICAN_BRAND]],
     [ PUBLICAN_BRAND="$withval" ])
 AC_SUBST(PUBLICAN_BRAND)
 
 ASCIIDOC_CLI_TYPE="pcs"
 AC_ARG_WITH(doc-cli,
     [  --with-doc-cli=cli_type  CLI type to use for generated documentation. [$ASCIIDOC_CLI_TYPE]],
     [ ASCIIDOC_CLI_TYPE="$withval" ])
 AC_SUBST(ASCIIDOC_CLI_TYPE)
 
 dnl ===============================================
 dnl General Processing
 dnl ===============================================
 
 AC_SUBST(HB_PKG)
 
 INIT_EXT=""
 echo Our Host OS: $host_os/$host
 
 
 AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
 case $prefix in
   NONE)
 	prefix=/usr
 	dnl Fix default variables - "prefix" variable if not specified
 	if test "$localstatedir" = "\${prefix}/var"; then
 		localstatedir="/var"
 	fi
 	if test "$sysconfdir" = "\${prefix}/etc"; then
 		sysconfdir="/etc"
 	fi
 	;;
 esac
 
 
 AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
 case $exec_prefix in
   dnl For consistency with Heartbeat, map NONE->$prefix
   NONE)	  exec_prefix=$prefix;;
   prefix) exec_prefix=$prefix;;
 esac
 
 AC_MSG_NOTICE(Sanitizing ais_prefix: ${CSPREFIX})
 case $CSPREFIX in
   dnl For consistency with Heartbeat, map NONE->$prefix
   NONE)	  CSPREFIX=$prefix;;
   prefix) CSPREFIX=$prefix;;
 esac
 
 AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR})
 case $INITDIR in
   prefix) INITDIR=$prefix;;
   "")
     AC_MSG_CHECKING(which init (rc) directory to use)
       for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d	\
 	   /usr/local/etc/rc.d /etc/rc.d
       do
         if
           test -d $initdir
         then
           INITDIR=$initdir
           break
         fi
       done
       AC_MSG_RESULT($INITDIR);;
 esac
 AC_SUBST(INITDIR)
 
 AC_MSG_NOTICE(Sanitizing libdir: ${libdir})
 case $libdir in
   dnl For consistency with Heartbeat, map NONE->$prefix
   prefix|NONE)
     AC_MSG_CHECKING(which lib directory to use)
     for aDir in lib64 lib
     do
       trydir="${exec_prefix}/${aDir}"
       if
         test -d ${trydir}
       then
         libdir=${trydir}
         break
       fi
     done
     AC_MSG_RESULT($libdir);
     ;;
 esac
 
 dnl Expand autoconf variables so that we dont end up with '${prefix}'
 dnl in #defines and python scripts
 dnl NOTE: Autoconf deliberately leaves them unexpanded to allow
 dnl    make exec_prefix=/foo install
 dnl No longer being able to do this seems like no great loss to me...
 
 eval prefix="`eval echo ${prefix}`"
 eval exec_prefix="`eval echo ${exec_prefix}`"
 eval bindir="`eval echo ${bindir}`"
 eval sbindir="`eval echo ${sbindir}`"
 eval libexecdir="`eval echo ${libexecdir}`"
 eval datadir="`eval echo ${datadir}`"
 eval sysconfdir="`eval echo ${sysconfdir}`"
 eval sharedstatedir="`eval echo ${sharedstatedir}`"
 eval localstatedir="`eval echo ${localstatedir}`"
 eval libdir="`eval echo ${libdir}`"
 eval includedir="`eval echo ${includedir}`"
 eval oldincludedir="`eval echo ${oldincludedir}`"
 eval infodir="`eval echo ${infodir}`"
 eval mandir="`eval echo ${mandir}`"
 
 dnl Home-grown variables
 eval INITDIR="${INITDIR}"
 eval docdir="`eval echo ${docdir}`"
 if test x"${docdir}" = x""; then
    docdir=${datadir}/doc/${PACKAGE}-${VERSION}
    #docdir=${datadir}/doc/packages/${PACKAGE}
 fi
 AC_SUBST(docdir)
 
 for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \
     sharedstatedir localstatedir libdir includedir oldincludedir infodir \
     mandir INITDIR docdir
 do
   dirname=`eval echo '${'${j}'}'`
   if
     test ! -d "$dirname"
   then
     AC_MSG_WARN([$j directory ($dirname) does not exist!])
   fi
 done
 
 dnl This OS-based decision-making is poor autotools practice;
 dnl feature-based mechanisms are strongly preferred.
 dnl
 dnl So keep this section to a bare minimum; regard as a "necessary evil".
 
 case "$host_os" in
 *bsd*)
 		AC_DEFINE_UNQUOTED(ON_BSD, 1, Compiling for BSD platform)
 		LIBS="-L/usr/local/lib"
 		CPPFLAGS="$CPPFLAGS -I/usr/local/include"
 		INIT_EXT=".sh"
 		;;
 *solaris*)
 		AC_DEFINE_UNQUOTED(ON_SOLARIS, 1, Compiling for Solaris platform)
 		;;
 *linux*)
 		AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform)
  		;;
 darwin*)
 		AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform)
   		LIBS="$LIBS -L${prefix}/lib"
   		CFLAGS="$CFLAGS -I${prefix}/include"
 		;;
 esac
 
 dnl Eventually remove this
 if test "$cross_compiling" != "yes"; then
    CPPFLAGS="$CPPFLAGS -I${prefix}/include/heartbeat"
 fi
 
 AC_SUBST(INIT_EXT)
 AC_MSG_NOTICE(Host CPU: $host_cpu)
 
 case "$host_cpu" in
   ppc64|powerpc64)
     case $CFLAGS in
      *powerpc64*)			;;
      *)	if test "$GCC" = yes; then
 	  CFLAGS="$CFLAGS -m64"
 	fi				;;
     esac
 esac
 
 AC_MSG_CHECKING(which format is needed to print uint64_t)
 
 ac_save_CFLAGS=$CFLAGS
 CFLAGS="-Wall -Werror"
 
 AC_COMPILE_IFELSE(
     [AC_LANG_PROGRAM(
       [
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
       ],
       [
 int max = 512;
 uint64_t bignum = 42;
 char *buffer = malloc(max);
 const char *random = "random";
 snprintf(buffer, max-1, "<quorum id=%lu quorate=%s/>", bignum, random);
 fprintf(stderr, "Result: %s\n", buffer);
       ]
     )],
     [U64T="%lu"],
     [U64T="%llu"]
 )
 CFLAGS=$ac_save_CFLAGS
 
 AC_MSG_RESULT($U64T)
 AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t)
 
 dnl ===============================================
 dnl Program Paths
 dnl ===============================================
 
 PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin"
 export PATH
 
 
 dnl Replacing AC_PROG_LIBTOOL with AC_CHECK_PROG because LIBTOOL
 dnl was NOT being expanded all the time thus causing things to fail.
 AC_CHECK_PROGS(LIBTOOL, glibtool libtool libtool15 libtool13)
 
 AM_PATH_PYTHON
 AC_CHECK_PROGS(MAKE, gmake make)
 AC_PATH_PROGS(HTML2TXT, lynx w3m)
 AC_PATH_PROGS(HELP2MAN, help2man)
 AC_PATH_PROGS(POD2MAN, pod2man, pod2man)
 AC_PATH_PROGS(ASCIIDOC, asciidoc)
 AC_PATH_PROGS(PUBLICAN, publican)
 AC_PATH_PROGS(INKSCAPE, inkscape)
 AC_PATH_PROGS(XSLTPROC, xsltproc)
 AC_PATH_PROGS(XMLCATALOG, xmlcatalog)
 AC_PATH_PROGS(FOP, fop)
 AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh)
 AC_PATH_PROGS(SCP, scp, /usr/bin/scp)
 AC_PATH_PROGS(TAR, tar)
 AC_PATH_PROGS(MD5, md5)
 AC_PATH_PROGS(TEST, test)
 AC_PATH_PROGS(PKGCONFIG, pkg-config)
 AC_PATH_PROGS(XML2CONFIG, xml2-config)
 AC_PATH_PROGS(VALGRIND_BIN, valgrind, /usr/bin/valgrind)
 AC_DEFINE_UNQUOTED(VALGRIND_BIN, "$VALGRIND_BIN", Valgrind command)
 
 dnl Disable these until we decide if the stonith config file should be supported
 dnl AC_PATH_PROGS(BISON, bison)
 dnl AC_PATH_PROGS(FLEX, flex)
 dnl AC_PATH_PROGS(HAVE_YACC, $YACC)
 
 if test x"${LIBTOOL}" = x""; then
    AC_MSG_ERROR(You need (g)libtool installed in order to build ${PACKAGE})
 fi
 if test x"${MAKE}" = x""; then
    AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE})
 fi
 
 AM_CONDITIONAL(BUILD_HELP, test x"${HELP2MAN}" != x"")
 if test x"${HELP2MAN}" != x""; then
    PCMK_FEATURES="$PCMK_FEATURES generated-manpages"
 fi
 
 MANPAGE_XSLT=""
 if test x"${XSLTPROC}" != x""; then
   AC_MSG_CHECKING(docbook to manpage transform)
   # first try to figure out correct template using xmlcatalog query,
   # resort to extensive (semi-deterministic) file search if that fails
   DOCBOOK_XSL_URI='http://docbook.sourceforge.net/release/xsl/current'
   DOCBOOK_XSL_PATH='manpages/docbook.xsl'
   MANPAGE_XSLT=$(${XMLCATALOG} "" ${DOCBOOK_XSL_URI}/${DOCBOOK_XSL_PATH} \
                  | sed -n 's|^file://||p;q')
   if test x"${MANPAGE_XSLT}" = x""; then
     DIRS=$(find "${datadir}" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \
            -type d | LC_ALL=C sort)
     XSLT=$(basename ${DOCBOOK_XSL_PATH})
     for d in ${DIRS}; do
       if test -f "${d}/${XSLT}"; then
          MANPAGE_XSLT="${d}/${XSLT}"
          break
       fi
     done
   fi
 fi
 AC_MSG_RESULT($MANPAGE_XSLT)
 AC_SUBST(MANPAGE_XSLT)
 
 AM_CONDITIONAL(BUILD_XML_HELP, test x"${MANPAGE_XSLT}" != x"")
 if test x"${MANPAGE_XSLT}" != x""; then
    PCMK_FEATURES="$PCMK_FEATURES agent-manpages"
 fi
 
 AM_CONDITIONAL(BUILD_ASCIIDOC, test x"${ASCIIDOC}" != x"")
 if test x"${ASCIIDOC}" != x""; then
    PCMK_FEATURES="$PCMK_FEATURES ascii-docs"
 fi
 
 SUPPORT_STONITH_CONFIG=0
 if test x"${HAVE_YACC}" != x"" -a x"${FLEX}" != x"" -a x"${BISON}" != x""; then
    SUPPORT_STONITH_CONFIG=1
    PCMK_FEATURES="$PCMK_FEATURES st-conf"
 fi
 
 AM_CONDITIONAL(BUILD_STONITH_CONFIG, test $SUPPORT_STONITH_CONFIG = 1)
 AC_DEFINE_UNQUOTED(SUPPORT_STONITH_CONFIG, $SUPPORT_STONITH_CONFIG, Support a stand-alone stonith config file in addition to the CIB)
 
 AM_CONDITIONAL(BUILD_DOCBOOK, test x"${PUBLICAN}" != x"" -a x"${INKSCAPE}" != x"")
 if test x"${PUBLICAN}" != x"" -a x"${INKSCAPE}" != x""; then
    AC_MSG_NOTICE(Enabling publican)
    PCMK_FEATURES="$PCMK_FEATURES publican-docs"
 fi
 
 dnl ========================================================================
 dnl checks for library functions to replace them
 dnl
 dnl     NoSuchFunctionName:
 dnl             is a dummy function which no system supplies.  It is here to make
 dnl             the system compile semi-correctly on OpenBSD which doesn't know
 dnl             how to create an empty archive
 dnl
 dnl     scandir: Only on BSD.
 dnl             System-V systems may have it, but hidden and/or deprecated.
 dnl             A replacement function is supplied for it.
 dnl
 dnl     setenv: is some bsdish function that should also be avoided (use
 dnl             putenv instead)
 dnl             On the other hand, putenv doesn't provide the right API for the
 dnl             code and has memory leaks designed in (sigh...)  Fortunately this
 dnl             A replacement function is supplied for it.
 dnl
 dnl     strerror: returns a string that corresponds to an errno.
 dnl             A replacement function is supplied for it.
 dnl
 dnl	strnlen: is a gnu function similar to strlen, but safer.
 dnl		We wrote a tolearably-fast replacement function for it.
 dnl
 dnl	strndup: is a gnu function similar to strdup, but safer.
 dnl		We wrote a tolearably-fast replacement function for it.
 
 AC_REPLACE_FUNCS(alphasort NoSuchFunctionName scandir setenv strerror strchrnul unsetenv strnlen strndup)
 
 dnl ===============================================
 dnl Libraries
 dnl ===============================================
 AC_CHECK_LIB(socket, socket)			dnl -lsocket
 AC_CHECK_LIB(c, dlopen)				dnl if dlopen is in libc...
 AC_CHECK_LIB(dl, dlopen)			dnl -ldl (for Linux)
 AC_CHECK_LIB(rt, sched_getscheduler)            dnl -lrt (for Tru64)
 AC_CHECK_LIB(gnugetopt, getopt_long)		dnl -lgnugetopt ( if available )
 AC_CHECK_LIB(pam, pam_start)			dnl -lpam (if available)
 
 AC_CHECK_FUNCS([sched_setscheduler])
 
 AC_CHECK_LIB(uuid, uuid_parse)			dnl load the library if necessary
 AC_CHECK_FUNCS(uuid_unparse)			dnl OSX ships uuid_* as standard functions
 
 AC_CHECK_HEADERS(uuid/uuid.h)
 
 if test "x$ac_cv_func_uuid_unparse" != xyes; then
    AC_MSG_ERROR(You do not have the libuuid development package installed)
 fi
 
 if test x"${PKGCONFIG}" = x""; then
    AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE})
 fi
 
 if test "x${enable_thread_safe}" = "xyes"; then
         GPKGNAME="gthread-2.0"
 else
         GPKGNAME="glib-2.0"
 fi
 
 if
    $PKGCONFIG --exists $GPKGNAME
 then
 	GLIBCONFIG="$PKGCONFIG $GPKGNAME"
 else
 	set -x
         echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH
 	$PKGCONFIG --exists $GPKGNAME; echo $?
 	$PKGCONFIG --cflags $GPKGNAME; echo $?
 	$PKGCONFIG $GPKGNAME; echo $?
 	set +x
 
 	AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE})
 fi
 AC_MSG_RESULT(using $GLIBCONFIG)
 
 #
 #	Where is dlopen?
 #
 if test "$ac_cv_lib_c_dlopen" = yes; then
 	LIBADD_DL=""
 elif test "$ac_cv_lib_dl_dlopen" = yes; then
 	LIBADD_DL=-ldl
 else
         LIBADD_DL=${lt_cv_dlopen_libs}
 fi
 dnl
 dnl Check for location of gettext
 dnl
 dnl On at least Solaris 2.x, where it is in libc, specifying lintl causes
 dnl grief. Ensure minimal result, not the sum of all possibilities.
 dnl And do libc first.
 dnl Known examples:
 dnl    c:      Linux, Solaris 2.6+
 dnl    intl:   BSD, AIX
 
 AC_CHECK_LIB(c, gettext)
 if test x$ac_cv_lib_c_gettext != xyes; then
    AC_CHECK_LIB(intl, gettext)
 fi
 
 if test x$ac_cv_lib_c_gettext != xyes -a x$ac_cv_lib_intl_gettext != xyes; then
    AC_MSG_ERROR(You need gettext installed in order to build ${PACKAGE})
 fi
 
 if test "X$GLIBCONFIG" != X; then
 	AC_MSG_CHECKING(for special glib includes: )
 	GLIBHEAD=`$GLIBCONFIG --cflags`
 	AC_MSG_RESULT($GLIBHEAD)
 	CPPFLAGS="$CPPFLAGS $GLIBHEAD"
 
 	AC_MSG_CHECKING(for glib library flags)
 	GLIBLIB=`$GLIBCONFIG --libs`
 	AC_MSG_RESULT($GLIBLIB)
 	LIBS="$LIBS $GLIBLIB"
 fi
 
 dnl FreeBSD needs -lcompat for ftime() used by lrmd.c
 AC_CHECK_LIB([compat], [ftime], [COMPAT_LIBS='-lcompat'])
 AC_SUBST(COMPAT_LIBS)
 
 dnl ========================================================================
 dnl Headers
 dnl ========================================================================
 
 AC_HEADER_STDC
 AC_CHECK_HEADERS(arpa/inet.h)
 AC_CHECK_HEADERS(asm/types.h)
 AC_CHECK_HEADERS(assert.h)
 AC_CHECK_HEADERS(auth-client.h)
 AC_CHECK_HEADERS(ctype.h)
 AC_CHECK_HEADERS(dirent.h)
 AC_CHECK_HEADERS(errno.h)
 AC_CHECK_HEADERS(fcntl.h)
 AC_CHECK_HEADERS(getopt.h)
 AC_CHECK_HEADERS(glib.h)
 AC_CHECK_HEADERS(grp.h)
 AC_CHECK_HEADERS(limits.h)
 AC_CHECK_HEADERS(linux/errqueue.h)
 AC_CHECK_HEADERS(linux/swab.h)
 AC_CHECK_HEADERS(malloc.h)
 AC_CHECK_HEADERS(netdb.h)
 AC_CHECK_HEADERS(netinet/in.h)
 AC_CHECK_HEADERS(netinet/ip.h)
 AC_CHECK_HEADERS(pam/pam_appl.h)
 AC_CHECK_HEADERS(pthread.h)
 AC_CHECK_HEADERS(pwd.h)
 AC_CHECK_HEADERS(security/pam_appl.h)
 AC_CHECK_HEADERS(sgtty.h)
 AC_CHECK_HEADERS(signal.h)
 AC_CHECK_HEADERS(stdarg.h)
 AC_CHECK_HEADERS(stddef.h)
 AC_CHECK_HEADERS(stdio.h)
 AC_CHECK_HEADERS(stdlib.h)
 AC_CHECK_HEADERS(string.h)
 AC_CHECK_HEADERS(strings.h)
 AC_CHECK_HEADERS(sys/dir.h)
 AC_CHECK_HEADERS(sys/ioctl.h)
 AC_CHECK_HEADERS(sys/param.h)
 AC_CHECK_HEADERS(sys/poll.h)
 AC_CHECK_HEADERS(sys/reboot.h)
 AC_CHECK_HEADERS(sys/resource.h)
 AC_CHECK_HEADERS(sys/select.h)
 AC_CHECK_HEADERS(sys/socket.h)
 AC_CHECK_HEADERS(sys/signalfd.h)
 AC_CHECK_HEADERS(sys/sockio.h)
 AC_CHECK_HEADERS(sys/stat.h)
 AC_CHECK_HEADERS(sys/time.h)
 AC_CHECK_HEADERS(sys/timeb.h)
 AC_CHECK_HEADERS(sys/types.h)
 AC_CHECK_HEADERS(sys/uio.h)
 AC_CHECK_HEADERS(sys/un.h)
 AC_CHECK_HEADERS(sys/utsname.h)
 AC_CHECK_HEADERS(sys/wait.h)
 AC_CHECK_HEADERS(time.h)
 AC_CHECK_HEADERS(unistd.h)
 AC_CHECK_HEADERS(winsock.h)
 
 dnl These headers need prerequisits before the tests will pass
 dnl AC_CHECK_HEADERS(net/if.h)
 dnl AC_CHECK_HEADERS(netinet/icmp6.h)
 dnl AC_CHECK_HEADERS(netinet/ip6.h)
 dnl AC_CHECK_HEADERS(netinet/ip_icmp.h)
 
 AC_MSG_CHECKING(for special libxml2 includes)
 if test "x$XML2CONFIG" = "x"; then
    AC_MSG_ERROR(libxml2 config not found)
 else
    XML2HEAD="`$XML2CONFIG --cflags`"
    AC_MSG_RESULT($XML2HEAD)
    AC_CHECK_LIB(xml2, xmlReadMemory)
    AC_CHECK_LIB(xslt, xsltApplyStylesheet)
 fi
 
 CPPFLAGS="$CPPFLAGS $XML2HEAD"
 
 AC_CHECK_HEADERS(libxml/xpath.h)
 AC_CHECK_HEADERS(libxslt/xslt.h)
 if test "$ac_cv_header_libxml_xpath_h" != "yes"; then
    AC_MSG_ERROR(The libxml developement headers were not found)
 fi
 if test "$ac_cv_header_libxslt_xslt_h" != "yes"; then
    AC_MSG_ERROR(The libxslt developement headers were not found)
 fi
 
 dnl ========================================================================
 dnl Structures
 dnl ========================================================================
 
 AC_CHECK_MEMBERS([struct tm.tm_gmtoff],,,[[#include <time.h>]])
 AC_CHECK_MEMBERS([lrm_op_t.rsc_deleted],,,[[#include <lrm/lrm_api.h>]])
 AC_CHECK_MEMBER([struct dirent.d_type],
     AC_DEFINE(HAVE_STRUCT_DIRENT_D_TYPE,1,[Define this if struct dirent has d_type]),,
     [#include <dirent.h>])
 
 dnl ========================================================================
 dnl Functions
 dnl ========================================================================
 
 AC_CHECK_FUNCS(g_log_set_default_handler)
 AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT,  1, [Have getopt function]))
 AC_CHECK_FUNCS(nanosleep, AC_DEFINE(HAVE_DECL_NANOSLEEP,  1, [Have nanosleep function]))
 
 dnl ========================================================================
 dnl   ltdl
 dnl ========================================================================
 
 AC_CHECK_LIB(ltdl, lt_dlopen, [LTDL_foo=1])
 if test "x${enable_bundled_ltdl}" = "xyes"; then
    if test $ac_cv_lib_ltdl_lt_dlopen = yes; then
       AC_MSG_NOTICE([Disabling usage of installed ltdl])
    fi
    ac_cv_lib_ltdl_lt_dlopen=no
 fi
 
 LIBLTDL_DIR=""
 if test $ac_cv_lib_ltdl_lt_dlopen != yes ; then
    AC_MSG_NOTICE([Installing local ltdl])
    LIBLTDL_DIR=libltdl
    ( cd $srcdir ; $TAR -xvf libltdl.tar )
    if test "$?" -ne 0; then
      AC_MSG_ERROR([$TAR of libltdl.tar in $srcdir failed])
    fi
    AC_CONFIG_SUBDIRS(libltdl)
 else
    LIBS="$LIBS -lltdl"
    AC_MSG_NOTICE([Using installed ltdl])
    INCLTDL=""
    LIBLTDL=""
 fi
 
 AC_SUBST(INCLTDL)
 AC_SUBST(LIBLTDL)
 AC_SUBST(LIBLTDL_DIR)
 
 dnl ========================================================================
 dnl   bzip2
 dnl ========================================================================
 AC_CHECK_HEADERS(bzlib.h)
 AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress)
 
 if test x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress != xyes ; then
    AC_MSG_ERROR(BZ2 libraries not found)
 fi
 
 if test x$ac_cv_header_bzlib_h != xyes; then
    AC_MSG_ERROR(BZ2 Development headers not found)
 fi
 
 dnl ========================================================================
 dnl sighandler_t is missing from Illumos, Solaris11 systems
 dnl ========================================================================
 
 AC_MSG_CHECKING([for sighandler_t])
 AC_TRY_COMPILE([#include <signal.h>],[sighandler_t *f;],
 has_sighandler_t=yes,has_sighandler_t=no)
 AC_MSG_RESULT($has_sighandler_t)
 if test "$has_sighandler_t" = "yes" ; then
     AC_DEFINE( HAVE_SIGHANDLER_T, 1, [Define if sighandler_t available] )
 fi
 
 dnl ========================================================================
 dnl   ncurses
 dnl ========================================================================
 dnl
 dnl A few OSes (e.g. Linux) deliver a default "ncurses" alongside "curses".
 dnl Many non-Linux deliver "curses"; sites may add "ncurses".
 dnl
 dnl However, the source-code recommendation for both is to #include "curses.h"
 dnl (i.e. "ncurses" still wants the include to be simple, no-'n', "curses.h").
 dnl
 dnl ncurse takes precedence.
 dnl
 AC_CHECK_HEADERS(curses.h)
 AC_CHECK_HEADERS(curses/curses.h)
 AC_CHECK_HEADERS(ncurses.h)
 AC_CHECK_HEADERS(ncurses/ncurses.h)
 
 dnl Although n-library is preferred, only look for it if the n-header was found.
 CURSESLIBS=''
 if test "$ac_cv_header_ncurses_h" = "yes"; then
   AC_CHECK_LIB(ncurses, printw,
     [AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)]
   )
   CURSESLIBS=`$PKGCONFIG --libs ncurses` || CURSESLIBS='-lncurses'
 fi
 
 if test "$ac_cv_header_ncurses_ncurses_h" = "yes"; then
   AC_CHECK_LIB(ncurses, printw,
     [AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)]
   )
   CURSESLIBS=`$PKGCONFIG --libs ncurses` || CURSESLIBS='-lncurses'
 fi
 
 dnl Only look for non-n-library if there was no n-library.
 if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_h" = "yes"; then
   AC_CHECK_LIB(curses, printw,
     [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)]
   )
 fi
 
 dnl Only look for non-n-library if there was no n-library.
 if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_curses_h" = "yes"; then
   AC_CHECK_LIB(curses, printw,
     [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)]
   )
 fi
 
 if test "x$CURSESLIBS" != "x"; then
    PCMK_FEATURES="$PCMK_FEATURES ncurses"
 fi
 
 dnl Check for printw() prototype compatibility
 if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then
     ac_save_LIBS=$LIBS
     LIBS="$CURSESLIBS"
     ac_save_CFLAGS=$CFLAGS
     CFLAGS="-Wcast-qual -Werror"
     # avoid broken test because of hardened build environment in Fedora 23+
     # - https://fedoraproject.org/wiki/Changes/Harden_All_Packages
     # - https://bugzilla.redhat.com/1297985
     if cc_supports_flag -fPIC; then
         CFLAGS="$CFLAGS -fPIC"
     fi
 
     AC_MSG_CHECKING(whether printw() requires argument of "const char *")
     AC_LINK_IFELSE(
 	    [AC_LANG_PROGRAM(
 	      [
 #if defined(HAVE_NCURSES_H)
 #  include <ncurses.h>
 #elif defined(HAVE_NCURSES_NCURSES_H)
 #  include <ncurses/ncurses.h>
 #elif defined(HAVE_CURSES_H)
 #  include <curses.h>
 #endif
 	      ],
 	      [printw((const char *)"Test");]
 	    )],
 	    [ac_cv_compatible_printw=yes],
 	    [ac_cv_compatible_printw=no]
     )
 
     LIBS=$ac_save_LIBS
     CFLAGS=$ac_save_CFLAGS
 
     AC_MSG_RESULT([$ac_cv_compatible_printw])
 
     if test "$ac_cv_compatible_printw" = no; then
 		AC_MSG_WARN([The printw() function of your ncurses or curses library is old, we will disable usage of the library. If you want to use this library anyway, please update to newer version of the library, ncurses 5.4 or later is recommended. You can get the library from http://www.gnu.org/software/ncurses/.])
 		AC_MSG_NOTICE([Disabling curses])
 		AC_DEFINE(HAVE_INCOMPATIBLE_PRINTW, 1, [Do we have incompatible printw() in curses library?])
     fi
 fi
 
 AC_SUBST(CURSESLIBS)
 
 dnl ========================================================================
 dnl    Profiling and GProf
 dnl ========================================================================
 
 AC_MSG_NOTICE(Old CFLAGS: $CFLAGS)
 case $SUPPORT_COVERAGE in
      1|yes|true)
 	SUPPORT_PROFILING=1
 	PCMK_FEATURES="$PCMK_FEATURES coverage"
 	CFLAGS="$CFLAGS -fprofile-arcs -ftest-coverage"
 	dnl During linking, make sure to specify -lgcov or -coverage
 
         dnl Enable gprof
 	#LIBS="$LIBS -pg"
 	#CFLAGS="$CFLAGS -pg"
 	;;
 esac
 
 case $SUPPORT_PROFILING in
      1|yes|true)
 	SUPPORT_PROFILING=1
 
 	dnl Disable various compiler optimizations
 	CFLAGS="$CFLAGS -fno-omit-frame-pointer -fno-inline -fno-builtin "
 	dnl CFLAGS="$CFLAGS -fno-inline-functions -fno-default-inline -fno-inline-functions-called-once -fno-optimize-sibling-calls"
 
 	dnl Turn off optimization so tools can get accurate line numbers
 	CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g' -e 's/-D_FORTIFY_SOURCE=.\ //g'`
 	CFLAGS="$CFLAGS -O0 -g3 -gdwarf-2"
 
 	dnl Update features
 	PCMK_FEATURES="$PCMK_FEATURES profile"
 	;;
      *) SUPPORT_PROFILING=0;;
 esac
 AC_MSG_NOTICE(New CFLAGS: $CFLAGS)
 AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for profiling)
 
 dnl ========================================================================
 dnl    Cluster infrastructure - Heartbeat / LibQB
 dnl ========================================================================
 
 dnl Compatability checks
 AC_CHECK_MEMBERS([struct lrm_ops.fail_rsc],,,[[#include <lrm/lrm_api.h>]])
 
 if test x${enable_no_stack} = xyes; then
     SUPPORT_HEARTBEAT=no
     SUPPORT_CS=no
 fi
 
 SAVE_CPPFLAGS="$CPPFLAGS"
 SAVE_LIBS="$LIBS"
 PKG_CHECK_MODULES(libqb, libqb >= 0.13, HAVE_libqb=1, HAVE_libqb=0)
 CPPFLAGS="$CPPFLAGS $libqb_CFLAGS"
 LIBS="$LIBS $libqb_LIBS"
 AC_CHECK_HEADERS(qb/qbipc_common.h)
 AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set)
 CPPFLAGS="$SAVE_CPPFLAGS"
 LIBS="$SAVE_LIBS"
 
 LIBQB_LOG=1
 PCMK_FEATURES="$PCMK_FEATURES libqb-logging libqb-ipc"
 
 AC_CHECK_FUNCS(qb_ipcs_connection_get_buffer_size, AC_DEFINE(HAVE_IPCS_GET_BUFFER_SIZE,  1, [Have qb_ipcc_get_buffer_size function]))
 
 LIBS="$LIBS $libqb_LIBS"
 
 AC_CHECK_HEADERS(heartbeat/hb_config.h)
 AC_CHECK_HEADERS(heartbeat/glue_config.h)
 AC_CHECK_HEADERS(stonith/stonith.h)
 AC_CHECK_HEADERS(agent_config.h)
 
 GLUE_HEADER=none
 HAVE_GLUE=0
 if test "$ac_cv_header_heartbeat_glue_config_h" = "yes";  then
    GLUE_HEADER=glue_config.h
    HAVE_GLUE=1
 
 elif test "$ac_cv_header_heartbeat_hb_config_h" = "yes"; then
    GLUE_HEADER=hb_config.h
    HAVE_GLUE=1
 else
    AC_MSG_WARN(cluster-glue development headers were not found)
 fi
 
 if test "$ac_cv_header_stonith_stonith_h" = "yes";  then
    PCMK_FEATURES="$PCMK_FEATURES lha-fencing"
 fi
 
 if test $HAVE_GLUE = 1; then
    dnl On Debian, AC_CHECK_LIBS fail if a library has any unresolved symbols
    dnl So check for all the dependencies (so they're added to LIBS) before checking for -lplumb
    AC_CHECK_LIB(pils, PILLoadPlugin)
    AC_CHECK_LIB(plumb, G_main_add_IPC_Channel)
 fi
 
 dnl ===============================================
 dnl Variables needed for substitution
 dnl ===============================================
 CRM_DTD_DIRECTORY="${datadir}/pacemaker"
 AC_DEFINE_UNQUOTED(CRM_DTD_DIRECTORY,"$CRM_DTD_DIRECTORY", Location for the Pacemaker Relax-NG Schema)
 AC_SUBST(CRM_DTD_DIRECTORY)
 
 CRM_CORE_DIR=`try_extract_header_define $GLUE_HEADER HA_COREDIR ${localstatedir}/lib/pacemaker/cores`
 AC_DEFINE_UNQUOTED(CRM_CORE_DIR,"$CRM_CORE_DIR", Location to store core files produced by Pacemaker daemons)
 AC_SUBST(CRM_CORE_DIR)
 
 CRM_DAEMON_USER=`try_extract_header_define $GLUE_HEADER HA_CCMUSER hacluster`
 AC_DEFINE_UNQUOTED(CRM_DAEMON_USER,"$CRM_DAEMON_USER", User to run Pacemaker daemons as)
 AC_SUBST(CRM_DAEMON_USER)
 
 CRM_DAEMON_GROUP=`try_extract_header_define $GLUE_HEADER HA_APIGROUP haclient`
 AC_DEFINE_UNQUOTED(CRM_DAEMON_GROUP,"$CRM_DAEMON_GROUP", Group to run Pacemaker daemons as)
 AC_SUBST(CRM_DAEMON_GROUP)
 
 CRM_STATE_DIR=${localstatedir}/run/crm
 AC_DEFINE_UNQUOTED(CRM_STATE_DIR,"$CRM_STATE_DIR", Where to keep state files and sockets)
 AC_SUBST(CRM_STATE_DIR)
 
 CRM_BLACKBOX_DIR=${localstatedir}/lib/pacemaker/blackbox
 AC_DEFINE_UNQUOTED(CRM_BLACKBOX_DIR,"$CRM_BLACKBOX_DIR", Where to keep blackbox dumps)
 AC_SUBST(CRM_BLACKBOX_DIR)
 
 PE_STATE_DIR="${localstatedir}/lib/pacemaker/pengine"
 AC_DEFINE_UNQUOTED(PE_STATE_DIR,"$PE_STATE_DIR", Where to keep PEngine outputs)
 AC_SUBST(PE_STATE_DIR)
 
 CRM_CONFIG_DIR="${localstatedir}/lib/pacemaker/cib"
 AC_DEFINE_UNQUOTED(CRM_CONFIG_DIR,"$CRM_CONFIG_DIR", Where to keep configuration files)
 AC_SUBST(CRM_CONFIG_DIR)
 
 CRM_CONFIG_CTS="${localstatedir}/lib/pacemaker/cts"
 AC_DEFINE_UNQUOTED(CRM_CONFIG_CTS,"$CRM_CONFIG_CTS", Where to keep cts stateful data)
 AC_SUBST(CRM_CONFIG_CTS)
 
 CRM_LEGACY_CONFIG_DIR="${localstatedir}/lib/heartbeat/crm"
 AC_DEFINE_UNQUOTED(CRM_LEGACY_CONFIG_DIR,"$CRM_LEGACY_CONFIG_DIR", Where Pacemaker used to keep configuration files)
 AC_SUBST(CRM_LEGACY_CONFIG_DIR)
 
 CRM_DAEMON_DIR="${libexecdir}/pacemaker"
 AC_DEFINE_UNQUOTED(CRM_DAEMON_DIR,"$CRM_DAEMON_DIR", Location for Pacemaker daemons)
 AC_SUBST(CRM_DAEMON_DIR)
 
 HB_DAEMON_DIR=`try_extract_header_define $GLUE_HEADER HA_LIBHBDIR $libdir/heartbeat`
 AC_DEFINE_UNQUOTED(HB_DAEMON_DIR,"$HB_DAEMON_DIR", Location Heartbeat expects Pacemaker daemons to be in)
 AC_SUBST(HB_DAEMON_DIR)
 
 dnl Needed so that the Corosync plugin can clear out the directory as Heartbeat does
 HA_STATE_DIR=`try_extract_header_define $GLUE_HEADER HA_VARRUNDIR ${localstatedir}/run`
 AC_DEFINE_UNQUOTED(HA_STATE_DIR,"$HA_STATE_DIR", Where Heartbeat keeps state files and sockets)
 AC_SUBST(HA_STATE_DIR)
 
 CRM_RSCTMP_DIR=`try_extract_header_define agent_config.h HA_RSCTMPDIR $HA_STATE_DIR/resource-agents`
 AC_MSG_CHECKING(Scratch dir for resource agents)
 AC_MSG_RESULT($CRM_RSCTMP_DIR)
 AC_DEFINE_UNQUOTED(CRM_RSCTMP_DIR,"$CRM_RSCTMP_DIR", Where resource agents should keep state files)
 AC_SUBST(CRM_RSCTMP_DIR)
 
 dnl Needed for the location of hostcache in CTS.py
 HA_VARLIBHBDIR=`try_extract_header_define $GLUE_HEADER HA_VARLIBHBDIR ${localstatedir}/lib/heartbeat`
 AC_SUBST(HA_VARLIBHBDIR)
 
 AC_DEFINE_UNQUOTED(UUID_FILE,"$localstatedir/lib/heartbeat/hb_uuid", Location of Heartbeat's UUID file)
 
 OCF_ROOT_DIR=`try_extract_header_define $GLUE_HEADER OCF_ROOT_DIR /usr/lib/ocf`
 if test "X$OCF_ROOT_DIR" = X; then
   AC_MSG_ERROR(Could not locate OCF directory)
 fi
 AC_SUBST(OCF_ROOT_DIR)
 
 OCF_RA_DIR=`try_extract_header_define $GLUE_HEADER OCF_RA_DIR $OCF_ROOT_DIR/resource.d`
 AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs)
 AC_SUBST(OCF_RA_DIR)
 
 RH_STONITH_DIR="$sbindir"
 AC_DEFINE_UNQUOTED(RH_STONITH_DIR,"$RH_STONITH_DIR", Location for Red Hat Stonith agents)
 
 RH_STONITH_PREFIX="fence_"
 AC_DEFINE_UNQUOTED(RH_STONITH_PREFIX,"$RH_STONITH_PREFIX", Prefix for Red Hat Stonith agents)
 
 AC_PATH_PROGS(GIT, git false)
 AC_MSG_CHECKING(build version)
 
 BUILD_VERSION=$Format:%h$
 if test $BUILD_VERSION != ":%h$"; then
    AC_MSG_RESULT(archive hash: $BUILD_VERSION)
 
 elif test -x $GIT -a -d .git; then
    BUILD_VERSION=`$GIT log --pretty="format:%h" -n 1`
    AC_MSG_RESULT(git hash: $BUILD_VERSION)
 
 else
    # The current directory name make a reasonable default
    # Most generated archives will include the hash or tag
    BASE=`basename $PWD`
    BUILD_VERSION=`echo $BASE | sed s:.*[[Pp]]acemaker-::`
    AC_MSG_RESULT(directory based hash: $BUILD_VERSION)
 fi
 
 AC_DEFINE_UNQUOTED(BUILD_VERSION, "$BUILD_VERSION", Build version)
 AC_SUBST(BUILD_VERSION)
 
 
 HAVE_dbus=1
 HAVE_upstart=0
 HAVE_systemd=0
 PKG_CHECK_MODULES(DBUS, dbus-1, ,HAVE_dbus=0)
 
 AC_DEFINE_UNQUOTED(SUPPORT_DBUS, $HAVE_dbus, Support dbus)
 AM_CONDITIONAL(BUILD_DBUS, test $HAVE_dbus = 1)
 
 if test $HAVE_dbus = 1; then
    CFLAGS="$CFLAGS `$PKGCONFIG --cflags dbus-1`"
 fi
 
 DBUS_LIBS="$CFLAGS `$PKGCONFIG --libs dbus-1`"
 AC_SUBST(DBUS_LIBS)
 
 AC_CHECK_TYPES([DBusBasicValue],,,[[#include <dbus/dbus.h>]])
 
 if test $HAVE_dbus = 1 -a "x${enable_upstart}" != xno; then
    HAVE_upstart=1
    PCMK_FEATURES="$PCMK_FEATURES upstart"
 fi
 
 AC_DEFINE_UNQUOTED(SUPPORT_UPSTART, $HAVE_upstart, Support upstart based system services)
 AM_CONDITIONAL(BUILD_UPSTART, test $HAVE_upstart = 1)
 AC_SUBST(SUPPORT_UPSTART)
 
 if
     $PKGCONFIG --exists systemd
 then
     systemdunitdir=`$PKGCONFIG --variable=systemdsystemunitdir systemd`
     AC_SUBST(systemdunitdir)
 else
     enable_systemd=no
 fi
 
 if test $HAVE_dbus = 1 -a "x${enable_systemd}" != xno; then
    if test -n "$systemdunitdir" -a "x$systemdunitdir" != xno; then
       HAVE_systemd=1
       PCMK_FEATURES="$PCMK_FEATURES systemd"
    fi
 fi
 
 AC_DEFINE_UNQUOTED(SUPPORT_SYSTEMD, $HAVE_systemd, Support systemd based system services)
 AM_CONDITIONAL(BUILD_SYSTEMD, test $HAVE_systemd = 1)
 AC_SUBST(SUPPORT_SYSTEMD)
 
 case $SUPPORT_NAGIOS in
      1|yes|true|try)
         SUPPORT_NAGIOS=1;;
      *)
         SUPPORT_NAGIOS=0;;
 esac
 
 if test $SUPPORT_NAGIOS = 1; then
     PCMK_FEATURES="$PCMK_FEATURES nagios"
 fi
 
 AC_DEFINE_UNQUOTED(SUPPORT_NAGIOS, $SUPPORT_NAGIOS, Support nagios plugins)
 AM_CONDITIONAL(BUILD_NAGIOS, test $SUPPORT_NAGIOS = 1)
 
 if test x"$NAGIOS_PLUGIN_DIR" = x""; then
     NAGIOS_PLUGIN_DIR="${libexecdir}/nagios/plugins"
 fi
 
 AC_DEFINE_UNQUOTED(NAGIOS_PLUGIN_DIR, "$NAGIOS_PLUGIN_DIR", Directory for nagios plugins)
 AC_SUBST(NAGIOS_PLUGIN_DIR)
 
 if test x"$NAGIOS_METADATA_DIR" = x""; then
     NAGIOS_METADATA_DIR="${datadir}/nagios/plugins-metadata"
 fi
 
 AC_DEFINE_UNQUOTED(NAGIOS_METADATA_DIR, "$NAGIOS_METADATA_DIR", Directory for nagios plugins metadata)
 AC_SUBST(NAGIOS_METADATA_DIR)
 
 STACKS=""
 CLUSTERLIBS=""
 
 dnl ========================================================================
 dnl    Cluster stack - Heartbeat
 dnl ========================================================================
 
 case $SUPPORT_HEARTBEAT in
 1|yes|true|try)
    AC_MSG_CHECKING(for heartbeat support)
    AC_CHECK_LIB(hbclient, ll_cluster_new, [SUPPORT_HEARTBEAT=1],
 		[if test $SUPPORT_HEARTBEAT != try; then
 			AC_MSG_FAILURE(Unable to support Heartbeat: client libraries not found)
 		fi])
 
    if test $SUPPORT_HEARTBEAT = 1 ; then
 	STACKS="$STACKS heartbeat"
 	dnl objdump -x ${libdir}/libccmclient.so | grep SONAME | awk '{print $2}'
 	AC_DEFINE_UNQUOTED(CCM_LIBRARY, "libccmclient.so.1", Library to load for ccm support)
 	AC_DEFINE_UNQUOTED(HEARTBEAT_LIBRARY, "libhbclient.so.1", Library to load for heartbeat support)
 	BUILD_ATOMIC_ATTRD=0
    else
 	SUPPORT_HEARTBEAT=0
    fi
    ;;
 *) SUPPORT_HEARTBEAT=0;;
 esac
 
 AM_CONDITIONAL(BUILD_HEARTBEAT_SUPPORT, test $SUPPORT_HEARTBEAT = 1)
 AC_DEFINE_UNQUOTED(SUPPORT_HEARTBEAT, $SUPPORT_HEARTBEAT, Support the Heartbeat messaging and membership layer)
 AC_SUBST(SUPPORT_HEARTBEAT)
 
 dnl ========================================================================
 dnl    Cluster stack - Corosync
 dnl ========================================================================
 
 dnl Normalize the values
 case $SUPPORT_CS in
      1|yes|true)
 		SUPPORT_CS=yes
 		missingisfatal=1;;
      try)	missingisfatal=0;;
      *) SUPPORT_CS=no;;
 esac
 
 AC_MSG_CHECKING(for native corosync)
 COROSYNC_LIBS=""
 CS_USES_LIBQB=0
 
 PCMK_SERVICE_ID=9
 LCRSODIR="$libdir"
 
 if test $SUPPORT_CS = no; then
     AC_MSG_RESULT(no (disabled))
     SUPPORT_CS=0
 else
     AC_MSG_RESULT($SUPPORT_CS, with '$CSPREFIX')
     PKG_CHECK_MODULES(cpg,    libcpg) dnl Fatal
     PKG_CHECK_MODULES(cfg,    libcfg) dnl Fatal
     PKG_CHECK_MODULES(cmap,   libcmap,   HAVE_cmap=1,   HAVE_cmap=0)
     PKG_CHECK_MODULES(cman,   libcman,   HAVE_cman=1,   HAVE_cman=0)
     PKG_CHECK_MODULES(confdb, libconfdb, HAVE_confdb=1, HAVE_confdb=0)
     PKG_CHECK_MODULES(fenced, libfenced, HAVE_fenced=1, HAVE_fenced=0)
     PKG_CHECK_MODULES(quorum, libquorum, HAVE_quorum=1, HAVE_quorum=0)
     PKG_CHECK_MODULES(oldipc, libcoroipcc, HAVE_oldipc=1, HAVE_oldipc=0)
 
     if test $HAVE_oldipc = 1; then
         SUPPORT_CS=1
 	CFLAGS="$CFLAGS $oldipc_FLAGS $cpg_FLAGS $cfg_FLAGS"
         COROSYNC_LIBS="$COROSYNC_LIBS $oldipc_LIBS $cpg_LIBS $cfg_LIBS"
 
     elif test $HAVE_libqb = 1; then
         SUPPORT_CS=1
         CS_USES_LIBQB=1
 	CFLAGS="$CFLAGS $libqb_FLAGS $cpg_FLAGS $cfg_FLAGS"
         COROSYNC_LIBS="$COROSYNC_LIBS $libqb_LIBS $cpg_LIBS $cfg_LIBS"
         AC_CHECK_LIB(corosync_common, cs_strerror)
 
     else
         aisreason="corosync/libqb IPC libraries not found by pkg_config"
     fi
 
     AC_DEFINE_UNQUOTED(HAVE_CONFDB, $HAVE_confdb, Have the old herarchial Corosync config API)
     AC_DEFINE_UNQUOTED(HAVE_CMAP, $HAVE_cmap, Have the new non-herarchial Corosync config API)
 fi
 
 
 if test $SUPPORT_CS = 1 -a x$HAVE_oldipc = x0 ; then
     dnl Support for plugins was removed about the time the IPC was
     dnl moved to libqb.
     dnl The only option now is the built-in quorum API
     CFLAGS="$CFLAGS $cmap_CFLAGS $quorum_CFLAGS"
     COROSYNC_LIBS="$COROSYNC_LIBS $cmap_LIBS $quorum_LIBS"
 
     STACKS="$STACKS corosync-native"
     AC_DEFINE_UNQUOTED(SUPPORT_CS_QUORUM, 1, Support the consumption of membership and quorum from corosync)
 fi
 
 SUPPORT_PLUGIN=0
 if test $SUPPORT_CS = 1 -a x$HAVE_confdb = x1; then
     dnl Need confdb to support cman and the plugins
     SUPPORT_PLUGIN=1
     BUILD_ATOMIC_ATTRD=0
     LCRSODIR=`$PKGCONFIG corosync --variable=lcrsodir`
     STACKS="$STACKS corosync-plugin"
     COROSYNC_LIBS="$COROSYNC_LIBS $confdb_LIBS"
 
     if test $SUPPORT_CMAN != no; then
         if test $HAVE_cman = 1 -a $HAVE_fenced = 1; then
             SUPPORT_CMAN=1
 	    STACKS="$STACKS cman"
             CFLAGS="$CFLAGS $cman_FLAGS $fenced_FLAGS"
             COROSYNC_LIBS="$COROSYNC_LIBS $cman_LIBS $fenced_LIBS"
         fi
     fi
 fi
 
 dnl Normalize SUPPORT_CS and SUPPORT_CMAN for use with #if directives
 if test $SUPPORT_CMAN != 1; then
     SUPPORT_CMAN=0
 fi
 
 if test $SUPPORT_CS = 1; then
     CLUSTERLIBS="$CLUSTERLIBS $COROSYNC_LIBS"
 
 elif test $SUPPORT_CS != 0; then
     SUPPORT_CS=0
     if test $missingisfatal = 0; then
         AC_MSG_WARN(Unable to support Corosync: $aisreason)
     else
         AC_MSG_FAILURE(Unable to support Corosync: $aisreason)
     fi
 fi
 
 AC_DEFINE_UNQUOTED(SUPPORT_COROSYNC, $SUPPORT_CS,    Support the Corosync messaging and membership layer)
 AC_DEFINE_UNQUOTED(SUPPORT_CMAN,     $SUPPORT_CMAN,  Support the consumption of membership and quorum from cman)
 AC_DEFINE_UNQUOTED(CS_USES_LIBQB,    $CS_USES_LIBQB, Does corosync use libqb for its ipc)
 AC_DEFINE_UNQUOTED(PCMK_SERVICE_ID,  $PCMK_SERVICE_ID, Corosync service number)
 AC_DEFINE_UNQUOTED(SUPPORT_PLUGIN,   $SUPPORT_PLUGIN, Support the Pacemaker plugin for Corosync)
 
 AM_CONDITIONAL(BUILD_CS_SUPPORT, test $SUPPORT_CS = 1)
 AM_CONDITIONAL(BUILD_CS_PLUGIN, test $SUPPORT_PLUGIN = 1)
 AM_CONDITIONAL(BUILD_CMAN, test $SUPPORT_CMAN = 1)
 
 AM_CONDITIONAL(BUILD_ATOMIC_ATTRD, test $BUILD_ATOMIC_ATTRD = 1)
 AC_DEFINE_UNQUOTED(HAVE_ATOMIC_ATTRD, $BUILD_ATOMIC_ATTRD, Support the new atomic attrd)
 
 AC_SUBST(SUPPORT_CMAN)
 AC_SUBST(SUPPORT_CS)
 AC_SUBST(SUPPORT_PLUGIN)
 
 dnl
 dnl    Cluster stack - Sanity
 dnl
 
 if test x${enable_no_stack} = xyes; then
     AC_MSG_NOTICE(No cluster stack supported.  Just building the Policy Engine)
     PCMK_FEATURES="$PCMK_FEATURES no-cluster-stack"
 else
     AC_MSG_CHECKING(for supported stacks)
     if test x"$STACKS" = x; then
       AC_MSG_FAILURE(You must support at least one cluster stack (heartbeat or corosync) )
     fi
     AC_MSG_RESULT($STACKS)
     PCMK_FEATURES="$PCMK_FEATURES $STACKS"
 fi
 
 if test ${BUILD_ATOMIC_ATTRD} = 1; then
     PCMK_FEATURES="$PCMK_FEATURES atomic-attrd"
 fi
 AC_SUBST(CLUSTERLIBS)
 AC_SUBST(LCRSODIR)
 
 dnl ========================================================================
 dnl    SNMP
 dnl ========================================================================
 
 case $SUPPORT_SNMP in
      1|yes|true) missingisfatal=1;;
      try)        missingisfatal=0;;
      *)		 SUPPORT_SNMP=no;;
 esac
 
 SNMPLIBS=""
 
 AC_MSG_CHECKING(for snmp support)
 if test $SUPPORT_SNMP = no; then
    AC_MSG_RESULT(no (disabled))
    SUPPORT_SNMP=0
 else
     SNMPCONFIG=""
     AC_MSG_RESULT($SUPPORT_SNMP)
     AC_CHECK_HEADERS(net-snmp/net-snmp-config.h)
 
     if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then
  	SUPPORT_SNMP="no"
     fi
 
     if test $SUPPORT_SNMP != no; then
 	AC_PATH_PROGS(SNMPCONFIG, net-snmp-config)
 	if test "X${SNMPCONFIG}" = "X"; then
 		AC_MSG_RESULT(You need the net_snmp development package to continue.)
 		SUPPORT_SNMP=no
 	fi
     fi
 
     if test $SUPPORT_SNMP != no; then
 	AC_MSG_CHECKING(for special snmp libraries)
 	SNMPLIBS=`$SNMPCONFIG --agent-libs`
 	AC_MSG_RESULT($SNMPLIBS)
     fi
 
     if test $SUPPORT_SNMP != no; then
         savedLibs=$LIBS
         LIBS="$LIBS $SNMPLIBS"
 
         dnl    On many systems libcrypto is needed when linking against libsnmp.
         dnl    Check to see if it exists, and if so use it.
 	dnl AC_CHECK_LIB(crypto, CRYPTO_free, CRYPTOLIB="-lcrypto",)
 	dnl AC_SUBST(CRYPTOLIB)
 
         AC_CHECK_FUNCS(netsnmp_transport_open_client)
         if test $ac_cv_func_netsnmp_transport_open_client != yes; then
             AC_CHECK_FUNCS(netsnmp_tdomain_transport)
             if test $ac_cv_func_netsnmp_tdomain_transport != yes; then
                 SUPPORT_SNMP=no
 	    else
                 AC_DEFINE_UNQUOTED(NETSNMPV53, 1, [Use the older 5.3 version of the net-snmp API])
             fi
         fi
         LIBS=$savedLibs
     fi
 
     if test $SUPPORT_SNMP = no; then
    	SNMPLIBS=""
    	SUPPORT_SNMP=0
      	if test $missingisfatal = 0; then
 	    AC_MSG_WARN(Unable to support SNMP)
         else
 	    AC_MSG_FAILURE(Unable to support SNMP)
         fi
     else
    	SUPPORT_SNMP=1
     fi
 fi
 
 if test $SUPPORT_SNMP = 1; then
    PCMK_FEATURES="$PCMK_FEATURES snmp"
 fi
 
 AC_SUBST(SNMPLIBS)
 AM_CONDITIONAL(ENABLE_SNMP, test "$SUPPORT_SNMP" = "1")
 AC_DEFINE_UNQUOTED(ENABLE_SNMP, $SUPPORT_SNMP, Build in support for sending SNMP traps)
 
 dnl ========================================================================
 dnl    ESMTP
 dnl ========================================================================
 
 case $SUPPORT_ESMTP in
      1|yes|true) missingisfatal=1;;
      try)        missingisfatal=0;;
      *)		 SUPPORT_ESMTP=no;;
 esac
 
 ESMTPLIB=""
 
 AC_MSG_CHECKING(for esmtp support)
 if test $SUPPORT_ESMTP = no; then
    AC_MSG_RESULT(no (disabled))
    SUPPORT_ESMTP=0
 else
    ESMTPCONFIG=""
    AC_MSG_RESULT($SUPPORT_ESMTP)
    AC_CHECK_HEADERS(libesmtp.h)
 
    if test "x${ac_cv_header_libesmtp_h}" != "xyes"; then
 	ENABLE_ESMTP="no"
    fi
 
    if test $SUPPORT_ESMTP != no; then
 	AC_PATH_PROGS(ESMTPCONFIG, libesmtp-config)
 	if test "X${ESMTPCONFIG}" = "X"; then
 		AC_MSG_RESULT(You need the libesmtp development package to continue.)
 		SUPPORT_ESMTP=no
 	fi
    fi
 
    if test $SUPPORT_ESMTP != no; then
 	AC_MSG_CHECKING(for special esmtp libraries)
 	ESMTPLIBS=`$ESMTPCONFIG --libs | tr '\n' ' '`
 	AC_MSG_RESULT($ESMTPLIBS)
    fi
 
    if test $SUPPORT_ESMTP = no; then
    	SUPPORT_ESMTP=0
      	if test $missingisfatal = 0; then
 	    AC_MSG_WARN(Unable to support ESMTP)
         else
 	    AC_MSG_FAILURE(Unable to support ESMTP)
         fi
    else
    	SUPPORT_ESMTP=1
         PCMK_FEATURES="$PCMK_FEATURES libesmtp"
    fi
 fi
 
 AC_SUBST(ESMTPLIBS)
 AM_CONDITIONAL(ENABLE_ESMTP, test "$SUPPORT_ESMTP" = "1")
 AC_DEFINE_UNQUOTED(ENABLE_ESMTP, $SUPPORT_ESMTP, Build in support for sending mail notifications with ESMTP)
 
 dnl ========================================================================
 dnl    ACL
 dnl ========================================================================
 
 case $SUPPORT_ACL in
      1|yes|true) missingisfatal=1;;
      try)        missingisfatal=0;;
      *)		 SUPPORT_ACL=no;;
 esac
 
 AC_MSG_CHECKING(for acl support)
 if test $SUPPORT_ACL = no; then
     AC_MSG_RESULT(no (disabled))
     SUPPORT_ACL=0
 else
     AC_MSG_RESULT($SUPPORT_ACL)
 
     SUPPORT_ACL=1
     AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set)
     if test $ac_cv_lib_qb_qb_ipcs_connection_auth_set != yes; then
         SUPPORT_ACL=0
     fi
 
     if test $SUPPORT_ACL = 0; then
         if test $missingisfatal = 0; then
             AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0)
         else
             AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0)
         fi
     fi
 fi
 
 if test $SUPPORT_ACL = 1; then
     PCMK_FEATURES="$PCMK_FEATURES acls"
 fi
 
 AM_CONDITIONAL(ENABLE_ACL, test "$SUPPORT_ACL" = "1")
 AC_DEFINE_UNQUOTED(ENABLE_ACL, $SUPPORT_ACL, Build in support for CIB ACL)
 
 dnl ========================================================================
 dnl    CIB secrets
 dnl ========================================================================
 
 case $SUPPORT_CIBSECRETS in
      1|yes|true|try)
         SUPPORT_CIBSECRETS=1;;
      *)
         SUPPORT_CIBSECRETS=0;;
 esac
 
 AC_DEFINE_UNQUOTED(SUPPORT_CIBSECRETS, $SUPPORT_CIBSECRETS, Support CIB secrets)
 AM_CONDITIONAL(BUILD_CIBSECRETS, test $SUPPORT_CIBSECRETS = 1)
 
 if test $SUPPORT_CIBSECRETS = 1; then
     PCMK_FEATURES="$PCMK_FEATURES cibsecrets"
 
     LRM_CIBSECRETS_DIR="${localstatedir}/lib/pacemaker/lrm/secrets"
     AC_DEFINE_UNQUOTED(LRM_CIBSECRETS_DIR,"$LRM_CIBSECRETS_DIR", Location for CIB secrets)
     AC_SUBST(LRM_CIBSECRETS_DIR)
 
     LRM_LEGACY_CIBSECRETS_DIR="${localstatedir}/lib/heartbeat/lrm/secrets"
     AC_DEFINE_UNQUOTED(LRM_LEGACY_CIBSECRETS_DIR,"$LRM_LEGACY_CIBSECRETS_DIR", Legacy location for CIB secrets)
     AC_SUBST(LRM_LEGACY_CIBSECRETS_DIR)
 fi
 
 dnl ========================================================================
 dnl    GnuTLS
 dnl ========================================================================
 
 AC_CHECK_HEADERS(gnutls/gnutls.h)
 AC_CHECK_HEADERS(security/pam_appl.h pam/pam_appl.h)
 
 dnl GNUTLS library: Attempt to determine by 'libgnutls-config' program.
 dnl If no 'libgnutls-config', try traditional autoconf means.
 AC_PATH_PROGS(LIBGNUTLS_CONFIG, libgnutls-config)
 
 if test -n "$LIBGNUTLS_CONFIG"; then
 	AC_MSG_CHECKING(for gnutls header flags)
 	GNUTLSHEAD="`$LIBGNUTLS_CONFIG --cflags`";
 	AC_MSG_RESULT($GNUTLSHEAD)
 	AC_MSG_CHECKING(for gnutls library flags)
 	GNUTLSLIBS="`$LIBGNUTLS_CONFIG --libs`";
 	AC_MSG_RESULT($GNUTLSLIBS)
 fi
 AC_CHECK_LIB(gnutls, gnutls_init)
 AC_CHECK_FUNCS(gnutls_priority_set_direct)
 
 AC_SUBST(GNUTLSHEAD)
 AC_SUBST(GNUTLSLIBS)
 
 
 dnl ========================================================================
 dnl    System Health
 dnl ========================================================================
 
 dnl Check if servicelog development package is installed
 SERVICELOG=servicelog-1
 SERVICELOG_EXISTS="no"
 AC_MSG_CHECKING(for $SERVICELOG packages)
 if
     $PKGCONFIG --exists $SERVICELOG
 then
     PKG_CHECK_MODULES([SERVICELOG], [servicelog-1])
     SERVICELOG_EXISTS="yes"
 fi
 AC_MSG_RESULT($SERVICELOG_EXISTS)
 AM_CONDITIONAL(BUILD_SERVICELOG, test "$SERVICELOG_EXISTS" = "yes")
 
 dnl Check if OpenIMPI packages and servicelog are installed
 OPENIPMI="OpenIPMI OpenIPMIposix"
 OPENIPMI_SERVICELOG_EXISTS="no"
 AC_MSG_CHECKING(for $SERVICELOG $OPENIPMI packages)
 if
     $PKGCONFIG --exists $OPENIPMI $SERVICELOG
 then
     PKG_CHECK_MODULES([OPENIPMI_SERVICELOG],[OpenIPMI OpenIPMIposix])
     OPENIPMI_SERVICELOG_EXISTS="yes"
 fi
 AC_MSG_RESULT($OPENIPMI_SERVICELOG_EXISTS)
 AM_CONDITIONAL(BUILD_OPENIPMI_SERVICELOG, test "$OPENIPMI_SERVICELOG_EXISTS" = "yes")
 
 dnl ========================================================================
 dnl Compiler flags
 dnl ========================================================================
 
 dnl Make sure that CFLAGS is not exported. If the user did
 dnl not have CFLAGS in their environment then this should have
 dnl no effect. However if CFLAGS was exported from the user's
 dnl environment, then the new CFLAGS will also be exported
 dnl to sub processes.
 
 CC_ERRORS=""
 CC_EXTRAS=""
 
 if export | fgrep " CFLAGS=" > /dev/null; then
 	SAVED_CFLAGS="$CFLAGS"
 	unset CFLAGS
 	CFLAGS="$SAVED_CFLAGS"
 	unset SAVED_CFLAGS
 fi
 
 if test "$GCC" != yes; then
         CFLAGS="$CFLAGS -g"
 	enable_fatal_warnings=no
 else
         CFLAGS="$CFLAGS -ggdb"
 
 	# We had to eliminate -Wnested-externs because of libtool changes
         EXTRA_FLAGS="-fgnu89-inline
 		-Wall
 		-Waggregate-return
 		-Wbad-function-cast
 		-Wcast-align
 		-Wdeclaration-after-statement
 		-Wendif-labels
 		-Wfloat-equal
 		-Wformat=2
 		-Wformat-security
 		-Wformat-nonliteral
 		-Wmissing-prototypes
 		-Wmissing-declarations
 		-Wnested-externs
 		-Wno-long-long
 		-Wno-strict-aliasing
 		-Wpointer-arith
 		-Wstrict-prototypes
 		-Wwrite-strings
 		-Wunused-but-set-variable
 		-Wunsigned-char"
 
 # Additional warnings it might be nice to enable one day
 #		-Wshadow
 #		-Wunreachable-code
 	case "$host_os" in
 	    *solaris*) ;;
 	    *) EXTRA_FLAGS="$EXTRA_FLAGS
 			-fstack-protector-all"
 		;;
 	esac
 	for j in $EXTRA_FLAGS
 	do
 	  if
 	    cc_supports_flag $j
 	  then
 	    CC_EXTRAS="$CC_EXTRAS $j"
 	  fi
 	done
 
 dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x
 
 	GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'`
 	AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4)
 
 dnl System specific options
 
 	case "$host_os" in
   	*linux*|*bsd*)
 		if test "${enable_fatal_warnings}" = "unknown"; then
         		enable_fatal_warnings=yes
         	fi
           	;;
 	esac
 
 	if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then
 	   enable_fatal_warnings=yes
 	else
 	   enable_fatal_warnings=no
         fi
 
 	if test "x${enable_ansi}" = xyes && cc_supports_flag -std=iso9899:199409 ; then
 	  AC_MSG_NOTICE(Enabling ANSI Compatibility)
 	  CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY"
 	fi
 
   	AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS})
 fi
 
 CFLAGS="$CFLAGS $CC_EXTRAS"
 
 NON_FATAL_CFLAGS="$CFLAGS"
 AC_SUBST(NON_FATAL_CFLAGS)
 
 dnl
 dnl We reset CFLAGS to include our warnings *after* all function
 dnl checking goes on, so that our warning flags don't keep the
 dnl AC_*FUNCS() calls above from working.  In particular, -Werror will
 dnl *always* cause us troubles if we set it before here.
 dnl
 dnl
 if test "x${enable_fatal_warnings}" = xyes ; then
    AC_MSG_NOTICE(Enabling Fatal Warnings)
    CFLAGS="$CFLAGS -Werror"
 fi
 AC_SUBST(CFLAGS)
 
 dnl This is useful for use in Makefiles that need to remove one specific flag
 CFLAGS_COPY="$CFLAGS"
 AC_SUBST(CFLAGS_COPY)
 
 AC_SUBST(LIBADD_DL)	dnl extra flags for dynamic linking libraries
 AC_SUBST(LIBADD_INTL)	dnl extra flags for GNU gettext stuff...
 
 AC_SUBST(LOCALE)
 
 dnl Options for cleaning up the compiler output
 QUIET_LIBTOOL_OPTS=""
 QUIET_MAKE_OPTS=""
 if test "x${enable_quiet}" = "xyes"; then
    QUIET_LIBTOOL_OPTS="--quiet"
    QUIET_MAKE_OPTS="--quiet"
 fi
 
 AC_MSG_RESULT(Supress make details: ${enable_quiet})
 
 dnl Put the above variables to use
 LIBTOOL="${LIBTOOL} --tag=CC \$(QUIET_LIBTOOL_OPTS)"
 MAKE="${MAKE} \$(QUIET_MAKE_OPTS)"
 
 AC_SUBST(CC)
 AC_SUBST(MAKE)
 AC_SUBST(LIBTOOL)
 AC_SUBST(QUIET_MAKE_OPTS)
 AC_SUBST(QUIET_LIBTOOL_OPTS)
 AC_DEFINE_UNQUOTED(CRM_FEATURES, "$PCMK_FEATURES", Set of enabled features)
 AC_SUBST(PCMK_FEATURES)
 
 dnl The Makefiles and shell scripts we output
 AC_CONFIG_FILES(Makefile				        \
 Doxyfile							\
 coverage.sh							\
 cts/Makefile					        	\
 	cts/CTSvars.py						\
 	cts/LSBDummy						\
 	cts/HBDummy						\
 	cts/benchmark/Makefile					\
 	cts/benchmark/clubench					\
 	cts/lxc_autogen.sh					\
 cib/Makefile							\
 attrd/Makefile							\
 crmd/Makefile							\
 pengine/Makefile						\
 	pengine/regression.core.sh				\
 doc/Makefile							\
 	doc/Pacemaker_Explained/publican.cfg			\
 	doc/Clusters_from_Scratch/publican.cfg			\
 	doc/Pacemaker_Remote/publican.cfg			\
 include/Makefile						\
 	include/crm/Makefile					\
 		include/crm/cib/Makefile			\
 		include/crm/common/Makefile			\
 		include/crm/cluster/Makefile			\
 		include/crm/fencing/Makefile			\
 		include/crm/pengine/Makefile			\
 replace/Makefile						\
 lib/Makefile							\
 	lib/pacemaker.pc					\
 	lib/pacemaker-cib.pc					\
 	lib/pacemaker-lrmd.pc					\
 	lib/pacemaker-service.pc				\
 	lib/pacemaker-pengine.pc				\
 	lib/pacemaker-fencing.pc				\
 	lib/pacemaker-cluster.pc				\
 	lib/ais/Makefile					\
 	lib/common/Makefile					\
 	lib/cluster/Makefile					\
 	lib/cib/Makefile					\
 	lib/pengine/Makefile					\
 	lib/transition/Makefile					\
 	lib/fencing/Makefile					\
 	lib/lrmd/Makefile					\
 	lib/services/Makefile					\
 mcp/Makefile							\
 	mcp/pacemaker						\
 	mcp/pacemaker.service					\
 	mcp/pacemaker.upstart					\
 	mcp/pacemaker.combined.upstart				\
 fencing/Makefile                                                \
         fencing/regression.py                                   \
 lrmd/Makefile                                                   \
         lrmd/regression.py                                      \
         lrmd/pacemaker_remote.service				\
         lrmd/pacemaker_remote					\
 extra/Makefile							\
 	extra/resources/Makefile				\
 	extra/logrotate/Makefile				\
 	extra/logrotate/pacemaker				\
 tools/Makefile							\
 	tools/crm_report					\
         tools/report.common                                     \
 	tools/cibsecret						\
 	tools/crm_mon.service					\
 	tools/crm_mon.upstart					\
 xml/Makefile							\
 lib/gnu/Makefile						\
 		)
 
 dnl Now process the entire list of files added by previous
 dnl  calls to AC_CONFIG_FILES()
 AC_OUTPUT()
 
 dnl *****************
 dnl Configure summary
 dnl *****************
 
 AC_MSG_RESULT([])
 AC_MSG_RESULT([$PACKAGE configuration:])
 AC_MSG_RESULT([  Version                  = ${VERSION} (Build: $BUILD_VERSION)])
 AC_MSG_RESULT([  Features                 =${PCMK_FEATURES}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([  Prefix                   = ${prefix}])
 AC_MSG_RESULT([  Executables              = ${sbindir}])
 AC_MSG_RESULT([  Man pages                = ${mandir}])
 AC_MSG_RESULT([  Libraries                = ${libdir}])
 AC_MSG_RESULT([  Header files             = ${includedir}])
 AC_MSG_RESULT([  Arch-independent files   = ${datadir}])
 AC_MSG_RESULT([  State information        = ${localstatedir}])
 AC_MSG_RESULT([  System configuration     = ${sysconfdir}])
 AC_MSG_RESULT([  Corosync Plugins         = ${LCRSODIR}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([  Use system LTDL          = ${ac_cv_lib_ltdl_lt_dlopen}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([  HA group name            = ${CRM_DAEMON_GROUP}])
 AC_MSG_RESULT([  HA user name             = ${CRM_DAEMON_USER}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([  CFLAGS                   = ${CFLAGS}])
 AC_MSG_RESULT([  Libraries                = ${LIBS}])
 AC_MSG_RESULT([  Stack Libraries          = ${CLUSTERLIBS}])
diff --git a/crmd/callbacks.c b/crmd/callbacks.c
index b21232b616..34abe81142 100644
--- a/crmd/callbacks.c
+++ b/crmd/callbacks.c
@@ -1,285 +1,313 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 #include <string.h>
 #include <crmd_fsa.h>
 
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crm/cluster.h>
 #include <crm/cib.h>
 
 #include <crmd.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 #include <tengine.h>
 #include <membership.h>
 
 void crmd_ha_connection_destroy(gpointer user_data);
 
 /* From join_dc... */
 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
 
 void
 crmd_ha_connection_destroy(gpointer user_data)
 {
     crm_trace("Invoked");
     if (is_set(fsa_input_register, R_HA_DISCONNECTED)) {
         /* we signed out, so this is expected */
         crm_info("Heartbeat disconnection complete");
         return;
     }
 
     crm_crit("Lost connection to heartbeat service!");
     register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL);
     trigger_fsa(fsa_source);
 }
 
 void
 crmd_ha_msg_filter(xmlNode * msg)
 {
     if (AM_I_DC) {
         const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
 
         if (safe_str_eq(sys_from, CRM_SYSTEM_DC)) {
             const char *from = crm_element_value(msg, F_ORIG);
 
             if (safe_str_neq(from, fsa_our_uname)) {
                 int level = LOG_INFO;
                 const char *op = crm_element_value(msg, F_CRM_TASK);
 
                 /* make sure the election happens NOW */
                 if (fsa_state != S_ELECTION) {
                     ha_msg_input_t new_input;
 
                     level = LOG_WARNING;
                     new_input.msg = msg;
                     register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
                                            __FUNCTION__);
                 }
 
                 do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
                 goto done;
             }
         }
 
     } else {
         const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
 
         if (safe_str_eq(sys_to, CRM_SYSTEM_DC)) {
             return;
         }
     }
 
     /* crm_log_xml_trace("HA[inbound]", msg); */
     route_message(C_HA_MESSAGE, msg);
 
   done:
     trigger_fsa(fsa_source);
 }
 
 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
 
 void
 peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
 {
     uint32_t old = 0;
     uint32_t changed = 0;
     bool appeared = FALSE;
+    bool is_remote = is_set(node->flags, crm_remote_node);
     const char *status = NULL;
 
     /* Crmd waits to receive some information from the membership layer before
      * declaring itself operational. If this is being called for a cluster node,
      * indicate that we have it.
      */
-    if (!is_set(node->flags, crm_remote_node)) {
+    if (!is_remote) {
         set_bit(fsa_input_register, R_PEER_DATA);
     }
 
     if (node->uname == NULL) {
         return;
     }
 
     switch (type) {
         case crm_status_uname:
             /* If we've never seen the node, then it also wont be in the status section */
             crm_info("%s is now %s", node->uname, state_text(node->state));
             return;
+
         case crm_status_rstate:
-            crm_info("Remote node %s is now %s (was %s)",
-                     node->uname, state_text(node->state), state_text(data));
-            /* Keep going */
         case crm_status_nstate:
-            crm_info("%s is now %s (was %s)",
-                     node->uname, state_text(node->state), state_text(data));
+            /* This callback should not be called unless the state actually
+             * changed, but here's a failsafe just in case.
+             */
+            CRM_CHECK(safe_str_neq(data, node->state), return);
 
-            if (safe_str_eq(data, node->state)) {
-                /* State did not change */
-                return;
+            crm_info("%s node %s is now %s (was %s)",
+                     (is_remote? "Remote" : "Cluster"),
+                     node->uname, state_text(node->state), state_text(data));
 
-            } else if(safe_str_eq(CRM_NODE_MEMBER, node->state)) {
+            if (safe_str_eq(CRM_NODE_MEMBER, node->state)) {
                 appeared = TRUE;
-                if (!is_set(node->flags, crm_remote_node)) {
+                if (!is_remote) {
                     remove_stonith_cleanup(node->uname);
                 }
             }
 
             crmd_notify_node_event(node);
             break;
 
         case crm_status_processes:
             if (data) {
                 old = *(const uint32_t *)data;
                 changed = node->processes ^ old;
             }
 
             status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
             crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
                      node->uname, peer2text(proc_flags), status,
                      AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);
 
             if ((changed & proc_flags) == 0) {
                 /* Peer process did not change */
                 crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
                 return;
             } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
                 crm_trace("Not connected");
                 return;
             } else if (fsa_state == S_STOPPING) {
                 crm_trace("Stopping");
                 return;
             }
 
             appeared = (node->processes & proc_flags) != 0;
             if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
                 /* Did we get evicted? */
                 crm_notice("Our peer connection failed");
                 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
 
             } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
                 /* Did the DC leave us? */
                 crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
                 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
 
                 if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
                     erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
                 }
 
             } else if(AM_I_DC && appeared == FALSE) {
                 crm_info("Peer %s left us", node->uname);
                 erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
                 /* crm_update_peer_join(__FUNCTION__, node, crm_join_none); */
             }
             break;
     }
 
     if (AM_I_DC) {
         xmlNode *update = NULL;
         int flags = node_update_peer;
-        gboolean alive = crm_is_peer_active(node);
+        gboolean alive = is_remote? appeared : crm_is_peer_active(node);
         crm_action_t *down = match_down_event(0, node->uuid, NULL, appeared);
 
         crm_trace("Alive=%d, appear=%d, down=%p", alive, appeared, down);
 
         if (alive && type == crm_status_processes) {
             register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
         }
 
         if (down) {
             const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
 
             if (safe_str_eq(task, CRM_OP_FENCE)) {
 
                 /* tengine_stonith_callback() confirms fence actions */
                 crm_trace("Updating CIB %s stonithd reported fencing of %s complete",
                           (down->confirmed? "after" : "before"), node->uname);
 
             } else if (alive == FALSE) {
                 crm_notice("%s of %s (op %d) is complete", task, node->uname, down->id);
                 /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */
                 stop_te_timer(down->timer);
 
-                flags |= node_update_join | node_update_expected;
-                crmd_peer_down(node, FALSE);
-                check_join_state(fsa_state, __FUNCTION__);
+                if (!is_remote) {
+                    flags |= node_update_join | node_update_expected;
+                    crmd_peer_down(node, FALSE);
+                    check_join_state(fsa_state, __FUNCTION__);
+                }
 
                 update_graph(transition_graph, down);
                 trigger_graph();
 
             } else {
-                crm_trace("Other %p", down);
+                crm_trace("Node %s came up, was expected %s (op %d)",
+                          node->uname, task, down->id);
             }
 
         } else if (appeared == FALSE) {
+            /* match_down_event() doesn't match resource stop events for
+             * pacemaker_remote nodes, so normal pacemaker_remote node stops
+             * will come here and get ugly log messages, but otherwise be OK.
+             * We can't skip this entirely for pacemaker_remote nodes,
+             * because recurring monitor failures will also end up here
+             * when the cluster recovers the connection resource.
+             */
             crm_notice("Stonith/shutdown of %s not matched", node->uname);
 
-            crm_update_peer_join(__FUNCTION__, node, crm_join_none);
-            check_join_state(fsa_state, __FUNCTION__);
+            if (!is_remote) {
+                crm_update_peer_join(__FUNCTION__, node, crm_join_none);
+                check_join_state(fsa_state, __FUNCTION__);
+            }
 
             abort_transition(INFINITY, tg_restart, "Node failure", NULL);
             fail_incompletable_actions(transition_graph, node->uuid);
 
         } else {
-            crm_trace("Other %p", down);
+            crm_trace("Node %s came up, was not expected to be down",
+                      node->uname);
+        }
+
+        if (is_remote) {
+            /* A pacemaker_remote node won't have its cluster status updated
+             * in the CIB by membership-layer callbacks, so do it here.
+             */
+            flags |= node_update_cluster;
+
+            /* Trigger resource placement on newly integrated nodes */
+            if (appeared) {
+                abort_transition(INFINITY, tg_restart,
+                                 "pacemaker_remote node integrated", NULL);
+            }
         }
 
+        /* Update the CIB node state */
         update = do_update_node_cib(node, flags, NULL, __FUNCTION__);
         fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
                             cib_scope_local | cib_quorum_override | cib_can_create);
         free_xml(update);
     }
 
     trigger_fsa(fsa_source);
 }
 
 void
 crmd_cib_connection_destroy(gpointer user_data)
 {
     CRM_CHECK(user_data == fsa_cib_conn,;);
 
     crm_trace("Invoked");
     trigger_fsa(fsa_source);
     fsa_cib_conn->state = cib_disconnected;
 
     if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
         crm_info("Connection to the CIB terminated...");
         return;
     }
 
     /* eventually this will trigger a reconnect, not a shutdown */
     crm_err("Connection to the CIB terminated...");
     register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
     clear_bit(fsa_input_register, R_CIB_CONNECTED);
 
     return;
 }
 
 gboolean
 crm_fsa_trigger(gpointer user_data)
 {
     crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue));
     s_crmd_fsa(C_FSA_INTERNAL);
     crm_trace("Exited  (queue len: %d)", g_list_length(fsa_message_queue));
     return TRUE;
 }
diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h
index ef02c40d4e..0e7ff481f3 100644
--- a/crmd/crmd_lrm.h
+++ b/crmd/crmd_lrm.h
@@ -1,164 +1,163 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crmd_messages.h>
 
 extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level);
 extern void lrm_clear_last_failure(const char *rsc_id, const char *node_name);
 void lrm_op_callback(lrmd_event_data_t * op);
 
 typedef struct resource_history_s {
     char *id;
     uint32_t last_callid;
     lrmd_rsc_info_t rsc;
     lrmd_event_data_t *last;
     lrmd_event_data_t *failed;
     GList *recurring_op_list;
 
     /* Resources must be stopped using the same
      * parameters they were started with.  This hashtable
      * holds the parameters that should be used for the next stop
      * cmd on this resource. */
     GHashTable *stop_params;
 } rsc_history_t;
 
 void history_free(gpointer data);
 
 /* TDOD - Replace this with lrmd_event_data_t */
 struct recurring_op_s {
     int call_id;
     int interval;
     gboolean remove;
     gboolean cancelled;
     unsigned int start_time;
     char *rsc_id;
     char *op_type;
     char *op_key;
     char *user_data;
     GHashTable *params;
 };
 
 typedef struct lrm_state_s {
     const char *node_name;
     /* reserved for lrm_state.c usage only */
     void *conn;
     /* reserved for remote_lrmd_ra.c usage only */
     void *remote_ra_data;
 
     GHashTable *resource_history;
     GHashTable *pending_ops;
     GHashTable *deletion_ops;
     GHashTable *rsc_info_cache;
 
     int num_lrm_register_fails;
 } lrm_state_t;
 
 struct pending_deletion_op_s {
     char *rsc;
     ha_msg_input_t *input;
 };
 
 /*!
  * \brief Is this the local ipc connection to the lrmd
  */
 gboolean
 lrm_state_is_local(lrm_state_t *lrm_state);
 
 /*!
  * \brief Clear all state information from a single state entry.
  * \note This does not close the lrmd connection
  */
 void lrm_state_reset_tables(lrm_state_t * lrm_state);
 GList *lrm_state_get_list(void);
 
 /*!
  * \brief Initiate internal state tables
  */
 gboolean lrm_state_init_local(void);
 
 /*!
  * \brief Destroy all state entries and internal state tables
  */
 void lrm_state_destroy_all(void);
 
 /*!
  * \brief Create lrmd connection entry.
  */
 lrm_state_t *lrm_state_create(const char *node_name);
 
 /*!
  * \brief Destroy lrmd connection keyed of node name
  */
 void lrm_state_destroy(const char *node_name);
 
 /*!
  * \brief Find lrm_state data by node name
  */
 lrm_state_t *lrm_state_find(const char *node_name);
 
 /*!
  * \brief Either find or create a new entry
  */
 lrm_state_t *lrm_state_find_or_create(const char *node_name);
 
 /*!
  * The functions below are wrappers for the lrmd api calls the crmd
  * uses.  These wrapper functions allow us to treat the crmd's remote
  * lrmd connection resources the same as regular resources.  Internally
  * Regular resources go to the lrmd, and remote connection resources are
  * handled locally in the crmd.
  */
 void lrm_state_disconnect(lrm_state_t * lrm_state);
 int lrm_state_ipc_connect(lrm_state_t * lrm_state);
 int lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
                                    int timeout);
 int lrm_state_is_connected(lrm_state_t * lrm_state);
 int lrm_state_poke_connection(lrm_state_t * lrm_state);
 
 int lrm_state_get_metadata(lrm_state_t * lrm_state,
                            const char *class,
                            const char *provider,
                            const char *agent, char **output, enum lrmd_call_options options);
 int lrm_state_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval);
 int lrm_state_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
                    int timeout, /* ms */
                    int start_delay,     /* ms */
                    lrmd_key_value_t * params);
 lrmd_rsc_info_t *lrm_state_get_rsc_info(lrm_state_t * lrm_state,
                                         const char *rsc_id, enum lrmd_call_options options);
 int lrm_state_register_rsc(lrm_state_t * lrm_state,
                            const char *rsc_id,
                            const char *class,
                            const char *provider, const char *agent, enum lrmd_call_options options);
 int lrm_state_unregister_rsc(lrm_state_t * lrm_state,
                              const char *rsc_id, enum lrmd_call_options options);
 
 /*! These functions are used to manage the remote lrmd connection resources */
 void remote_lrm_op_callback(lrmd_event_data_t * op);
 gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id);
 lrmd_rsc_info_t *remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id);
 int remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval);
 int remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */
                    int timeout, /* ms */
                    int start_delay,     /* ms */
                    lrmd_key_value_t * params);
 void remote_ra_cleanup(lrm_state_t * lrm_state);
-
-xmlNode *simple_remote_node_status(const char *node_name, xmlNode *parent, const char *source);
+void remote_ra_fail(const char *node_name);
 
 gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending);
diff --git a/crmd/crmd_messages.h b/crmd/crmd_messages.h
index 10787f0b3a..44ce1d6036 100644
--- a/crmd/crmd_messages.h
+++ b/crmd/crmd_messages.h
@@ -1,114 +1,116 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef XML_CRM_MESSAGES__H
 #  define XML_CRM_MESSAGES__H
 
 #  include <crm/crm.h>
 #  include <crm/common/ipcs.h>
 #  include <crm/common/xml.h>
 #  include <crm/cluster/internal.h>
 #  include <crmd_fsa.h>
 
 typedef struct ha_msg_input_s {
     xmlNode *msg;
     xmlNode *xml;
 
 } ha_msg_input_t;
 
 extern ha_msg_input_t *new_ha_msg_input(xmlNode * orig);
 extern void delete_ha_msg_input(ha_msg_input_t * orig);
 
 extern void *fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type,
                                 const char *caller);
 
 #  define fsa_typed_data(x) fsa_typed_data_adv(msg_data, x, __FUNCTION__)
 
 extern void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
                                    fsa_data_t * cur_data, void *new_data, const char *raised_from);
 
 #  define register_fsa_error(cause, input, new_data) register_fsa_error_adv(cause, input, msg_data, new_data, __FUNCTION__)
 
 extern int register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
                                   void *data, long long with_actions,
                                   gboolean prepend, const char *raised_from);
 
 extern void fsa_dump_queue(int log_level);
 extern void route_message(enum crmd_fsa_cause cause, xmlNode * input);
 
 #  define crmd_fsa_stall(suppress) do {                                 \
     if(suppress == FALSE && msg_data != NULL) {                         \
         register_fsa_input_adv(                                         \
             ((fsa_data_t*)msg_data)->fsa_cause, I_WAIT_FOR_EVENT,       \
             ((fsa_data_t*)msg_data)->data, action, TRUE, __FUNCTION__); \
     } else {                                                            \
         register_fsa_input_adv(                                         \
             C_FSA_INTERNAL, I_WAIT_FOR_EVENT,                           \
             NULL, action, TRUE, __FUNCTION__);                          \
     }                                                                   \
     } while(0)
 
 #  define register_fsa_input(cause, input, data) register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __FUNCTION__)
 
 #  define register_fsa_action(action) {					\
 		fsa_actions |= action;					\
 		if(fsa_source) {					\
 			mainloop_set_trigger(fsa_source);			\
 		}							\
 		crm_debug("%s added action %s to the FSA",		\
 			  __FUNCTION__, fsa_action2string(action));	\
 	}
 
 #  define register_fsa_input_before(cause, input, data) register_fsa_input_adv(cause, input, data, A_NOTHING, TRUE, __FUNCTION__)
 
 #  define register_fsa_input_later(cause, input, data) register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __FUNCTION__)
 
 void delete_fsa_input(fsa_data_t * fsa_data);
 
 GListPtr put_message(fsa_data_t * new_message);
 fsa_data_t *get_message(void);
 gboolean is_message(void);
 gboolean have_wait_message(void);
 
 extern gboolean relay_message(xmlNode * relay_message, gboolean originated_locally);
 
 extern void process_message(xmlNode * msg, gboolean originated_locally, const char *src_node_name);
 
 extern gboolean crm_dc_process_message(xmlNode * whole_message,
                                        xmlNode * action,
                                        const char *host_from,
                                        const char *sys_from,
                                        const char *sys_to, const char *op, gboolean dc_mode);
 
 extern gboolean send_msg_via_ipc(xmlNode * msg, const char *sys);
 
 extern gboolean add_pending_outgoing_reply(const char *originating_node_name,
                                            const char *crm_msg_reference,
                                            const char *sys_to, const char *sys_from);
 
 gboolean crmd_is_proxy_session(const char *session);
 void crmd_proxy_send(const char *session, xmlNode *msg);
 
 extern gboolean crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session);
 
 extern gboolean send_request(xmlNode * msg, char **msg_reference);
 
 extern enum crmd_fsa_input handle_message(xmlNode * stored_msg, enum crmd_fsa_cause cause);
 
 extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
 
+void send_remote_state_message(const char *node_name, gboolean node_up);
+
 #endif
diff --git a/crmd/lrm.c b/crmd/lrm.c
index a73b2a92e0..5f63676e83 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -1,2462 +1,2386 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
 #include <crm/crm.h>
 #include <crm/services.h>
 
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 
 #define START_DELAY_THRESHOLD 5 * 60 * 1000
 #define MAX_LRM_REG_FAILS 30
 
 struct delete_event_s {
     int rc;
     const char *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
 static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
 static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
 static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
                              const char *user_name);
 
 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
                                        const char *rsc_id, const char *operation);
 static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
                           xmlNode * msg, xmlNode * request);
 
 void send_direct_ack(const char *to_host, const char *to_sys,
                      lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
 
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
                                          int log_level);
 static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op);
 
 static void
 lrm_connection_destroy(void)
 {
     if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
         crm_crit("LRM Connection failed");
         register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
         clear_bit(fsa_input_register, R_LRM_CONNECTED);
 
     } else {
         crm_info("LRM Connection disconnected");
     }
 
 }
 
 static char *
 make_stop_id(const char *rsc, int call_id)
 {
     char *op_id = NULL;
 
     op_id = calloc(1, strlen(rsc) + 34);
     if (op_id != NULL) {
         snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id);
     }
     return op_id;
 }
 
 static void
 copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
 {
     if (strstr(key, CRM_META "_") == NULL) {
         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
     }
 }
 
 static void
 copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
 {
     if (strstr(key, CRM_META "_") != NULL) {
         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
     }
 }
 
 /*
  * \internal
  * \brief Remove a recurring operation from a resource's history
  *
  * \param[in,out] history  Resource history to modify
  * \param[in]     op       Operation to remove
  *
  * \return TRUE if the operation was found and removed, FALSE otherwise
  */
 static gboolean
 history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
 {
     GList *iter;
 
     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
         lrmd_event_data_t *existing = iter->data;
 
         if ((op->interval == existing->interval)
             && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
             && safe_str_eq(op->op_type, existing->op_type)) {
 
             history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
             lrmd_free_event(existing);
             return TRUE;
         }
     }
     return FALSE;
 }
 
 /*
  * \internal
  * \brief Free all recurring operations in resource history
  *
  * \param[in,out] history  Resource history to modify
  */
 static void
 history_free_recurring_ops(rsc_history_t *history)
 {
     GList *iter;
 
     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
         lrmd_free_event(iter->data);
     }
     g_list_free(history->recurring_op_list);
     history->recurring_op_list = NULL;
 }
 
 /*
  * \internal
  * \brief Free resource history
  *
  * \param[in,out] history  Resource history to free
  */
 void
 history_free(gpointer data)
 {
     rsc_history_t *history = (rsc_history_t*)data;
 
     if (history->stop_params) {
         g_hash_table_destroy(history->stop_params);
     }
 
     /* Don't need to free history->rsc.id because it's set to history->id */
     free(history->rsc.type);
     free(history->rsc.class);
     free(history->rsc.provider);
 
     lrmd_free_event(history->failed);
     lrmd_free_event(history->last);
     free(history->id);
     history_free_recurring_ops(history);
     free(history);
 }
 
 static void
 update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
     int target_rc = 0;
     rsc_history_t *entry = NULL;
 
     if (op->rsc_deleted) {
         crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
         delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
         return;
     }
 
     if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
         return;
     }
 
     crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
 
     entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
     if (entry == NULL && rsc) {
         entry = calloc(1, sizeof(rsc_history_t));
         entry->id = strdup(op->rsc_id);
         g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
 
         entry->rsc.id = entry->id;
         entry->rsc.type = strdup(rsc->type);
         entry->rsc.class = strdup(rsc->class);
         if (rsc->provider) {
             entry->rsc.provider = strdup(rsc->provider);
         } else {
             entry->rsc.provider = NULL;
         }
 
     } else if (entry == NULL) {
         crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
         return;
     }
 
     entry->last_callid = op->call_id;
     target_rc = rsc_op_expected_rc(op);
     if (op->op_status == PCMK_LRM_OP_CANCELLED) {
         if (op->interval > 0) {
             crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type,
                       op->interval);
             history_remove_recurring_op(entry, op);
             return;
         } else {
             crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval,
                       op->rc, op->op_status);
         }
 
     } else if (did_rsc_op_fail(op, target_rc)) {
         /* We must store failed monitors here
          * - otherwise the block below will cause them to be forgetten them when a stop happens
          */
         if (entry->failed) {
             lrmd_free_event(entry->failed);
         }
         entry->failed = lrmd_copy_event(op);
 
     } else if (op->interval == 0) {
         if (entry->last) {
             lrmd_free_event(entry->last);
         }
         entry->last = lrmd_copy_event(op);
 
         if (op->params &&
             (safe_str_eq(CRMD_ACTION_START, op->op_type) ||
              safe_str_eq("reload", op->op_type) ||
              safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {
 
             if (entry->stop_params) {
                 g_hash_table_destroy(entry->stop_params);
             }
             entry->stop_params = g_hash_table_new_full(crm_str_hash,
                                                        g_str_equal, g_hash_destroy_str,
                                                        g_hash_destroy_str);
 
             g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
         }
     }
 
     if (op->interval > 0) {
         /* Ensure there are no duplicates */
         history_remove_recurring_op(entry, op);
 
         crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
         entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
 
     } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
         crm_trace("Dropping %d recurring ops because of: %s_%s_%d",
                   g_list_length(entry->recurring_op_list), op->rsc_id,
                   op->op_type, op->interval);
         history_free_recurring_ops(entry);
     }
 }
 
 void
 lrm_op_callback(lrmd_event_data_t * op)
 {
     const char *nodename = NULL;
     lrm_state_t *lrm_state = NULL;
 
     CRM_CHECK(op != NULL, return);
 
     /* determine the node name for this connection. */
     nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname;
 
     if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) {
         /* if this is the local lrmd ipc connection, set the right bits in the
          * crmd when the connection goes down */
         lrm_connection_destroy();
         return;
     } else if (op->type != lrmd_event_exec_complete) {
         /* we only need to process execution results */
         return;
     }
 
     lrm_state = lrm_state_find(nodename);
     CRM_ASSERT(lrm_state != NULL);
 
     process_lrm_event(lrm_state, op, NULL);
 }
 
 /*	 A_LRM_CONNECT	*/
 void
 do_lrm_control(long long action,
                enum crmd_fsa_cause cause,
                enum crmd_fsa_state cur_state,
                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     /* This only pertains to local lrmd connections.  Remote connections are handled as
      * resources within the pengine.  Connecting and disconnecting from remote lrmd instances
      * handled differently than the local. */
 
     lrm_state_t *lrm_state = NULL;
 
     if(fsa_our_uname == NULL) {
         return; /* Nothing to do */
     }
     lrm_state = lrm_state_find_or_create(fsa_our_uname);
     if (lrm_state == NULL) {
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
         return;
     }
 
     if (action & A_LRM_DISCONNECT) {
         if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
             if (action == A_LRM_DISCONNECT) {
                 crmd_fsa_stall(FALSE);
                 return;
             }
         }
 
         clear_bit(fsa_input_register, R_LRM_CONNECTED);
         crm_info("Disconnecting from the LRM");
         lrm_state_disconnect(lrm_state);
         lrm_state_reset_tables(lrm_state);
         crm_notice("Disconnected from the LRM");
     }
 
     if (action & A_LRM_CONNECT) {
         int ret = pcmk_ok;
 
         crm_debug("Connecting to the LRM");
         ret = lrm_state_ipc_connect(lrm_state);
 
         if (ret != pcmk_ok) {
             if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
                 crm_warn("Failed to sign on to the LRM %d"
                          " (%d max) times", lrm_state->num_lrm_register_fails, MAX_LRM_REG_FAILS);
 
                 crm_timer_start(wait_timer);
                 crmd_fsa_stall(FALSE);
                 return;
             }
         }
 
         if (ret != pcmk_ok) {
             crm_err("Failed to sign on to the LRM %d" " (max) times",
                     lrm_state->num_lrm_register_fails);
             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
             return;
         }
 
         set_bit(fsa_input_register, R_LRM_CONNECTED);
         crm_info("LRM connection established");
     }
 
     if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
         crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
     }
 }
 
 static gboolean
 lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
 {
     int counter = 0;
     gboolean rc = TRUE;
     const char *when = "lrm disconnect";
 
     GHashTableIter gIter;
     const char *key = NULL;
     rsc_history_t *entry = NULL;
     struct recurring_op_s *pending = NULL;
 
     crm_debug("Checking for active resources before exit");
 
     if (cur_state == S_TERMINATE) {
         log_level = LOG_ERR;
         when = "shutdown";
 
     } else if (is_set(fsa_input_register, R_SHUTDOWN)) {
         when = "shutdown... waiting";
     }
 
     if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
         guint removed = g_hash_table_foreach_remove(
             lrm_state->pending_ops, stop_recurring_actions, lrm_state);
 
         crm_notice("Stopped %u recurring operations at %s (%u ops remaining)",
                    removed, when, g_hash_table_size(lrm_state->pending_ops));
     }
 
     if (lrm_state->pending_ops) {
         g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
             /* Ignore recurring actions in the shutdown calculations */
             if (pending->interval == 0) {
                 counter++;
             }
         }
     }
 
     if (counter > 0) {
         do_crm_log(log_level, "%d pending LRM operations at %s", counter, when);
 
         if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
             g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
             while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
                 do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
             }
 
         } else {
             rc = FALSE;
         }
         return rc;
     }
 
     if (lrm_state->resource_history == NULL) {
         return rc;
     }
 
     if (cur_state == S_TERMINATE || is_set(fsa_input_register, R_SHUTDOWN)) {
         /* At this point we're not waiting, we're just shutting down */
         when = "shutdown";
     }
 
     counter = 0;
     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
     while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
         if (is_rsc_active(lrm_state, entry->id) == FALSE) {
             continue;
         }
 
         counter++;
         crm_trace("Found %s active", entry->id);
         if (lrm_state->pending_ops) {
             GHashTableIter hIter;
 
             g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
             while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
                 if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
                     crm_notice("%sction %s (%s) incomplete at %s",
                                pending->interval == 0 ? "A" : "Recurring a",
                                key, pending->op_key, when);
                 }
             }
         }
     }
 
     if (counter) {
         crm_err("%d resources were active at %s.", counter, when);
     }
 
     return rc;
 }
 
 GHashTable *metadata_hash = NULL;
 
 static char *
 get_rsc_metadata(const char *type, const char *rclass, const char *provider, bool force)
 {
     int rc = pcmk_ok;
     int len = 0;
     char *key = NULL;
     char *metadata = NULL;
 
     /* Always use a local connection for this operation */
     lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname);
 
     CRM_CHECK(type != NULL, return NULL);
     CRM_CHECK(rclass != NULL, return NULL);
     CRM_CHECK(lrm_state != NULL, return NULL);
 
     if (provider == NULL) {
         provider = "heartbeat";
     }
 
     if (metadata_hash == NULL) {
         metadata_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
     }
 
     len = strlen(type) + strlen(rclass) + strlen(provider) + 4;
     key = malloc(len);
     if(key == NULL) {
         return NULL;
     }
 
     snprintf(key, len, "%s::%s:%s", type, rclass, provider);
     if(force == FALSE) {
         crm_trace("Retreiving cached metadata for %s", key);
         metadata = g_hash_table_lookup(metadata_hash, key);
     }
 
     if(metadata == NULL) {
         rc = lrm_state_get_metadata(lrm_state, rclass, provider, type, &metadata, 0);
         crm_trace("Retrieved live metadata for %s: %s (%d)", key, pcmk_strerror(rc), rc);
         if(rc == pcmk_ok) {
             CRM_LOG_ASSERT(metadata != NULL);
             g_hash_table_insert(metadata_hash, key, metadata);
             key = NULL;
         } else {
             CRM_LOG_ASSERT(metadata == NULL);
             metadata = NULL;
         }
     }
 
     if (metadata == NULL) {
         crm_warn("No metadata found for %s: %s (%d)", key, pcmk_strerror(rc), rc);
     }
 
     free(key);
     return metadata;
 }
 
 static char *
 build_parameter_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode *result,
                      const char *criteria, bool target, bool invert_for_xml)
 {
     int len = 0;
     int max = 0;
     char *list = NULL;
 
     xmlNode *param = NULL;
     xmlNode *params = NULL;
 
     const char *secure_terms[] = {
         "password",
         "passwd",
         "user",
     };
 
     if(safe_str_eq("private", criteria)) {
         /* It will take time for the agents to be updated
          * Check for some common terms
          */
         max = DIMOF(secure_terms);
     }
 
     params = find_xml_node(metadata, "parameters", TRUE);
     for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) {
         if (crm_str_eq((const char *)param->name, "parameter", TRUE)) {
             bool accept = FALSE;
             const char *name = crm_element_value(param, "name");
             const char *value = crm_element_value(param, criteria);
 
             if(max && value) {
                 /* Turn off the compatibility logic once an agent has been updated to know about 'private' */
                 max = 0;
             }
 
             if (name == NULL) {
                 crm_err("Invalid parameter in %s metadata", op->rsc_id);
 
             } else if(target == crm_is_true(value)) {
                 accept = TRUE;
 
             } else if(max) {
                 int lpc = 0;
                 bool found = FALSE;
 
                 for(lpc = 0; found == FALSE && lpc < max; lpc++) {
                     if(safe_str_eq(secure_terms[lpc], name)) {
                         found = TRUE;
                     }
                 }
 
                 if(found == target) {
                     accept = TRUE;
                 }
             }
 
             if(accept) {
                 int start = len;
 
                 crm_trace("Attr %s is %s%s", name, target?"":"not ", criteria);
 
                 len += strlen(name) + 2;
                 list = realloc_safe(list, len + 1);
                 sprintf(list + start, " %s ", name);
 
             } else {
                 crm_trace("Rejecting %s for %s", name, criteria);
             }
 
             if(invert_for_xml) {
                 crm_trace("Inverting %s match for %s xml", name, criteria);
                 accept = !accept;
             }
 
             if(result && accept) {
                 value = g_hash_table_lookup(op->params, name);
                 if(value != NULL) {
                     crm_trace("Adding attr to the xml result", name, target?"":"not ", criteria);
                     crm_xml_add(result, name, value);
                 }
             }
         }
     }
 
     return list;
 }
 
 static bool
 resource_supports_action(xmlNode *metadata, const char *name) 
 {
     const char *value = NULL;
 
     xmlNode *action = NULL;
     xmlNode *actions = NULL;
 
     actions = find_xml_node(metadata, "actions", TRUE);
     for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) {
         if (crm_str_eq((const char *)action->name, "action", TRUE)) {
             value = crm_element_value(action, "name");
             if (safe_str_eq(name, value)) {
                 return TRUE;
             }
         }
     }
 
     return FALSE;
 }
 
 static void
 append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version)
 {
     char *list = NULL;
     char *digest = NULL;
     xmlNode *restart = NULL;
 
     CRM_LOG_ASSERT(op->params != NULL);
 
     if (op->interval > 0) {
         /* monitors are not reloadable */
         return;
     }
 
     if(resource_supports_action(metadata, "reload")) {
         restart = create_xml_node(NULL, XML_TAG_PARAMS);
         /* Any parameters with unique="1" should be added into the "op-force-restart" list. */
         list = build_parameter_list(op, metadata, restart, "unique", TRUE, FALSE);
 
     } else {
         /* Resource does not support reloads */
         return;
     }
 
     digest = calculate_operation_digest(restart, version);
     /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
      * no matter if it actually supports any parameters with unique="1"). */
     crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: "");
     crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
 
     crm_trace("%s: %s, %s", op->rsc_id, digest, list);
     crm_log_xml_trace(restart, "restart digest source");
 
     free_xml(restart);
     free(digest);
     free(list);
 }
 
 static void
 append_secure_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version)
 {
     char *list = NULL;
     char *digest = NULL;
     xmlNode *secure = NULL;
 
     CRM_LOG_ASSERT(op->params != NULL);
 
     /*
      * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
      * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
      * the insecure ones
      */
     secure = create_xml_node(NULL, XML_TAG_PARAMS);
     list = build_parameter_list(op, metadata, secure, "private", TRUE, TRUE);
 
     if (list != NULL) {
         digest = calculate_operation_digest(secure, version);
         crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
         crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
 
         crm_trace("%s: %s, %s", op->rsc_id, digest, list);
         crm_log_xml_trace(secure, "secure digest source");
     } else {
         crm_trace("%s: no secure parameters", op->rsc_id);
     }
 
     free_xml(secure);
     free(digest);
     free(list);
 }
 
 static gboolean
 build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
                        const char *src)
 {
     int target_rc = 0;
     xmlNode *xml_op = NULL;
     xmlNode *metadata = NULL;
     const char *m_string = NULL;
     const char *caller_version = NULL;
 
     if (op == NULL) {
         return FALSE;
     }
 
     target_rc = rsc_op_expected_rc(op);
 
     /* there is a small risk in formerly mixed clusters that it will
      * be sub-optimal.
      *
      * however with our upgrade policy, the update we send should
      * still be completely supported anyway
      */
     caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
     CRM_LOG_ASSERT(caller_version != NULL);
 
     if(caller_version == NULL) {
         caller_version = CRM_FEATURE_SET;
     }
 
     crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
     xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG);
     if (xml_op == NULL) {
         return TRUE;
     }
 
     if (rsc == NULL || op->params == NULL || crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) {
         /* Stopped resources don't need the digest logic */
         crm_trace("No digests needed for %s %p %p %s", op->rsc_id, op->params, rsc, op->op_type);
         return TRUE;
     }
 
     m_string = get_rsc_metadata(rsc->type, rsc->class, rsc->provider, safe_str_eq(op->op_type, RSC_START));
     if(m_string == NULL) {
         crm_err("No metadata for %s::%s:%s", rsc->provider, rsc->class, rsc->type);
         return TRUE;
     }
 
     metadata = string2xml(m_string);
     if(metadata == NULL) {
         crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type);
         return TRUE;
     }
 
     crm_trace("Includind additional digests for %s::%s:%s", rsc->provider, rsc->class, rsc->type);
     append_restart_list(op, metadata, xml_op, caller_version);
     append_secure_list(op, metadata, xml_op, caller_version);
 
     free_xml(metadata);
     return TRUE;
 }
 
 static gboolean
 is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
 {
     rsc_history_t *entry = NULL;
 
     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
     if (entry == NULL || entry->last == NULL) {
         return FALSE;
     }
 
     crm_trace("Processing %s: %s.%d=%d",
               rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc);
     if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
         return FALSE;
 
     } else if (entry->last->rc == PCMK_OCF_OK
                && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
         /* a stricter check is too complex...
          * leave that to the PE
          */
         return FALSE;
 
     } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
         return FALSE;
 
     } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) {
         /* Badly configured resources can't be reliably stopped */
         return FALSE;
     }
 
     return TRUE;
 }
 
 static gboolean
 build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
 {
     GHashTableIter iter;
     rsc_history_t *entry = NULL;
 
     g_hash_table_iter_init(&iter, lrm_state->resource_history);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
 
         GList *gIter = NULL;
         xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
 
         crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
         crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
         crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class);
         crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
 
         if (entry->last && entry->last->params) {
             const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
             if (container) {
                 crm_trace("Resource %s is a part of container resource %s", entry->id, container);
                 crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
             }
         }
         build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__);
         build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__);
         for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
             build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__);
         }
     }
 
     return FALSE;
 }
 
 static xmlNode *
 do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
 {
     xmlNode *xml_state = NULL;
     xmlNode *xml_data = NULL;
     xmlNode *rsc_list = NULL;
-    const char *uuid = NULL;
+    crm_node_t *peer = NULL;
 
-    if (lrm_state_is_local(lrm_state)) {
-        crm_node_t *peer = crm_get_peer(0, lrm_state->node_name);
-        xml_state = do_update_node_cib(peer, update_flags, NULL, __FUNCTION__);
-        uuid = fsa_our_uuid;
+    peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
+    CRM_CHECK(peer != NULL, return NULL);
 
-    } else {
-        xml_state = simple_remote_node_status(lrm_state->node_name, NULL, __FUNCTION__);
-        uuid = lrm_state->node_name;
-    }
+    xml_state = do_update_node_cib(peer, update_flags, NULL, __FUNCTION__);
 
     xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
-    crm_xml_add(xml_data, XML_ATTR_ID, uuid);
+    crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
     rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
 
     /* Build a list of active (not always running) resources */
     build_active_RAs(lrm_state, rsc_list);
 
     crm_log_xml_trace(xml_state, "Current state of the LRM");
 
     return xml_state;
 }
 
 xmlNode *
 do_lrm_query(gboolean is_replace, const char *node_name)
 {
     lrm_state_t *lrm_state = lrm_state_find(node_name);
     xmlNode *xml_state;
 
     if (!lrm_state) {
         crm_err("Could not query lrm state for lrmd node %s", node_name);
         return NULL;
     }
     xml_state = do_lrm_query_internal(lrm_state,
                                       node_update_cluster|node_update_peer);
 
     /* In case this function is called to generate a join confirmation to
      * send to the DC, force the current and expected join state to member.
      * This isn't necessary for newer DCs but is backward compatible.
      */
     crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER);
     crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER);
 
     return xml_state;
 }
 
 static void
 notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
 {
     lrmd_event_data_t *op = NULL;
     const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
     const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
 
     crm_info("Notifying %s on %s that %s was%s deleted",
              from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not");
 
     op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
     CRM_ASSERT(op != NULL);
 
     if (rc == pcmk_ok) {
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_OK;
     } else {
         op->op_status = PCMK_LRM_OP_ERROR;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
     }
 
     send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
     lrmd_free_event(op);
 
     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
         /* this isn't expected - trigger a new transition */
         time_t now = time(NULL);
         char *now_s = crm_itoa(now);
 
         crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id);
 
         update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
                              "last-lrm-refresh", now_s, FALSE, NULL, NULL);
 
         free(now_s);
     }
 }
 
 static gboolean
 lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
 {
     struct delete_event_s *event = user_data;
     struct pending_deletion_op_s *op = value;
 
     if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
         notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
         return TRUE;
     }
     return FALSE;
 }
 
 static gboolean
 lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
 {
     const char *rsc = user_data;
     struct recurring_op_s *pending = value;
 
     if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
         crm_info("Removing op %s:%d for deleted resource %s",
                  pending->op_key, pending->call_id, rsc);
         return TRUE;
     }
     return FALSE;
 }
 
 /*
  * Remove the rsc from the CIB
  *
  * Avoids refreshing the entire LRM section of this host
  */
 #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
 
 static int
 delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
                   const char *user_name)
 {
     char *rsc_xpath = NULL;
     int max = 0;
     int rc = pcmk_ok;
 
     CRM_CHECK(rsc_id != NULL, return -ENXIO);
 
     max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1;
     rsc_xpath = calloc(1, max);
     snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id);
 
     rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
                          NULL, NULL, call_options | cib_xpath, user_name);
 
     free(rsc_xpath);
     return rc;
 }
 
 static void
 delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
                  GHashTableIter * rsc_gIter, int rc, const char *user_name)
 {
     struct delete_event_s event;
 
     CRM_CHECK(rsc_id != NULL, return);
 
     if (rc == pcmk_ok) {
         char *rsc_id_copy = strdup(rsc_id);
 
         if (rsc_gIter)
             g_hash_table_iter_remove(rsc_gIter);
         else
             g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
         crm_debug("sync: Sending delete op for %s", rsc_id_copy);
         delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);
 
         g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
         free(rsc_id_copy);
     }
 
     if (input) {
         notify_deleted(lrm_state, input, rsc_id, rc);
     }
 
     event.rc = rc;
     event.rsc = rsc_id;
     event.lrm_state = lrm_state;
     g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
 }
 
 /*
  * Remove the op from the CIB
  *
  * Avoids refreshing the entire LRM section of this host
  */
 
 #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']"
 #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']"
 
 static void
 delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc_id,
                 const char *key, int call_id)
 {
     xmlNode *xml_top = NULL;
 
     if (op != NULL) {
         xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
         crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
         crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
 
         if (op->interval > 0) {
             char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval);
 
             /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
             crm_xml_add(xml_top, XML_ATTR_ID, op_id);
             free(op_id);
         }
 
         crm_debug("async: Sending delete op for %s_%s_%d (call=%d)",
                   op->rsc_id, op->op_type, op->interval, op->call_id);
 
         fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override);
 
     } else if (rsc_id != NULL && key != NULL) {
         int max = 0;
         char *op_xpath = NULL;
 
         if (call_id > 0) {
             max =
                 strlen(op_call_template) + strlen(rsc_id) + strlen(lrm_state->node_name) +
                 strlen(key) + 10;
             op_xpath = calloc(1, max);
             snprintf(op_xpath, max, op_call_template, lrm_state->node_name, rsc_id, key, call_id);
 
         } else {
             max =
                 strlen(op_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) +
                 1;
             op_xpath = calloc(1, max);
             snprintf(op_xpath, max, op_template, lrm_state->node_name, rsc_id, key);
         }
 
         crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id);
         fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath);
 
         free(op_xpath);
 
     } else {
         crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key);
         return;
     }
 
     crm_log_xml_trace(xml_top, "op:cancel");
     free_xml(xml_top);
 }
 
 void
 lrm_clear_last_failure(const char *rsc_id, const char *node_name)
 {
     char *attr = NULL;
     GHashTableIter iter;
     GList *lrm_state_list = lrm_state_get_list();
     GList *state_entry;
     rsc_history_t *entry = NULL;
 
     attr = generate_op_key(rsc_id, "last_failure", 0);
 
     /* This clears last failure for every lrm state that has this rsc.*/
     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
         lrm_state_t *lrm_state = state_entry->data;
 
         if (node_name != NULL) {
             if (strcmp(node_name, lrm_state->node_name) != 0) {
                 /* filter by node_name if node_name is present */
                 continue;
             }
         }
 
         delete_op_entry(lrm_state, NULL, rsc_id, attr, 0);
 
         if (!lrm_state->resource_history) {
             continue;
         }
 
         g_hash_table_iter_init(&iter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
             if (crm_str_eq(rsc_id, entry->id, TRUE)) {
                 lrmd_free_event(entry->failed);
                 entry->failed = NULL;
             }
         }
     }
     free(attr);
     g_list_free(lrm_state_list);
 }
 
 /* Returns: gboolean - cancellation is in progress */
 static gboolean
 cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
 {
     int rc = pcmk_ok;
     char *local_key = NULL;
     struct recurring_op_s *pending = NULL;
 
     CRM_CHECK(op != 0, return FALSE);
     CRM_CHECK(rsc_id != NULL, return FALSE);
     if (key == NULL) {
         local_key = make_stop_id(rsc_id, op);
         key = local_key;
     }
     pending = g_hash_table_lookup(lrm_state->pending_ops, key);
 
     if (pending) {
         if (remove && pending->remove == FALSE) {
             pending->remove = TRUE;
             crm_debug("Scheduling %s for removal", key);
         }
 
         if (pending->cancelled) {
             crm_debug("Operation %s already cancelled", key);
             free(local_key);
             return FALSE;
         }
 
         pending->cancelled = TRUE;
 
     } else {
         crm_info("No pending op found for %s", key);
         free(local_key);
         return FALSE;
     }
 
     crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
     rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval);
     if (rc == pcmk_ok) {
         crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
         free(local_key);
         return TRUE;
     }
 
     crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
     /* The caller needs to make sure the entry is
      * removed from the pending_ops list
      *
      * Usually by returning TRUE inside the worker function
      * supplied to g_hash_table_foreach_remove()
      *
      * Not removing the entry from pending_ops will block
      * the node from shutting down
      */
     free(local_key);
     return FALSE;
 }
 
 struct cancel_data {
     gboolean done;
     gboolean remove;
     const char *key;
     lrmd_rsc_info_t *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean
 cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     struct cancel_data *data = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (crm_str_eq(op->op_key, data->key, TRUE)) {
         data->done = TRUE;
         remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
     }
     return remove;
 }
 
 static gboolean
 cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
 {
     guint removed = 0;
     struct cancel_data data;
 
     CRM_CHECK(rsc != NULL, return FALSE);
     CRM_CHECK(key != NULL, return FALSE);
 
     data.key = key;
     data.rsc = rsc;
     data.done = FALSE;
     data.remove = remove;
     data.lrm_state = lrm_state;
 
     removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
     crm_trace("Removed %u op cache entries, new size: %u",
               removed, g_hash_table_size(lrm_state->pending_ops));
     return data.done;
 }
 
 static lrmd_rsc_info_t *
 get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create)
 {
     lrmd_rsc_info_t *rsc = NULL;
     const char *id = ID(resource);
     const char *type = crm_element_value(resource, XML_ATTR_TYPE);
     const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS);
     const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER);
     const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG);
 
     crm_trace("Retrieving %s from the LRM.", id);
     CRM_CHECK(id != NULL, return NULL);
 
     rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
 
     if (!rsc && long_id) {
         rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0);
     }
 
     if (!rsc && do_create) {
         CRM_CHECK(class != NULL, return NULL);
         CRM_CHECK(type != NULL, return NULL);
 
         crm_trace("Adding rsc %s before operation", id);
 
         lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring);
 
         rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
 
         if (!rsc) {
             fsa_data_t *msg_data = NULL;
 
             crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name);
             /* only register this as a internal error if this involves the local
              * lrmd. Otherwise we're likely dealing with an unresponsive remote-node
              * which is not a FSA failure. */
             if (lrm_state_is_local(lrm_state) == TRUE) {
                 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
             }
         }
     }
 
     return rsc;
 }
 
 static void
 delete_resource(lrm_state_t * lrm_state,
                 const char *id,
                 lrmd_rsc_info_t * rsc,
                 GHashTableIter * gIter,
                 const char *sys,
                 const char *host,
                 const char *user,
                 ha_msg_input_t * request,
                 gboolean unregister)
 {
     int rc = pcmk_ok;
 
     crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
 
     if (rsc && unregister) {
         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
     }
 
     if (rc == pcmk_ok) {
         crm_trace("Resource '%s' deleted", id);
     } else if (rc == -EINPROGRESS) {
         crm_info("Deletion of resource '%s' pending", id);
         if (request) {
             struct pending_deletion_op_s *op = NULL;
             char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
 
             op = calloc(1, sizeof(struct pending_deletion_op_s));
             op->rsc = strdup(rsc->id);
             op->input = copy_ha_msg_input(request);
             g_hash_table_insert(lrm_state->deletion_ops, ref, op);
         }
         return;
     } else {
         crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
                  id, sys, user ? user : "internal", host, rc);
     }
 
     delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
 }
 
 static int
 get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
 {
     int call_id = 999999999;
     rsc_history_t *entry = NULL;
 
     if(lrm_state) {
         entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
     }
 
     /* Make sure the call id is greater than the last successful operation,
      * otherwise the failure will not result in a possible recovery of the resource
      * as it could appear the failure occurred before the successful start */
     if (entry) {
         call_id = entry->last_callid + 1;
     }
 
     if (call_id < 0) {
         call_id = 1;
     }
     return call_id;
 }
 
 static void
 force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node)
 {
         GHashTableIter gIter;
         rsc_history_t *entry = NULL;
 
 
         crm_info("clearing resource history on node %s", lrm_state->node_name);
         g_hash_table_iter_init(&gIter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
             /* only unregister the resource during a reprobe if it is not a remote connection
              * resource. otherwise unregistering the connection will terminate remote-node
              * membership */
             gboolean unregister = TRUE;
 
             if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
                 lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
                 if (remote_lrm_state) {
                     /* when forcing a reprobe, make sure to clear remote node before
                      * clearing the remote node's connection resource */ 
                     force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
                 }
                 unregister = FALSE;
             }
 
             delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
                             user_name, NULL, unregister);
         }
 
         /* Now delete the copy in the CIB */
         erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
 
         /* And finally, _delete_ the value in attrd
          * Setting it to FALSE results in the PE sending us back here again
          */
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
 }
 
 static void
 synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) 
 {
     lrmd_event_data_t *op = NULL;
     const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
     const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
     xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
 
     if(xml_rsc == NULL) {
         /* Do something else?  driect_ack? */
         crm_info("Skipping %s=%d on %s (%p): no resource",
                  crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state);
         return;
 
     } else if(operation == NULL) {
         /* This probably came from crm_resource -C, nothing to do */
         crm_info("Skipping %s=%d on %s (%p): no operation",
                  crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state);
         return;
     }
 
     op = construct_op(lrm_state, action, ID(xml_rsc), operation);
     CRM_ASSERT(op != NULL);
 
     op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
     if(safe_str_eq(operation, RSC_NOTIFY)) {
         /* Notifications can't fail yet */
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_OK;
 
     } else {
         op->op_status = PCMK_LRM_OP_ERROR;
         op->rc = rc;
     }
     op->t_run = time(NULL);
     op->t_rcchange = op->t_run;
 
     crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state);
 
     if(lrm_state) {
         process_lrm_event(lrm_state, op, NULL);
 
     } else {
         lrmd_rsc_info_t rsc;
 
         rsc.id = strdup(op->rsc_id);
         rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE);
         rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS);
         rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER);
 
         do_update_resource(target_node, &rsc, op);
 
         free(rsc.id);
         free(rsc.type);
         free(rsc.class);
         free(rsc.provider);
     }
     lrmd_free_event(op);
 }
 
 
 /*	 A_LRM_INVOKE	*/
 void
 do_lrm_invoke(long long action,
               enum crmd_fsa_cause cause,
               enum crmd_fsa_state cur_state,
               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     gboolean create_rsc = TRUE;
     lrm_state_t *lrm_state = NULL;
     const char *crm_op = NULL;
     const char *from_sys = NULL;
     const char *from_host = NULL;
     const char *operation = NULL;
     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
     const char *user_name = NULL;
     const char *target_node = NULL;
     gboolean is_remote_node = FALSE;
     gboolean crm_rsc_delete = FALSE;
 
     if (input->xml != NULL) {
         /* Remote node operations are routed here to their remote connections */
         target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
     }
     if (target_node == NULL) {
         target_node = fsa_our_uname;
     } else if (safe_str_neq(target_node, fsa_our_uname)) {
         is_remote_node = TRUE;
     }
 
     lrm_state = lrm_state_find(target_node);
 
     if (lrm_state == NULL && is_remote_node) {
         crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.",
             target_node, fsa_our_uname);
 
         /* The action must be recorded here and in the CIB as failed */
         synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED);
         return;
     }
 
     CRM_ASSERT(lrm_state != NULL);
 
 #if ENABLE_ACL
     user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
     crm_trace("LRM command from user '%s'", user_name);
 #endif
 
     crm_op = crm_element_value(input->msg, F_CRM_TASK);
     from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
         from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
     }
 
     crm_trace("LRM command from: %s", from_sys);
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
         /* remember this delete op came from crm_resource */
         crm_rsc_delete = TRUE;
         operation = CRMD_ACTION_DELETE;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
         operation = CRM_OP_LRM_REFRESH;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
         lrmd_event_data_t *op = NULL;
         lrmd_rsc_info_t *rsc = NULL;
         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
 
         CRM_CHECK(xml_rsc != NULL, return);
 
         /* The lrmd can not fail a resource, it does not understand the
          * concept of success or failure in relation to a resource, it simply
          * executes operations and reports the results. We determine what a failure is.
          * Becaues of this, if we want to fail a resource we have to fake what we
          * understand a failure to look like.
          *
          * To do this we create a fake lrmd operation event for the resource
          * we want to fail.  We then pass that event to the lrmd client callback
          * so it will be processed as if it actually came from the lrmd. */
         op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon");
         CRM_ASSERT(op != NULL);
 
         free((char *)op->user_data);
         op->user_data = NULL;
         op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
         op->interval = 0;
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
         op->t_run = time(NULL);
         op->t_rcchange = op->t_run;
 
 #if ENABLE_ACL
         if (user_name && is_privileged(user_name) == FALSE) {
             crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
             lrmd_free_event(op);
             return;
         }
 #endif
 
         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
         if (rsc) {
             crm_info("Failing resource %s...", rsc->id);
             process_lrm_event(lrm_state, op, NULL);
             op->op_status = PCMK_LRM_OP_DONE;
             op->rc = PCMK_OCF_OK;
             lrmd_free_rsc_info(rsc);
         } else {
             crm_info("Cannot find/create resource in order to fail it...");
             crm_log_xml_warn(input->msg, "bad input");
         }
 
         send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
         lrmd_free_event(op);
         return;
 
     } else if (input->xml != NULL) {
         operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
     }
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
         int rc = pcmk_ok;
         xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
 
         fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
         crm_info("Forced a local LRM refresh: call=%d", rc);
 
         if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) {
             xmlNode *reply = create_request(
                 CRM_OP_INVOKE_LRM, fragment,
                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
 
             crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
 
             if (relay_message(reply, TRUE) == FALSE) {
                 crm_log_xml_err(reply, "Unable to route reply");
             }
             free_xml(reply);
         }
 
         free_xml(fragment);
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
         xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
         xmlNode *reply = create_reply(input->msg, data);
 
         if (relay_message(reply, TRUE) == FALSE) {
             crm_err("Unable to route reply");
             crm_log_xml_err(reply, "reply");
         }
         free_xml(reply);
         free_xml(data);
 
     } else if (safe_str_eq(operation, CRM_OP_PROBED)) {
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node);
 
     } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) {
         crm_notice("Forcing the status of all resources to be redetected");
 
         force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);
 
         if(strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0
            && strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0) {
             xmlNode *reply = create_request(
                 CRM_OP_INVOKE_LRM, NULL,
                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
 
             crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
 
             if (relay_message(reply, TRUE) == FALSE) {
                 crm_log_xml_err(reply, "Unable to route reply");
             }
             free_xml(reply);
         }
 
     } else if (operation != NULL) {
         lrmd_rsc_info_t *rsc = NULL;
         xmlNode *params = NULL;
         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
 
         CRM_CHECK(xml_rsc != NULL, return);
 
         /* only the first 16 chars are used by the LRM */
         params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
 
         if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
             create_rsc = FALSE;
         }
 
         if(lrm_state_is_connected(lrm_state) == FALSE) {
             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED);
             return;
         }
 
         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
         if (rsc == NULL && create_rsc) {
             crm_err("Invalid resource definition for %s", ID(xml_rsc));
             crm_log_xml_warn(input->msg, "bad input");
 
             /* if the operation couldn't complete because we can't register
              * the resource, return a generic error */
             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED);
 
         } else if (rsc == NULL) {
             lrmd_event_data_t *op = NULL;
 
             crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml));
             delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name);
 
             op = construct_op(lrm_state, input->xml, ID(xml_rsc), operation);
 
             /* Deleting something that does not exist is a success */
             op->op_status = PCMK_LRM_OP_DONE;
             op->rc = PCMK_OCF_OK;
             CRM_ASSERT(op != NULL);
 
             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
             lrmd_free_event(op);
 
         } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
             char *op_key = NULL;
             char *meta_key = NULL;
             int call = 0;
             const char *call_id = NULL;
             const char *op_task = NULL;
             const char *op_interval = NULL;
             gboolean in_progress = FALSE;
 
             CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       lrmd_free_rsc_info(rsc); return);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL);
             op_interval = crm_element_value(params, meta_key);
             free(meta_key);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
             op_task = crm_element_value(params, meta_key);
             free(meta_key);
 
             meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
             call_id = crm_element_value(params, meta_key);
             free(meta_key);
 
             CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       lrmd_free_rsc_info(rsc); return);
             CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command");
                       lrmd_free_rsc_info(rsc); return);
 
             op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0"));
 
             crm_debug("PE requested op %s (call=%s) be cancelled",
                       op_key, call_id ? call_id : "NA");
             call = crm_parse_int(call_id, "0");
             if (call == 0) {
                 /* the normal case when the PE cancels a recurring op */
                 in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
 
             } else {
                 /* the normal case when the PE cancels an orphan op */
                 in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
             }
 
             if (in_progress == FALSE) {
                 lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc->id, op_task);
 
                 crm_info("Nothing known about operation %d for %s", call, op_key);
                 delete_op_entry(lrm_state, NULL, rsc->id, op_key, call);
 
                 CRM_ASSERT(op != NULL);
 
                 op->rc = PCMK_OCF_OK;
                 op->op_status = PCMK_LRM_OP_DONE;
                 send_direct_ack(from_host, from_sys, rsc, op, rsc->id);
                 lrmd_free_event(op);
 
                 /* needed?? surely not otherwise the cancel_op_(_key) wouldn't
                  * have failed in the first place
                  */
                 g_hash_table_remove(lrm_state->pending_ops, op_key);
             }
 
             free(op_key);
 
         } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
             gboolean unregister = TRUE;
 
 #if ENABLE_ACL
             int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name);
             if (cib_rc != pcmk_ok) {
                 lrmd_event_data_t *op = NULL;
 
                 crm_err
                     ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s",
                      rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc,
                      pcmk_strerror(cib_rc));
 
                 op = construct_op(lrm_state, input->xml, rsc->id, operation);
                 op->op_status = PCMK_LRM_OP_ERROR;
 
                 if (cib_rc == -EACCES) {
                     op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
                 } else {
                     op->rc = PCMK_OCF_UNKNOWN_ERROR;
                 }
                 send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
                 lrmd_free_event(op);
                 lrmd_free_rsc_info(rsc);
                 return;
             }
 #endif
             if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
                 unregister = FALSE;
             }
 
             delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister);
 
         } else {
             do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
         }
 
         lrmd_free_rsc_info(rsc);
 
     } else {
         crm_err("Operation was neither a lrm_query, nor a rsc op.  %s", crm_str(crm_op));
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
 }
 
 static lrmd_event_data_t *
 construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation)
 {
     lrmd_event_data_t *op = NULL;
     const char *op_delay = NULL;
     const char *op_timeout = NULL;
     const char *op_interval = NULL;
     GHashTable *params = NULL;
 
     const char *transition = NULL;
 
     CRM_ASSERT(rsc_id != NULL);
 
     op = calloc(1, sizeof(lrmd_event_data_t));
     op->type = lrmd_event_exec_complete;
     op->op_type = strdup(operation);
     op->op_status = PCMK_LRM_OP_PENDING;
     op->rc = -1;
     op->rsc_id = strdup(rsc_id);
     op->interval = 0;
     op->timeout = 0;
     op->start_delay = 0;
 
     if (rsc_op == NULL) {
         CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
         op->user_data = NULL;
         /* the stop_all_resources() case
          * by definition there is no DC (or they'd be shutting
          *   us down).
          * So we should put our version here.
          */
         op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                            g_hash_destroy_str, g_hash_destroy_str);
 
         g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
 
         crm_trace("Constructed %s op for %s", operation, rsc_id);
         return op;
     }
 
     params = xml2list(rsc_op);
     g_hash_table_remove(params, CRM_META "_op_target_rc");
 
     op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
     op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
     op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL);
 
     op->interval = crm_parse_int(op_interval, "0");
     op->timeout = crm_parse_int(op_timeout, "0");
     op->start_delay = crm_parse_int(op_delay, "0");
 
     if (safe_str_neq(operation, RSC_STOP)) {
         op->params = params;
 
     } else {
         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
 
         /* If we do not have stop parameters cached, use
          * whatever we are given */
         if (!entry || !entry->stop_params) {
             op->params = params;
         } else {
             /* Copy the cached parameter list so that we stop the resource
              * with the old attributes, not the new ones */
             op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                                g_hash_destroy_str, g_hash_destroy_str);
 
             g_hash_table_foreach(params, copy_meta_keys, op->params);
             g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
             g_hash_table_destroy(params);
             params = NULL;
         }
     }
 
     /* sanity */
     if (op->interval < 0) {
         op->interval = 0;
     }
     if (op->timeout <= 0) {
         op->timeout = op->interval;
     }
     if (op->start_delay < 0) {
         op->start_delay = 0;
     }
 
     transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
     CRM_CHECK(transition != NULL, return op);
 
     op->user_data = strdup(transition);
 
     if (op->interval != 0) {
         if (safe_str_eq(operation, CRMD_ACTION_START)
             || safe_str_eq(operation, CRMD_ACTION_STOP)) {
             crm_err("Start and Stop actions cannot have an interval: %d", op->interval);
             op->interval = 0;
         }
     }
 
     crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval);
 
     return op;
 }
 
 void
 send_direct_ack(const char *to_host, const char *to_sys,
                 lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
 {
     xmlNode *reply = NULL;
     xmlNode *update, *iter;
     crm_node_t *peer = NULL;
 
     CRM_CHECK(op != NULL, return);
     if (op->rsc_id == NULL) {
         CRM_ASSERT(rsc_id != NULL);
         op->rsc_id = strdup(rsc_id);
     }
     if (to_sys == NULL) {
         to_sys = CRM_SYSTEM_TENGINE;
     }
 
     peer = crm_get_peer(0, fsa_our_uname);
     update = do_update_node_cib(peer, node_update_none, NULL, __FUNCTION__);
 
     iter = create_xml_node(update, XML_CIB_TAG_LRM);
     crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
 
     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
 
     build_operation_update(iter, rsc, op, __FUNCTION__);
     reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
 
     crm_log_xml_trace(update, "ACK Update");
 
     crm_debug("ACK'ing resource op %s_%s_%d from %s: %s",
               op->rsc_id, op->op_type, op->interval, op->user_data,
               crm_element_value(reply, XML_ATTR_REFERENCE));
 
     if (relay_message(reply, TRUE) == FALSE) {
         crm_log_xml_err(reply, "Unable to route reply");
     }
 
     free_xml(update);
     free_xml(reply);
 }
 
 gboolean
 verify_stopped(enum crmd_fsa_state cur_state, int log_level)
 {
     gboolean res = TRUE;
     GList *lrm_state_list = lrm_state_get_list();
     GList *state_entry;
 
     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
         lrm_state_t *lrm_state = state_entry->data;
 
         if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
             /* keep iterating through all even when false is returned */
             res = FALSE;
         }
     }
 
     set_bit(fsa_input_register, R_SENT_RSC_STOP);
     g_list_free(lrm_state_list); lrm_state_list = NULL;
     return res;
 }
 
 struct stop_recurring_action_s {
     lrmd_rsc_info_t *rsc;
     lrm_state_t *lrm_state;
 };
 
 static gboolean
 stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     struct stop_recurring_action_s *event = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
         crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
         remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
     }
 
     return remove;
 }
 
 static gboolean
 stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
 {
     gboolean remove = FALSE;
     lrm_state_t *lrm_state = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     if (op->interval != 0) {
         crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
         remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
     }
 
     return remove;
 }
 
 static void
 do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
               xmlNode * request)
 {
     int call_id = 0;
     char *op_id = NULL;
     lrmd_event_data_t *op = NULL;
     lrmd_key_value_t *params = NULL;
     fsa_data_t *msg_data = NULL;
     const char *transition = NULL;
     gboolean stop_recurring = FALSE;
 
     CRM_CHECK(rsc != NULL, return);
     CRM_CHECK(operation != NULL, return);
 
     if (msg != NULL) {
         transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
         if (transition == NULL) {
             crm_log_xml_err(msg, "Missing transition number");
         }
     }
 
     op = construct_op(lrm_state, msg, rsc->id, operation);
     CRM_CHECK(op != NULL, return);
 
     if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
         && op->interval == 0
         && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {
 
         /* pcmk remote connections are a special use case.
          * We never ever want to stop monitoring a connection resource until
          * the entire migration has completed. If the connection is ever unexpected
          * severed, even during a migration, this is an event we must detect.*/
         stop_recurring = FALSE;
 
     } else if (op->interval == 0
         && strcmp(operation, CRMD_ACTION_STATUS) != 0
         && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
 
         /* stop any previous monitor operations before changing the resource state */
         stop_recurring = TRUE;
     }
 
     if (stop_recurring == TRUE) {
         guint removed = 0;
         struct stop_recurring_action_s data;
 
         data.rsc = rsc;
         data.lrm_state = lrm_state;
         removed = g_hash_table_foreach_remove(
             lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
 
         crm_debug("Stopped %u recurring operations in preparation for %s_%s_%d",
                   removed, rsc->id, operation, op->interval);
     }
 
     /* now do the op */
     crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval);
 
     if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) {
         if (safe_str_neq(operation, "fail")
             && safe_str_neq(operation, CRMD_ACTION_STOP)) {
             crm_info("Discarding attempt to perform action %s on %s in state %s",
                      operation, rsc->id, fsa_state2string(fsa_state));
             op->rc = CRM_DIRECT_NACK_RC;
             op->op_status = PCMK_LRM_OP_ERROR;
             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
             lrmd_free_event(op);
             free(op_id);
             return;
         }
     }
 
     op_id = generate_op_key(rsc->id, op->op_type, op->interval);
 
     if (op->interval > 0) {
         /* cancel it so we can then restart it without conflict */
         cancel_op_key(lrm_state, rsc, op_id, FALSE);
     }
 
     if (op->params) {
         char *key = NULL;
         char *value = NULL;
         GHashTableIter iter;
 
         g_hash_table_iter_init(&iter, op->params);
         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
             params = lrmd_key_value_add(params, key, value);
         }
     }
 
     call_id = lrm_state_exec(lrm_state,
                              rsc->id,
                              op->op_type,
                              op->user_data, op->interval, op->timeout, op->start_delay, params);
 
     if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
         crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 
     } else if (call_id <= 0) {
 
         crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id);
         op->call_id = get_fake_call_id(lrm_state, rsc->id);
         op->op_status = PCMK_LRM_OP_DONE;
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
         op->t_run = time(NULL);
         op->t_rcchange = op->t_run;
         process_lrm_event(lrm_state, op, NULL);
 
     } else {
         /* record all operations so we can wait
          * for them to complete during shutdown
          */
         char *call_id_s = make_stop_id(rsc->id, call_id);
         struct recurring_op_s *pending = NULL;
 
         pending = calloc(1, sizeof(struct recurring_op_s));
         crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
 
         pending->call_id = call_id;
         pending->interval = op->interval;
         pending->op_type = strdup(operation);
         pending->op_key = strdup(op_id);
         pending->rsc_id = strdup(rsc->id);
         pending->start_time = time(NULL);
         pending->user_data = strdup(op->user_data);
         g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
 
         if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) {
             char *uuid = NULL;
             int dummy = 0, target_rc = 0;
 
             crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
 
             decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
             free(uuid);
 
             op->rc = target_rc;
             op->op_status = PCMK_LRM_OP_DONE;
             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
         }
 
         pending->params = op->params;
         op->params = NULL;
     }
 
     free(op_id);
     lrmd_free_event(op);
     return;
 }
 
 int last_resource_update = 0;
 
 static void
 cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     switch (rc) {
         case pcmk_ok:
         case -pcmk_err_diff_failed:
         case -pcmk_err_diff_resync:
             crm_trace("Resource update %d complete: rc=%d", call_id, rc);
             break;
         default:
             crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
     }
 
     if (call_id == last_resource_update) {
         last_resource_update = 0;
         trigger_fsa(fsa_source);
     }
 }
 
-/*
- * \internal
- * \brief Initialize status section for a newly started pacemaker_remote node
- *
- * Clear the XML_NODE_IS_FENCED flag in the CIB status section for a remote node
- * or guest node (intended to be called when the node starts). If the node ever
- * needs to be fenced, this flag will allow various actions to determine whether
- * the fencing has happened yet.
- *
- * \param[in] node_name  Name of new remote node
- * \param[in] call_opt   Call options to pass to CIB update method
- */
-static void
-remote_node_init_status(const char *node_name, int call_opt)
-{
-    int call_id = 0;
-    xmlNode *update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
-    xmlNode *state;
-
-    state = simple_remote_node_status(node_name, update,__FUNCTION__);
-    crm_xml_add(state, XML_NODE_IS_FENCED, "0");
-
-    /* TODO: Consider forcing a synchronous or asynchronous call here.
-     * In practice, it's currently always async, the benefit of which is
-     * quicker startup. The argument for sync is to close the tiny window
-     * in which the remote connection could drop immediately after connecting,
-     * and fencing might not happen because it appears to already have been.
-     */
-    fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
-    if (call_id < 0) {
-        /* TODO: Return an error code on failure, and handle it somehow.
-         * If this fails, later actions could mistakenly think the node has
-         * already been fenced, thus preventing actual fencing, or allowing
-         * recurring monitor failures to be cleared too soon.
-         */
-        crm_perror(LOG_WARNING,
-                   "Initializing status for pacemaker_remote node %s in CIB",
-                   node_name);
-    }
-    free_xml(update);
-}
-
-static void
-remote_node_clear_status(const char *node_name, int call_opt)
-{
-    if (node_name == NULL) {
-        return;
-    }
-    remote_node_init_status(node_name, call_opt);
-    erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
-    erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
-}
-
 static int
 do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
 {
 /*
   <status>
   <nodes_status id=uname>
   <lrm>
   <lrm_resources>
   <lrm_resource id=...>
   </...>
 */
     int rc = pcmk_ok;
     xmlNode *update, *iter = NULL;
     int call_opt = crmd_cib_smart_opt();
     const char *uuid = NULL;
 
     CRM_CHECK(op != NULL, return 0);
 
     iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
     update = iter;
     iter = create_xml_node(iter, XML_CIB_TAG_STATE);
 
     if (safe_str_eq(node_name, fsa_our_uname)) {
         uuid = fsa_our_uuid;
 
     } else {
         /* remote nodes uuid and uname are equal */
         uuid = node_name;
         crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
     }
 
     CRM_LOG_ASSERT(uuid != NULL);
     if(uuid == NULL) {
         rc = -EINVAL;
         goto done;
     }
 
     crm_xml_add(iter, XML_ATTR_UUID,  uuid);
     crm_xml_add(iter, XML_ATTR_UNAME, node_name);
     crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
 
     iter = create_xml_node(iter, XML_CIB_TAG_LRM);
     crm_xml_add(iter, XML_ATTR_ID, uuid);
 
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
 
     build_operation_update(iter, rsc, op, __FUNCTION__);
 
     if (rsc) {
         const char *container = NULL;
 
         crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
         crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
         crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
 
         if (op->params) {
             container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
         }
         if (container) {
             crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
             crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
         }
 
-        CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id));
-        CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id));
-
-        /* check to see if we need to initialize remote-node related status sections */
-        if (safe_str_eq(op->op_type, "start") && op->rc == 0 && op->op_status == PCMK_LRM_OP_DONE) {
-            const char *remote_node = g_hash_table_lookup(op->params, CRM_META"_remote_node");
-
-            if (remote_node) {
-                /* A container for a remote-node has started, initialize remote-node's status */
-                crm_info("Initalizing lrm status for container remote-node %s. Container successfully started.", remote_node);
-                remote_node_clear_status(remote_node, call_opt);
-            } else if (container == FALSE && safe_str_eq(rsc->type, "remote") && safe_str_eq(rsc->provider, "pacemaker")) {
-                /* baremetal remote node connection resource has started, initialize remote-node's status */
-                crm_info("Initializing lrm status for baremetal remote-node %s", rsc->id);
-                remote_node_clear_status(rsc->id, call_opt);
-            }
-        }
-
     } else {
         crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id);
         send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
         goto cleanup;
     }
 
     crm_log_xml_trace(update, __FUNCTION__);
 
     /* make it an asyncronous call and be done with it
      *
      * Best case:
      *   the resource state will be discovered during
      *   the next signup or election.
      *
      * Bad case:
      *   we are shutting down and there is no DC at the time,
      *   but then why were we shutting down then anyway?
      *   (probably because of an internal error)
      *
      * Worst case:
      *   we get shot for having resources "running" when the really weren't
      *
      * the alternative however means blocking here for too long, which
      * isn't acceptable
      */
     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
 
     if (rc > 0) {
         last_resource_update = rc;
     }
   done:
     /* the return code is a call number, not an error code */
     crm_trace("Sent resource state update message: %d for %s=%d on %s", rc,
               op->op_type, op->interval, op->rsc_id);
     fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
 
   cleanup:
     free_xml(update);
     return rc;
 }
 
 void
 do_lrm_event(long long action,
              enum crmd_fsa_cause cause,
              enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
 {
     CRM_CHECK(FALSE, return);
 }
 
 gboolean
 process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending)
 {
     char *op_id = NULL;
     char *op_key = NULL;
 
     int update_id = 0;
     gboolean remove = FALSE;
     gboolean removed = FALSE;
     lrmd_rsc_info_t *rsc = NULL;
 
     CRM_CHECK(op != NULL, return FALSE);
     CRM_CHECK(op->rsc_id != NULL, return FALSE);
 
     op_id = make_stop_id(op->rsc_id, op->call_id);
     op_key = generate_op_key(op->rsc_id, op->op_type, op->interval);
     rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
     if(pending == NULL) {
         remove = TRUE;
         pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
     }
 
     if (op->op_status == PCMK_LRM_OP_ERROR) {
         switch(op->rc) {
             case PCMK_OCF_NOT_RUNNING:
             case PCMK_OCF_RUNNING_MASTER:
             case PCMK_OCF_DEGRADED:
             case PCMK_OCF_DEGRADED_MASTER:
                 /* Leave it up to the TE/PE to decide if this is an error */
                 op->op_status = PCMK_LRM_OP_DONE;
                 break;
             default:
                 /* Nothing to do */
                 break;
         }
     }
 
     if (op->op_status != PCMK_LRM_OP_CANCELLED) {
         if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
             /* Keep notify ops out of the CIB */
             send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
         } else {
             update_id = do_update_resource(lrm_state->node_name, rsc, op);
         }
     } else if (op->interval == 0) {
         /* This will occur when "crm resource cleanup" is called while actions are in-flight */
         crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id);
         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
 
     } else if (pending == NULL) {
         /* We don't need to do anything for cancelled ops
          * that are not in our pending op list. There are no
          * transition actions waiting on these operations. */
 
     } else if (op->user_data == NULL) {
         /* At this point we have a pending entry, but no transition
          * key present in the user_data field. report this */
         crm_err("Op %s (call=%d): No user data", op_key, op->call_id);
 
     } else if (pending->remove) {
         /* The tengine canceled this op, we have been waiting for the cancel to finish. */
         delete_op_entry(lrm_state, op, op->rsc_id, op_key, op->call_id);
 
     } else if (pending && op->rsc_deleted) {
         /* The tengine initiated this op, but it was cancelled outside of the
          * tengine's control during a resource cleanup/re-probe request. The tengine
          * must be alerted that this operation completed, otherwise the tengine
          * will continue waiting for this update to occur until it is timed out.
          * We don't want this update going to the cib though, so use a direct ack. */
         crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id);
         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
 
     } else {
         /* Before a stop is called, no need to direct ack */
         crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id);
     }
 
     if(remove == FALSE) {
         /* The caller will do this afterwards, but keep the logging consistent */
         removed = TRUE;
 
     } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) {
         removed = TRUE;
         crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
                   op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops));
 
     } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) {
         removed = TRUE;
         g_hash_table_remove(lrm_state->pending_ops, op_id);
     }
 
     switch (op->op_status) {
         case PCMK_LRM_OP_CANCELLED:
             crm_info("Operation %s: %s (node=%s, call=%d, confirmed=%s)",
                      op_key, services_lrm_status_str(op->op_status), lrm_state->node_name,
                      op->call_id, removed ? "true" : "false");
             break;
 
         case PCMK_LRM_OP_DONE:
             do_crm_log(op->interval?LOG_INFO:LOG_NOTICE,
                        "Operation %s: %s (node=%s, call=%d, rc=%d, cib-update=%d, confirmed=%s)",
                        op_key, services_ocf_exitcode_str(op->rc), lrm_state->node_name,
                        op->call_id, op->rc, update_id, removed ? "true" : "false");
             break;
 
         case PCMK_LRM_OP_TIMEOUT:
             crm_err("Operation %s: %s (node=%s, call=%d, timeout=%dms)",
                     op_key, services_lrm_status_str(op->op_status), lrm_state->node_name, op->call_id, op->timeout);
             break;
 
         default:
             crm_err("Operation %s (node=%s, call=%d, status=%d, cib-update=%d, confirmed=%s) %s",
                     op_key, lrm_state->node_name, op->call_id, op->op_status, update_id, removed ? "true" : "false",
                     services_lrm_status_str(op->op_status));
     }
 
     if (op->output) {
         char *prefix =
             crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id);
 
         if (op->rc) {
             crm_log_output(LOG_NOTICE, prefix, op->output);
         } else {
             crm_log_output(LOG_DEBUG, prefix, op->output);
         }
         free(prefix);
     }
 
     crmd_notify_resource_op(lrm_state->node_name, op);
 
     if (op->rsc_deleted) {
         crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
         delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
     }
 
     /* If a shutdown was escalated while operations were pending,
      * then the FSA will be stalled right now... allow it to continue
      */
     mainloop_set_trigger(fsa_source);
     update_history_cache(lrm_state, rsc, op);
 
     lrmd_free_rsc_info(rsc);
     free(op_key);
     free(op_id);
 
     return TRUE;
 }
diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c
index a3441e8c7f..696f1a8eb2 100644
--- a/crmd/lrm_state.c
+++ b/crmd/lrm_state.c
@@ -1,797 +1,840 @@
 /* 
  * Copyright (C) 2012 David Vossel <davidvossel@gmail.com>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/iso8601.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 #include <crm/pengine/rules.h>
 
 GHashTable *lrm_state_table = NULL;
 extern GHashTable *proxy_table;
 int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
 void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
 
 static void
 free_rsc_info(gpointer value)
 {
     lrmd_rsc_info_t *rsc_info = value;
 
     lrmd_free_rsc_info(rsc_info);
 }
 
 static void
 free_deletion_op(gpointer value)
 {
     struct pending_deletion_op_s *op = value;
 
     free(op->rsc);
     delete_ha_msg_input(op->input);
     free(op);
 }
 
 static void
 free_recurring_op(gpointer value)
 {
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     free(op->user_data);
     free(op->rsc_id);
     free(op->op_type);
     free(op->op_key);
     if (op->params) {
         g_hash_table_destroy(op->params);
     }
     free(op);
 }
 
 static gboolean
 fail_pending_op(gpointer key, gpointer value, gpointer user_data)
 {
     lrmd_event_data_t event = { 0, };
     lrm_state_t *lrm_state = user_data;
     struct recurring_op_s *op = (struct recurring_op_s *)value;
 
     crm_trace("Pre-emptively failing %s_%s_%d on %s (call=%s, %s)",
               op->rsc_id, op->op_type, op->interval,
               lrm_state->node_name, key, op->user_data);
 
     event.type = lrmd_event_exec_complete;
     event.rsc_id = op->rsc_id;
     event.op_type = op->op_type;
     event.user_data = op->user_data;
     event.timeout = 0;
     event.interval = op->interval;
     event.rc = PCMK_OCF_CONNECTION_DIED;
     event.op_status = PCMK_LRM_OP_ERROR;
     event.t_run = op->start_time;
     event.t_rcchange = op->start_time;
 
     event.call_id = op->call_id;
     event.remote_nodename = lrm_state->node_name;
     event.params = op->params;
 
     process_lrm_event(lrm_state, &event, op);
     return TRUE;
 }
 
 gboolean
 lrm_state_is_local(lrm_state_t *lrm_state)
 {
     if (lrm_state == NULL || fsa_our_uname == NULL) {
         return FALSE;
     }
 
     if (strcmp(lrm_state->node_name, fsa_our_uname) != 0) {
         return FALSE;
     }
 
     return TRUE;
 
 }
 
 lrm_state_t *
 lrm_state_create(const char *node_name)
 {
     lrm_state_t *state = NULL;
 
     if (!node_name) {
         crm_err("No node name given for lrm state object");
         return NULL;
     }
 
     state = calloc(1, sizeof(lrm_state_t));
     if (!state) {
         return NULL;
     }
 
     state->node_name = strdup(node_name);
 
     state->rsc_info_cache = g_hash_table_new_full(crm_str_hash,
                                                 g_str_equal, NULL, free_rsc_info);
 
     state->deletion_ops = g_hash_table_new_full(crm_str_hash,
                                                 g_str_equal, g_hash_destroy_str, free_deletion_op);
 
     state->pending_ops = g_hash_table_new_full(crm_str_hash,
                                                g_str_equal, g_hash_destroy_str, free_recurring_op);
 
     state->resource_history = g_hash_table_new_full(crm_str_hash,
                                                     g_str_equal, NULL, history_free);
 
     g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
     return state;
 
 }
 
 void
 lrm_state_destroy(const char *node_name)
 {
     g_hash_table_remove(lrm_state_table, node_name);
 }
 
 static gboolean
 remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
 {
     remote_proxy_t *proxy = value;
     const char *node_name = user_data;
 
     if (safe_str_eq(node_name, proxy->node_name)) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 static void
 internal_lrm_state_destroy(gpointer data)
 {
     lrm_state_t *lrm_state = data;
 
     if (!lrm_state) {
         return;
     }
 
     crm_trace("Destroying proxy table %s with %d members", lrm_state->node_name, g_hash_table_size(proxy_table));
     g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name);
     remote_ra_cleanup(lrm_state);
     lrmd_api_delete(lrm_state->conn);
 
     if (lrm_state->rsc_info_cache) {
         crm_trace("Destroying rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache));
         g_hash_table_destroy(lrm_state->rsc_info_cache);
     }
     if (lrm_state->resource_history) {
         crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history));
         g_hash_table_destroy(lrm_state->resource_history);
     }
     if (lrm_state->deletion_ops) {
         crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops));
         g_hash_table_destroy(lrm_state->deletion_ops);
     }
     if (lrm_state->pending_ops) {
         crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops));
         g_hash_table_destroy(lrm_state->pending_ops);
     }
 
     free((char *)lrm_state->node_name);
     free(lrm_state);
 }
 
 void
 lrm_state_reset_tables(lrm_state_t * lrm_state)
 {
     if (lrm_state->resource_history) {
         crm_trace("Re-setting history op cache with %d members",
                   g_hash_table_size(lrm_state->resource_history));
         g_hash_table_remove_all(lrm_state->resource_history);
     }
     if (lrm_state->deletion_ops) {
         crm_trace("Re-setting deletion op cache with %d members",
                   g_hash_table_size(lrm_state->deletion_ops));
         g_hash_table_remove_all(lrm_state->deletion_ops);
     }
     if (lrm_state->pending_ops) {
         crm_trace("Re-setting pending op cache with %d members",
                   g_hash_table_size(lrm_state->pending_ops));
         g_hash_table_remove_all(lrm_state->pending_ops);
     }
     if (lrm_state->rsc_info_cache) {
         crm_trace("Re-setting rsc info cache with %d members",
                   g_hash_table_size(lrm_state->rsc_info_cache));
         g_hash_table_remove_all(lrm_state->rsc_info_cache);
     }
 }
 
 gboolean
 lrm_state_init_local(void)
 {
     if (lrm_state_table) {
         return TRUE;
     }
 
     lrm_state_table =
         g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, internal_lrm_state_destroy);
     if (!lrm_state_table) {
         return FALSE;
     }
 
     proxy_table =
         g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, remote_proxy_free);
     if (!proxy_table) {
-         g_hash_table_destroy(lrm_state_table);
+        g_hash_table_destroy(lrm_state_table);
+        lrm_state_table = NULL;
         return FALSE;
     }
 
     return TRUE;
 }
 
 void
 lrm_state_destroy_all(void)
 {
     if (lrm_state_table) {
         crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table));
         g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL;
     }
     if(proxy_table) {
         crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table));
         g_hash_table_destroy(proxy_table); proxy_table = NULL;
     }
 }
 
 lrm_state_t *
 lrm_state_find(const char *node_name)
 {
     if (!node_name) {
         return NULL;
     }
     return g_hash_table_lookup(lrm_state_table, node_name);
 }
 
 lrm_state_t *
 lrm_state_find_or_create(const char *node_name)
 {
     lrm_state_t *lrm_state;
 
     lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
     if (!lrm_state) {
         lrm_state = lrm_state_create(node_name);
     }
 
     return lrm_state;
 }
 
 GList *
 lrm_state_get_list(void)
 {
     return g_hash_table_get_values(lrm_state_table);
 }
 
+static remote_proxy_t *
+find_connected_proxy_by_node(const char * node_name)
+{
+    GHashTableIter gIter;
+    remote_proxy_t *proxy = NULL;
+
+    CRM_CHECK(proxy_table != NULL, return NULL);
+
+    g_hash_table_iter_init(&gIter, proxy_table);
+
+    while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) {
+        if (proxy->source
+            && safe_str_eq(node_name, proxy->node_name)) {
+            return proxy;
+        }
+    }
+
+    return NULL;
+}
+
+static void
+remote_proxy_disconnect_by_node(const char * node_name)
+{
+    remote_proxy_t *proxy = NULL;
+
+    CRM_CHECK(proxy_table != NULL, return);
+
+    while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) {
+        /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected()
+         * , which removes the entry from proxy_table.
+         * Do not do this in a g_hash_table_iter_next() loop. */
+        if (proxy->source) {
+            mainloop_del_ipc_client(proxy->source);
+        }
+    }
+
+    return;
+}
+
 void
 lrm_state_disconnect(lrm_state_t * lrm_state)
 {
     int removed = 0;
 
     if (!lrm_state->conn) {
         return;
     }
     crm_trace("Disconnecting %s", lrm_state->node_name);
+
+    remote_proxy_disconnect_by_node(lrm_state->node_name);
+
     ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn);
 
     if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
         removed = g_hash_table_foreach_remove(lrm_state->pending_ops, fail_pending_op, lrm_state);
         crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name);
     }
 
     lrmd_api_delete(lrm_state->conn);
     lrm_state->conn = NULL;
 }
 
 int
 lrm_state_is_connected(lrm_state_t * lrm_state)
 {
     if (!lrm_state->conn) {
         return FALSE;
     }
     return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn);
 }
 
 int
 lrm_state_poke_connection(lrm_state_t * lrm_state)
 {
 
     if (!lrm_state->conn) {
         return -1;
     }
     return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn);
 }
 
 int
 lrm_state_ipc_connect(lrm_state_t * lrm_state)
 {
     int ret;
 
     if (!lrm_state->conn) {
         lrm_state->conn = lrmd_api_new();
         ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, lrm_op_callback);
     }
 
     ret = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL);
 
     if (ret != pcmk_ok) {
         lrm_state->num_lrm_register_fails++;
     } else {
         lrm_state->num_lrm_register_fails = 0;
     }
 
     return ret;
 }
 
 static int
 remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
 {
     /* Async responses from cib and friends back to clients via pacemaker_remoted */
     xmlNode *xml = NULL;
     remote_proxy_t *proxy = userdata;
     lrm_state_t *lrm_state = lrm_state_find(proxy->node_name);
     uint32_t flags;
 
     if (lrm_state == NULL) {
         return 0;
     }
 
     xml = string2xml(buffer);
     if (xml == NULL) {
         crm_warn("Received a NULL msg from IPC service.");
         return 1;
     }
 
     flags = crm_ipc_buffer_flags(proxy->ipc);
     if (flags & crm_ipc_proxied_relay_response) {
         crm_trace("Passing response back to %.8s on %s: %.200s - request id: %d", proxy->session_id, proxy->node_name, buffer, proxy->last_request_id);
         remote_proxy_relay_response(lrm_state->conn, proxy->session_id, xml, proxy->last_request_id);
         proxy->last_request_id = 0;
 
     } else {
         crm_trace("Passing event back to %.8s on %s: %.200s", proxy->session_id, proxy->node_name, buffer);
         remote_proxy_relay_event(lrm_state->conn, proxy->session_id, xml);
     }
     free_xml(xml);
     return 1;
 }
 
 static void
 remote_proxy_disconnected(void *userdata)
 {
     remote_proxy_t *proxy = userdata;
     lrm_state_t *lrm_state = lrm_state_find(proxy->node_name);
 
     crm_trace("Destroying %s (%p)", lrm_state->node_name, userdata);
 
     proxy->source = NULL;
     proxy->ipc = NULL;
 
     if (lrm_state && lrm_state->conn) {
         remote_proxy_notify_destroy(lrm_state->conn, proxy->session_id);
     }
     g_hash_table_remove(proxy_table, proxy->session_id);
 }
 
 static remote_proxy_t *
 remote_proxy_new(const char *node_name, const char *session_id, const char *channel)
 {
     static struct ipc_client_callbacks proxy_callbacks = {
         .dispatch = remote_proxy_dispatch_internal,
         .destroy = remote_proxy_disconnected
     };
     remote_proxy_t *proxy = calloc(1, sizeof(remote_proxy_t));
 
     proxy->node_name = strdup(node_name);
     proxy->session_id = strdup(session_id);
 
     if (safe_str_eq(channel, CRM_SYSTEM_CRMD)) {
         proxy->is_local = TRUE;
     } else {
         proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 0, proxy, &proxy_callbacks);
         proxy->ipc = mainloop_get_ipc_client(proxy->source);
 
         if (proxy->source == NULL) {
             remote_proxy_free(proxy);
             return NULL;
         }
     }
 
     crm_trace("created proxy session ID %s", proxy->session_id);
     g_hash_table_insert(proxy_table, proxy->session_id, proxy);
 
     return proxy;
 }
 
 gboolean
 crmd_is_proxy_session(const char *session)
 {
     return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE;
 }
 
 void
 crmd_proxy_send(const char *session, xmlNode *msg)
 {
     remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
     lrm_state_t *lrm_state = NULL;
 
     if (!proxy) {
         return;
     }
     crm_log_xml_trace(msg, "to-proxy");
     lrm_state = lrm_state_find(proxy->node_name);
     if (lrm_state) {
         crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name);
         remote_proxy_relay_event(lrm_state->conn, session, msg);
     }
 }
 
 static void
 crmd_proxy_dispatch(const char *session, xmlNode *msg)
 {
 
     crm_log_xml_trace(msg, "CRMd-PROXY[inbound]");
 
     crm_xml_add(msg, F_CRM_SYS_FROM, session);
     if (crmd_authorize_message(msg, NULL, session)) {
         route_message(C_IPC_MESSAGE, msg);
     }
 
     trigger_fsa(fsa_source);
 }
 
 static void
 remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     if (rc != pcmk_ok) {
         crm_err("Query resulted in an error: %s", pcmk_strerror(rc));
 
         if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
             crm_err("The cluster is mis-configured - shutting down and staying down");
         }
 
     } else {
         lrmd_t * lrmd = (lrmd_t *)user_data;
         crm_time_t *now = crm_time_new(NULL);
         GHashTable *config_hash = g_hash_table_new_full(
             crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
 
         crm_debug("Call %d : Parsing CIB options", call_id);
         
         unpack_instance_attributes(
             output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now);
 
         /* Now send it to the remote peer */
         remote_proxy_check(lrmd, config_hash);
 
         g_hash_table_destroy(config_hash);
         crm_time_free(now);
     }
 }
 
 static void
 remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
 {
     lrm_state_t *lrm_state = userdata;
     const char *op = crm_element_value(msg, F_LRMD_IPC_OP);
     const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION);
     int msg_id = 0;
 
     /* sessions are raw ipc connections to IPC,
      * all we do is proxy requests/responses exactly
      * like they are given to us at the ipc level. */
 
     CRM_CHECK(op != NULL, return);
     CRM_CHECK(session != NULL, return);
 
     crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id);
     /* This is msg from remote ipc client going to real ipc server */
 
     if (safe_str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ)) {
         char *now_s = NULL;
         time_t now = time(NULL);
 
         crm_notice("Graceful proxy shutdown of %s", lrm_state->node_name);
 
         now_s = crm_itoa(now);
         update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
         free(now_s);
 
         remote_proxy_ack_shutdown(lrmd);
         return;
 
     } else if (safe_str_eq(op, LRMD_IPC_OP_NEW)) {
         int rc;
         const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER);
 
         CRM_CHECK(channel != NULL, return);
 
         if (remote_proxy_new(lrm_state->node_name, session, channel) == NULL) {
             remote_proxy_notify_destroy(lrmd, session);
         }
         crm_trace("new remote proxy client established to %s, session id %s", channel, session);
 
         /* Look up stonith-watchdog-timeout and send to the remote peer for validation */
         rc = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local);
         fsa_cib_conn->cmds->register_callback_full(fsa_cib_conn, rc, 10, FALSE, lrmd, "remote_config_check", remote_config_check, NULL);
         
     } else if (safe_str_eq(op, LRMD_IPC_OP_DESTROY)) {
         remote_proxy_end_session(session);
 
     } else if (safe_str_eq(op, LRMD_IPC_OP_REQUEST)) {
         int flags = 0;
         xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG);
         const char *name = crm_element_value(msg, F_LRMD_IPC_CLIENT);
         remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
 
         CRM_CHECK(request != NULL, return);
 
         if (proxy == NULL) {
             /* proxy connection no longer exists */
             remote_proxy_notify_destroy(lrmd, session);
             return;
         } else if ((proxy->is_local == FALSE) && (crm_ipc_connected(proxy->ipc) == FALSE)) {
             remote_proxy_end_session(session);
             return;
         }
         proxy->last_request_id = 0;
         crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags);
         crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote");
 
 #if ENABLE_ACL
         CRM_ASSERT(lrm_state->node_name);
         crm_acl_get_set_user(request, F_LRMD_IPC_USER, lrm_state->node_name);
 #endif
 
         if (proxy->is_local) {
             /* this is for the crmd, which we are, so don't try
              * and connect/send to ourselves over ipc. instead
              * do it directly. */
             crmd_proxy_dispatch(session, request);
             if (flags & crm_ipc_client_response) {
                 xmlNode *op_reply = create_xml_node(NULL, "ack");
 
                 crm_xml_add(op_reply, "function", __FUNCTION__);
                 crm_xml_add_int(op_reply, "line", __LINE__);
                 remote_proxy_relay_response(lrmd, session, op_reply, msg_id);
                 free_xml(op_reply);
             }
 
         } else if(is_set(flags, crm_ipc_proxied)) {
             const char *type = crm_element_value(request, F_TYPE);
             int rc = 0;
 
             if (safe_str_eq(type, T_ATTRD)
                 && crm_element_value(request, F_ATTRD_HOST) == NULL) {
                 crm_xml_add(request, F_ATTRD_HOST, proxy->node_name);
             }
 
             rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL);
 
             if(rc < 0) {
                 xmlNode *op_reply = create_xml_node(NULL, "nack");
 
                 crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)",
                          op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc);
 
                 /* Send a n'ack so the caller doesn't block */
                 crm_xml_add(op_reply, "function", __FUNCTION__);
                 crm_xml_add_int(op_reply, "line", __LINE__);
                 crm_xml_add_int(op_reply, "rc", rc);
                 remote_proxy_relay_response(lrmd, session, op_reply, msg_id);
                 free_xml(op_reply);
 
             } else {
                 crm_trace("Relayed %s request %d from %s to %s for %s",
                           op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name);
                 proxy->last_request_id = msg_id;
             }
 
         } else {
             int rc = pcmk_ok;
             xmlNode *op_reply = NULL;
             /* For backwards compatibility with pacemaker_remoted <= 1.1.10 */
 
             crm_trace("Relaying %s request %d from %s to %s for %s",
                       op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name);
 
             rc = crm_ipc_send(proxy->ipc, request, flags, 10000, &op_reply);
             if(rc < 0) {
                 crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)",
                          op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc);
             } else {
                 crm_trace("Relayed %s request %d from %s to %s for %s",
                           op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name);
             }
 
             if(op_reply) {
                 remote_proxy_relay_response(lrmd, session, op_reply, msg_id);
                 free_xml(op_reply);
             }
         }
     } else {
         crm_err("Unknown proxy operation: %s", op);
     }
 }
 
 int
 lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
                                int timeout_ms)
 {
     int ret;
 
     if (!lrm_state->conn) {
         lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server, port);
         if (!lrm_state->conn) {
             return -1;
         }
         ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback);
         lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, remote_proxy_cb);
     }
 
     crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms);
     ret =
         ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name,
                                                           timeout_ms);
 
     if (ret != pcmk_ok) {
         lrm_state->num_lrm_register_fails++;
     } else {
         lrm_state->num_lrm_register_fails = 0;
     }
 
     return ret;
 }
 
 int
 lrm_state_get_metadata(lrm_state_t * lrm_state,
                        const char *class,
                        const char *provider,
                        const char *agent, char **output, enum lrmd_call_options options)
 {
     if (!lrm_state->conn) {
         return -ENOTCONN;
     }
 
     /* Optimize this... only retrieve metadata from local lrmd connection. Perhaps consider
      * caching result. */
     return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata(lrm_state->conn, class, provider, agent,
                                                             output, options);
 }
 
 int
 lrm_state_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval)
 {
     if (!lrm_state->conn) {
         return -ENOTCONN;
     }
 
     /* Optimize this, cancel requires a synced request/response to the server.
      * Figure out a way to make this async. */
     if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
         return remote_ra_cancel(lrm_state, rsc_id, action, interval);
     }
     return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, action, interval);
 }
 
 lrmd_rsc_info_t *
 lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
 {
     lrmd_rsc_info_t *rsc = NULL;
 
     if (!lrm_state->conn) {
         return NULL;
     }
     if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
         return remote_ra_get_rsc_info(lrm_state, rsc_id);
     }
 
     rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
     if (rsc == NULL) {
         /* only contact the lrmd if we don't already have a cached rsc info */
         rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options);
         if (rsc == NULL) {
 		    return NULL;
         }
         /* cache the result */
         g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
     }
 
     return lrmd_copy_rsc_info(rsc);
 
 }
 
 int
 lrm_state_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval,     /* ms */
                int timeout,     /* ms */
                int start_delay, /* ms */
                lrmd_key_value_t * params)
 {
 
     if (!lrm_state->conn) {
         lrmd_key_value_freeall(params);
         return -ENOTCONN;
     }
 
     if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
         return remote_ra_exec(lrm_state,
                               rsc_id, action, userdata, interval, timeout, start_delay, params);
     }
 
     return ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn,
                                                     rsc_id,
                                                     action,
                                                     userdata,
                                                     interval,
                                                     timeout,
                                                     start_delay,
                                                     lrmd_opt_notify_changes_only, params);
 }
 
 int
 lrm_state_register_rsc(lrm_state_t * lrm_state,
                        const char *rsc_id,
                        const char *class,
                        const char *provider, const char *agent, enum lrmd_call_options options)
 {
     if (!lrm_state->conn) {
         return -ENOTCONN;
     }
 
     /* optimize this... this function is a synced round trip from client to daemon.
      * The crmd/lrm.c code path should be re-factored to allow the register of resources
      * to be performed async. The lrmd client api needs to make an async version
      * of register available. */
     if (is_remote_lrmd_ra(agent, provider, NULL)) {
         return lrm_state_find_or_create(rsc_id) ? pcmk_ok : -1;
     }
 
     return ((lrmd_t *) lrm_state->conn)->cmds->register_rsc(lrm_state->conn, rsc_id, class,
                                                             provider, agent, options);
 }
 
 int
 lrm_state_unregister_rsc(lrm_state_t * lrm_state,
                          const char *rsc_id, enum lrmd_call_options options)
 {
     if (!lrm_state->conn) {
         return -ENOTCONN;
     }
 
     /* optimize this... this function is a synced round trip from client to daemon.
      * The crmd/lrm.c code path that uses this function should always treat it as an
      * async operation. The lrmd client api needs to make an async version unreg available. */
     if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
         lrm_state_destroy(rsc_id);
         return pcmk_ok;
     }
 
     g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
 
     return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
 }
diff --git a/crmd/membership.c b/crmd/membership.c
index 27ae710cae..ca63cc465a 100644
--- a/crmd/membership.c
+++ b/crmd/membership.c
@@ -1,433 +1,430 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 /* put these first so that uuid_t is defined without conflicts */
 #include <crm_internal.h>
 
 #include <string.h>
 
 #include <crm/crm.h>
 
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 #include <crm/cluster/internal.h>
 #include <crmd_messages.h>
 #include <crmd_fsa.h>
 #include <crmd_lrm.h>
 #include <fsa_proto.h>
 #include <crmd_callbacks.h>
 #include <tengine.h>
 #include <membership.h>
 #include <crmd.h>
 
 gboolean membership_flux_hack = FALSE;
 void post_cache_update(int instance);
 
 int last_peer_update = 0;
 guint highest_born_on = -1;
 
 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
 
 static void
 reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
 {
     crm_node_t *node = value;
 
     if (crm_is_peer_active(node) == FALSE) {
         crm_update_peer_join(__FUNCTION__, node, crm_join_none);
 
         if(node && node->uname) {
             election_remove(fsa_election, node->uname);
 
             if (safe_str_eq(fsa_our_uname, node->uname)) {
                 crm_err("We're not part of the cluster anymore");
                 register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
 
             } else if (AM_I_DC == FALSE && safe_str_eq(node->uname, fsa_our_dc)) {
                 crm_warn("Our DC node (%s) left the cluster", node->uname);
                 register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
             }
         }
 
         if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) {
             check_join_state(fsa_state, __FUNCTION__);
         }
         if(node && node->uuid) {
             fail_incompletable_actions(transition_graph, node->uuid);
         }
     }
 }
 
 gboolean ever_had_quorum = FALSE;
 
 void
 post_cache_update(int instance)
 {
     xmlNode *no_op = NULL;
 
     crm_peer_seq = instance;
     crm_debug("Updated cache after membership event %d.", instance);
 
     g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
     set_bit(fsa_input_register, R_MEMBERSHIP);
 
     if (AM_I_DC) {
         populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
                            node_update_expected, __FUNCTION__);
     }
 
     /*
      * If we lost nodes, we should re-check the election status
      * Safe to call outside of an election
      */
     register_fsa_action(A_ELECTION_CHECK);
 
     /* Membership changed, remind everyone we're here.
      * This will aid detection of duplicate DCs
      */
     no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
                            AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
     send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE);
     free_xml(no_op);
 }
 
 static void
 crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     fsa_data_t *msg_data = NULL;
 
     last_peer_update = 0;
 
     if (rc == pcmk_ok) {
         crm_trace("Node update %d complete", call_id);
 
     } else if(call_id < pcmk_ok) {
         crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
         crm_log_xml_debug(msg, "failed");
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 
     } else {
         crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
         crm_log_xml_debug(msg, "failed");
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
 }
 
 xmlNode *
 do_update_node_cib(crm_node_t * node, int flags, xmlNode * parent, const char *source)
 {
     const char *value = NULL;
     xmlNode *node_state;
 
-    if (is_set(node->flags, crm_remote_node)) {
-        return simple_remote_node_status(node->uname, parent, source);
-    }
-
     if (!node->state) {
         crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
        return NULL;
     }
 
     node_state = create_xml_node(parent, XML_CIB_TAG_STATE);
+
+    if (is_set(node->flags, crm_remote_node)) {
+        crm_xml_add(node_state, XML_NODE_IS_REMOTE, XML_BOOLEAN_TRUE);
+    }
+
     set_uuid(node_state, XML_ATTR_UUID, node);
 
     if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) {
         crm_info("Node update for %s cancelled: no id", node->uname);
         free_xml(node_state);
         return NULL;
     }
 
     crm_xml_add(node_state, XML_ATTR_UNAME, node->uname);
 
-    if (flags & node_update_cluster) {
-        if (safe_str_eq(node->state, CRM_NODE_MEMBER)) {
-            value = XML_BOOLEAN_YES;
-        } else if (node->state) {
-            value = XML_BOOLEAN_NO;
-        } else {
-            value = NULL;
-        }
-        crm_xml_add(node_state, XML_NODE_IN_CLUSTER, value);
+    if ((flags & node_update_cluster) && node->state) {
+        crm_xml_add_boolean(node_state, XML_NODE_IN_CLUSTER,
+                            safe_str_eq(node->state, CRM_NODE_MEMBER));
     }
 
-    if (flags & node_update_peer) {
-        value = OFFLINESTATUS;
-        if (node->processes & proc_flags) {
-            value = ONLINESTATUS;
+    if (!is_set(node->flags, crm_remote_node)) {
+        if (flags & node_update_peer) {
+            value = OFFLINESTATUS;
+            if (node->processes & proc_flags) {
+                value = ONLINESTATUS;
+            }
+            crm_xml_add(node_state, XML_NODE_IS_PEER, value);
         }
-        crm_xml_add(node_state, XML_NODE_IS_PEER, value);
-    }
 
-    if (flags & node_update_join) {
-        if(node->join <= crm_join_none) {
-            value = CRMD_JOINSTATE_DOWN;
-        } else {
-            value = CRMD_JOINSTATE_MEMBER;
+        if (flags & node_update_join) {
+            if (node->join <= crm_join_none) {
+                value = CRMD_JOINSTATE_DOWN;
+            } else {
+                value = CRMD_JOINSTATE_MEMBER;
+            }
+            crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
         }
-        crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
-    }
 
-    if (flags & node_update_expected) {
-        crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
+        if (flags & node_update_expected) {
+            crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
+        }
     }
 
     crm_xml_add(node_state, XML_ATTR_ORIGIN, source);
 
     return node_state;
 }
 
 static void
 remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
                                  xmlNode * output, void *user_data)
 {
     char *node_uuid = user_data;
 
     do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
                         "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
                         node_uuid, pcmk_strerror(rc), rc);
 }
 
 static void
 search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
                                  xmlNode * output, void *user_data)
 {
     char *new_node_uuid = user_data;
     xmlNode *node_xml = NULL;
 
     if (rc != pcmk_ok) {
         if (rc != -ENXIO) {
             crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
                        new_node_uuid, pcmk_strerror(rc), rc);
         }
         return;
 
     } else if (output == NULL) {
         return;
     }
 
     if (safe_str_eq(crm_element_name(output), XML_CIB_TAG_NODE)) {
         node_xml = output;
 
     } else {
         node_xml = __xml_first_child(output);
     }
 
     for (; node_xml != NULL; node_xml = __xml_next(node_xml)) {
         const char *node_uuid = NULL;
         const char *node_uname = NULL;
         GHashTableIter iter;
         crm_node_t *node = NULL;
         gboolean known = FALSE;
 
         if (safe_str_neq(crm_element_name(node_xml), XML_CIB_TAG_NODE)) {
             continue;
         }
 
         node_uuid = crm_element_value(node_xml, XML_ATTR_ID);
         node_uname = crm_element_value(node_xml, XML_ATTR_UNAME);
 
         if (node_uuid == NULL || node_uname == NULL) {
             continue;
         }
 
         g_hash_table_iter_init(&iter, crm_peer_cache);
         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
             if (node->uuid
                 && safe_str_eq(node->uuid, node_uuid)
                 && node->uname
                 && safe_str_eq(node->uname, node_uname)) {
 
                 known = TRUE;
                 break;
             }
         }
 
         if (known == FALSE) {
             int delete_call_id = 0;
             xmlNode *node_state_xml = NULL;
 
             crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
                        node_uuid, node_uname, new_node_uuid);
 
             delete_call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_NODES, node_xml,
                                                         cib_scope_local | cib_quorum_override);
             fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
                                       remove_conflicting_node_callback);
 
             node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE);
             crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid);
             crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname);
 
             delete_call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state_xml,
                                                         cib_scope_local | cib_quorum_override);
             fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
                                       remove_conflicting_node_callback);
             free_xml(node_state_xml);
         }
     }
 }
 
 static void
 node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     fsa_data_t *msg_data = NULL;
 
     if(call_id < pcmk_ok) {
         crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
         crm_log_xml_debug(msg, "update:failed");
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 
     } else if(rc < pcmk_ok) {
         crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
         crm_log_xml_debug(msg, "update:failed");
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
 }
 
 #define NODE_PATH_MAX 512
 
 void
 populate_cib_nodes(enum node_update_flags flags, const char *source)
 {
     int call_id = 0;
     gboolean from_hashtable = TRUE;
     int call_options = cib_scope_local | cib_quorum_override;
     xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
 
 #if SUPPORT_HEARTBEAT
     if (is_not_set(flags, node_update_quick) && is_heartbeat_cluster()) {
         from_hashtable = heartbeat_initialize_nodelist(fsa_cluster_conn, FALSE, node_list);
     }
 #endif
 
 #if SUPPORT_COROSYNC
 #  if !SUPPORT_PLUGIN
     if (is_not_set(flags, node_update_quick) && is_corosync_cluster()) {
         from_hashtable = corosync_initialize_nodelist(NULL, FALSE, node_list);
     }
 #  endif
 #endif
 
     if (from_hashtable) {
         GHashTableIter iter;
         crm_node_t *node = NULL;
 
         g_hash_table_iter_init(&iter, crm_peer_cache);
         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
             xmlNode *new_node = NULL;
 
             crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
             if(node->uuid && node->uname) {
                 char xpath[NODE_PATH_MAX];
 
                 /* We need both to be valid */
                 new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
                 crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
                 crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
 
                 /* Search and remove unknown nodes with the conflicting uname from CIB */
                 snprintf(xpath, NODE_PATH_MAX,
                          "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES
                          "/" XML_CIB_TAG_NODE "[@uname='%s'][@id!='%s']",
                          node->uname, node->uuid);
 
                 call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, xpath, NULL,
                                                     cib_scope_local | cib_xpath);
                 fsa_register_cib_callback(call_id, FALSE, strdup(node->uuid),
                                           search_conflicting_node_callback);
             }
         }
     }
 
     crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
 
     fsa_cib_update(XML_CIB_TAG_NODES, node_list, call_options, call_id, NULL);
     fsa_register_cib_callback(call_id, FALSE, NULL, node_list_update_callback);
 
     free_xml(node_list);
 
     if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) {
         /*
          * There is no need to update the local CIB with our values if
          * we've not seen valid membership data
          */
         GHashTableIter iter;
         crm_node_t *node = NULL;
 
         node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);
 
         g_hash_table_iter_init(&iter, crm_peer_cache);
         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
             do_update_node_cib(node, flags, node_list, source);
         }
 
         if (crm_remote_peer_cache) {
             g_hash_table_iter_init(&iter, crm_remote_peer_cache);
             while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
                 do_update_node_cib(node, flags, node_list, source);
             }
         }
 
         fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
         fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
         last_peer_update = call_id;
 
         free_xml(node_list);
     }
 }
 
 static void
 cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     fsa_data_t *msg_data = NULL;
 
     if (rc == pcmk_ok) {
         crm_trace("Quorum update %d complete", call_id);
 
     } else {
         crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
         crm_log_xml_debug(msg, "failed");
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
 }
 
 void
 crm_update_quorum(gboolean quorum, gboolean force_update)
 {
     ever_had_quorum |= quorum;
 
     if(ever_had_quorum && quorum == FALSE && no_quorum_suicide_escalation) {
         pcmk_panic(__FUNCTION__);
     }
 
     if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) {
         int call_id = 0;
         xmlNode *update = NULL;
         int call_options = cib_scope_local | cib_quorum_override;
 
         update = create_xml_node(NULL, XML_TAG_CIB);
         crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
         crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid);
 
         fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL);
         crm_debug("Updating quorum status to %s (call=%d)", quorum ? "true" : "false", call_id);
         fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete);
         free_xml(update);
     }
     fsa_has_quorum = quorum;
 }
diff --git a/crmd/messages.c b/crmd/messages.c
index 5114519da7..060396ff9d 100644
--- a/crmd/messages.c
+++ b/crmd/messages.c
@@ -1,982 +1,1043 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 #include <string.h>
 #include <time.h>
 #include <crmd_fsa.h>
 
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crm/cluster/internal.h>
 #include <crm/cib.h>
 #include <crm/common/ipcs.h>
 
 #include <crmd.h>
 #include <crmd_messages.h>
 #include <crmd_lrm.h>
 #include <throttle.h>
 
 GListPtr fsa_message_queue = NULL;
 extern void crm_shutdown(int nsig);
 
 extern crm_ipc_t *attrd_ipc;
 void handle_response(xmlNode * stored_msg);
 enum crmd_fsa_input handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause);
 enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg);
 
 #define ROUTER_RESULT(x)	crm_trace("Router result: %s", x)
 
 /* debug only, can wrap all it likes */
 int last_data_id = 0;
 
 void
 register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
                        fsa_data_t * cur_data, void *new_data, const char *raised_from)
 {
     /* save the current actions if any */
     if (fsa_actions != A_NOTHING) {
         register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
                                I_NULL, cur_data ? cur_data->data : NULL,
                                fsa_actions, TRUE, __FUNCTION__);
     }
 
     /* reset the action list */
     crm_info("Resetting the current action list");
     fsa_dump_actions(fsa_actions, "Drop");
     fsa_actions = A_NOTHING;
 
     /* register the error */
     register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
 }
 
 int
 register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
                        void *data, long long with_actions,
                        gboolean prepend, const char *raised_from)
 {
     unsigned old_len = g_list_length(fsa_message_queue);
     fsa_data_t *fsa_data = NULL;
 
     CRM_CHECK(raised_from != NULL, raised_from = "<unknown>");
 
     if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
         /* no point doing anything */
         crm_err("Cannot add entry to queue: no input and no action");
         return 0;
     }
 
     if (input == I_WAIT_FOR_EVENT) {
         do_fsa_stall = TRUE;
         crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
                   raised_from, fsa_cause2string(cause), data, old_len);
 
         if (old_len > 0) {
             fsa_dump_queue(LOG_TRACE);
             prepend = FALSE;
         }
 
         if (data == NULL) {
             fsa_actions |= with_actions;
             fsa_dump_actions(with_actions, "Restored");
             return 0;
         }
 
         /* Store everything in the new event and reset fsa_actions */
         with_actions |= fsa_actions;
         fsa_actions = A_NOTHING;
     }
 
     last_data_id++;
     crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data",
               raised_from, prepend ? "prepended" : "appended", last_data_id,
               fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without");
 
     fsa_data = calloc(1, sizeof(fsa_data_t));
     fsa_data->id = last_data_id;
     fsa_data->fsa_input = input;
     fsa_data->fsa_cause = cause;
     fsa_data->origin = raised_from;
     fsa_data->data = NULL;
     fsa_data->data_type = fsa_dt_none;
     fsa_data->actions = with_actions;
 
     if (with_actions != A_NOTHING) {
         crm_trace("Adding actions %.16llx to input", with_actions);
     }
 
     if (data != NULL) {
         switch (cause) {
             case C_FSA_INTERNAL:
             case C_CRMD_STATUS_CALLBACK:
             case C_IPC_MESSAGE:
             case C_HA_MESSAGE:
                 crm_trace("Copying %s data from %s as a HA msg",
                           fsa_cause2string(cause), raised_from);
                 CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
                           crm_err("Bogus data from %s", raised_from));
                 fsa_data->data = copy_ha_msg_input(data);
                 fsa_data->data_type = fsa_dt_ha_msg;
                 break;
 
             case C_LRM_OP_CALLBACK:
                 crm_trace("Copying %s data from %s as lrmd_event_data_t",
                           fsa_cause2string(cause), raised_from);
                 fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
                 fsa_data->data_type = fsa_dt_lrm;
                 break;
 
             case C_CCM_CALLBACK:
             case C_SUBSYSTEM_CONNECT:
             case C_LRM_MONITOR_CALLBACK:
             case C_TIMER_POPPED:
             case C_SHUTDOWN:
             case C_HEARTBEAT_FAILED:
             case C_HA_DISCONNECT:
             case C_ILLEGAL:
             case C_UNKNOWN:
             case C_STARTUP:
                 crm_err("Copying %s data (from %s)"
                         " not yet implemented", fsa_cause2string(cause), raised_from);
                 crmd_exit(pcmk_err_generic);
                 break;
         }
         crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause));
     }
 
     /* make sure to free it properly later */
     if (prepend) {
         crm_trace("Prepending input");
         fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data);
     } else {
         fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
     }
 
     crm_trace("Queue len: %d", g_list_length(fsa_message_queue));
 
     /* fsa_dump_queue(LOG_DEBUG_2); */
 
     if (old_len == g_list_length(fsa_message_queue)) {
         crm_err("Couldn't add message to the queue");
     }
 
     if (fsa_source && input != I_WAIT_FOR_EVENT) {
         crm_trace("Triggering FSA: %s", __FUNCTION__);
         mainloop_set_trigger(fsa_source);
     }
     return last_data_id;
 }
 
 void
 fsa_dump_queue(int log_level)
 {
     int offset = 0;
     GListPtr lpc = NULL;
 
     for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) {
         fsa_data_t *data = (fsa_data_t *) lpc->data;
 
         do_crm_log_unlikely(log_level,
                             "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
                             offset++, data->id, fsa_input2string(data->fsa_input),
                             data->origin, data->data, data->data_type,
                             fsa_cause2string(data->fsa_cause));
     }
 }
 
 ha_msg_input_t *
 copy_ha_msg_input(ha_msg_input_t * orig)
 {
     ha_msg_input_t *copy = NULL;
     xmlNodePtr data = NULL;
 
     if (orig != NULL) {
         crm_trace("Copy msg");
         data = copy_xml(orig->msg);
 
     } else {
         crm_trace("No message to copy");
     }
     copy = new_ha_msg_input(data);
     if (orig && orig->msg != NULL) {
         CRM_CHECK(copy->msg != NULL, crm_err("copy failed"));
     }
     return copy;
 }
 
 void
 delete_fsa_input(fsa_data_t * fsa_data)
 {
     lrmd_event_data_t *op = NULL;
     xmlNode *foo = NULL;
 
     if (fsa_data == NULL) {
         return;
     }
     crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
 
     if (fsa_data->data != NULL) {
         switch (fsa_data->data_type) {
             case fsa_dt_ha_msg:
                 delete_ha_msg_input(fsa_data->data);
                 break;
 
             case fsa_dt_xml:
                 foo = fsa_data->data;
                 free_xml(foo);
                 break;
 
             case fsa_dt_lrm:
                 op = (lrmd_event_data_t *) fsa_data->data;
                 lrmd_free_event(op);
                 break;
 
             case fsa_dt_none:
                 if (fsa_data->data != NULL) {
                     crm_err("Don't know how to free %s data from %s",
                             fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
                     crmd_exit(pcmk_err_generic);
                 }
                 break;
         }
         crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
     }
 
     free(fsa_data);
 }
 
 /* returns the next message */
 fsa_data_t *
 get_message(void)
 {
     fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0);
 
     fsa_message_queue = g_list_remove(fsa_message_queue, message);
     crm_trace("Processing input %d", message->id);
     return message;
 }
 
 /* returns the current head of the FIFO queue */
 gboolean
 is_message(void)
 {
     return (g_list_length(fsa_message_queue) > 0);
 }
 
 void *
 fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
 {
     void *ret_val = NULL;
 
     if (fsa_data == NULL) {
         crm_err("%s: No FSA data available", caller);
 
     } else if (fsa_data->data == NULL) {
         crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
 
     } else if (fsa_data->data_type != a_type) {
         crm_crit("%s: Message data was the wrong type! %d vs. requested=%d.  Origin: %s",
                  caller, fsa_data->data_type, a_type, fsa_data->origin);
         CRM_ASSERT(fsa_data->data_type == a_type);
     } else {
         ret_val = fsa_data->data;
     }
 
     return ret_val;
 }
 
 /*	A_MSG_ROUTE	*/
 void
 do_msg_route(long long action,
              enum crmd_fsa_cause cause,
              enum crmd_fsa_state cur_state,
              enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
 
     route_message(msg_data->fsa_cause, input->msg);
 }
 
 void
 route_message(enum crmd_fsa_cause cause, xmlNode * input)
 {
     ha_msg_input_t fsa_input;
     enum crmd_fsa_input result = I_NULL;
 
     fsa_input.msg = input;
     CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
 
     /* try passing the buck first */
     if (relay_message(input, cause == C_IPC_MESSAGE)) {
         return;
     }
 
     /* handle locally */
     result = handle_message(input, cause);
 
     /* done or process later? */
     switch (result) {
         case I_NULL:
         case I_CIB_OP:
         case I_ROUTER:
         case I_NODE_JOIN:
         case I_JOIN_REQUEST:
         case I_JOIN_RESULT:
             break;
         default:
             /* Defering local processing of message */
             register_fsa_input_later(cause, result, &fsa_input);
             return;
     }
 
     if (result != I_NULL) {
         /* add to the front of the queue */
         register_fsa_input(cause, result, &fsa_input);
     }
 }
 
 gboolean
 relay_message(xmlNode * msg, gboolean originated_locally)
 {
     int dest = 1;
     int is_for_dc = 0;
     int is_for_dcib = 0;
     int is_for_te = 0;
     int is_for_crm = 0;
     int is_for_cib = 0;
     int is_local = 0;
     gboolean processing_complete = FALSE;
     const char *host_to = crm_element_value(msg, F_CRM_HOST_TO);
     const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
     const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
     const char *type = crm_element_value(msg, F_TYPE);
     const char *msg_error = NULL;
 
     crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE));
 
     if (msg == NULL) {
         msg_error = "Cannot route empty message";
 
     } else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) {
         /* quietly ignore */
         processing_complete = TRUE;
 
     } else if (safe_str_neq(type, T_CRM)) {
         msg_error = "Bad message type";
 
     } else if (sys_to == NULL) {
         msg_error = "Bad message destination: no subsystem";
     }
 
     if (msg_error != NULL) {
         processing_complete = TRUE;
         crm_err("%s", msg_error);
         crm_log_xml_warn(msg, "bad msg");
     }
 
     if (processing_complete) {
         return TRUE;
     }
 
     processing_complete = TRUE;
 
     is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
     is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
     is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
     is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
     is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
 
     is_local = 0;
     if (host_to == NULL || strlen(host_to) == 0) {
         if (is_for_dc || is_for_te) {
             is_local = 0;
 
         } else if (is_for_crm && originated_locally) {
             is_local = 0;
 
         } else {
             is_local = 1;
         }
 
     } else if (safe_str_eq(fsa_our_uname, host_to)) {
         is_local = 1;
     }
 
     if (is_for_dc || is_for_dcib || is_for_te) {
         if (AM_I_DC && is_for_te) {
             ROUTER_RESULT("Message result: Local relay");
             send_msg_via_ipc(msg, sys_to);
 
         } else if (AM_I_DC) {
             ROUTER_RESULT("Message result: DC/CRMd process");
             processing_complete = FALSE;        /* more to be done by caller */
         } else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE)
                    && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) {
 
             /* Neither the TE or PE should be sending messages
              *   to DC's on other nodes
              *
              * By definition, if we are no longer the DC, then
              *   the PE or TE's data should be discarded
              */
 
 #if SUPPORT_COROSYNC
             if (is_openais_cluster()) {
                 dest = text2msg_type(sys_to);
             }
 #endif
             ROUTER_RESULT("Message result: External relay to DC");
             send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
 
         } else {
             /* discard */
             ROUTER_RESULT("Message result: Discard, not DC");
         }
 
     } else if (is_local && (is_for_crm || is_for_cib)) {
         ROUTER_RESULT("Message result: CRMd process");
         processing_complete = FALSE;    /* more to be done by caller */
 
     } else if (is_local) {
         ROUTER_RESULT("Message result: Local relay");
         send_msg_via_ipc(msg, sys_to);
 
     } else {
         crm_node_t *node_to = NULL;
 
 #if SUPPORT_COROSYNC
         if (is_openais_cluster()) {
             dest = text2msg_type(sys_to);
 
             if (dest == crm_msg_none || dest > crm_msg_stonith_ng) {
                 dest = crm_msg_crmd;
             }
         }
 #endif
 
         if (host_to) {
             node_to = crm_find_peer(0, host_to);
             if (node_to == NULL) {
                crm_err("Cannot route message to unknown node %s", host_to);
                return TRUE;
             }
         }
 
         ROUTER_RESULT("Message result: External relay");
         send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE);
     }
 
     return processing_complete;
 }
 
 static gboolean
 process_hello_message(xmlNode * hello,
                       char **client_name, char **major_version, char **minor_version)
 {
     const char *local_client_name;
     const char *local_major_version;
     const char *local_minor_version;
 
     *client_name = NULL;
     *major_version = NULL;
     *minor_version = NULL;
 
     if (hello == NULL) {
         return FALSE;
     }
 
     local_client_name = crm_element_value(hello, "client_name");
     local_major_version = crm_element_value(hello, "major_version");
     local_minor_version = crm_element_value(hello, "minor_version");
 
     if (local_client_name == NULL || strlen(local_client_name) == 0) {
         crm_err("Hello message was not valid (field %s not found)", "client name");
         return FALSE;
 
     } else if (local_major_version == NULL || strlen(local_major_version) == 0) {
         crm_err("Hello message was not valid (field %s not found)", "major version");
         return FALSE;
 
     } else if (local_minor_version == NULL || strlen(local_minor_version) == 0) {
         crm_err("Hello message was not valid (field %s not found)", "minor version");
         return FALSE;
     }
 
     *client_name = strdup(local_client_name);
     *major_version = strdup(local_major_version);
     *minor_version = strdup(local_minor_version);
 
     crm_trace("Hello message ok");
     return TRUE;
 }
 
 gboolean
 crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session)
 {
     char *client_name = NULL;
     char *major_version = NULL;
     char *minor_version = NULL;
     gboolean auth_result = FALSE;
 
     xmlNode *xml = NULL;
     const char *op = crm_element_value(client_msg, F_CRM_TASK);
     const char *uuid = curr_client ? curr_client->id : proxy_session;
 
     if (uuid == NULL) {
         crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE));
         return FALSE;
 
     } else if (safe_str_neq(CRM_OP_HELLO, op)) {
         return TRUE;
     }
 
     xml = get_message_xml(client_msg, F_CRM_DATA);
     auth_result = process_hello_message(xml, &client_name, &major_version, &minor_version);
 
     if (auth_result == TRUE) {
         if (client_name == NULL) {
             crm_err("Bad client details (client_name=%s, uuid=%s)",
                     crm_str(client_name), uuid);
             auth_result = FALSE;
         }
     }
 
     if (auth_result == TRUE) {
         /* check version */
         int mav = atoi(major_version);
         int miv = atoi(minor_version);
 
         crm_trace("Checking client version number");
         if (mav < 0 || miv < 0) {
             crm_err("Client version (%d:%d) is not acceptable", mav, miv);
             auth_result = FALSE;
         }
     }
 
     if (auth_result == TRUE) {
         crm_trace("Accepted client %s", client_name);
         if (curr_client) {
             curr_client->userdata = strdup(client_name);
         }
 
         crm_trace("Triggering FSA: %s", __FUNCTION__);
         mainloop_set_trigger(fsa_source);
 
     } else {
         crm_warn("Rejected client logon request");
         if (curr_client) {
             qb_ipcs_disconnect(curr_client->ipcs);
         }
     }
 
     free(minor_version);
     free(major_version);
     free(client_name);
 
     /* hello messages should never be processed further */
     return FALSE;
 }
 
 enum crmd_fsa_input
 handle_message(xmlNode * msg, enum crmd_fsa_cause cause)
 {
     const char *type = NULL;
 
     CRM_CHECK(msg != NULL, return I_NULL);
 
     type = crm_element_value(msg, F_CRM_MSG_TYPE);
     if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) {
         return handle_request(msg, cause);
 
     } else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) {
         handle_response(msg);
         return I_NULL;
     }
 
     crm_err("Unknown message type: %s", type);
     return I_NULL;
 }
 
 static enum crmd_fsa_input
 handle_failcount_op(xmlNode * stored_msg)
 {
     const char *rsc = NULL;
     const char *uname = NULL;
     gboolean is_remote_node = FALSE;
     xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR);
 
     if (xml_rsc) {
         rsc = ID(xml_rsc);
     }
 
     uname = crm_element_value(stored_msg, XML_LRM_ATTR_TARGET);
     if (crm_element_value(stored_msg, XML_LRM_ATTR_ROUTER_NODE)) {
         is_remote_node = TRUE;
     }
 
     if (rsc) {
         char *attr = NULL;
 
         crm_info("Removing failcount for %s", rsc);
 
         attr = crm_concat("fail-count", rsc, '-');
         update_attrd(uname, attr, NULL, NULL, is_remote_node);
         free(attr);
 
         attr = crm_concat("last-failure", rsc, '-');
         update_attrd(uname, attr, NULL, NULL, is_remote_node);
         free(attr);
 
         lrm_clear_last_failure(rsc, uname);
     } else {
         crm_log_xml_warn(stored_msg, "invalid failcount op");
     }
 
     return I_NULL;
 }
 
+/*!
+ * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
+ *
+ * \param[in] msg  Message XML
+ *
+ * \return Next FSA input
+ */
+static enum crmd_fsa_input
+handle_remote_state(xmlNode *msg)
+{
+    const char *remote_uname = ID(msg);
+    const char *remote_is_up = crm_element_value(msg, XML_NODE_IN_CLUSTER);
+    crm_node_t *remote_peer;
+
+    CRM_CHECK(remote_uname && remote_is_up, return I_NULL);
+
+    remote_peer = crm_remote_peer_get(remote_uname);
+    CRM_CHECK(remote_peer, return I_NULL);
+
+    crm_update_peer_state(__FUNCTION__, remote_peer,
+                          crm_is_true(remote_is_up)?
+                          CRM_NODE_MEMBER : CRM_NODE_LOST, 0);
+    return I_NULL;
+}
+
 enum crmd_fsa_input
 handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
 {
     xmlNode *msg = NULL;
     const char *op = crm_element_value(stored_msg, F_CRM_TASK);
 
     /* Optimize this for the DC - it has the most to do */
 
     if (op == NULL) {
         crm_log_xml_err(stored_msg, "Bad message");
         return I_NULL;
     }
 
     if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
         const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
         crm_node_t *node = crm_find_peer(0, from);
 
         crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN);
         if(AM_I_DC == FALSE) {
             return I_NULL; /* Done */
         }
     }
 
     /*========== DC-Only Actions ==========*/
     if (AM_I_DC) {
         if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
             return I_NODE_JOIN;
 
         } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
             return I_JOIN_REQUEST;
 
         } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
             return I_JOIN_RESULT;
 
         } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
             const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
             gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);
 
             if (is_set(fsa_input_register, R_SHUTDOWN)) {
                 crm_info("Shutting ourselves down (DC)");
                 return I_STOP;
 
             } else if (dc_match) {
                 crm_err("We didn't ask to be shut down, yet our"
                         " TE is telling us to. Better get out now!");
                 return I_TERMINATE;
 
             } else if (fsa_state != S_STOPPING) {
                 crm_err("Another node is asking us to shutdown" " but we think we're ok.");
                 return I_ELECTION;
             }
 
         } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
             /* a slave wants to shut down */
             /* create cib fragment and add to message */
             return handle_shutdown_request(stored_msg);
+
+        } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
+            /* a remote connection host is letting us know the node state */
+            return handle_remote_state(stored_msg);
         }
     }
 
     /*========== common actions ==========*/
     if (strcmp(op, CRM_OP_NOVOTE) == 0) {
         ha_msg_input_t fsa_input;
 
         fsa_input.msg = stored_msg;
         register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
                                A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);
 
     } else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
         throttle_update(stored_msg);
         return I_NULL;
 
     } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
         return handle_failcount_op(stored_msg);
 
     } else if (strcmp(op, CRM_OP_VOTE) == 0) {
         /* count the vote and decide what to do after that */
         ha_msg_input_t fsa_input;
 
         fsa_input.msg = stored_msg;
         register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
                                A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);
 
         /* Sometimes we _must_ go into S_ELECTION */
         if (fsa_state == S_HALT) {
             crm_debug("Forcing an election from S_HALT");
             return I_ELECTION;
 #if 0
         } else if (AM_I_DC) {
             /* This is the old way of doing things but what is gained? */
             return I_ELECTION;
 #endif
         }
 
     } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
         crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
         return I_JOIN_OFFER;
 
     } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
         crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
         return I_JOIN_RESULT;
 
     } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0
                || strcmp(op, CRM_OP_LRM_FAIL) == 0
                || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) {
 
         crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
         return I_ROUTER;
 
     } else if (strcmp(op, CRM_OP_NOOP) == 0) {
         return I_NULL;
 
     } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {
 
         crm_shutdown(SIGTERM);
         /*return I_SHUTDOWN; */
         return I_NULL;
 
         /*========== (NOT_DC)-Only Actions ==========*/
     } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) {
 
         const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
         gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);
 
         if (dc_match || fsa_our_dc == NULL) {
             if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) {
                 crm_err("We didn't ask to be shut down, yet our DC is telling us to.");
                 set_bit(fsa_input_register, R_STAYDOWN);
                 return I_STOP;
             }
             crm_info("Shutting down");
             return I_STOP;
 
         } else {
             crm_warn("Discarding %s op from %s", op, host_from);
         }
 
     } else if (strcmp(op, CRM_OP_PING) == 0) {
         /* eventually do some stuff to figure out
          * if we /are/ ok
          */
         const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO);
         xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING);
 
         crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
         crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to);
         crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state));
 
         /* Ok, so technically not so interesting, but CTS needs to see this */
         crm_notice("Current ping state: %s", fsa_state2string(fsa_state));
 
         msg = create_reply(stored_msg, ping);
         if (msg) {
             (void)relay_message(msg, TRUE);
         }
 
         free_xml(ping);
         free_xml(msg);
 
     } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
         int id = 0;
         const char *name = NULL;
 
         crm_element_value_int(stored_msg, XML_ATTR_ID, &id);
         name = crm_element_value(stored_msg, XML_ATTR_UNAME);
 
         if(cause == C_IPC_MESSAGE) {
             msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
             if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
                 crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
             } else {
                 crm_notice("Instructing peers to remove references to node %s/%u", name, id);
             }
             free_xml(msg);
 
         } else {
             reap_crm_member(id, name);
         }
 
     } else {
         crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
         crm_log_xml_err(stored_msg, "Unexpected");
     }
 
     return I_NULL;
 }
 
 void
 handle_response(xmlNode * stored_msg)
 {
     const char *op = crm_element_value(stored_msg, F_CRM_TASK);
 
     if (op == NULL) {
         crm_log_xml_err(stored_msg, "Bad message");
 
     } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
         /* Check if the PE answer been superseded by a subsequent request? */
         const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE);
 
         if (msg_ref == NULL) {
             crm_err("%s - Ignoring calculation with no reference", op);
 
         } else if (safe_str_eq(msg_ref, fsa_pe_ref)) {
             ha_msg_input_t fsa_input;
 
             fsa_input.msg = stored_msg;
             register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
             crm_trace("Completed: %s...", fsa_pe_ref);
 
         } else {
             crm_info("%s calculation %s is obsolete", op, msg_ref);
         }
 
     } else if (strcmp(op, CRM_OP_VOTE) == 0
                || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
 
     } else {
         const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
 
         crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
                 op, host_from, AM_I_DC ? "DC" : "CRMd");
     }
 }
 
 enum crmd_fsa_input
 handle_shutdown_request(xmlNode * stored_msg)
 {
     /* handle here to avoid potential version issues
      *   where the shutdown message/procedure may have
      *   been changed in later versions.
      *
      * This way the DC is always in control of the shutdown
      */
 
     char *now_s = NULL;
     time_t now = time(NULL);
     const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
 
     if (host_from == NULL) {
         /* we're shutting down and the DC */
         host_from = fsa_our_uname;
     }
 
     crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state));
     crm_log_xml_trace(stored_msg, "message");
 
     now_s = crm_itoa(now);
     update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE);
     free(now_s);
 
     /* will be picked up by the TE as long as its running */
     return I_NULL;
 }
 
 /* msg is deleted by the time this returns */
 extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data);
 
 gboolean
 send_msg_via_ipc(xmlNode * msg, const char *sys)
 {
     gboolean send_ok = TRUE;
     crm_client_t *client_channel = crm_client_get_by_id(sys);
 
     if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) {
         crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname);
     }
 
     if (client_channel != NULL) {
         /* Transient clients such as crmadmin */
         send_ok = crm_ipcs_send(client_channel, 0, msg, crm_ipc_server_event);
 
     } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) {
         xmlNode *data = get_message_xml(msg, F_CRM_DATA);
 
         process_te_message(msg, data);
 
     } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) {
         fsa_data_t fsa_data;
         ha_msg_input_t fsa_input;
 
         fsa_input.msg = msg;
         fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
 
         fsa_data.id = 0;
         fsa_data.actions = 0;
         fsa_data.data = &fsa_input;
         fsa_data.fsa_input = I_MESSAGE;
         fsa_data.fsa_cause = C_IPC_MESSAGE;
         fsa_data.origin = __FUNCTION__;
         fsa_data.data_type = fsa_dt_ha_msg;
 
 #ifdef FSA_TRACE
         crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE);
 #endif
         do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data);
 
     } else if (sys != NULL && crmd_is_proxy_session(sys)) {
         crmd_proxy_send(sys, msg);
 
     } else {
         crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys));
         send_ok = FALSE;
     }
 
     return send_ok;
 }
 
 ha_msg_input_t *
 new_ha_msg_input(xmlNode * orig)
 {
     ha_msg_input_t *input_copy = NULL;
 
     input_copy = calloc(1, sizeof(ha_msg_input_t));
     input_copy->msg = orig;
     input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA);
     return input_copy;
 }
 
 void
 delete_ha_msg_input(ha_msg_input_t * orig)
 {
     if (orig == NULL) {
         return;
     }
     free_xml(orig->msg);
     free(orig);
 }
+
+/*!
+ * \internal
+ * \brief Notify the DC of a remote node state change
+ *
+ * \param[in] node_name  Node's name
+ * \param[in] node_up    TRUE if node is up, FALSE if down
+ */
+void
+send_remote_state_message(const char *node_name, gboolean node_up)
+{
+    /* If we don't have a DC, or the message fails, we have a failsafe:
+     * the DC will eventually pick up the change via the CIB node state.
+     * The message allows it to happen sooner if possible.
+     */
+    if (fsa_our_dc) {
+        xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, fsa_our_dc,
+                                      CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
+
+        crm_info("Notifying DC %s of pacemaker_remote node %s %s",
+                 fsa_our_dc, node_name, (node_up? "coming up" : "going down"));
+        crm_xml_add(msg, XML_ATTR_ID, node_name);
+        crm_xml_add_boolean(msg, XML_NODE_IN_CLUSTER, node_up);
+        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, msg,
+                             TRUE);
+        free_xml(msg);
+    } else {
+        crm_debug("No DC to notify of pacemaker_remote node %s %s",
+                  node_name, (node_up? "coming up" : "going down"));
+    }
+}
+
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
index ca4923f39d..181c462e31 100644
--- a/crmd/remote_lrmd_ra.c
+++ b/crmd/remote_lrmd_ra.c
@@ -1,907 +1,1064 @@
 /* 
  * Copyright (C) 2013 David Vossel <davidvossel@gmail.com>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 
 #include <crmd.h>
 #include <crmd_fsa.h>
 #include <crmd_messages.h>
 #include <crmd_callbacks.h>
 #include <crmd_lrm.h>
 #include <crm/lrmd.h>
 #include <crm/services.h>
 
 #define REMOTE_LRMD_RA "remote"
 
 /* The max start timeout before cmd retry */
 #define MAX_START_TIMEOUT_MS 10000
 
 typedef struct remote_ra_cmd_s {
     /*! the local node the cmd is issued from */
     char *owner;
     /*! the remote node the cmd is executed on */
     char *rsc_id;
     /*! the action to execute */
     char *action;
     /*! some string the client wants us to give it back */
     char *userdata;
     /*! start delay in ms */
     int start_delay;
     /*! timer id used for start delay. */
     int delay_id;
     /*! timeout in ms for cmd */
     int timeout;
     int remaining_timeout;
     /*! recurring interval in ms */
     int interval;
     /*! interval timer id */
     int interval_id;
     int reported_success;
     int monitor_timeout_id;
     int takeover_timeout_id;
     /*! action parameters */
     lrmd_key_value_t *params;
     /*! executed rc */
     int rc;
     int op_status;
     int call_id;
     time_t start_time;
     gboolean cancel;
 } remote_ra_cmd_t;
 
 enum remote_migration_status {
     expect_takeover = 1,
     takeover_complete,
 };
 
 typedef struct remote_ra_data_s {
     crm_trigger_t *work;
     remote_ra_cmd_t *cur_cmd;
     GList *cmds;
     GList *recurring_cmds;
 
     enum remote_migration_status migrate_status;
 
     gboolean active;
 } remote_ra_data_t;
 
 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
 static GList *fail_all_monitor_cmds(GList * list);
 
 static void
 free_cmd(gpointer user_data)
 {
     remote_ra_cmd_t *cmd = user_data;
 
     if (!cmd) {
         return;
     }
     if (cmd->delay_id) {
         g_source_remove(cmd->delay_id);
     }
     if (cmd->interval_id) {
         g_source_remove(cmd->interval_id);
     }
     if (cmd->monitor_timeout_id) {
         g_source_remove(cmd->monitor_timeout_id);
     }
     if (cmd->takeover_timeout_id) {
         g_source_remove(cmd->takeover_timeout_id);
     }
     free(cmd->owner);
     free(cmd->rsc_id);
     free(cmd->action);
     free(cmd->userdata);
     lrmd_key_value_freeall(cmd->params);
     free(cmd);
 }
 
 static int
 generate_callid(void)
 {
     static int remote_ra_callid = 0;
 
     remote_ra_callid++;
     if (remote_ra_callid <= 0) {
         remote_ra_callid = 1;
     }
 
     return remote_ra_callid;
 }
 
 static gboolean
 recurring_helper(gpointer data)
 {
     remote_ra_cmd_t *cmd = data;
     lrm_state_t *connection_rsc = NULL;
 
     cmd->interval_id = 0;
     connection_rsc = lrm_state_find(cmd->rsc_id);
     if (connection_rsc && connection_rsc->remote_ra_data) {
         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
 
         ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
 
         ra_data->cmds = g_list_append(ra_data->cmds, cmd);
         mainloop_set_trigger(ra_data->work);
     }
     return FALSE;
 }
 
 static gboolean
 start_delay_helper(gpointer data)
 {
     remote_ra_cmd_t *cmd = data;
     lrm_state_t *connection_rsc = NULL;
 
     cmd->delay_id = 0;
     connection_rsc = lrm_state_find(cmd->rsc_id);
     if (connection_rsc && connection_rsc->remote_ra_data) {
         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
 
         mainloop_set_trigger(ra_data->work);
     }
     return FALSE;
 }
 
+/*!
+ * \internal
+ * \brief Handle cluster communication related to pacemaker_remote node joining
+ *
+ * \param[in] node_name  Name of newly integrated pacemaker_remote node
+ */
+static void
+remote_node_up(const char *node_name)
+{
+    int call_opt, call_id = 0;
+    xmlNode *update, *state;
+    crm_node_t *node;
+
+    CRM_CHECK(node_name != NULL, return);
+    crm_info("Announcing pacemaker_remote node %s", node_name);
+
+    /* Clear node's operation history and transient attributes */
+    call_opt = crmd_cib_smart_opt();
+    erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
+    erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
+
+    /* Clear node's probed attribute */
+    update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
+
+    /* Ensure node is in the remote peer cache with member status */
+    node = crm_remote_peer_get(node_name);
+    CRM_CHECK(node != NULL, return);
+    crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
+
+    /* pacemaker_remote nodes don't participate in the membership layer,
+     * so cluster nodes don't automatically get notified when they come and go.
+     * We send a cluster message to the DC, and update the CIB node state entry,
+     * so the DC will get it sooner (via message) or later (via CIB refresh),
+     * and any other interested parties can query the CIB.
+     */
+    send_remote_state_message(node_name, TRUE);
+
+    update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+    state = do_update_node_cib(node, node_update_cluster, update, __FUNCTION__);
+
+    /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
+     * needs to be fenced, this flag will allow various actions to determine
+     * whether the fencing has happened yet.
+     */
+    crm_xml_add(state, XML_NODE_IS_FENCED, "0");
+
+    /* TODO: If the remote connection drops, and this (async) CIB update either
+     * failed or has not yet completed, later actions could mistakenly think the
+     * node has already been fenced (if the XML_NODE_IS_FENCED attribute was
+     * previously set, because it won't have been cleared). This could prevent
+     * actual fencing or allow recurring monitor failures to be cleared too
+     * soon. Ideally, we wouldn't rely on the CIB for the fenced status.
+     */
+    fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
+    if (call_id < 0) {
+        crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
+    }
+    free_xml(update);
+}
+
+/*!
+ * \internal
+ * \brief Handle cluster communication related to pacemaker_remote node leaving
+ *
+ * \param[in] node_name  Name of lost node
+ */
+static void
+remote_node_down(const char *node_name)
+{
+    xmlNode *update;
+    int call_id = 0;
+    int call_opt = crmd_cib_smart_opt();
+    crm_node_t *node;
+
+    /* Clear all node attributes */
+    update_attrd_remote_node_removed(node_name, NULL);
+
+    /* Ensure node is in the remote peer cache with lost state */
+    node = crm_remote_peer_get(node_name);
+    CRM_CHECK(node != NULL, return);
+    crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0);
+
+    /* Notify DC */
+    send_remote_state_message(node_name, FALSE);
+
+    /* Update CIB node state */
+    update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+    do_update_node_cib(node, node_update_cluster, update, __FUNCTION__);
+    fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
+    if (call_id < 0) {
+        crm_perror(LOG_ERR, "%s CIB node state update", node_name);
+    }
+    free_xml(update);
+}
+
+/*!
+ * \internal
+ * \brief Handle effects of a remote RA command on node state
+ *
+ * \param[in] cmd  Completed remote RA command
+ */
+static void
+check_remote_node_state(remote_ra_cmd_t *cmd)
+{
+    /* Only successful actions can change node state */
+    if (cmd->rc != PCMK_OCF_OK) {
+        return;
+    }
+
+    if (safe_str_eq(cmd->action, "start")) {
+        remote_node_up(cmd->rsc_id);
+
+    } else if (safe_str_eq(cmd->action, "migrate_from")) {
+        /* After a successful migration, we don't need to do remote_node_up()
+         * because the DC already knows the node is up, and we don't want to
+         * clear LRM history etc. We do need to add the remote node to this
+         * host's remote peer cache, because (unless it happens to be DC)
+         * it hasn't been tracking the remote node, and other code relies on
+         * the cache to distinguish remote nodes from unseen cluster nodes.
+         */
+        crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
+
+        CRM_CHECK(node != NULL, return);
+        crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
+
+    } else if (safe_str_eq(cmd->action, "stop")) {
+        lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
+        remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
+
+        if (ra_data) {
+            if (ra_data->migrate_status != takeover_complete) {
+                /* Stop means down if we didn't successfully migrate elsewhere */
+                remote_node_down(cmd->rsc_id);
+            } else if (AM_I_DC == FALSE) {
+                /* Only the connection host and DC track node state,
+                 * so if the connection migrated elsewhere and we aren't DC,
+                 * un-cache the node, so we don't have stale info
+                 */
+                crm_remote_peer_cache_remove(cmd->rsc_id);
+            }
+        }
+    }
+
+    /* We don't do anything for successful monitors, which is correct for
+     * routine recurring monitors, and for monitors on nodes where the
+     * connection isn't supposed to be (the cluster will stop the connection in
+     * that case). However, if the initial probe finds the connection already
+     * active on the node where we want it, we probably should do
+     * remote_node_up(). Unfortunately, we can't distinguish that case here.
+     * Given that connections have to be initiated by the cluster, the chance of
+     * that should be close to zero.
+     */
+}
+
 static void
 report_remote_ra_result(remote_ra_cmd_t * cmd)
 {
     lrmd_event_data_t op = { 0, };
 
+    check_remote_node_state(cmd);
+
     op.type = lrmd_event_exec_complete;
     op.rsc_id = cmd->rsc_id;
     op.op_type = cmd->action;
     op.user_data = cmd->userdata;
     op.timeout = cmd->timeout;
     op.interval = cmd->interval;
     op.rc = cmd->rc;
     op.op_status = cmd->op_status;
     op.t_run = cmd->start_time;
     op.t_rcchange = cmd->start_time;
     if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
         op.t_rcchange = time(NULL);
         /* This edge case will likely never ever occur, but if it does the
          * result is that a failure will not be processed correctly. This is only
          * remotely possible because we are able to detect a connection resource's tcp
          * connection has failed at any moment after start has completed. The actual
          * recurring operation is just a connectivity ping.
          *
          * basically, we are not guaranteed that the first successful monitor op and
          * a subsequent failed monitor op will not occur in the same timestamp. We have to
          * make it look like the operations occurred at separate times though. */
         if (op.t_rcchange == op.t_run) {
             op.t_rcchange++;
         }
     }
 
     if (cmd->params) {
         lrmd_key_value_t *tmp;
 
         op.params = g_hash_table_new_full(crm_str_hash,
                                           g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
         for (tmp = cmd->params; tmp; tmp = tmp->next) {
             g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
         }
 
     }
     op.call_id = cmd->call_id;
     op.remote_nodename = cmd->owner;
 
     lrm_op_callback(&op);
 
     if (op.params) {
         g_hash_table_destroy(op.params);
     }
 }
 
 static void
 update_remaining_timeout(remote_ra_cmd_t * cmd)
 {
     cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
 }
 
 static gboolean
 retry_start_cmd_cb(gpointer data)
 {
     lrm_state_t *lrm_state = data;
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
     remote_ra_cmd_t *cmd = NULL;
     int rc = -1;
 
     if (!ra_data || !ra_data->cur_cmd) {
         return FALSE;
     }
     cmd = ra_data->cur_cmd;
     if (safe_str_neq(cmd->action, "start") && safe_str_neq(cmd->action, "migrate_from")) {
         return FALSE;
     }
     update_remaining_timeout(cmd);
 
     if (cmd->remaining_timeout > 0) {
         rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
     }
 
     if (rc != 0) {
         cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
         cmd->op_status = PCMK_LRM_OP_ERROR;
         report_remote_ra_result(cmd);
 
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
         ra_data->cur_cmd = NULL;
         free_cmd(cmd);
     } else {
         /* wait for connection event */
     }
 
     return FALSE;
 }
 
 
 static gboolean
 connection_takeover_timeout_cb(gpointer data)
 {
     lrm_state_t *lrm_state = NULL;
     remote_ra_cmd_t *cmd = data;
 
     crm_info("takeover event timed out for node %s", cmd->rsc_id);
     cmd->takeover_timeout_id = 0;
 
     lrm_state = lrm_state_find(cmd->rsc_id);
 
     handle_remote_ra_stop(lrm_state, cmd);
     free_cmd(cmd);
 
     return FALSE;
 }
 
 static gboolean
 monitor_timeout_cb(gpointer data)
 {
     lrm_state_t *lrm_state = NULL;
     remote_ra_cmd_t *cmd = data;
 
     lrm_state = lrm_state_find(cmd->rsc_id);
 
     crm_info("Poke async response timed out for node %s (%p)", cmd->rsc_id, lrm_state);
     cmd->monitor_timeout_id = 0;
     cmd->op_status = PCMK_LRM_OP_TIMEOUT;
     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
 
     if (lrm_state && lrm_state->remote_ra_data) {
         remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 
         if (ra_data->cur_cmd == cmd) {
             ra_data->cur_cmd = NULL;
         }
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
     }
 
     report_remote_ra_result(cmd);
     free_cmd(cmd);
 
     if(lrm_state) {
         lrm_state_disconnect(lrm_state);
     }
     return FALSE;
 }
 
-xmlNode *
-simple_remote_node_status(const char *node_name, xmlNode * parent, const char *source)
-{
-    xmlNode *state = create_xml_node(parent, XML_CIB_TAG_STATE);
-
-    crm_xml_add(state, XML_NODE_IS_REMOTE, "true");
-    crm_xml_add(state, XML_ATTR_UUID,  node_name);
-    crm_xml_add(state, XML_ATTR_UNAME, node_name);
-    crm_xml_add(state, XML_ATTR_ORIGIN, source);
-
-    return state;
-}
-
 void
 remote_lrm_op_callback(lrmd_event_data_t * op)
 {
     gboolean cmd_handled = FALSE;
     lrm_state_t *lrm_state = NULL;
     remote_ra_data_t *ra_data = NULL;
     remote_ra_cmd_t *cmd = NULL;
 
     crm_debug("remote connection event - event_type:%s node:%s action:%s rc:%s op_status:%s",
               lrmd_event_type2str(op->type),
               op->remote_nodename,
               op->op_type ? op->op_type : "none",
               services_ocf_exitcode_str(op->rc), services_lrm_status_str(op->op_status));
 
     lrm_state = lrm_state_find(op->remote_nodename);
     if (!lrm_state || !lrm_state->remote_ra_data) {
         crm_debug("lrm_state info not found for remote lrmd connection event");
         return;
     }
     ra_data = lrm_state->remote_ra_data;
 
     /* Another client has connected to the remote daemon,
      * determine if this is expected. */
     if (op->type == lrmd_event_new_client) {
         /* great, we new this was coming */
         if (ra_data->migrate_status == expect_takeover) {
             ra_data->migrate_status = takeover_complete;
         } else {
             crm_err("Unexpected pacemaker_remote client takeover. Disconnecting");
             lrm_state_disconnect(lrm_state);
         }
         return;
     }
 
     /* filter all EXEC events up */
     if (op->type == lrmd_event_exec_complete) {
         if (ra_data->migrate_status == takeover_complete) {
             crm_debug("ignoring event, this connection is taken over by another node");
         } else {
             lrm_op_callback(op);
         }
         return;
     }
 
     if ((op->type == lrmd_event_disconnect) &&
         (ra_data->cur_cmd == NULL) &&
         (ra_data->active == TRUE)) {
 
         crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
         ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
         ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
         return;
     }
 
     if (!ra_data->cur_cmd) {
         crm_debug("no event to match");
         return;
     }
 
     cmd = ra_data->cur_cmd;
 
     /* Start actions and migrate from actions complete after connection
      * comes back to us. */
     if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") ||
                                            safe_str_eq(cmd->action, "migrate_from"))) {
 
         if (op->connection_rc < 0) {
             update_remaining_timeout(cmd);
             /* There isn't much of a reason to reschedule if the timeout is too small */
             if (cmd->remaining_timeout > 3000) {
                 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
                 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
                 return;
             } else {
                 crm_trace("can't reschedule start, remaining timeout too small %d",
                           cmd->remaining_timeout);
             }
             cmd->op_status = PCMK_LRM_OP_TIMEOUT;
             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
 
         } else {
-
-            if (safe_str_eq(cmd->action, "start")) {
-                /* clear PROBED value if it happens to be set after start completes. */
-                update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
-            }
             lrm_state_reset_tables(lrm_state);
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             ra_data->active = TRUE;
         }
 
         crm_debug("remote lrmd connect event matched %s action. ", cmd->action);
         report_remote_ra_result(cmd);
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) {
 
         if (cmd->monitor_timeout_id) {
             g_source_remove(cmd->monitor_timeout_id);
             cmd->monitor_timeout_id = 0;
         }
 
         /* Only report success the first time, after that only worry about failures.
          * For this function, if we get the poke pack, it is always a success. Pokes
          * only fail if the send fails, or the response times out. */
         if (!cmd->reported_success) {
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             report_remote_ra_result(cmd);
             cmd->reported_success = 1;
         }
 
         crm_debug("remote lrmd poke event matched %s action. ", cmd->action);
 
         /* success, keep rescheduling if interval is present. */
         if (cmd->interval && (cmd->cancel == FALSE)) {
             ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
             cmd->interval_id = g_timeout_add(cmd->interval, recurring_helper, cmd);
             cmd = NULL;         /* prevent free */
         }
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) {
         if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
             cmd->op_status = PCMK_LRM_OP_ERROR;
             report_remote_ra_result(cmd);
             crm_err("remote-node %s unexpectedly disconneced during monitor operation", lrm_state->node_name);
         }
         cmd_handled = TRUE;
 
     } else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) {
 
         handle_remote_ra_stop(lrm_state, cmd);
         cmd_handled = TRUE;
 
     } else {
         crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
     }
 
     if (cmd_handled) {
         ra_data->cur_cmd = NULL;
         if (ra_data->cmds) {
             mainloop_set_trigger(ra_data->work);
         }
         free_cmd(cmd);
     }
 }
 
 static void
 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
 {
     remote_ra_data_t *ra_data = NULL;
 
     CRM_ASSERT(lrm_state);
     ra_data = lrm_state->remote_ra_data;
 
     if (ra_data->migrate_status != takeover_complete) {
-        /* only clear the status if this stop is not apart of a successful migration */
-        update_attrd_remote_node_removed(lrm_state->node_name, NULL);
         /* delete pending ops when ever the remote connection is intentionally stopped */
         g_hash_table_remove_all(lrm_state->pending_ops);
     } else {
         /* we no longer hold the history if this connection has been migrated */
         lrm_state_reset_tables(lrm_state);
     }
 
     ra_data->active = FALSE;
     lrm_state_disconnect(lrm_state);
     cmd->rc = PCMK_OCF_OK;
     cmd->op_status = PCMK_LRM_OP_DONE;
 
     if (ra_data->cmds) {
         g_list_free_full(ra_data->cmds, free_cmd);
     }
     if (ra_data->recurring_cmds) {
         g_list_free_full(ra_data->recurring_cmds, free_cmd);
     }
     ra_data->cmds = NULL;
     ra_data->recurring_cmds = NULL;
     ra_data->cur_cmd = NULL;
 
     report_remote_ra_result(cmd);
 }
 
 static int
 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
 {
     const char *server = NULL;
     lrmd_key_value_t *tmp = NULL;
     int port = 0;
     int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
 
     for (tmp = cmd->params; tmp; tmp = tmp->next) {
         if (safe_str_eq(tmp->key, "addr") || safe_str_eq(tmp->key, "server")) {
             server = tmp->value;
         }
         if (safe_str_eq(tmp->key, "port")) {
             port = atoi(tmp->value);
         }
     }
 
     return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
 }
 
 static gboolean
 handle_remote_ra_exec(gpointer user_data)
 {
     int rc = 0;
     lrm_state_t *lrm_state = user_data;
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
     remote_ra_cmd_t *cmd;
     GList *first = NULL;
 
     if (ra_data->cur_cmd) {
         /* still waiting on previous cmd */
         return TRUE;
     }
 
     while (ra_data->cmds) {
         first = ra_data->cmds;
         cmd = first->data;
         if (cmd->delay_id) {
             /* still waiting for start delay timer to trip */
             return TRUE;
         }
 
         ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
         g_list_free_1(first);
 
         if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
             ra_data->migrate_status = 0;
             rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
             if (rc == 0) {
                 /* take care of this later when we get async connection result */
                 crm_debug("began remote lrmd connect, waiting for connect event.");
                 ra_data->cur_cmd = cmd;
                 return TRUE;
             } else {
                 crm_debug("connect failed, not expecting to match any connection event later");
                 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
                 cmd->op_status = PCMK_LRM_OP_ERROR;
             }
             report_remote_ra_result(cmd);
 
         } else if (!strcmp(cmd->action, "monitor")) {
 
             if (lrm_state_is_connected(lrm_state) == TRUE) {
                 rc = lrm_state_poke_connection(lrm_state);
                 if (rc < 0) {
                     cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
                     cmd->op_status = PCMK_LRM_OP_ERROR;
                 }
             } else {
                 rc = -1;
                 cmd->op_status = PCMK_LRM_OP_DONE;
                 cmd->rc = PCMK_OCF_NOT_RUNNING;
             }
 
             if (rc == 0) {
                 crm_debug("poked remote lrmd at node %s, waiting for async response.", cmd->rsc_id);
                 ra_data->cur_cmd = cmd;
                 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
                 return TRUE;
             }
             report_remote_ra_result(cmd);
 
         } else if (!strcmp(cmd->action, "stop")) {
 
             if (ra_data->migrate_status == expect_takeover) {
                 /* briefly wait on stop for the takeover event to occur. If the
                  * takeover event does not occur during the wait period, that's fine.
                  * It just means that the remote-node's lrm_status section is going to get
                  * cleared which will require all the resources running in the remote-node
                  * to be explicitly re-detected via probe actions.  If the takeover does occur
                  * successfully, then we can leave the status section intact. */
                 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
                 ra_data->cur_cmd = cmd;
                 return TRUE;
             }
 
             handle_remote_ra_stop(lrm_state, cmd);
 
         } else if (!strcmp(cmd->action, "migrate_to")) {
             ra_data->migrate_status = expect_takeover;
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             report_remote_ra_result(cmd);
         } else if (!strcmp(cmd->action, "reload")) {
             /* reloads are a no-op right now, add logic here when they become important */
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
             report_remote_ra_result(cmd);
         }
 
         free_cmd(cmd);
     }
 
     return TRUE;
 }
 
 static void
 remote_ra_data_init(lrm_state_t * lrm_state)
 {
     remote_ra_data_t *ra_data = NULL;
 
     if (lrm_state->remote_ra_data) {
         return;
     }
 
     ra_data = calloc(1, sizeof(remote_ra_data_t));
     ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
     lrm_state->remote_ra_data = ra_data;
 }
 
 void
 remote_ra_cleanup(lrm_state_t * lrm_state)
 {
     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 
     if (!ra_data) {
         return;
     }
 
     if (ra_data->cmds) {
         g_list_free_full(ra_data->cmds, free_cmd);
     }
 
     if (ra_data->recurring_cmds) {
         g_list_free_full(ra_data->recurring_cmds, free_cmd);
     }
     mainloop_destroy_trigger(ra_data->work);
     free(ra_data);
     lrm_state->remote_ra_data = NULL;
 }
 
 gboolean
 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
 {
     if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
         return TRUE;
     }
     if (id && lrm_state_find(id) && safe_str_neq(id, fsa_our_uname)) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 lrmd_rsc_info_t *
 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
 {
     lrmd_rsc_info_t *info = NULL;
 
     if ((lrm_state_find(rsc_id))) {
         info = calloc(1, sizeof(lrmd_rsc_info_t));
 
         info->id = strdup(rsc_id);
         info->type = strdup(REMOTE_LRMD_RA);
         info->class = strdup("ocf");
         info->provider = strdup("pacemaker");
     }
 
     return info;
 }
 
 static gboolean
 is_remote_ra_supported_action(const char *action)
 {
     if (!action) {
         return FALSE;
     } else if (strcmp(action, "start") &&
                strcmp(action, "stop") &&
                strcmp(action, "reload") &&
                strcmp(action, "migrate_to") &&
                strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
         return FALSE;
     }
 
     return TRUE;
 }
 
 static GList *
 fail_all_monitor_cmds(GList * list)
 {
     GList *rm_list = NULL;
     remote_ra_cmd_t *cmd = NULL;
     GListPtr gIter = NULL;
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval > 0 && safe_str_eq(cmd->action, "monitor")) {
             rm_list = g_list_append(rm_list, cmd);
         }
     }
 
     for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
 
         cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
         cmd->op_status = PCMK_LRM_OP_ERROR;
         crm_trace("Pre-emptively failing %s %s (interval=%d, %s)", cmd->action, cmd->rsc_id, cmd->interval, cmd->userdata);
         report_remote_ra_result(cmd);
 
         list = g_list_remove(list, cmd);
         free_cmd(cmd);
     }
 
     /* frees only the list data, not the cmds */
     g_list_free(rm_list);
     return list;
 }
 
 static GList *
 remove_cmd(GList * list, const char *action, int interval)
 {
     remote_ra_cmd_t *cmd = NULL;
     GListPtr gIter = NULL;
 
     for (gIter = list; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval == interval && safe_str_eq(cmd->action, action)) {
             break;
         }
         cmd = NULL;
     }
     if (cmd) {
         list = g_list_remove(list, cmd);
         free_cmd(cmd);
     }
     return list;
 }
 
 int
 remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval)
 {
     lrm_state_t *connection_rsc = NULL;
     remote_ra_data_t *ra_data = NULL;
 
     connection_rsc = lrm_state_find(rsc_id);
     if (!connection_rsc || !connection_rsc->remote_ra_data) {
         return -EINVAL;
     }
 
     ra_data = connection_rsc->remote_ra_data;
     ra_data->cmds = remove_cmd(ra_data->cmds, action, interval);
     ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval);
     if (ra_data->cur_cmd &&
         (ra_data->cur_cmd->interval == interval) &&
         (safe_str_eq(ra_data->cur_cmd->action, action))) {
 
         ra_data->cur_cmd->cancel = TRUE;
     }
 
     return 0;
 }
 
 static remote_ra_cmd_t *
 handle_dup_monitor(remote_ra_data_t *ra_data, int interval, const char *userdata)
 {
     GList *gIter = NULL;
     remote_ra_cmd_t *cmd = NULL;
 
     /* there are 3 places a potential duplicate monitor operation
      * could exist.
      * 1. recurring_cmds list. where the op is waiting for its next interval
      * 2. cmds list, where the op is queued to get executed immediately
      * 3. cur_cmd, which means the monitor op is in flight right now.
      */
     if (interval == 0) {
         return NULL;
     }
 
     if (ra_data->cur_cmd &&
         ra_data->cur_cmd->cancel == FALSE &&
         ra_data->cur_cmd->interval == interval &&
         safe_str_eq(ra_data->cur_cmd->action, "monitor")) {
 
         cmd = ra_data->cur_cmd;
         goto handle_dup;
     }
 
     for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) {
             goto handle_dup;
         }
     }
 
     for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
         cmd = gIter->data;
         if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) {
             goto handle_dup;
         }
     }
 
     return NULL;
 
 handle_dup:
 
     crm_trace("merging duplicate monitor cmd %s_monitor_%d", cmd->rsc_id, interval);
 
     /* update the userdata */
     if (userdata) {
        free(cmd->userdata);
        cmd->userdata = strdup(userdata);
     }
 
     /* if we've already reported success, generate a new call id */
     if (cmd->reported_success) {
         cmd->start_time = time(NULL);
         cmd->call_id = generate_callid();
         cmd->reported_success = 0;
     }
 
     /* if we have an interval_id set, that means we are in the process of
      * waiting for this cmd's next interval. instead of waiting, cancel
      * the timer and execute the action immediately */
     if (cmd->interval_id) {
         g_source_remove(cmd->interval_id);
         cmd->interval_id = 0;
         recurring_helper(cmd);
     }
 
     return cmd;  
 }
 
 int
 remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval,     /* ms */
                int timeout,     /* ms */
                int start_delay, /* ms */
                lrmd_key_value_t * params)
 {
     int rc = 0;
     lrm_state_t *connection_rsc = NULL;
     remote_ra_cmd_t *cmd = NULL;
     remote_ra_data_t *ra_data = NULL;
 
     if (is_remote_ra_supported_action(action) == FALSE) {
         rc = -EINVAL;
         goto exec_done;
     }
 
     connection_rsc = lrm_state_find(rsc_id);
     if (!connection_rsc) {
         rc = -EINVAL;
         goto exec_done;
     }
 
     remote_ra_data_init(connection_rsc);
     ra_data = connection_rsc->remote_ra_data;
 
     cmd = handle_dup_monitor(ra_data, interval, userdata);
     if (cmd) {
        return cmd->call_id;
     }
 
     cmd = calloc(1, sizeof(remote_ra_cmd_t));
     cmd->owner = strdup(lrm_state->node_name);
     cmd->rsc_id = strdup(rsc_id);
     cmd->action = strdup(action);
     cmd->userdata = strdup(userdata);
     cmd->interval = interval;
     cmd->timeout = timeout;
     cmd->start_delay = start_delay;
     cmd->params = params;
     cmd->start_time = time(NULL);
 
     cmd->call_id = generate_callid();
 
     if (cmd->start_delay) {
         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
     }
 
     ra_data->cmds = g_list_append(ra_data->cmds, cmd);
     mainloop_set_trigger(ra_data->work);
 
     return cmd->call_id;
   exec_done:
 
     lrmd_key_value_freeall(params);
     return rc;
 }
+
+/*!
+ * \internal
+ * \brief Immediately fail all monitors of a remote node, if proxied here
+ *
+ * \param[in] node_name  Name of pacemaker_remote node
+ */
+void
+remote_ra_fail(const char *node_name)
+{
+    lrm_state_t *lrm_state = lrm_state_find(node_name);
+
+    if (lrm_state && lrm_state_is_connected(lrm_state)) {
+        remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+        crm_info("Failing monitors on pacemaker_remote node %s", node_name);
+        ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
+        ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
+    }
+}
+
diff --git a/crmd/te_events.c b/crmd/te_events.c
index 09abb131cf..21739c44c7 100644
--- a/crmd/te_events.c
+++ b/crmd/te_events.c
@@ -1,642 +1,573 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 
 #include <crm/common/xml.h>
 #include <tengine.h>
 
 #include <crmd_fsa.h>
 
 char *failed_stop_offset = NULL;
 char *failed_start_offset = NULL;
 
 gboolean
 fail_incompletable_actions(crm_graph_t * graph, const char *down_node)
 {
     const char *target_uuid = NULL;
     const char *router = NULL;
     const char *router_uuid = NULL;
     xmlNode *last_action = NULL;
 
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     if (graph == NULL || graph->complete) {
         return FALSE;
     }
 
     gIter = graph->synapses;
     for (; gIter != NULL; gIter = gIter->next) {
         synapse_t *synapse = (synapse_t *) gIter->data;
 
         if (synapse->confirmed || synapse->failed) {
             /* We've already been here */
             continue;
         }
 
         gIter2 = synapse->actions;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             crm_action_t *action = (crm_action_t *) gIter2->data;
 
             if (action->type == action_type_pseudo || action->confirmed) {
                 continue;
             } else if (action->type == action_type_crm) {
                 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
 
                 if (safe_str_eq(task, CRM_OP_FENCE)) {
                     continue;
                 }
             }
 
             target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
             router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
             if (router) {
                 crm_node_t *node = crm_get_peer(0, router);
                 if (node) {
                     router_uuid = node->uuid;
                 }
             }
 
             if (safe_str_eq(target_uuid, down_node) || safe_str_eq(router_uuid, down_node)) {
                 action->failed = TRUE;
                 synapse->failed = TRUE;
                 last_action = action->xml;
                 stop_te_timer(action->timer);
                 update_graph(graph, action);
 
                 if (synapse->executed) {
                     crm_notice("Action %d (%s) was pending on %s (offline)",
                                action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
                 } else {
                     crm_info("Action %d (%s) is scheduled for %s (offline)",
                              action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
                 }
             }
         }
     }
 
     if (last_action != NULL) {
         crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
         abort_transition(INFINITY, tg_restart, "Node failure", last_action);
         return TRUE;
     }
 
     return FALSE;
 }
 
 /*!
  * \internal
  * \brief Update failure-related node attributes if warranted
  *
  * \param[in] event            XML describing operation that (maybe) failed
  * \param[in] event_node_uuid  Node that event occurred on
  * \param[in] rc               Actual operation return code
  * \param[in] target_rc        Expected operation return code
  * \param[in] do_update        If TRUE, do update regardless of operation type
  * \param[in] ignore_failures  If TRUE, update last failure but not fail count
  *
  * \return TRUE if this was not a direct nack, success or lrm status refresh
  */
 static gboolean
 update_failcount(xmlNode * event, const char *event_node_uuid, int rc,
                  int target_rc, gboolean do_update, gboolean ignore_failures)
 {
     int interval = 0;
 
     char *task = NULL;
     char *rsc_id = NULL;
 
     const char *value = NULL;
     const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
     const char *on_uname = crm_peer_uname(event_node_uuid);
     const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
 
     /* Nothing needs to be done for success, lrm status refresh,
      * or direct nack (internal code for "busy, try again")
      */
     if ((rc == CRM_DIRECT_NACK_RC) || (rc == target_rc)) {
         return FALSE;
     } else if (safe_str_eq(origin, "build_active_RAs")) {
         crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
                   id, rc, on_uname);
         return FALSE;
     }
 
     /* Sanity check */
     CRM_CHECK(on_uname != NULL, return TRUE);
     CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval),
               crm_err("Couldn't parse: %s", ID(event)); goto bail);
     CRM_CHECK(task != NULL, goto bail);
     CRM_CHECK(rsc_id != NULL, goto bail);
 
     /* Decide whether update is necessary and what value to use */
     if ((interval > 0) || safe_str_eq(task, CRMD_ACTION_PROMOTE)
         || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
         do_update = TRUE;
 
     } else if (safe_str_eq(task, CRMD_ACTION_START)) {
         do_update = TRUE;
         if (failed_start_offset == NULL) {
             failed_start_offset = strdup(INFINITY_S);
         }
         value = failed_start_offset;
 
     } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
         do_update = TRUE;
         if (failed_stop_offset == NULL) {
             failed_stop_offset = strdup(INFINITY_S);
         }
         value = failed_stop_offset;
     }
 
     /* Fail count will be either incremented or set to infinity */
     if (value == NULL || safe_str_neq(value, INFINITY_S)) {
         value = XML_NVPAIR_ATTR_VALUE "++";
     }
 
     if (do_update) {
         char *now = crm_itoa(time(NULL));
         char *attr_name = NULL;
         gboolean is_remote_node = FALSE;
 
         if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
             is_remote_node = TRUE;
         }
 
         crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
                  (ignore_failures? "last failure" : "failcount"),
                  rsc_id, on_uname, task, rc, value, now);
 
         /* Update the fail count, if we're not ignoring failures */
         if (!ignore_failures) {
             attr_name = crm_concat("fail-count", rsc_id, '-');
             update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
             free(attr_name);
         }
 
         /* Update the last failure time (even if we're ignoring failures,
          * so that failure can still be detected and shown, e.g. by crm_mon)
          */
         attr_name = crm_concat("last-failure", rsc_id, '-');
         update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
         free(attr_name);
 
         free(now);
     }
 
   bail:
     free(rsc_id);
     free(task);
     return TRUE;
 }
 
 /*!
  * \internal
  * \brief Return simplified operation status based on operation return code
  *
  * \param[in] action       CRM action instance of operation
  * \param[in] orig_status  Original reported operation status
  * \param[in] rc           Actual operation return code
  * \param[in] target_rc    Expected operation return code
  *
  * \return PCMK_LRM_OP_DONE if rc equals target_rc, PCMK_LRM_OP_ERROR otherwise
  *
  * \note This assumes that PCMK_LRM_OP_PENDING operations have already been
  *       filtered (otherwise they will get simplified as well).
  */
 static int
 status_from_rc(crm_action_t * action, int orig_status, int rc, int target_rc)
 {
     if (target_rc == rc) {
         crm_trace("Target rc: == %d", rc);
         if (orig_status != PCMK_LRM_OP_DONE) {
             crm_trace("Re-mapping op status to PCMK_LRM_OP_DONE for rc=%d", rc);
         }
         return PCMK_LRM_OP_DONE;
     }
 
     if (rc != CRM_DIRECT_NACK_RC) {
         const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
         const char *uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
 
         crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s",
                  action->id, task, uname, target_rc, rc,
                  services_lrm_status_str(PCMK_LRM_OP_ERROR));
     }
     return PCMK_LRM_OP_ERROR;
 }
 
-static void
-process_remote_node_action(crm_action_t *action, xmlNode *event)
-{
-    xmlNode *child = NULL;
-
-    /* The whole point of this function is to detect when a remote-node
-     * is integrated into the cluster or has failed, and properly abort
-     * the transition so resources can be placed on the new node or fail
-     * all pending actions on a lost node.
-     */
-
-    if (crm_remote_peer_cache_size() == 0) {
-        return;
-    } else if (action->type != action_type_rsc) {
-        return;
-    } else if (action->confirmed == FALSE) {
-        return;
-    } else if (!action->failed || safe_str_neq(crm_element_value(action->xml, XML_LRM_ATTR_TASK), "start")) {
-        /* we only care about failed remote nodes, or remote nodes that have just come online. */
-        return;
-    }
-
-    for (child = __xml_first_child(action->xml); child != NULL; child = __xml_next(child)) {
-        const char *provider;
-        const char *type;
-        const char *rsc;
-        const char *action_type;
-        crm_node_t *remote_peer;
-
-        if (safe_str_neq(crm_element_name(child), XML_CIB_TAG_RESOURCE)) {
-            continue;
-        }
-
-        provider = crm_element_value(child, XML_AGENT_ATTR_PROVIDER);
-        type = crm_element_value(child, XML_ATTR_TYPE);
-        rsc = ID(child);
-        action_type = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
-
-        if (safe_str_neq(provider, "pacemaker") || safe_str_neq(type, "remote") || rsc == NULL) {
-            break;
-        }
-
-        remote_peer = crm_get_peer_full(0, rsc, CRM_GET_PEER_REMOTE);
-        if (remote_peer == NULL) {
-            break;
-        }
-
-        /* if a remote node connection failed, and this failure is not related to a probe
-         * action, make sure to cancel any in-flight operations occurring on that remote node
-         * since those actions will timeout. we don't want to wait around for the timeouts */
-        if (action->failed &&
-            !(safe_str_eq(action_type, "monitor") && action->interval == 0)) {
-
-            /* the rsc id is actually the remote node id. we want to mark all
-             * in-flight actions on a failed remote node as incompletable */
-            fail_incompletable_actions(transition_graph, rsc);
-
-        } else if (!action->failed &&
-                   safe_str_eq(remote_peer->state, CRM_NODE_LOST) &&
-                   safe_str_eq(action_type, "start")) {
-            /* A remote node will be placed in the "lost" state after
-             * it has been successfully fenced.  After successfully connecting
-             * to a remote-node after being fenced, we need to abort the transition
-             * so resources can be placed on the newly integrated remote-node */
-            abort_transition(INFINITY, tg_restart, "Remote-node re-discovered.", event);
-        }
-
-        return;
-    }
-}
-
 /*!
  * \internal
  * \brief Confirm action and update transition graph, aborting transition on failures
  *
  * \param[in/out] action           CRM action instance of this operation
  * \param[in]     event            Event instance of this operation
  * \param[in]     orig_status      Original reported operation status
  * \param[in]     op_rc            Actual operation return code
  * \param[in]     target_rc        Expected operation return code
  * \param[in]     ignore_failures  Whether to ignore operation failures
  *
  * \note This assumes that PCMK_LRM_OP_PENDING operations have already been
  *       filtered (otherwise they may be treated as failures).
  */
 static void
 match_graph_event(crm_action_t *action, xmlNode *event, int op_status,
                   int op_rc, int target_rc, gboolean ignore_failures)
 {
     const char *target = NULL;
     const char *this_event = NULL;
     const char *ignore_s = "";
 
     /* Remap operation status based on return code */
     op_status = status_from_rc(action, op_status, op_rc, target_rc);
 
     /* Process OP status */
     switch (op_status) {
         case PCMK_LRM_OP_DONE:
             break;
         case PCMK_LRM_OP_ERROR:
         case PCMK_LRM_OP_TIMEOUT:
         case PCMK_LRM_OP_NOTSUPPORTED:
             if (ignore_failures) {
                 ignore_s = ", ignoring failure";
             } else {
                 action->failed = TRUE;
             }
             break;
         case PCMK_LRM_OP_CANCELLED:
             /* do nothing?? */
             crm_err("Don't know what to do for cancelled ops yet");
             break;
         default:
             /*
              PCMK_LRM_OP_ERROR_HARD,
              PCMK_LRM_OP_ERROR_FATAL,
              PCMK_LRM_OP_NOT_INSTALLED
              */
             action->failed = TRUE;
             crm_err("Unsupported action result: %d", op_status);
     }
 
     /* stop this event's timer if it had one */
     stop_te_timer(action->timer);
     te_action_confirmed(action);
 
     update_graph(transition_graph, action);
     trigger_graph();
 
     if (action->failed) {
         abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event);
     }
 
     this_event = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
     target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     crm_info("Action %s (%d) confirmed on %s (rc=%d%s)",
              crm_str(this_event), action->id, crm_str(target), op_rc, ignore_s);
-
-    /* determine if this action affects a remote-node's online/offline status */
-    process_remote_node_action(action, event);
 }
 
 crm_action_t *
 get_action(int id, gboolean confirmed)
 {
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     gIter = transition_graph->synapses;
     for (; gIter != NULL; gIter = gIter->next) {
         synapse_t *synapse = (synapse_t *) gIter->data;
 
         gIter2 = synapse->actions;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             crm_action_t *action = (crm_action_t *) gIter2->data;
 
             if (action->id == id) {
                 if (confirmed) {
                     stop_te_timer(action->timer);
                     te_action_confirmed(action);
                 }
                 return action;
             }
         }
     }
 
     return NULL;
 }
 
 crm_action_t *
 get_cancel_action(const char *id, const char *node)
 {
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     gIter = transition_graph->synapses;
     for (; gIter != NULL; gIter = gIter->next) {
         synapse_t *synapse = (synapse_t *) gIter->data;
 
         gIter2 = synapse->actions;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             const char *task = NULL;
             const char *target = NULL;
             crm_action_t *action = (crm_action_t *) gIter2->data;
 
             task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
             if (safe_str_neq(CRMD_ACTION_CANCEL, task)) {
                 continue;
             }
 
             task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
             if (safe_str_neq(task, id)) {
                 crm_trace("Wrong key %s for %s on %s", task, id, node);
                 continue;
             }
 
             target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
             if (node && safe_str_neq(target, node)) {
                 crm_trace("Wrong node %s for %s on %s", target, id, node);
                 continue;
             }
 
             crm_trace("Found %s on %s", id, node);
             return action;
         }
     }
 
     return NULL;
 }
 
 /*!
  * \brief Find a transition event that would have made a specified node down
  *
  * \param[in] id      If nonzero, also consider this action ID a match
  * \param[in] target  UUID of node to match
  * \param[in] filter  If not NULL, only match CRM actions of this type
  * \param[in] quiet   If FALSE, log a warning if no match found
  *
  * \return Matching event if found, NULL otherwise
  *
  * \note "Down" events are CRM_OP_FENCE and CRM_OP_SHUTDOWN.
+ * \todo This should detect normal pacemaker_remote node stop events,
+ *       where action->type is action_type_rsc,
+ *       XML_LRM_ATTR_TASK is CRMD_ACTION_STOP,
+ *       and the affected resource creates a remote node that matches target.
+ *       Then, peer_update_callback() could ignore these.
  */
 crm_action_t *
 match_down_event(int id, const char *target, const char *filter, bool quiet)
 {
     const char *this_action = NULL;
     const char *this_node = NULL;
     crm_action_t *match = NULL;
 
     GListPtr gIter = NULL;
     GListPtr gIter2 = NULL;
 
     gIter = transition_graph->synapses;
     for (; gIter != NULL; gIter = gIter->next) {
         synapse_t *synapse = (synapse_t *) gIter->data;
 
         /* lookup event */
         gIter2 = synapse->actions;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             crm_action_t *action = (crm_action_t *) gIter2->data;
 
             if (id > 0 && action->id == id) {
                 match = action;
                 break;
             }
 
             this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
 
             if (action->type != action_type_crm) {
                 continue;
 
             } else if (filter != NULL && safe_str_neq(this_action, filter)) {
                 continue;
 
             } else if (safe_str_neq(this_action, CRM_OP_FENCE)
                        && safe_str_neq(this_action, CRM_OP_SHUTDOWN)) {
                 continue;
             }
 
             this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
 
             if (this_node == NULL) {
                 crm_log_xml_err(action->xml, "No node uuid");
             }
 
             if (safe_str_neq(this_node, target)) {
                 crm_trace("Action %d node %s is not a match for %s",
                           action->id, this_node, target);
                 continue;
             }
 
             match = action;
             id = action->id;
             break;
         }
 
         if (match != NULL) {
             break;
         }
     }
 
     if (match != NULL) {
         crm_debug("Match found for action %d: %s on %s", id,
                   crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
 
     } else if (id > 0) {
         crm_err("No match for action %d", id);
 
     } else if(quiet == FALSE) {
         crm_warn("No match for shutdown action on %s", target);
     }
 
     return match;
 }
 
 gboolean
 process_graph_event(xmlNode * event, const char *event_node)
 {
     int rc = -1;
     int status = -1;
     int callid = -1;
 
     int action_num = -1;
     crm_action_t *action = NULL;
 
     int target_rc = -1;
     int transition_num = -1;
     char *update_te_uuid = NULL;
 
     gboolean stop_early = FALSE;
     gboolean ignore_failures = FALSE;
     const char *id = NULL;
     const char *desc = NULL;
     const char *magic = NULL;
 
     CRM_ASSERT(event != NULL);
 
 /*
 <lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1355361636" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
 */
 
     id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
     crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
     crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
     crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
 
     magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
     if (magic == NULL) {
         /* non-change */
         return FALSE;
     }
 
     if (decode_transition_key(magic, &update_te_uuid, &transition_num,
                               &action_num, &target_rc) == FALSE) {
         crm_err("Invalid event %s.%d detected: %s", id, callid, magic);
         abort_transition(INFINITY, tg_restart, "Bad event", event);
         return FALSE;
     }
 
     if (status == PCMK_LRM_OP_PENDING) {
         goto bail;
     }
 
     if (transition_num == -1) {
         desc = "initiated outside of the cluster";
         abort_transition(INFINITY, tg_restart, "Unexpected event", event);
 
     } else if ((action_num < 0) || (crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE)) {
         desc = "initiated by a different node";
         abort_transition(INFINITY, tg_restart, "Foreign event", event);
         stop_early = TRUE;      /* This could be an lrm status refresh */
 
     } else if (transition_graph->id != transition_num) {
         desc = "arrived really late";
         abort_transition(INFINITY, tg_restart, "Old event", event);
         stop_early = TRUE;      /* This could be an lrm status refresh */
 
     } else if (transition_graph->complete) {
         desc = "arrived late";
         abort_transition(INFINITY, tg_restart, "Inactive graph", event);
 
     } else {
         action = get_action(action_num, FALSE);
 
         if (action == NULL) {
             desc = "unknown";
             abort_transition(INFINITY, tg_restart, "Unknown event", event);
 
         } else {
             ignore_failures = safe_str_eq(
                 crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore");
             match_graph_event(action, event, status, rc, target_rc, ignore_failures);
         }
     }
 
     if (action && (rc == target_rc)) {
         crm_trace("Processed update to %s: %s", id, magic);
     } else {
         if (update_failcount(event, event_node, rc, target_rc,
                              (transition_num == -1), ignore_failures)) {
-            /* Turns out this wasn't an lrm status refresh update aferall */
+            /* Turns out this wasn't an lrm status refresh update afterall */
             stop_early = FALSE;
             desc = "failed";
         }
         crm_info("Detected action (%d.%d) %s.%d=%s: %s", transition_num,
                  action_num, id, callid, services_ocf_exitcode_str(rc), desc);
     }
 
   bail:
     free(update_te_uuid);
     return stop_early;
 }
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index 3a9f491e4f..c2e16f5e5f 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -1,638 +1,648 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 
 #include <crm/msg_xml.h>
 
 #include <crm/common/xml.h>
 #include <tengine.h>
 #include <crmd_fsa.h>
+#include <crmd_lrm.h>
 #include <crmd_messages.h>
 #include <throttle.h>
 #include <crm/fencing/internal.h>
 
 crm_trigger_t *stonith_reconnect = NULL;
 
 /*
  * stonith cleanup list
  *
  * If the DC is shot, proper notifications might not go out.
  * The stonith cleanup list allows the cluster to (re-)send
  * notifications once a new DC is elected.
  */
 
 static GListPtr stonith_cleanup_list = NULL;
 
 /*!
  * \internal
  * \brief Add a node to the stonith cleanup list
  *
  * \param[in] target  Name of node to add
  */
 void
 add_stonith_cleanup(const char *target) {
     stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
 }
 
 /*!
  * \internal
  * \brief Remove a node from the stonith cleanup list
  *
  * \param[in] Name of node to remove
  */
 void
 remove_stonith_cleanup(const char *target)
 {
     GListPtr iter = stonith_cleanup_list;
 
     while (iter != NULL) {
         GListPtr tmp = iter;
         char *iter_name = tmp->data;
 
         iter = iter->next;
         if (safe_str_eq(target, iter_name)) {
             crm_trace("Removing %s from the cleanup list", iter_name);
             stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
             free(iter_name);
         }
     }
 }
 
 /*!
  * \internal
  * \brief Purge all entries from the stonith cleanup list
  */
 void
 purge_stonith_cleanup()
 {
     if (stonith_cleanup_list) {
         GListPtr iter = NULL;
 
         for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
             char *target = iter->data;
 
             crm_info("Purging %s from stonith cleanup list", target);
             free(target);
         }
         g_list_free(stonith_cleanup_list);
         stonith_cleanup_list = NULL;
     }
 }
 
 /*!
  * \internal
  * \brief Send stonith updates for all entries in cleanup list, then purge it
  */
 void
 execute_stonith_cleanup()
 {
     GListPtr iter;
 
     for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
         char *target = iter->data;
         crm_node_t *target_node = crm_get_peer(0, target);
         const char *uuid = crm_peer_uuid(target_node);
 
         crm_notice("Marking %s, target of a previous stonith action, as clean", target);
         send_stonith_update(NULL, target, uuid);
         free(target);
     }
     g_list_free(stonith_cleanup_list);
     stonith_cleanup_list = NULL;
 }
 
 /* end stonith cleanup list functions */
 
 static gboolean
 fail_incompletable_stonith(crm_graph_t * graph)
 {
     GListPtr lpc = NULL;
     const char *task = NULL;
     xmlNode *last_action = NULL;
 
     if (graph == NULL) {
         return FALSE;
     }
 
     for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
         GListPtr lpc2 = NULL;
         synapse_t *synapse = (synapse_t *) lpc->data;
 
         if (synapse->confirmed) {
             continue;
         }
 
         for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
             crm_action_t *action = (crm_action_t *) lpc2->data;
 
             if (action->type != action_type_crm || action->confirmed) {
                 continue;
             }
 
             task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
             if (task && safe_str_eq(task, CRM_OP_FENCE)) {
                 action->failed = TRUE;
                 last_action = action->xml;
                 update_graph(graph, action);
                 crm_notice("Failing action %d (%s): STONITHd terminated",
                            action->id, ID(action->xml));
             }
         }
     }
 
     if (last_action != NULL) {
         crm_warn("STONITHd failure resulted in un-runnable actions");
         abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
         return TRUE;
     }
 
     return FALSE;
 }
 
 static void
 tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
 {
     if (is_set(fsa_input_register, R_ST_REQUIRED)) {
         crm_crit("Fencing daemon connection failed");
         mainloop_set_trigger(stonith_reconnect);
 
     } else {
         crm_info("Fencing daemon disconnected");
     }
 
     /* cbchan will be garbage at this point, arrange for it to be reset */
     if(stonith_api) {
         stonith_api->state = stonith_disconnected;
     }
 
     if (AM_I_DC) {
         fail_incompletable_stonith(transition_graph);
         trigger_graph();
     }
 }
 
 #if SUPPORT_CMAN
 #  include <libfenced.h>
 #endif
 
 char *te_client_id = NULL;
 
 #ifdef HAVE_SYS_REBOOT_H
 #  include <unistd.h>
 #  include <sys/reboot.h>
 #endif
 
 static void
 tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
 {
     if(te_client_id == NULL) {
         te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid());
     }
 
     if (st_event == NULL) {
         crm_err("Notify data not found");
         return;
     }
 
     crmd_notify_fencing_op(st_event);
 
     if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
         crm_notice("%s was successfully unfenced by %s (at the request of %s)",
                    st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
                 /* TODO: Hook up st_event->device */
         return;
 
     } else if (safe_str_eq("on", st_event->action)) {
         crm_err("Unfencing of %s by %s failed: %s (%d)",
                 st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
                 pcmk_strerror(st_event->result), st_event->result);
         return;
 
     } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
         crm_crit("We were allegedly just fenced by %s for %s!",
                  st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */
 
         qb_log_fini(); /* Try to get the above log message to disk - somehow */
 
         /* Get out ASAP and do not come back up.
          *
          * Triggering a reboot is also not the worst idea either since
          * the rest of the cluster thinks we're safely down
          */
 
 #ifdef RB_HALT_SYSTEM
         reboot(RB_HALT_SYSTEM);
 #endif
 
         /*
          * If reboot() fails or is not supported, coming back up will
          * probably lead to a situation where the other nodes set our
          * status to 'lost' because of the fencing callback and will
          * discard subsequent election votes with:
          *
          * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
          *
          * So just stay dead, something is seriously messed up anyway.
          *
          */
         exit(100); /* None of our wrappers since we already called qb_log_fini() */
         return;
     }
 
     if (st_event->result == pcmk_ok &&
         safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
         st_fail_count_reset(st_event->target);
     }
 
     crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
                st_event->target, st_event->result == pcmk_ok ? "" : " not",
                st_event->action,
                st_event->executioner ? st_event->executioner : "<anyone>",
                st_event->origin, pcmk_strerror(st_event->result), st_event->id,
                st_event->client_origin ? st_event->client_origin : "<unknown>");
 
 #if SUPPORT_CMAN
     if (st_event->result == pcmk_ok && is_cman_cluster()) {
         int local_rc = 0;
         int confirm = 0;
         char *target_copy = strdup(st_event->target);
 
         /* In case fenced hasn't noticed yet
          *
          * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
          */
         local_rc = fenced_external(target_copy);
         if (local_rc != 0) {
             crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                     local_rc);
         } else {
             crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
         }
 
         /* In case fenced is already trying to shoot it */
         confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
         if (confirm >= 0) {
             int ignore = 0;
             int len = strlen(target_copy);
 
             errno = 0;
             local_rc = write(confirm, target_copy, len);
             ignore = write(confirm, "\n", 1);
 
             if(ignore < 0 && errno == EBADF) {
                 crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);
 
             } else if (local_rc < len) {
                 crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);
 
             } else {
                 fsync(confirm);
                 crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
             }
             close(confirm);
         }
         free(target_copy);
     }
 #endif
 
     if (st_event->result == pcmk_ok) {
         crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
         const char *uuid = NULL;
         gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
 
         if (peer == NULL) {
             return;
         }
 
         uuid = crm_peer_uuid(peer);
 
         crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
         if(AM_I_DC) {
             /* The DC always sends updates */
             send_stonith_update(NULL, st_event->target, uuid);
 
             if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
 
                 /* Abort the current transition graph if it wasn't us
                  * that invoked stonith to fence someone
                  */
                 crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                 abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
             }
 
             /* Assume it was our leader if we dont currently have one */
         } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
             && !is_set(peer->flags, crm_remote_node)) {
 
             crm_notice("Target %s our leader %s (recorded: %s)",
                        fsa_our_dc ? "was" : "may have been", st_event->target,
                        fsa_our_dc ? fsa_our_dc : "<unset>");
 
             /* Given the CIB resyncing that occurs around elections,
              * have one node update the CIB now and, if the new DC is different,
              * have them do so too after the election
              */
             if (we_are_executioner) {
                 send_stonith_update(NULL, st_event->target, uuid);
             }
             add_stonith_cleanup(st_event->target);
         }
 
+        /* If the target is a remote node, and we host its connection,
+         * immediately fail all monitors so it can be recovered quickly.
+         * The connection won't necessarily drop when a remote node is fenced,
+         * so the failure might not otherwise be detected until the next poke.
+         */
+        if (is_set(peer->flags, crm_remote_node)) {
+            remote_ra_fail(st_event->target);
+        }
+
         crmd_peer_down(peer, TRUE);
      }
 }
 
 gboolean
 te_connect_stonith(gpointer user_data)
 {
     int lpc = 0;
     int rc = pcmk_ok;
 
     if (stonith_api == NULL) {
         stonith_api = stonith_api_new();
     }
 
     if (stonith_api->state != stonith_disconnected) {
         crm_trace("Still connected");
         return TRUE;
     }
 
     for (lpc = 0; lpc < 30; lpc++) {
         crm_debug("Attempting connection to fencing daemon...");
 
         sleep(1);
         rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
 
         if (rc == pcmk_ok) {
             break;
         }
 
         if (user_data != NULL) {
             if (is_set(fsa_input_register, R_ST_REQUIRED)) {
                 crm_err("Sign-in failed: triggered a retry");
                 mainloop_set_trigger(stonith_reconnect);
             } else {
                 crm_info("Sign-in failed, but no longer required");
             }
             return TRUE;
         }
 
         crm_err("Sign-in failed: pausing and trying again in 2s...");
         sleep(1);
     }
 
     CRM_CHECK(rc == pcmk_ok, return TRUE);      /* If not, we failed 30 times... just get out */
     stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
                                              tengine_stonith_connection_destroy);
 
     stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
                                              tengine_stonith_notify);
 
     crm_trace("Connected");
     return TRUE;
 }
 
 gboolean
 stop_te_timer(crm_action_timer_t * timer)
 {
     const char *timer_desc = "action timer";
 
     if (timer == NULL) {
         return FALSE;
     }
     if (timer->reason == timeout_abort) {
         timer_desc = "global timer";
         crm_trace("Stopping %s", timer_desc);
     }
 
     if (timer->source_id != 0) {
         crm_trace("Stopping %s", timer_desc);
         g_source_remove(timer->source_id);
         timer->source_id = 0;
 
     } else {
         crm_trace("%s was already stopped", timer_desc);
         return FALSE;
     }
 
     return TRUE;
 }
 
 gboolean
 te_graph_trigger(gpointer user_data)
 {
     enum transition_status graph_rc = -1;
 
     if (transition_graph == NULL) {
         crm_debug("Nothing to do");
         return TRUE;
     }
 
     crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
 
     switch (fsa_state) {
         case S_STARTING:
         case S_PENDING:
         case S_NOT_DC:
         case S_HALT:
         case S_ILLEGAL:
         case S_STOPPING:
         case S_TERMINATE:
             return TRUE;
             break;
         default:
             break;
     }
 
     if (transition_graph->complete == FALSE) {
         int limit = transition_graph->batch_limit;
 
         transition_graph->batch_limit = throttle_get_total_job_limit(limit);
         graph_rc = run_graph(transition_graph);
         transition_graph->batch_limit = limit; /* Restore the configured value */
 
         /* significant overhead... */
         /* print_graph(LOG_DEBUG_3, transition_graph); */
 
         if (graph_rc == transition_active) {
             crm_trace("Transition not yet complete");
             return TRUE;
 
         } else if (graph_rc == transition_pending) {
             crm_trace("Transition not yet complete - no actions fired");
             return TRUE;
         }
 
         if (graph_rc != transition_complete) {
             crm_warn("Transition failed: %s", transition_status(graph_rc));
             print_graph(LOG_NOTICE, transition_graph);
         }
     }
 
     crm_debug("Transition %d is now complete", transition_graph->id);
     transition_graph->complete = TRUE;
     notify_crmd(transition_graph);
 
     return TRUE;
 }
 
 void
 trigger_graph_processing(const char *fn, int line)
 {
     crm_trace("%s:%d - Triggered graph processing", fn, line);
     mainloop_set_trigger(transition_trigger);
 }
 
 void
 abort_transition_graph(int abort_priority, enum transition_action abort_action,
                        const char *abort_text, xmlNode * reason, const char *fn, int line)
 {
     int add[] = { 0, 0, 0 };
     int del[] = { 0, 0, 0 };
     int level = LOG_INFO;
     xmlNode *diff = NULL;
     xmlNode *change = NULL;
 
     CRM_CHECK(transition_graph != NULL, return);
 
     switch (fsa_state) {
         case S_STARTING:
         case S_PENDING:
         case S_NOT_DC:
         case S_HALT:
         case S_ILLEGAL:
         case S_STOPPING:
         case S_TERMINATE:
             crm_info("Abort %s suppressed: state=%s (complete=%d)",
                      abort_text, fsa_state2string(fsa_state), transition_graph->complete);
             return;
         default:
             break;
     }
 
     /* Make sure any queued calculations are discarded ASAP */
     free(fsa_pe_ref);
     fsa_pe_ref = NULL;
 
     if (transition_graph->complete == FALSE) {
         if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) {
             level = LOG_NOTICE;
         }
     }
 
     if(reason) {
         xmlNode *search = NULL;
 
         for(search = reason; search; search = search->parent) {
             if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) {
                 diff = search;
                 break;
             }
         }
 
         if(diff) {
             xml_patch_versions(diff, add, del);
             for(search = reason; search; search = search->parent) {
                 if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) {
                     change = search;
                     break;
                 }
             }
         }
     }
 
     if(reason == NULL) {
         do_crm_log(level, "Transition aborted: %s (source=%s:%d, %d)",
                    abort_text, fn, line, transition_graph->complete);
 
     } else if(change == NULL) {
         char *local_path = xml_get_path(reason);
 
         do_crm_log(level, "Transition aborted by %s.%s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                    TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, transition_graph->complete);
         free(local_path);
 
     } else {
         const char *kind = NULL;
         const char *op = crm_element_value(change, XML_DIFF_OP);
         const char *path = crm_element_value(change, XML_DIFF_PATH);
 
         if(change == reason) {
             if(strcmp(op, "create") == 0) {
                 reason = reason->children;
 
             } else if(strcmp(op, "modify") == 0) {
                 reason = first_named_child(reason, XML_DIFF_RESULT);
                 if(reason) {
                     reason = reason->children;
                 }
             }
         }
 
         kind = TYPE(reason);
         if(strcmp(op, "delete") == 0) {
             const char *shortpath = strrchr(path, '/');
 
             do_crm_log(level, "Transition aborted by deletion of %s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                        shortpath?shortpath+1:path, abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
 
         } else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) { 
             do_crm_log(level, "Transition aborted by %s, %s=%s: %s (%s cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                        crm_element_value(reason, XML_ATTR_ID),
                        crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
                        crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
                        abort_text, op, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
 
         } else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) {
             const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
 
             do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (magic=%s, cib=%d.%d.%d, source=%s:%d, %d)",
                        crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
                        crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
                        magic, add[0], add[1], add[2], fn, line, transition_graph->complete);
 
         } else if (safe_str_eq(XML_CIB_TAG_STATE, kind)
                    || safe_str_eq(XML_CIB_TAG_NODE, kind)) {
             const char *uname = crm_peer_uname(ID(reason));
 
             do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (cib=%d.%d.%d, source=%s:%d, %d)",
                        kind, op, uname ? uname : ID(reason), abort_text,
                        add[0], add[1], add[2], fn, line, transition_graph->complete);
 
         } else {
             do_crm_log(level, "Transition aborted by %s.%s '%s': %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                        TYPE(reason), ID(reason), op?op:"change", abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
         }
     }
 
     if (transition_graph->complete) {
         if (transition_timer->period_ms > 0) {
             crm_timer_stop(transition_timer);
             crm_timer_start(transition_timer);
         } else {
             register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
         }
         return;
     }
 
     mainloop_set_trigger(transition_trigger);
 }
diff --git a/cts/CTStests.py b/cts/CTStests.py
index 6aeb818449..3b0a7f6f88 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -1,3167 +1,3168 @@
 '''CTS: Cluster Testing System: Tests module
 
 There are a few things we want to do here:
 
  '''
 
 __copyright__ = '''
 Copyright (C) 2000, 2001 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 
 Add RecourceRecover testcase Zhao Kai <zhaokai@cn.ibm.com>
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 
 #
 #        SPECIAL NOTE:
 #
 #        Tests may NOT implement any cluster-manager-specific code in them.
 #        EXTEND the ClusterManager object to provide the base capabilities
 #        the test needs if you need to do something that the current CM classes
 #        do not.  Otherwise you screw up the whole point of the object structure
 #        in CTS.
 #
 #                Thank you.
 #
 
 import time, os, re, string, subprocess, tempfile
 from stat import *
 from cts import CTS
 from cts.CTSaudits import *
 from cts.CTSvars   import *
 from cts.patterns  import PatternSelector
 from cts.logging   import LogFactory
 from cts.remote    import RemoteFactory
 from cts.watcher   import LogWatcher
 from cts.environment import EnvFactory
 
 AllTestClasses = [ ]
 
 
 class CTSTest:
     '''
     A Cluster test.
     We implement the basic set of properties and behaviors for a generic
     cluster test.
 
     Cluster tests track their own statistics.
     We keep each of the kinds of counts we track as separate {name,value}
     pairs.
     '''
 
     def __init__(self, cm):
         #self.name="the unnamed test"
         self.Stats = {"calls":0
         ,        "success":0
         ,        "failure":0
         ,        "skipped":0
         ,        "auditfail":0}
 
 #        if not issubclass(cm.__class__, ClusterManager):
 #            raise ValueError("Must be a ClusterManager object")
         self.CM = cm
         self.Env = EnvFactory().getInstance()
         self.rsh = RemoteFactory().getInstance()
         self.logger = LogFactory()
         self.templates = PatternSelector(cm["Name"])
         self.Audits = []
         self.timeout = 120
         self.passed = 1
         self.is_loop = 0
         self.is_unsafe = 0
         self.is_docker_unsafe = 0
         self.is_experimental = 0
         self.is_container = 0
         self.is_valgrind = 0
         self.benchmark = 0  # which tests to benchmark
         self.timer = {}  # timers
 
     def log(self, args):
         self.logger.log(args)
 
     def debug(self, args):
         self.logger.debug(args)
 
     def has_key(self, key):
         return key in self.Stats
 
     def __setitem__(self, key, value):
         self.Stats[key] = value
 
     def __getitem__(self, key):
         if str(key) == "0":
             raise ValueError("Bad call to 'foo in X', should reference 'foo in X.Stats' instead")
 
         if key in self.Stats:
             return self.Stats[key]
         return None
 
     def log_mark(self, msg):
         self.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
         return
 
     def get_timer(self,key = "test"):
         try: return self.timer[key]
         except: return 0
 
     def set_timer(self,key = "test"):
         self.timer[key] = time.time()
         return self.timer[key]
 
     def log_timer(self,key = "test"):
         elapsed = 0
         if key in self.timer:
             elapsed = time.time() - self.timer[key]
             s = key == "test" and self.name or "%s:%s" % (self.name,key)
             self.debug("%s runtime: %.2f" % (s, elapsed))
             del self.timer[key]
         return elapsed
 
     def incr(self, name):
         '''Increment (or initialize) the value associated with the given name'''
         if not name in self.Stats:
             self.Stats[name] = 0
         self.Stats[name] = self.Stats[name]+1
 
         # Reset the test passed boolean
         if name == "calls":
             self.passed = 1
 
     def failure(self, reason="none"):
         '''Increment the failure count'''
         self.passed = 0
         self.incr("failure")
         self.logger.log(("Test %s" % self.name).ljust(35) + " FAILED: %s" % reason)
         return None
 
     def success(self):
         '''Increment the success count'''
         self.incr("success")
         return 1
 
     def skipped(self):
         '''Increment the skipped count'''
         self.incr("skipped")
         return 1
 
     def __call__(self, node):
         '''Perform the given test'''
         raise ValueError("Abstract Class member (__call__)")
         self.incr("calls")
         return self.failure()
 
     def audit(self):
         passed = 1
         if len(self.Audits) > 0:
             for audit in self.Audits:
                 if not audit():
                     self.logger.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
                     self.incr("auditfail")
                     passed = 0
         return passed
 
     def setup(self, node):
         '''Setup the given test'''
         return self.success()
 
     def teardown(self, node):
         '''Tear down the given test'''
         return self.success()
 
     def create_watch(self, patterns, timeout, name=None):
         if not name:
             name = self.name
         return LogWatcher(self.Env["LogFileName"], patterns, name, timeout, kind=self.Env["LogWatcher"], hosts=self.Env["nodes"])
 
     def local_badnews(self, prefix, watch, local_ignore=[]):
         errcount = 0
         if not prefix:
             prefix = "LocalBadNews:"
 
         ignorelist = []
         ignorelist.append(" CTS: ")
         ignorelist.append(prefix)
         ignorelist.extend(local_ignore)
 
         while errcount < 100:
             match = watch.look(0)
             if match:
                add_err = 1
                for ignore in ignorelist:
                    if add_err == 1 and re.search(ignore, match):
                        add_err = 0
                if add_err == 1:
                    self.logger.log(prefix + " " + match)
                    errcount = errcount + 1
             else:
               break
         else:
             self.logger.log("Too many errors!")
 
         watch.end()
         return errcount
 
     def is_applicable(self):
         return self.is_applicable_common()
 
     def is_applicable_common(self):
         '''Return TRUE if we are applicable in the current test configuration'''
         #raise ValueError("Abstract Class member (is_applicable)")
 
         if self.is_loop and not self.Env["loop-tests"]:
             return 0
         elif self.is_unsafe and not self.Env["unsafe-tests"]:
             return 0
         elif self.is_valgrind and not self.Env["valgrind-tests"]:
             return 0
         elif self.is_experimental and not self.Env["experimental-tests"]:
             return 0
         elif self.is_docker_unsafe and self.Env["docker"]:
             return 0
         elif self.is_container and not self.Env["container-tests"]:
             return 0
         elif self.Env["benchmark"] and self.benchmark == 0:
             return 0
 
         return 1
 
     def find_ocfs2_resources(self, node):
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "o2cb" and r.parent != "NA":
                     self.debug("Found o2cb: %s" % self.r_o2cb)
                     self.r_o2cb = r.parent
             if re.search("^Constraint", line):
                 c = AuditConstraint(self.CM, line)
                 if c.type == "rsc_colocation" and c.target == self.r_o2cb:
                     self.r_ocfs2.append(c.rsc)
 
         self.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
         return len(self.r_ocfs2)
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         return 1
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return []
 
 
 class StopTest(CTSTest):
     '''Stop (deactivate) the cluster manager on a node'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Stop"
 
     def __call__(self, node):
         '''Perform the 'stop' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] != "up":
             return self.skipped()
 
         patterns = []
         # Technically we should always be able to notice ourselves stopping
         patterns.append(self.templates["Pat:We_stopped"] % node)
 
         #if self.Env["use_logd"]:
         #    patterns.append(self.templates["Pat:Logd_stopped"] % node)
 
         # Any active node needs to notice this one left
         # NOTE: This wont work if we have multiple partitions
         for other in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[other] == "up" and other != node:
                 patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
                 #self.debug("Checking %s will notice %s left"%(other, node))
 
         watch = self.create_watch(patterns, self.Env["DeadTime"])
         watch.setwatch()
 
         if node == self.CM.OurNode:
             self.incr("us")
         else:
             if self.CM.upcount() <= 1:
                 self.incr("all")
             else:
                 self.incr("them")
 
         self.CM.StopaCM(node)
         watch_result = watch.lookforall()
 
         failreason = None
         UnmatchedList = "||"
         if watch.unmatched:
             (rc, output) = self.rsh(node, "/bin/ps axf", None)
             for line in output:
                 self.debug(line)
 
             (rc, output) = self.rsh(node, "/usr/sbin/dlm_tool dump", None)
             for line in output:
                 self.debug(line)
 
             for regex in watch.unmatched:
                 self.logger.log ("ERROR: Shutdown pattern not found: %s" % (regex))
                 UnmatchedList +=  regex + "||";
                 failreason = "Missing shutdown pattern"
 
         self.CM.cluster_stable(self.Env["DeadTime"])
 
         if not watch.unmatched or self.CM.upcount() == 0:
             return self.success()
 
         if len(watch.unmatched) >= self.CM.upcount():
             return self.failure("no match against (%s)" % UnmatchedList)
 
         if failreason == None:
             return self.success()
         else:
             return self.failure(failreason)
 #
 # We don't register StopTest because it's better when called by
 # another test...
 #
 
 
 class StartTest(CTSTest):
     '''Start (activate) the cluster manager on a node'''
     def __init__(self, cm, debug=None):
         CTSTest.__init__(self,cm)
         self.name = "start"
         self.debug = debug
 
     def __call__(self, node):
         '''Perform the 'start' test. '''
         self.incr("calls")
 
         if self.CM.upcount() == 0:
             self.incr("us")
         else:
             self.incr("them")
 
         if self.CM.ShouldBeStatus[node] != "down":
             return self.skipped()
         elif self.CM.StartaCM(node):
             return self.success()
         else:
             return self.failure("Startup %s on node %s failed"
                                 % (self.Env["Name"], node))
 
 #
 # We don't register StartTest because it's better when called by
 # another test...
 #
 
 
 class FlipTest(CTSTest):
     '''If it's running, stop it.  If it's stopped start it.
        Overthrow the status quo...
     '''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Flip"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, node):
         '''Perform the 'Flip' test. '''
         self.incr("calls")
         if self.CM.ShouldBeStatus[node] == "up":
             self.incr("stopped")
             ret = self.stop(node)
             type = "up->down"
             # Give the cluster time to recognize it's gone...
             time.sleep(self.Env["StableTime"])
         elif self.CM.ShouldBeStatus[node] == "down":
             self.incr("started")
             ret = self.start(node)
             type = "down->up"
         else:
             return self.skipped()
 
         self.incr(type)
         if ret:
             return self.success()
         else:
             return self.failure("%s failure" % type)
 
 #        Register FlipTest as a good test to run
 AllTestClasses.append(FlipTest)
 
 
 class RestartTest(CTSTest):
     '''Stop and restart a node'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Restart"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         '''Perform the 'restart' test. '''
         self.incr("calls")
 
         self.incr("node:" + node)
 
         ret1 = 1
         if self.CM.StataCM(node):
             self.incr("WasStopped")
             if not self.start(node):
                 return self.failure("start (setup) failure: "+node)
 
         self.set_timer()
         if not self.stop(node):
             return self.failure("stop failure: "+node)
         if not self.start(node):
             return self.failure("start failure: "+node)
         return self.success()
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RestartTest)
 
 
 class StonithdTest(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Stonithd"
         self.startall = SimulStartLite(cm)
         self.benchmark = 1
 
     def __call__(self, node):
         self.incr("calls")
         if len(self.Env["nodes"]) < 2:
             return self.skipped()
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         is_dc = self.CM.is_node_dc(node)
 
         watchpats = []
         watchpats.append(self.templates["Pat:FenceOpOK"] % node)
         watchpats.append(self.templates["Pat:NodeFenced"] % node)
 
         if self.Env["at-boot"] == 0:
             self.debug("Expecting %s to stay down" % node)
             self.CM.ShouldBeStatus[node] = "down"
         else:
             self.debug("Expecting %s to come up again %d" % (node, self.Env["at-boot"]))
             watchpats.append("%s.* S_STARTING -> S_PENDING" % node)
             watchpats.append("%s.* S_PENDING -> S_NOT_DC" % node)
 
         watch = self.create_watch(watchpats, 30 + self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
         watch.setwatch()
 
         origin = self.Env.RandomGen.choice(self.Env["nodes"])
 
         rc = self.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node)
 
         if rc == 194:
             # 194 - 256 = -62 = Timer expired
             #
             # Look for the patterns, usually this means the required
             # device was running on the node to be fenced - or that
             # the required devices were in the process of being loaded
             # and/or moved
             #
             # Effectively the node committed suicide so there will be
             # no confirmation, but pacemaker should be watching and
             # fence the node again
 
             self.logger.log("Fencing command on %s to fence %s timed out" % (origin, node))
 
         elif origin != node and rc != 0:
             self.debug("Waiting for the cluster to recover")
             self.CM.cluster_stable()
 
             self.debug("Waiting STONITHd node to come back up")
             self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
             self.logger.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc))
 
         elif origin == node and rc != 255:
             # 255 == broken pipe, ie. the node was fenced as expected
             self.logger.log("Locally originated fencing returned %d" % rc)
 
         self.set_timer("fence")
         matched = watch.lookforall()
         self.log_timer("fence")
         self.set_timer("reform")
         if watch.unmatched:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.debug("Waiting STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.Env["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected patterns")
         elif not is_stable:
             return self.failure("Cluster did not become stable")
 
         self.log_timer("reform")
         return self.success()
 
     def errorstoignore(self):
         return [
             self.templates["Pat:Fencing_start"] % ".*",
             self.templates["Pat:Fencing_ok"] % ".*",
             r"error.*: Resource .*stonith::.* is active on 2 nodes attempting recovery",
             r"error.*: Operation reboot of .*by .* for stonith_admin.*: Timer expired",
         ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
 
         if "DoFencing" in self.Env.keys():
             return self.Env["DoFencing"]
 
         return 1
 
 AllTestClasses.append(StonithdTest)
 
 
 class StartOnebyOne(CTSTest):
     '''Start all the nodes ~ one by one'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "StartOnebyOne"
         self.stopall = SimulStopLite(cm)
         self.start = StartTest(cm)
         self.ns = CTS.NodeStatus(cm.Env)
 
     def __call__(self, dummy):
         '''Perform the 'StartOnebyOne' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Test setup failed")
 
         failed = []
         self.set_timer()
         for node in self.Env["nodes"]:
             if not self.start(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to start: " + repr(failed))
 
         return self.success()
 
 #        Register StartOnebyOne as a good test to run
 AllTestClasses.append(StartOnebyOne)
 
 
 class SimulStart(CTSTest):
     '''Start all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStart"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStart' test. '''
         self.incr("calls")
 
         #        We ignore the "node" parameter...
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
         self.CM.clear_all_caches()
 
         if not self.startall(None):
             return self.failure("Startall failed")
 
         return self.success()
 
 #        Register SimulStart as a good test to run
 AllTestClasses.append(SimulStart)
 
 
 class SimulStop(CTSTest):
     '''Stop all the nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStop"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'SimulStop' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.stopall(None):
             return self.failure("Stopall failed")
 
         return self.success()
 
 #     Register SimulStop as a good test to run
 AllTestClasses.append(SimulStop)
 
 
 class StopOnebyOne(CTSTest):
     '''Stop all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "StopOnebyOne"
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
 
     def __call__(self, dummy):
         '''Perform the 'StopOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         failed = []
         self.set_timer()
         for node in self.Env["nodes"]:
             if not self.stop(node):
                 failed.append(node)
 
         if len(failed) > 0:
             return self.failure("Some node failed to stop: " + repr(failed))
 
         self.CM.clear_all_caches()
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(StopOnebyOne)
 
 
 class RestartOnebyOne(CTSTest):
     '''Restart all the nodes in order'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RestartOnebyOne"
         self.startall = SimulStartLite(cm)
 
     def __call__(self, dummy):
         '''Perform the 'RestartOnebyOne' test. '''
         self.incr("calls")
 
         #     We ignore the "node" parameter...
 
         #     Start up all the nodes...
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         did_fail = []
         self.set_timer()
         self.restart = RestartTest(self.CM)
         for node in self.Env["nodes"]:
             if not self.restart(node):
                 did_fail.append(node)
 
         if did_fail:
             return self.failure("Could not restart %d nodes: %s"
                                 % (len(did_fail), repr(did_fail)))
         return self.success()
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(RestartOnebyOne)
 
 
 class PartialStart(CTSTest):
     '''Start a node - but tell it to stop before it finishes starting up'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "PartialStart"
         self.startall = SimulStartLite(cm)
         self.stopall = SimulStopLite(cm)
         self.stop = StopTest(cm)
         #self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'PartialStart' test. '''
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Setup failed")
 
 #   FIXME!  This should use the CM class to get the pattern
 #       then it would be applicable in general
         watchpats = []
         watchpats.append("crmd.*Connecting to cluster infrastructure")
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
         watch.setwatch()
 
         self.CM.StartaCMnoBlock(node)
         ret = watch.lookforall()
         if not ret:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
             return self.failure("Setup of %s failed" % node)
 
         ret = self.stop(node)
         if not ret:
             return self.failure("%s did not stop in time" % node)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
 
         # We might do some fencing in the 2-node case if we make it up far enough
         return [
             """Executing reboot fencing operation""",
         ]
 
 #     Register StopOnebyOne as a good test to run
 AllTestClasses.append(PartialStart)
 
 
 class StandbyTest(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Standby"
         self.benchmark = 1
 
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
 
     # make sure the node is active
     # set the node to standby mode
     # check resources, none resource should be running on the node
     # set the node to active mode
     # check resouces, resources should have been migrated back (SHOULD THEY?)
 
     def __call__(self, node):
 
         self.incr("calls")
         ret = self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         self.debug("Make sure node %s is active" % node)
         if self.CM.StandbyStatus(node) != "off":
             if not self.CM.SetStandbyMode(node, "off"):
                 return self.failure("can't set node %s to active mode" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
 
         self.debug("Getting resources running on node %s" % node)
         rsc_on_node = self.CM.active_resources(node)
 
         watchpats = []
         watchpats.append(r"State transition .* -> S_POLICY_ENGINE")
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
         watch.setwatch()
 
         self.debug("Setting node %s to standby mode" % node)
         if not self.CM.SetStandbyMode(node, "on"):
             return self.failure("can't set node %s to standby mode" % node)
 
         self.set_timer("on")
 
         ret = watch.lookforall()
         if not ret:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
             self.CM.SetStandbyMode(node, "off")
             return self.failure("cluster didn't react to standby change on %s" % node)
 
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "on":
             return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
         self.log_timer("on")
 
         self.debug("Checking resources")
         bad_run = self.CM.active_resources(node)
         if len(bad_run) > 0:
             rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
             self.debug("Setting node %s to active mode" % node)
             self.CM.SetStandbyMode(node, "off")
             return rc
 
         self.debug("Setting node %s to active mode" % node)
         if not self.CM.SetStandbyMode(node, "off"):
             return self.failure("can't set node %s to active mode" % node)
 
         self.set_timer("off")
         self.CM.cluster_stable()
 
         status = self.CM.StandbyStatus(node)
         if status != "off":
             return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
         self.log_timer("off")
 
         return self.success()
 
 AllTestClasses.append(StandbyTest)
 
 
 class ValgrindTest(CTSTest):
     '''Check for memory leaks'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Valgrind"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_valgrind = 1
         self.is_loop = 1
 
     def setup(self, node):
         self.incr("calls")
 
         ret = self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         # Enable valgrind
         self.logger.logPat = "/tmp/%s-*.valgrind" % self.name
 
         self.Env["valgrind-prefix"] = self.name
 
         self.rsh(node, "rm -f %s" % self.logger.logPat, None)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Start all nodes failed")
 
         for node in self.Env["nodes"]:
             (rc, output) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             for line in output:
                 self.debug(line)
 
         return self.success()
 
     def teardown(self, node):
         # Disable valgrind
         self.Env["valgrind-prefix"] = None
 
         # Return all nodes to normal
         ret = self.stopall(None)
         if not ret:
             return self.failure("Stop all nodes failed")
 
         return self.success()
 
     def find_leaks(self):
         # Check for leaks
         leaked = []
         self.stop = StopTest(self.CM)
 
         for node in self.Env["nodes"]:
             (rc, ps_out) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None)
             rc = self.stop(node)
             if not rc:
                 self.failure("Couldn't shut down %s" % node)
 
             rc = self.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logger.logPat, 0)
             if rc != 1:
                 leaked.append(node)
                 self.failure("Valgrind errors detected on %s" % node)
                 for line in ps_out:
                     self.logger.log(line)
                 (rc, output) = self.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logger.logPat, None)
                 for line in output:
                     self.logger.log(line)
                 (rc, output) = self.rsh(node, "cat %s" % self.logger.logPat, None)
                 for line in output:
                     self.debug(line)
 
         self.rsh(node, "rm -f %s" % self.logger.logPat, None)
         return leaked
 
     def __call__(self, node):
         leaked = self.find_leaks()
         if len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"cib.*: \*\*\*\*\*\*\*\*\*\*\*\*\*",
             r"cib.*: .* avoid confusing Valgrind",
             r"HA_VALGRIND_ENABLED",
         ]
 
 
 class StandbyLoopTest(ValgrindTest):
     '''Check for memory leaks by putting a node in and out of standby for an hour'''
     def __init__(self, cm):
         ValgrindTest.__init__(self,cm)
         self.name = "StandbyLoop"
 
     def __call__(self, node):
 
         lpc = 0
         delay = 2
         failed = 0
         done = time.time() + self.Env["loop-minutes"] * 60
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "on"):
                 self.failure("can't set node %s to standby mode" % node)
                 failed = lpc
 
             time.sleep(delay)
             if not self.CM.SetStandbyMode(node, "off"):
                 self.failure("can't set node %s to active mode" % node)
                 failed = lpc
 
         leaked = self.find_leaks()
         if failed:
             return self.failure("Iteration %d failed" % failed)
         elif len(leaked) > 0:
             return self.failure("Nodes %s leaked" % repr(leaked))
 
         return self.success()
 
 AllTestClasses.append(StandbyLoopTest)
 
 
 class BandwidthTest(CTSTest):
 #        Tests should not be cluster-manager-specific
 #        If you need to find out cluster manager configuration to do this, then
 #        it should be added to the generic cluster manager API.
     '''Test the bandwidth which heartbeat uses'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "Bandwidth"
         self.start = StartTest(cm)
         self.__setitem__("min",0)
         self.__setitem__("max",0)
         self.__setitem__("totalbandwidth",0)
         (handle, self.tempfile) = tempfile.mkstemp(".cts")
         os.close(handle)
         self.startall = SimulStartLite(cm)
 
     def __call__(self, node):
         '''Perform the Bandwidth test'''
         self.incr("calls")
 
         if self.CM.upcount() < 1:
             return self.skipped()
 
         Path = self.CM.InternalCommConfig()
         if "ip" not in Path["mediatype"]:
              return self.skipped()
 
         port = Path["port"][0]
         port = int(port)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Test setup failed")
         time.sleep(5)  # We get extra messages right after startup.
 
         fstmpfile = "/var/run/band_estimate"
         dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
         %                (port, fstmpfile)
 
         rc = self.rsh(node, dumpcmd)
         if rc == 0:
             farfile = "root@%s:%s" % (node, fstmpfile)
             self.rsh.cp(farfile, self.tempfile)
             Bandwidth = self.countbandwidth(self.tempfile)
             if not Bandwidth:
                 self.logger.log("Could not compute bandwidth.")
                 return self.success()
             intband = int(Bandwidth + 0.5)
             self.logger.log("...bandwidth: %d bits/sec" % intband)
             self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
             if self.Stats["min"] == 0:
                 self.Stats["min"] = Bandwidth
             if Bandwidth > self.Stats["max"]:
                 self.Stats["max"] = Bandwidth
             if Bandwidth < self.Stats["min"]:
                 self.Stats["min"] = Bandwidth
             self.rsh(node, "rm -f %s" % fstmpfile)
             os.unlink(self.tempfile)
             return self.success()
         else:
             return self.failure("no response from tcpdump command [%d]!" % rc)
 
     def countbandwidth(self, file):
         fp = open(file, "r")
         fp.seek(0)
         count = 0
         sum = 0
         while 1:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count + 1
                 linesplit = string.split(line," ")
                 for j in range(len(linesplit)-1):
                     if linesplit[j] == "udp": break
                     if linesplit[j] == "length:": break
 
                 try:
                     sum = sum + int(linesplit[j+1])
                 except ValueError:
                     self.logger.log("Invalid tcpdump line: %s" % line)
                     return None
                 T1 = linesplit[0]
                 timesplit = string.split(T1,":")
                 time2split = string.split(timesplit[2],".")
                 time1 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001
                 break
 
         while count < 100:
             line = fp.readline()
             if not line:
                 return None
             if re.search("udp",line) or re.search("UDP,", line):
                 count = count+1
                 linessplit = string.split(line," ")
                 for j in range(len(linessplit)-1):
                     if linessplit[j] == "udp": break
                     if linesplit[j] == "length:": break
                 try:
                     sum = int(linessplit[j+1]) + sum
                 except ValueError:
                     self.logger.log("Invalid tcpdump line: %s" % line)
                     return None
 
         T2 = linessplit[0]
         timesplit = string.split(T2,":")
         time2split = string.split(timesplit[2],".")
         time2 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001
         time = time2-time1
         if (time <= 0):
             return 0
         return (sum*8)/time
 
     def is_applicable(self):
         '''BandwidthTest never applicable'''
         return 0
 
 AllTestClasses.append(BandwidthTest)
 
 
 ###################################################################
 class MaintenanceMode(CTSTest):
 ###################################################################
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "MaintenanceMode"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max = 30
         #self.is_unsafe = 1
         self.benchmark = 1
         self.action = "asyncmon"
         self.interval = 0
         self.rid = "maintenanceDummy"
 
     def toggleMaintenanceMode(self, node, action):
         pats = []
         pats.append(self.templates["Pat:DC_IDLE"])
 
         # fail the resource right after turning Maintenance mode on
         # verify it is not recovered until maintenance mode is turned off
         if action == "On":
             pats.append(r"pengine.*:\s+warning:.*Processing failed op %s for %s on" % (self.action, self.rid))
         else:
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.debug("Turning maintenance mode %s" % action)
         self.rsh(node, self.templates["MaintenanceMode%s" % (action)])
         if (action == "On"):
             self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover%s" % (action))
         watch.lookforall()
         self.log_timer("recover%s" % (action))
         if watch.unmatched:
             self.debug("Failed to find patterns when turning maintenance mode %s" % action)
             return repr(watch.unmatched)
 
         return ""
 
     def insertMaintenanceDummy(self, node):
         pats = []
         pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % (self.rid, "start_0")))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.CM.AddDummyRsc(node, self.rid)
 
         self.set_timer("addDummy")
         watch.lookforall()
         self.log_timer("addDummy")
 
         if watch.unmatched:
             self.debug("Failed to find patterns when adding maintenance dummy resource")
             return repr(watch.unmatched)
         return ""
 
     def removeMaintenanceDummy(self, node):
         pats = []
         pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
         self.CM.RemoveDummyRsc(node, self.rid)
 
         self.set_timer("removeDummy")
         watch.lookforall()
         self.log_timer("removeDummy")
 
         if watch.unmatched:
             self.debug("Failed to find patterns when removing maintenance dummy resource")
             return repr(watch.unmatched)
         return ""
 
     def managedRscList(self, node):
         rscList = []
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.managed():
                     rscList.append(tmp.id)
 
         return rscList
 
     def verifyResources(self, node, rscList, managed):
         managedList = list(rscList)
         managed_str = "managed"
         if not managed:
             managed_str = "unmanaged"
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if managed and not tmp.managed():
                     continue
                 elif not managed and tmp.managed():
                     continue
                 elif managedList.count(tmp.id):
                     managedList.remove(tmp.id)
 
         if len(managedList) == 0:
             self.debug("Found all %s resources on %s" % (managed_str, node))
             return True
 
         self.logger.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList))
         return False
 
     def __call__(self, node):
         '''Perform the 'MaintenanceMode' test. '''
         self.incr("calls")
         verify_managed = False
         verify_unmanaged = False
         failPat = ""
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         # get a list of all the managed resources. We use this list
         # after enabling maintenance mode to verify all managed resources
         # become un-managed.  After maintenance mode is turned off, we use
         # this list to verify all the resources become managed again.
         managedResources = self.managedRscList(node)
         if len(managedResources) == 0:
             self.logger.log("No managed resources on %s" % node)
             return self.skipped()
 
         # insert a fake resource we can fail during maintenance mode
         # so we can verify recovery does not take place until after maintenance
         # mode is disabled.
         failPat = failPat + self.insertMaintenanceDummy(node)
 
         # toggle maintenance mode ON, then fail dummy resource.
         failPat = failPat + self.toggleMaintenanceMode(node, "On")
 
         # verify all the resources are now unmanaged
         if self.verifyResources(node, managedResources, False):
             verify_unmanaged = True
 
         # Toggle maintenance mode  OFF, verify dummy is recovered.
         failPat = failPat + self.toggleMaintenanceMode(node, "Off")
 
         # verify all the resources are now managed again
         if self.verifyResources(node, managedResources, True):
             verify_managed = True
 
         # Remove our maintenance dummy resource.
         failPat = failPat + self.removeMaintenanceDummy(node)
 
         self.CM.cluster_stable()
 
         if failPat != "":
             return self.failure("Unmatched patterns: %s" % (failPat))
         elif verify_unmanaged is False:
             return self.failure("Failed to verify resources became unmanaged during maintenance mode")
         elif verify_managed is False:
             return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for %s" % self.rid,
             r"pengine.*: Recover %s\s*\(.*\)" % self.rid,
             r"Unknown operation: fail",
             r"(ERROR|error): sending stonithRA op to stonithd failed.",
             self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))),
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
         ]
 
 AllTestClasses.append(MaintenanceMode)
 
 
 class ResourceRecover(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ResourceRecover"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.max = 30
         self.rid = None
         self.rid_alt = None
         #self.is_unsafe = 1
         self.benchmark = 1
 
         # these are the values used for the new LRM API call
         self.action = "asyncmon"
         self.interval = 0
 
     def __call__(self, node):
         '''Perform the 'ResourceRecover' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         resourcelist = self.CM.active_resources(node)
         # if there are no resourcelist, return directly
         if len(resourcelist) == 0:
             self.logger.log("No active resources on %s" % node)
             return self.skipped()
 
         self.rid = self.Env.RandomGen.choice(resourcelist)
         self.rid_alt = self.rid
 
         rsc = None
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 tmp = AuditResource(self.CM, line)
                 if tmp.id == self.rid:
                     rsc = tmp
                     # Handle anonymous clones that get renamed
                     self.rid = rsc.clone_id
                     break
 
         if not rsc:
             return self.failure("Could not find %s in the resource list" % self.rid)
 
         self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))
 
         pats = []
         pats.append(r"pengine.*:\s+warning:.*Processing failed op %s for (%s|%s) on" % (self.action,
             rsc.id, rsc.clone_id))
 
         if rsc.managed():
             pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0"))
             if rsc.unique():
                 pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0"))
             else:
                 # Anonymous clones may get restarted with a different clone number
                 pats.append(self.templates["Pat:RscOpOK"] % (".*", "start_0"))
 
         watch = self.create_watch(pats, 60)
         watch.setwatch()
 
         self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
 
         self.set_timer("recover")
         watch.lookforall()
         self.log_timer("recover")
 
         self.CM.cluster_stable()
         recovered = self.CM.ResourceLocation(self.rid)
 
         if watch.unmatched:
             return self.failure("Patterns not found: %s" % repr(watch.unmatched))
 
         elif rsc.unique() and len(recovered) > 1:
             return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))
 
         elif len(recovered) > 0:
             self.debug("%s is running on: %s" % (self.rid, repr(recovered)))
 
         elif rsc.managed():
             return self.failure("%s was not recovered and is inactive" % self.rid)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for %s" % self.rid,
             r"pengine.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt),
             r"Unknown operation: fail",
             r"(ERROR|error): sending stonithRA op to stonithd failed.",
             self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))),
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
         ]
 
 AllTestClasses.append(ResourceRecover)
 
 
 class ComponentFail(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ComponentFail"
         # TODO make this work correctly in docker.
         self.is_docker_unsafe = 1
         self.startall = SimulStartLite(cm)
         self.complist = cm.Components()
         self.patterns = []
         self.okerrpatterns = []
         self.is_unsafe = 1
 
     def __call__(self, node):
         '''Perform the 'ComponentFail' test. '''
         self.incr("calls")
         self.patterns = []
         self.okerrpatterns = []
 
         # start all nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         if not self.CM.cluster_stable(self.Env["StableTime"]):
             return self.failure("Setup failed - unstable")
 
         node_is_dc = self.CM.is_node_dc(node, None)
 
         # select a component to kill
         chosen = self.Env.RandomGen.choice(self.complist)
         while chosen.dc_only == 1 and node_is_dc == 0:
             chosen = self.Env.RandomGen.choice(self.complist)
 
         self.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
         self.incr(chosen.name)
 
         if chosen.name != "aisexec" and chosen.name != "corosync":
             if self.Env["Name"] != "crm-lha" or chosen.name != "pengine":
                 self.patterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
                 self.patterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
 
         self.patterns.extend(chosen.pats)
         if node_is_dc:
           self.patterns.extend(chosen.dc_pats)
 
         # In an ideal world, this next stuff should be in the "chosen" object as a member function
         if self.Env["Name"] == "crm-lha" and chosen.triggersreboot:
             # Make sure the node goes down and then comes back up if it should reboot...
             for other in self.Env["nodes"]:
                 if other != node:
                     self.patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
             self.patterns.append(self.templates["Pat:Slave_started"] % node)
             self.patterns.append(self.templates["Pat:Local_started"] % node)
 
             if chosen.dc_only:
                 # Sometimes these will be in the log, and sometimes they won't...
                 self.okerrpatterns.append("%s .*Process %s:.* exited" % (node, chosen.name))
                 self.okerrpatterns.append("%s .*I_ERROR.*crmdManagedChildDied" % node)
                 self.okerrpatterns.append("%s .*The %s subsystem terminated unexpectedly" % (node, chosen.name))
                 self.okerrpatterns.append("(ERROR|error): Client .* exited with return code")
             else:
                 # Sometimes this won't be in the log...
                 self.okerrpatterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
                 self.okerrpatterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
                 self.okerrpatterns.append(self.templates["Pat:ChildExit"])
 
         if chosen.name == "stonith":
             # Ignore actions for STONITH resources
             (rc, lines) = self.rsh(node, "crm_resource -c", None)
             for line in lines:
                 if re.search("^Resource", line):
                     r = AuditResource(self.CM, line)
                     if r.rclass == "stonith":
                         self.okerrpatterns.append(self.templates["Pat:Fencing_recover"] % r.id)
 
         # supply a copy so self.patterns doesn't end up empty
         tmpPats = []
         tmpPats.extend(self.patterns)
         self.patterns.extend(chosen.badnews_ignore)
 
         # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
         stonithPats = []
         stonithPats.append(self.templates["Pat:Fencing_ok"] % node)
         stonith = self.create_watch(stonithPats, 0)
         stonith.setwatch()
 
         # set the watch for stable
         watch = self.create_watch(
             tmpPats, self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
         watch.setwatch()
 
         # kill the component
         chosen.kill(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         self.debug("Waiting for any STONITHd node to come back up")
         self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         self.CM.cluster_stable(self.Env["StartTime"])
 
         self.debug("Checking if %s was shot" % node)
         shot = stonith.look(60)
         if shot:
             self.debug("Found: " + repr(shot))
             self.okerrpatterns.append(self.templates["Pat:Fencing_start"] % node)
 
             if self.Env["at-boot"] == 0:
                 self.CM.ShouldBeStatus[node] = "down"
 
             # If fencing occurred, chances are many (if not all) the expected logs
             # will not be sent - or will be lost when the node reboots
             return self.success()
 
         # check for logs indicating a graceful recovery
         matched = watch.lookforall(allow_multiple_matches=1)
         if watch.unmatched:
             self.logger.log("Patterns not found: " + repr(watch.unmatched))
 
         self.debug("Waiting for the cluster to re-stabilize with all nodes")
         is_stable = self.CM.cluster_stable(self.Env["StartTime"])
 
         if not matched:
             return self.failure("Didn't find all expected %s patterns" % chosen.name)
         elif not is_stable:
             return self.failure("Cluster did not become stable after killing %s" % chosen.name)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
     # Note that okerrpatterns refers to the last time we ran this test
     # The good news is that this works fine for us...
         self.okerrpatterns.extend(self.patterns)
         return self.okerrpatterns
 
 AllTestClasses.append(ComponentFail)
 
 
 class SplitBrainTest(CTSTest):
     '''It is used to test split-brain. when the path between the two nodes break
        check the two nodes both take over the resource'''
     def __init__(self,cm):
         CTSTest.__init__(self,cm)
         self.name = "SplitBrain"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.is_experimental = 1
 
     def isolate_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]) + " from " +repr(partition))
 
         if len(other_nodes) == 0:
             return 1
 
         self.debug("Creating partition: " + repr(partition))
         self.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             if not self.CM.isolate_node(node, other_nodes):
                 self.logger.log("Could not isolate %s" % node)
                 return 0
 
         return 1
 
     def heal_partition(self, partition):
         other_nodes = []
         other_nodes.extend(self.Env["nodes"])
 
         for node in partition:
             try:
                 other_nodes.remove(node)
             except ValueError:
                 self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]))
 
         if len(other_nodes) == 0:
             return 1
 
         self.debug("Healing partition: " + repr(partition))
         self.debug("Everyone else: " + repr(other_nodes))
 
         for node in partition:
             self.CM.unisolate_node(node, other_nodes)
 
     def __call__(self, node):
         '''Perform split-brain test'''
         self.incr("calls")
         self.passed = 1
         partitions = {}
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed")
 
         while 1:
             # Retry until we get multiple partitions
             partitions = {}
             p_max = len(self.Env["nodes"])
             for node in self.Env["nodes"]:
                 p = self.Env.RandomGen.randint(1, p_max)
                 if not p in partitions:
                     partitions[p] = []
                 partitions[p].append(node)
             p_max = len(partitions.keys())
             if p_max > 1:
                 break
             # else, try again
 
         self.debug("Created %d partitions" % p_max)
         for key in list(partitions.keys()):
             self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))
 
         # Disabling STONITH to reduce test complexity for now
         self.rsh(node, "crm_attribute -V -n stonith-enabled -v false")
 
         for key in list(partitions.keys()):
             self.isolate_partition(partitions[key])
 
         count = 30
         while count > 0:
             if len(self.CM.find_partitions()) != p_max:
                 time.sleep(10)
             else:
                 break
         else:
             self.failure("Expected partitions were not created")
 
         # Target number of partitions formed - wait for stability
         if not self.CM.cluster_stable():
             self.failure("Partitioned cluster not stable")
 
         # Now audit the cluster state
         self.CM.partitions_expected = p_max
         if not self.audit():
             self.failure("Audits failed")
         self.CM.partitions_expected = 1
 
         # And heal them again
         for key in list(partitions.keys()):
             self.heal_partition(partitions[key])
 
         # Wait for a single partition to form
         count = 30
         while count > 0:
             if len(self.CM.find_partitions()) != 1:
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not reform")
 
         # Wait for it to have the right number of members
         count = 30
         while count > 0:
             members = []
 
             partitions = self.CM.find_partitions()
             if len(partitions) > 0:
                 members = partitions[0].split()
 
             if len(members) != len(self.Env["nodes"]):
                 time.sleep(10)
                 count -= 1
             else:
                 break
         else:
             self.failure("Cluster did not completely reform")
 
         # Wait up to 20 minutes - the delay is more preferable than
         # trying to continue with in a messed up state
         if not self.CM.cluster_stable(1200):
             self.failure("Reformed cluster not stable")
             if self.Env["continue"] == 1:
                 answer = "Y"
             else:
                 try:
                     answer = raw_input('Continue? [nY]')
                 except EOFError, e:
                     answer = "n" 
             if answer and answer == "n":
                 raise ValueError("Reformed cluster not stable")
 
         # Turn fencing back on
         if self.Env["DoFencing"]:
             self.rsh(node, "crm_attribute -V -D -n stonith-enabled")
 
         self.CM.cluster_stable()
 
         if self.passed:
             return self.success()
         return self.failure("See previous errors")
 
     def errorstoignore(self):
         '''Return list of errors which are 'normal' and should be ignored'''
         return [
             r"Another DC detected:",
             r"(ERROR|error).*: .*Application of an update diff failed",
             r"crmd.*:.*not in our membership list",
             r"CRIT:.*node.*returning after partition",
         ]
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         return len(self.Env["nodes"]) > 2
 
 AllTestClasses.append(SplitBrainTest)
 
 
 class Reattach(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "Reattach"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
         self.is_unsafe = 0 # Handled by canrunnow()
 
     def _is_managed(self, node):
         is_managed = self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -Q -G -d true", 1)
         is_managed = is_managed[:-1] # Strip off the newline
         return is_managed == "true"
 
     def _set_unmanaged(self, node):
         self.debug("Disable resource management")
         self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -v false")
 
     def _set_managed(self, node):
         self.debug("Re-enable resource management")
         self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -D")
 
     def setup(self, node):
         attempt = 0
         if not self.startall(None):
             return None
 
         # Make sure we are really _really_ stable and that all
         # resources, including those that depend on transient node
         # attributes, are started
         while not self.CM.cluster_stable(double_check=True):
             if attempt < 5:
                 attempt += 1
                 self.debug("Not stable yet, re-testing")
             else:
                 self.logger.log("Cluster is not stable")
                 return None
 
         return 1
 
     def teardown(self, node):
 
         # Make sure 'node' is up
         start = StartTest(self.CM)
         start(node)
 
         if not self._is_managed(node):
             self.logger.log("Attempting to re-enable resource management on %s" % node)
             self._set_managed(node)
             self.CM.cluster_stable()
             if not self._is_managed(node):
                 self.logger.log("Could not re-enable resource management")
                 return 0
 
         return 1
 
     def canrunnow(self, node):
         '''Return TRUE if we can meaningfully run right now'''
         if self.find_ocfs2_resources(node):
             self.logger.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
             return 0
         return 1
 
     def __call__(self, node):
         self.incr("calls")
 
         pats = []
         # Conveniently, pengine will display this message when disabling management,
         # even if fencing is not enabled, so we can rely on it.
         managed = self.create_watch(["Delaying fencing operations"], 60)
         managed.setwatch()
 
         self._set_unmanaged(node)
 
         if not managed.lookforall():
             self.logger.log("Patterns not found: " + repr(managed.unmatched))
             return self.failure("Resource management not disabled")
 
         pats = []
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "start"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "stop"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "promote"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "demote"))
         pats.append(self.templates["Pat:RscOpOK"] % (".*", "migrate"))
 
         watch = self.create_watch(pats, 60, "ShutdownActivity")
         watch.setwatch()
 
         self.debug("Shutting down the cluster")
         ret = self.stopall(None)
         if not ret:
             self._set_managed(node)
             return self.failure("Couldn't shut down the cluster")
 
         self.debug("Bringing the cluster back up")
         ret = self.startall(None)
         time.sleep(5) # allow ping to update the CIB
         if not ret:
             self._set_managed(node)
             return self.failure("Couldn't restart the cluster")
 
         if self.local_badnews("ResourceActivity:", watch):
             self._set_managed(node)
             return self.failure("Resources stopped or started during cluster restart")
 
         watch = self.create_watch(pats, 60, "StartupActivity")
         watch.setwatch()
 
         # Re-enable resource management (and verify it happened).
         self._set_managed(node)
         self.CM.cluster_stable()
         if not self._is_managed(node):
             return self.failure("Could not re-enable resource management")
 
         # Ignore actions for STONITH resources
         ignore = []
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rclass == "stonith":
 
                     self.debug("Ignoring start actions for %s" % r.id)
                     ignore.append(self.templates["Pat:RscOpOK"] % (r.id, "start_0"))
 
         if self.local_badnews("ResourceActivity:", watch, ignore):
             return self.failure("Resources stopped or started after resource management was re-enabled")
 
         return ret
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"resources were active at shutdown",
         ]
 
     def is_applicable(self):
         if self.Env["Name"] == "crm-lha":
             return None
         return 1
 
 AllTestClasses.append(Reattach)
 
 
 class SpecialTest1(CTSTest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SpecialTest1"
         self.startall = SimulStartLite(cm)
         self.restart1 = RestartTest(cm)
         self.stopall = SimulStopLite(cm)
 
     def __call__(self, node):
         '''Perform the 'SpecialTest1' test for Andrew. '''
         self.incr("calls")
 
         #        Shut down all the nodes...
         ret = self.stopall(None)
         if not ret:
             return self.failure("Could not stop all nodes")
 
         # Test config recovery when the other nodes come up
         self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")
 
         #        Start the selected node
         ret = self.restart1(node)
         if not ret:
             return self.failure("Could not start "+node)
 
         #        Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Could not start the remaining nodes")
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         # Errors that occur as a result of the CIB being wiped
         return [
             r"error.*: v1 patchset error, patch failed to apply: Application of an update diff failed",
             r"error.*: Resource start-up disabled since no STONITH resources have been defined",
             r"error.*: Either configure some or disable STONITH with the stonith-enabled option",
             r"error.*: NOTE: Clusters with shared data need STONITH to ensure data integrity",
         ]
 
 AllTestClasses.append(SpecialTest1)
 
 
 class HAETest(CTSTest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "HAETest"
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
         self.is_loop = 1
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
         return self.success()
 
     def wait_on_state(self, node, resource, expected_clones, attempts=240):
         while attempts > 0:
             active = 0
             (rc, lines) = self.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)
 
             # Hack until crm_resource does the right thing
             if rc == 0 and lines:
                 active = len(lines)
 
             if len(lines) == expected_clones:
                 return 1
 
             elif rc == 1:
                 self.debug("Resource %s is still inactive" % resource)
 
             elif rc == 234:
                 self.logger.log("Unknown resource %s" % resource)
                 return 0
 
             elif rc == 246:
                 self.logger.log("Cluster is inactive")
                 return 0
 
             elif rc != 0:
                 self.logger.log("Call to crm_resource failed, rc=%d" % rc)
                 return 0
 
             else:
                 self.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))
 
             attempts -= 1
             time.sleep(1)
 
         return 0
 
     def find_dlm(self, node):
         self.r_dlm = None
 
         (rc, lines) = self.rsh(node, "crm_resource -c", None)
         for line in lines:
             if re.search("^Resource", line):
                 r = AuditResource(self.CM, line)
                 if r.rtype == "controld" and r.parent != "NA":
                     self.debug("Found dlm: %s" % self.r_dlm)
                     self.r_dlm = r.parent
                     return 1
         return 0
 
     def find_hae_resources(self, node):
         self.r_dlm = None
         self.r_o2cb = None
         self.r_ocfs2 = []
 
         if self.find_dlm(node):
             self.find_ocfs2_resources(node)
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return 0
         if self.Env["Schema"] == "hae":
             return 1
         return None
 
 
 class HAERoleTest(HAETest):
     def __init__(self, cm):
         '''Lars' mount/unmount test for the HA extension. '''
         HAETest.__init__(self,cm)
         self.name = "HAERoleTest"
 
     def change_state(self, node, resource, target):
         rc = self.rsh(node, "crm_resource -V -r %s -p target-role -v %s  --meta" % (resource, target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
         lpc = 0
         failed = 0
         delay = 2
         done = time.time() + self.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "Stopped")
             if not self.wait_on_state(node, self.r_dlm, 0):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "Started")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
 
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAERoleTest)
 
 
 class HAEStandbyTest(HAETest):
     '''Set up a custom test to cause quorum failure issues for Andrew'''
     def __init__(self, cm):
         HAETest.__init__(self,cm)
         self.name = "HAEStandbyTest"
 
     def change_state(self, node, resource, target):
         rc = self.rsh(node, "crm_standby -V -l reboot -v %s" % (target))
         return rc
 
     def __call__(self, node):
         self.incr("calls")
 
         lpc = 0
         failed = 0
         done = time.time() + self.Env["loop-minutes"]*60
         self.find_hae_resources(node)
 
         clone_max = len(self.Env["nodes"])
         while time.time() <= done and not failed:
             lpc = lpc + 1
 
             self.change_state(node, self.r_dlm, "true")
             if not self.wait_on_state(node, self.r_dlm, clone_max-1):
                 self.failure("%s did not go down correctly" % self.r_dlm)
                 failed = lpc
 
             self.change_state(node, self.r_dlm, "false")
             if not self.wait_on_state(node, self.r_dlm, clone_max):
                 self.failure("%s did not come up correctly" % self.r_dlm)
                 failed = lpc
 
             if not self.wait_on_state(node, self.r_o2cb, clone_max):
                 self.failure("%s did not come up correctly" % self.r_o2cb)
                 failed = lpc
 
             for fs in self.r_ocfs2:
                 if not self.wait_on_state(node, fs, clone_max):
                     self.failure("%s did not come up correctly" % fs)
                     failed = lpc
 
         if failed:
             return self.failure("iteration %d failed" % failed)
         return self.success()
 
 AllTestClasses.append(HAEStandbyTest)
 
 
 class NearQuorumPointTest(CTSTest):
     '''
     This test brings larger clusters near the quorum point (50%).
     In addition, it will test doing starts and stops at the same time.
 
     Here is how I think it should work:
     - loop over the nodes and decide randomly which will be up and which
       will be down  Use a 50% probability for each of up/down.
     - figure out what to do to get into that state from the current state
     - in parallel, bring up those going up  and bring those going down.
     '''
 
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "NearQuorumPoint"
 
     def __call__(self, dummy):
         '''Perform the 'NearQuorumPoint' test. '''
         self.incr("calls")
         startset = []
         stopset = []
 
         stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint")
         #decide what to do with each node
         for node in self.Env["nodes"]:
             action = self.Env.RandomGen.choice(["start","stop"])
             #action = self.Env.RandomGen.choice(["start","stop","no change"])
             if action == "start" :
                 startset.append(node)
             elif action == "stop" :
                 stopset.append(node)
 
         self.debug("start nodes:" + repr(startset))
         self.debug("stop nodes:" + repr(stopset))
 
         #add search patterns
         watchpats = [ ]
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 watchpats.append(self.templates["Pat:We_stopped"] % node)
 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 #watchpats.append(self.templates["Pat:Slave_started"] % node)
                 watchpats.append(self.templates["Pat:Local_started"] % node)
             else:
                 for stopping in stopset:
                     if self.CM.ShouldBeStatus[stopping] == "up":
                         watchpats.append(self.templates["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))
 
         if len(watchpats) == 0:
             return self.skipped()
 
         if len(startset) != 0:
             watchpats.append(self.templates["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
 
         watch.setwatch()
 
         #begin actions
         for node in stopset:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
 
         for node in startset:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.CM.StartaCMnoBlock(node)
 
         #get the result
         if watch.lookforall():
             self.CM.cluster_stable()
             self.CM.fencing_cleanup("NearQuorumPoint", stonith)
             return self.success()
 
         self.logger.log("Warn: Patterns not found: " + repr(watch.unmatched))
 
         #get the "bad" nodes
         upnodes = []
         for node in stopset:
             if self.CM.StataCM(node) == 1:
                 upnodes.append(node)
 
         downnodes = []
         for node in startset:
             if self.CM.StataCM(node) == 0:
                 downnodes.append(node)
 
         self.CM.fencing_cleanup("NearQuorumPoint", stonith)
         if upnodes == [] and downnodes == []:
             self.CM.cluster_stable()
 
             # Make sure they're completely down with no residule
             for node in stopset:
                 self.rsh(node, self.templates["StopCmd"])
 
             return self.success()
 
         if len(upnodes) > 0:
             self.logger.log("Warn: Unstoppable nodes: " + repr(upnodes))
 
         if len(downnodes) > 0:
             self.logger.log("Warn: Unstartable nodes: " + repr(downnodes))
 
         return self.failure()
 
     def is_applicable(self):
         if self.Env["Name"] == "crm-cman":
             return None
         return 1
 
 AllTestClasses.append(NearQuorumPointTest)
 
 
 class RollingUpgradeTest(CTSTest):
     '''Perform a rolling upgrade of the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RollingUpgrade"
         self.start = StartTest(cm)
         self.stop = StopTest(cm)
         self.stopall = SimulStopLite(cm)
         self.startall = SimulStartLite(cm)
 
     def setup(self, node):
         #  Start all remaining nodes
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.Env["nodes"]:
             if not self.downgrade(node, None):
                 return self.failure("Couldn't downgrade %s" % node)
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Couldn't start all nodes")
         return self.success()
 
     def teardown(self, node):
         # Stop everything
         ret = self.stopall(None)
         if not ret:
             return self.failure("Couldn't stop all nodes")
 
         for node in self.Env["nodes"]:
             if not self.upgrade(node, None):
                 return self.failure("Couldn't upgrade %s" % node)
 
         return self.success()
 
     def install(self, node, version, start=1, flags="--force"):
 
         target_dir = "/tmp/rpm-%s" % version
         src_dir = "%s/%s" % (self.Env["rpm-dir"], version)
 
         self.logger.log("Installing %s on %s with %s" % (version, node, flags))
         if not self.stop(node):
             return self.failure("stop failure: "+node)
 
         rc = self.rsh(node, "mkdir -p %s" % target_dir)
         rc = self.rsh(node, "rm -f %s/*.rpm" % target_dir)
         (rc, lines) = self.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
         for line in lines:
             line = line[:-1]
             rc = self.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
         rc = self.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))
 
         if start and not self.start(node):
             return self.failure("start failure: "+node)
 
         return self.success()
 
     def upgrade(self, node, start=1):
         return self.install(node, self.Env["current-version"], start)
 
     def downgrade(self, node, start=1):
         return self.install(node, self.Env["previous-version"], start, "--force --nodeps")
 
     def __call__(self, node):
         '''Perform the 'Rolling Upgrade' test. '''
         self.incr("calls")
 
         for node in self.Env["nodes"]:
             if self.upgrade(node):
                 return self.failure("Couldn't upgrade %s" % node)
 
             self.CM.cluster_stable()
 
         return self.success()
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return None
 
         if not "rpm-dir" in self.Env.keys():
             return None
         if not "current-version" in self.Env.keys():
             return None
         if not "previous-version" in self.Env.keys():
             return None
 
         return 1
 
 #        Register RestartTest as a good test to run
 AllTestClasses.append(RollingUpgradeTest)
 
 
 class BSC_AddResource(CTSTest):
     '''Add a resource to the cluster'''
     def __init__(self, cm):
         CTSTest.__init__(self, cm)
         self.name = "AddResource"
         self.resource_offset = 0
         self.cib_cmd = """cibadmin -C -o %s -X '%s' """
 
     def __call__(self, node):
         self.incr("calls")
         self.resource_offset =         self.resource_offset  + 1
 
         r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
         start_pat = "crmd.*%s_start_0.*confirmed.*ok"
 
         patterns = []
         patterns.append(start_pat % r_id)
 
         watch = self.create_watch(patterns, self.Env["DeadTime"])
         watch.setwatch()
 
         ip = self.NextIP()
         if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
             return self.failure("Make resource %s failed" % r_id)
 
         failed = 0
         watch_result = watch.lookforall()
         if watch.unmatched:
             for regex in watch.unmatched:
                 self.logger.log ("Warn: Pattern not found: %s" % (regex))
                 failed = 1
 
         if failed:
             return self.failure("Resource pattern(s) not found")
 
         if not self.CM.cluster_stable(self.Env["DeadTime"]):
             return self.failure("Unstable cluster")
 
         return self.success()
 
     def NextIP(self):
         ip = self.Env["IPBase"]
         if ":" in ip:
             fields = ip.rpartition(":")
             fields[2] = str(hex(int(fields[2], 16)+1))
             print(str(hex(int(f[2], 16)+1)))
         else:
             fields = ip.rpartition('.')
             fields[2] = str(int(fields[2])+1)
 
         ip = fields[0] + fields[1] + fields[3];
         self.Env["IPBase"] = ip
         return ip.strip()
 
     def make_ip_resource(self, node, id, rclass, type, ip):
         self.logger.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
         rsc_xml="""
 <primitive id="%s" class="%s" type="%s"  provider="heartbeat">
     <instance_attributes id="%s"><attributes>
         <nvpair id="%s" name="ip" value="%s"/>
     </attributes></instance_attributes>
 </primitive>""" % (id, rclass, type, id, id, ip)
 
         node_constraint = """
       <rsc_location id="run_%s" rsc="%s">
         <rule id="pref_run_%s" score="100">
           <expression id="%s_loc_expr" attribute="#uname" operation="eq" value="%s"/>
         </rule>
       </rsc_location>""" % (id, id, id, id, node)
 
         rc = 0
         (rc, lines) = self.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
         if rc != 0:
             self.logger.log("Constraint creation failed: %d" % rc)
             return None
 
         (rc, lines) = self.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
         if rc != 0:
             self.logger.log("Resource creation failed: %d" % rc)
             return None
 
         return 1
 
     def is_applicable(self):
         if self.Env["DoBSC"]:
             return 1
         return None
 
 AllTestClasses.append(BSC_AddResource)
 
 
 class SimulStopLite(CTSTest):
     '''Stop any active nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStopLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStopLite' setup work. '''
         self.incr("calls")
 
         self.debug("Setup: " + self.name)
 
         #     We ignore the "node" parameter...
         watchpats = [ ]
 
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.incr("WasStarted")
                 watchpats.append(self.templates["Pat:We_stopped"] % node)
                 #if self.Env["use_logd"]:
                 #    watchpats.append(self.templates["Pat:Logd_stopped"] % node)
 
         if len(watchpats) == 0:
             self.CM.clear_all_caches()
             return self.success()
 
         #     Stop all the nodes - at about the same time...
         watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
 
         watch.setwatch()
         self.set_timer()
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "up":
                 self.CM.StopaCMnoBlock(node)
         if watch.lookforall():
             self.CM.clear_all_caches()
 
             # Make sure they're completely down with no residule
             for node in self.Env["nodes"]:
                 self.rsh(node, self.templates["StopCmd"])
 
             return self.success()
 
         did_fail = 0
         up_nodes = []
         for node in self.Env["nodes"]:
             if self.CM.StataCM(node) == 1:
                 did_fail = 1
                 up_nodes.append(node)
 
         if did_fail:
             return self.failure("Active nodes exist: " + repr(up_nodes))
 
         self.logger.log("Warn: All nodes stopped but CTS didnt detect: "
                     + repr(watch.unmatched))
 
         self.CM.clear_all_caches()
         return self.failure("Missing log message: "+repr(watch.unmatched))
 
     def is_applicable(self):
         '''SimulStopLite is a setup test and never applicable'''
         return 0
 
 
 class SimulStartLite(CTSTest):
     '''Start any stopped nodes ~ simultaneously'''
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "SimulStartLite"
 
     def __call__(self, dummy):
         '''Perform the 'SimulStartList' setup work. '''
         self.incr("calls")
         self.debug("Setup: " + self.name)
 
         #        We ignore the "node" parameter...
         node_list = []
         for node in self.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == "down":
                 self.incr("WasStopped")
                 node_list.append(node)
 
         self.set_timer()
         while len(node_list) > 0:
             # Repeat until all nodes come up
             watchpats = [ ]
 
             uppat = self.templates["Pat:Slave_started"]
             if self.CM.upcount() == 0:
                 uppat = self.templates["Pat:Local_started"]
 
             watchpats.append(self.templates["Pat:DC_IDLE"])
             for node in node_list:
                 watchpats.append(uppat % node)
                 watchpats.append(self.templates["Pat:InfraUp"] % node)
                 watchpats.append(self.templates["Pat:PacemakerUp"] % node)
 
             #   Start all the nodes - at about the same time...
             watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
             watch.setwatch()
 
             stonith = self.CM.prepare_fencing_watcher(self.name)
 
             for node in node_list:
                 self.CM.StartaCMnoBlock(node)
 
             watch.lookforall()
 
             node_list = self.CM.fencing_cleanup(self.name, stonith)
 
             if node_list == None:
                 return self.failure("Cluster did not stabilize")
 
             # Remove node_list messages from watch.unmatched
             for node in node_list:
                 self.logger.debug("Dealing with stonith operations for %s" % repr(node_list))
                 if watch.unmatched:
                     try:
                         watch.unmatched.remove(uppat % node)
                     except:
                         self.debug("Already matched: %s" % (uppat % node))
                     try:                        
                         watch.unmatched.remove(self.templates["Pat:InfraUp"] % node)
                     except:
                         self.debug("Already matched: %s" % (self.templates["Pat:InfraUp"] % node))
                     try:
                         watch.unmatched.remove(self.templates["Pat:PacemakerUp"] % node)
                     except:
                         self.debug("Already matched: %s" % (self.templates["Pat:PacemakerUp"] % node))
 
             if watch.unmatched:
                 for regex in watch.unmatched:
                     self.logger.log ("Warn: Startup pattern not found: %s" %(regex))
 
             if not self.CM.cluster_stable():
                 return self.failure("Cluster did not stabilize")
 
         did_fail = 0
         unstable = []
         for node in self.Env["nodes"]:
             if self.CM.StataCM(node) == 0:
                 did_fail = 1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstarted nodes exist: " + repr(unstable))
 
         unstable = []
         for node in self.Env["nodes"]:
             if not self.CM.node_stable(node):
                 did_fail = 1
                 unstable.append(node)
 
         if did_fail:
             return self.failure("Unstable cluster nodes exist: " + repr(unstable))
 
         return self.success()
 
     def is_applicable(self):
         '''SimulStartLite is a setup test and never applicable'''
         return 0
 
 
 def TestList(cm, audits):
     result = []
     for testclass in AllTestClasses:
         bound_test = testclass(cm)
         if bound_test.is_applicable():
             bound_test.Audits = audits
             result.append(bound_test)
     return result
 
 
 class RemoteLXC(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteLXC"
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.num_containers = 2
         self.is_container = 1
         self.is_docker_unsafe = 1
         self.failed = 0
         self.fail_string = ""
 
     def start_lxc_simple(self, node):
 
         # restore any artifacts laying around from a previous test.
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
 
         # generate the containers, put them in the config, add some resources to them
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc1", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc2", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "start_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "promote_0"))
 
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers)
         self.set_timer("remoteSimpleInit")
         watch.lookforall()
         self.log_timer("remoteSimpleInit")
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
     def cleanup_lxc_simple(self, node):
 
         pats = [ ]
         # if the test failed, attempt to clean up the cib and libvirt environment
         # as best as possible 
         if self.failed == 1:
             # restore libvirt and cib
             self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
             self.rsh(node, "crm_resource -C -r container1 &>/dev/null")
             self.rsh(node, "crm_resource -C -r container2 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc1 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc2 &>/dev/null")
             self.rsh(node, "crm_resource -C -r lxc-ms &>/dev/null")
             time.sleep(20)
             return
 
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         pats.append(self.templates["Pat:RscOpOK"] % ("container1", "stop_0"))
         pats.append(self.templates["Pat:RscOpOK"] % ("container2", "stop_0"))
 
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null")
         self.set_timer("remoteSimpleCleanup")
         watch.lookforall()
         self.log_timer("remoteSimpleCleanup")
 
         if watch.unmatched:
             self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
             self.failed = 1
 
         # cleanup libvirt
         self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null")
 
     def __call__(self, node):
         '''Perform the 'RemoteLXC' test. '''
         self.incr("calls")
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         rc = self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null")
         if rc == 1:
             self.log("Environment test for lxc support failed.")
             return self.skipped()
 
         self.start_lxc_simple(node)
         self.cleanup_lxc_simple(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         if self.failed == 1:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [
             r"Updating failcount for ping",
             r"pengine.*: Recover (ping|lxc-ms|container)\s*\(.*\)",
             # The orphaned lxc-ms resource causes an expected transition error
             # that is a result of the pengine not having knowledge that the 
             # ms resource used to be a clone.  As a result it looks like that 
             # resource is running in multiple locations when it shouldn't... But in
             # this instance we know why this error is occurring and that it is expected.
             r"Calculated Transition .* /var/lib/pacemaker/pengine/pe-error",
             r"Resource lxc-ms .* is active on 2 nodes attempting recovery",
             r"Unknown operation: fail",
             r"(ERROR|error): sending stonithRA op to stonithd failed.",
+            r"VirtualDomain.*ERROR: Unable to determine emulator",
         ]
 
 AllTestClasses.append(RemoteLXC)
 
 
 class RemoteDriver(CTSTest):
 
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = self.__class__.__name__
         self.is_docker_unsafe = 1
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
         self.remote_rsc = "remote-rsc"
         self.cib_cmd = """cibadmin -C -o %s -X '%s' """
         self.reset()
 
     def reset(self):
         self.pcmk_started = 0
         self.failed = False
         self.fail_string = ""
         self.remote_node_added = 0
         self.remote_rsc_added = 0
         self.remote_use_reconnect_interval = self.Env.RandomGen.choice([True,False])
 
     def fail(self, msg):
         """ Mark test as failed. """
 
         self.failed = True
 
         # Always log the failure.
         self.logger.log(msg)
 
         # Use first failure as test status, as it's likely to be most useful.
         if not self.fail_string:
             self.fail_string = msg
 
     def get_othernode(self, node):
         for othernode in self.Env["nodes"]:
             if othernode == node:
                 # we don't want to try and use the cib that we just shutdown.
                 # find a cluster node that is not our soon to be remote-node.
                 continue
             else:
                 return othernode
 
     def del_rsc(self, node, rsc):
         othernode = self.get_othernode(node)
         rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc))
         if rc != 0:
             self.fail("Removal of resource '%s' failed" % rsc)
 
     def add_rsc(self, node, rsc_xml):
         othernode = self.get_othernode(node)
         rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml))
         if rc != 0:
             self.fail("resource creation failed")
 
     def add_primitive_rsc(self, node):
         rsc_xml = """
 <primitive class="ocf" id="%s" provider="heartbeat" type="Dummy">
     <operations>
       <op id="remote-rsc-monitor-interval-10s" interval="10s" name="monitor"/>
     </operations>
     <meta_attributes id="remote-meta_attributes"/>
 </primitive>""" % (self.remote_rsc)
         self.add_rsc(node, rsc_xml)
         if not self.failed:
             self.remote_rsc_added = 1
 
     def add_connection_rsc(self, node):
         if self.remote_use_reconnect_interval:
             # use reconnect interval and make sure to set cluster-recheck-interval as well.
             rsc_xml = """
 <primitive class="ocf" id="%s" provider="pacemaker" type="remote">
     <instance_attributes id="remote-instance_attributes"/>
         <instance_attributes id="remote-instance_attributes">
           <nvpair id="remote-instance_attributes-server" name="server" value="%s"/>
           <nvpair id="remote-instance_attributes-reconnect_interval" name="reconnect_interval" value="60s"/>
         </instance_attributes>
     <operations>
       <op id="remote-monitor-interval-60s" interval="60s" name="monitor"/>
       <op id="remote-name-start-interval-0-timeout-120" interval="0" name="start" timeout="60"/>
     </operations>
 </primitive>""" % (self.remote_node, node)
             self.rsh(self.get_othernode(node), self.templates["SetCheckInterval"] % ("45s"))
         else:
             # not using reconnect interval
             rsc_xml = """
 <primitive class="ocf" id="%s" provider="pacemaker" type="remote">
     <instance_attributes id="remote-instance_attributes"/>
         <instance_attributes id="remote-instance_attributes">
           <nvpair id="remote-instance_attributes-server" name="server" value="%s"/>
         </instance_attributes>
     <operations>
       <op id="remote-monitor-interval-60s" interval="60s" name="monitor"/>
       <op id="remote-name-start-interval-0-timeout-120" interval="0" name="start" timeout="120"/>
     </operations>
 </primitive>""" % (self.remote_node, node)
 
         self.add_rsc(node, rsc_xml)
         if not self.failed:
             self.remote_node_added = 1
 
     def stop_pcmk_remote(self, node):
         # disable pcmk remote
         for i in range(10):
             rc = self.rsh(node, "service pacemaker_remote stop")
             if rc != 0:
                 time.sleep(6)
             else:
                 break
 
     def start_pcmk_remote(self, node):
         for i in range(10):
             rc = self.rsh(node, "service pacemaker_remote start")
             if rc != 0:
                 time.sleep(6)
             else:
                 self.pcmk_started = 1
                 break
 
     def kill_pcmk_remote(self, node):
         """ Simulate a Pacemaker Remote daemon failure. """
 
         # We kill the process to prevent a graceful stop,
         # then stop it to prevent the OS from restarting it.
         self.rsh(node, "killall -9 pacemaker_remoted")
         self.stop_pcmk_remote(node)
 
     def start_metal(self, node):
         pcmk_started = 0
 
         # make sure the resource doesn't already exist for some reason
         self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_rsc))
         self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_node))
 
         if not self.stop(node):
             self.fail("Failed to shutdown cluster node %s" % node)
             return
 
         self.start_pcmk_remote(node)
 
         if self.pcmk_started == 0:
             self.fail("Failed to start pacemaker_remote on node %s" % node)
             return
 
         # convert node to baremetal node now that it has shutdow the cluster stack
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start"))
         pats.append(self.templates["Pat:DC_IDLE"])
 
         self.add_connection_rsc(node)
 
         self.set_timer("remoteMetalInit")
         watch.lookforall()
         self.log_timer("remoteMetalInit")
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
 
     def migrate_connection(self, node):
         if self.failed:
             return
 
         pats = [ ]
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_to"))
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_from"))
         pats.append(self.templates["Pat:DC_IDLE"])
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         (rc, lines) = self.rsh(node, "crm_resource -M -r %s" % (self.remote_node), None)
         if rc != 0:
             self.fail("failed to move remote node connection resource")
             return
 
         self.set_timer("remoteMetalMigrate")
         watch.lookforall()
         self.log_timer("remoteMetalMigrate")
 
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
             return
 
     def fail_rsc(self, node):
         if self.failed:
             return
 
         watchpats = [ ]
         watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "stop", self.remote_node))
         watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node))
         watchpats.append(self.templates["Pat:DC_IDLE"])
 
         watch = self.create_watch(watchpats, 120)
         watch.setwatch()
 
         self.debug("causing dummy rsc to fail.")
 
         rc = self.rsh(node, "rm -f /var/run/resource-agents/Dummy*")
 
         self.set_timer("remoteRscFail")
         watch.lookforall()
         self.log_timer("remoteRscFail")
         if watch.unmatched:
             self.fail("Unmatched patterns during rsc fail: %s" % watch.unmatched)
 
     def fail_connection(self, node):
         if self.failed:
             return
 
         watchpats = [ ]
         watchpats.append(self.templates["Pat:FenceOpOK"] % self.remote_node)
         watchpats.append(self.templates["Pat:NodeFenced"] % self.remote_node)
 
         watch = self.create_watch(watchpats, 120)
         watch.setwatch()
 
         # force stop the pcmk remote daemon. this will result in fencing
         self.debug("Force stopped active remote node")
         self.kill_pcmk_remote(node)
 
         self.debug("Waiting for remote node to be fenced.")
         self.set_timer("remoteMetalFence")
         watch.lookforall()
         self.log_timer("remoteMetalFence")
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
             return
 
         self.debug("Waiting for the remote node to come back up")
         self.CM.ns.WaitForNodeToComeUp(node, 120);
 
         pats = [ ]
         watch = self.create_watch(pats, 240)
         watch.setwatch()
         pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start"))
         if self.remote_rsc_added == 1:
             pats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node))
 
         # start the remote node again watch it integrate back into cluster.
         self.start_pcmk_remote(node)
         if self.pcmk_started == 0:
             self.fail("Failed to start pacemaker_remote on node %s" % node)
             return
 
         self.debug("Waiting for remote node to rejoin cluster after being fenced.")
         self.set_timer("remoteMetalRestart")
         watch.lookforall()
         self.log_timer("remoteMetalRestart")
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
             return
 
     def add_dummy_rsc(self, node):
         if self.failed:
             return
 
         # verify we can put a resource on the remote node
         pats = [ ]
         watch = self.create_watch(pats, 120)
         watch.setwatch()
         pats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node))
         pats.append(self.templates["Pat:DC_IDLE"])
 
         # Add a resource that must live on remote-node
         self.add_primitive_rsc(node)
 
         # force that rsc to prefer the remote node. 
         (rc, line) = self.CM.rsh(node, "crm_resource -M -r %s -N %s -f" % (self.remote_rsc, self.remote_node), None)
         if rc != 0:
             self.fail("Failed to place remote resource on remote node.")
             return
 
         self.set_timer("remoteMetalRsc")
         watch.lookforall()
         self.log_timer("remoteMetalRsc")
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
 
     def test_attributes(self, node):
         if self.failed:
             return
 
         # This verifies permanent attributes can be set on a remote-node. It also
         # verifies the remote-node can edit it's own cib node section remotely.
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -v testval -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail("Failed to set remote-node attribute. rc:%s output:%s" % (rc, line))
             return
 
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -Q -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail("Failed to get remote-node attribute")
             return
 
         (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -D -N %s" % (self.remote_node), None)
         if rc != 0:
             self.fail("Failed to delete remote-node attribute")
             return
 
     def cleanup_metal(self, node):
         if self.pcmk_started == 0:
             return
 
         pats = [ ]
 
         watch = self.create_watch(pats, 120)
         watch.setwatch()
 
         if self.remote_rsc_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % (self.remote_rsc, "stop"))
         if self.remote_node_added == 1:
             pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "stop"))
 
         self.set_timer("remoteMetalCleanup")
 
         if self.remote_use_reconnect_interval:
             self.debug("Cleaning up re-check interval")
             self.rsh(self.get_othernode(node), self.templates["ClearCheckInterval"])
 
         if self.remote_rsc_added == 1:
 
             # Remove dummy resource added for remote node tests
             self.debug("Cleaning up dummy rsc put on remote node")
             self.rsh(node, "crm_resource -U -r %s" % self.remote_rsc)
             self.del_rsc(node, self.remote_rsc)
 
         if self.remote_node_added == 1:
 
             # Remove remote node's connection resource
             self.debug("Cleaning up remote node connection resource")
             self.rsh(node, "crm_resource -U -r %s" % (self.remote_node))
             self.del_rsc(node, self.remote_node)
 
         watch.lookforall()
         self.log_timer("remoteMetalCleanup")
 
         if watch.unmatched:
             self.fail("Unmatched patterns: %s" % watch.unmatched)
 
         self.stop_pcmk_remote(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
 
         if self.remote_node_added == 1:
             # Remove remote node itself
             self.debug("Cleaning up node entry for remote node")
             self.rsh(self.get_othernode(node), "crm_node --force --remove %s" % self.remote_node)
 
     def setup_env(self, node):
 
         self.remote_node = "remote_%s" % (node)
 
         # we are assuming if all nodes have a key, that it is
         # the right key... If any node doesn't have a remote
         # key, we regenerate it everywhere.
         if self.rsh.exists_on_all("/etc/pacemaker/authkey", self.Env["nodes"]):
             return
 
         # create key locally
         (handle, keyfile) = tempfile.mkstemp(".cts")
         os.close(handle)
         devnull = open(os.devnull, 'wb')
         subprocess.check_call(["dd", "if=/dev/urandom", "of=%s" % keyfile, "bs=4096", "count=1"],
             stdout=devnull, stderr=devnull)
         devnull.close()
 
         # sync key throughout the cluster
         for node in self.Env["nodes"]:
             self.rsh(node, "mkdir -p --mode=0750 /etc/pacemaker")
             self.rsh.cp(keyfile, "root@%s:/etc/pacemaker/authkey" % node)
             self.rsh(node, "chgrp haclient /etc/pacemaker /etc/pacemaker/authkey")
             self.rsh(node, "chmod 0640 /etc/pacemaker/authkey")
         os.unlink(keyfile)
 
     def is_applicable(self):
         if not self.is_applicable_common():
             return False
 
         for node in self.Env["nodes"]:
             rc = self.rsh(node, "type pacemaker_remoted >/dev/null 2>&1")
             if rc != 0:
                 return False
         return True
 
     def start_new_test(self, node):
         self.incr("calls")
         self.reset()
 
         ret = self.startall(None)
         if not ret:
             return self.failure("Setup failed, start all nodes failed.")
 
         self.setup_env(node)
         self.start_metal(node)
         self.add_dummy_rsc(node)
 
     def __call__(self, node):
         return self.failure("This base class is not meant to be called directly.")
 
     def errorstoignore(self):
         '''Return list of errors which should be ignored'''
         return [ """is running on remote.*which isn't allowed""",
                  """Connection terminated""",
                  """Failed to send remote""",
                 ]
 
 # RemoteDriver is just a base class for other tests, so it is not added to AllTestClasses
 
 
 class RemoteBasic(RemoteDriver):
 
     def __call__(self, node):
         '''Perform the 'RemoteBaremetal' test. '''
 
         self.start_new_test(node)
         self.test_attributes(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed:
             return self.failure(self.fail_string)
 
         return self.success()
 
 AllTestClasses.append(RemoteBasic)
 
 class RemoteStonithd(RemoteDriver):
 
     def __call__(self, node):
         '''Perform the 'RemoteStonithd' test. '''
 
         self.start_new_test(node)
         self.fail_connection(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def is_applicable(self):
         if not RemoteDriver.is_applicable(self):
             return False
 
         if "DoFencing" in self.Env.keys():
             return self.Env["DoFencing"]
 
         return True
 
     def errorstoignore(self):
         ignore_pats = [
             r"Unexpected disconnect on remote-node",
             r"crmd.*:\s+error.*: Operation remote_.*_monitor",
             r"pengine.*:\s+Recover remote_.*\s*\(.*\)",
             r"Calculated Transition .* /var/lib/pacemaker/pengine/pe-error",
             r"error.*: Resource .*ocf::.* is active on 2 nodes attempting recovery",
         ]
 
         ignore_pats.extend(RemoteDriver.errorstoignore(self))
         return ignore_pats
 
 AllTestClasses.append(RemoteStonithd)
 
 
 class RemoteMigrate(RemoteDriver):
 
     def __call__(self, node):
         '''Perform the 'RemoteMigrate' test. '''
 
         self.start_new_test(node)
         self.migrate_connection(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed:
             return self.failure(self.fail_string)
 
         return self.success()
 
 AllTestClasses.append(RemoteMigrate)
 
 
 class RemoteRscFailure(RemoteDriver):
 
     def __call__(self, node):
         '''Perform the 'RemoteRscFailure' test. '''
 
         self.start_new_test(node)
 
         # This is an important step. We are migrating the connection
         # before failing the resource. This verifies that the migration
         # has properly maintained control over the remote-node.
         self.migrate_connection(node)
 
         self.fail_rsc(node)
         self.cleanup_metal(node)
 
         self.debug("Waiting for the cluster to recover")
         self.CM.cluster_stable()
         if self.failed:
             return self.failure(self.fail_string)
 
         return self.success()
 
     def errorstoignore(self):
         ignore_pats = [
             r"pengine.*: Recover remote-rsc\s*\(.*\)",
         ]
 
         ignore_pats.extend(RemoteDriver.errorstoignore(self))
         return ignore_pats
 
 AllTestClasses.append(RemoteRscFailure)
 
 # vim:ts=4:sw=4:et:
diff --git a/cts/README.md b/cts/README.md
index 0486e9eac1..15ecdd0805 100644
--- a/cts/README.md
+++ b/cts/README.md
@@ -1,253 +1,272 @@
 # Pacemaker Cluster Test Suite (CTS)
 
 ## Purpose
 
 CTS thoroughly exercises a pacemaker test cluster by running a randomized
 series of predefined tests on the cluster. CTS can be run against a
 pre-existing cluster configuration or (more typically) overwrite the existing
 configuration with a test configuration.
 
 
 ## Requirements
 
 * Three or more machines (one test exerciser and two or more test cluster
   machines).
 
 * The test cluster machines should be on the same subnet and have journalling
   filesystems (ext3, ext4, xfs, etc.) for all of their filesystems other than
   /boot. You also need a number of free IP addresses on that subnet if you
   intend to test mutual IP address takeover.
 
 * The test exerciser machine doesn't need to be on the same subnet as the test
   cluster machines.  Minimal demands are made on the exerciser machine - it
   just has to stay up during the tests.
 
 * It helps a lot in tracking problems if all machines' clocks are closely
   synchronized. NTP does this automatically, but you can do it by hand if you
   want.
 
 * The exerciser needs to be able to ssh over to the cluster nodes as root
   without a password challenge. Configure ssh accordingly (see the Mini-HOWTO
   at the end of this document for more details).
 
 * The exerciser needs to be able to resolve the machine names of the
   test cluster - either by DNS or by /etc/hosts.
 
 * CTS is not guaranteed to run on all platforms that pacemaker itself does.
   It calls commands such as service that may not be provided by all OSes.
 	
 ## Preparation
 
 Install Pacemaker (including CTS) on all machines. These scripts are
 coordinated with particular versions of Pacemaker, so you need the same version
 of CTS as the rest of Pacemaker, and you need the same version of
 pacemaker and CTS on both the test exerciser and the test cluster machines.
 
 You can install CTS from source, although many distributions provide
 packages that include it (e.g. pacemaker-cts or pacemaker-dev).
 Typically, packages will install CTS as /usr/share/pacemaker/tests/cts.
 
 Configure cluster communications (Corosync, CMAN or Heartbeat) on the
 cluster machines and verify everything works.
 
 NOTE: Do not run the cluster on the test exerciser machine.
 
 NOTE: Wherever machine names are mentioned in these configuration files,
 they must match the machines' `uname -n` name.  This may or may not match
 the machines' FQDN (fully qualified domain name) - it depends on how
 you (and your OS) have named the machines.
 
 
 ## Run CTS
 
 Now assuming you did all this, what you need to do is run CTSlab.py:
 
     python ./CTSlab.py [options] number-of-tests-to-run
 
 You must specify which nodes are part of the cluster with --nodes, e.g.:
 
     --node "pcmk-1 pcmk-2 pcmk-3"
 
 Most people will want to save the output with --outputfile, e.g.:
 
     --outputfile ~/cts.log
 
 Unless you want to test your pre-existing cluster configuration, you also want:
 
     --clobber-cib
     --populate-resources
     --test-ip-base $IP    # e.g. --test-ip-base 192.168.9.100
 
 and configure some sort of fencing:
 
     --stonith $TYPE  # e.g. "--stonith xvm" to use fence_xvm or "--stonith lha" to use external/ssh
 
 A complete command line might look like:
   
     python ./CTSlab.py --nodes "pcmk-1 pcmk-2 pcmk-3" --outputfile ~/cts.log \
         --clobber-cib --populate-resources --test-ip-base 192.168.9.100   \
         --stonith xvm 50
 
 For more options, use the --help option.
 
 NOTE: Perhaps more convenient way to compile a command line like above
       is to use cluster_test script that, at least in the source repository,
       sits in the same directory as this very file.
 
 To extract the result of a particular test, run:
 
     crm_report -T $test
 
 
 ## Optional/advanced testing
 
 ### Memory testing
 
 Pacemaker and CTS have various options for testing memory management. On the
 cluster nodes, pacemaker components will use various environment variables to
 control these options. How these variables are set varies by OS, but usually
 they are set in the /etc/sysconfig/pacemaker or /etc/default/pacemaker file.
 
 Valgrind is a program for detecting memory management problems (such as
 use-after-free errors). If you have valgrind installed, you can enable it by
 setting the following environment variables on all cluster nodes:
 
     PCMK_valgrind_enabled=attrd,cib,crmd,lrmd,pengine,stonith-ng
     VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25
         --log-file=/var/lib/pacemaker/valgrind-%p
         --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions
         --gen-suppressions=all"
 
 and running CTS with these options:
 
     --valgrind-tests --valgrind-procs="attrd cib crmd lrmd pengine stonith-ng"
 
 These options should only be set while specifically testing memory management,
 because they may slow down the cluster significantly, and they will disable
 writes to the CIB. If desired, you can enable valgrind on a subset of pacemaker
 components rather than all of them as listed above.
 
 Valgrind will put a text file for each process in the location specified by
 valgrind's --log-file option. For explanations of the messages valgrind
 generates, see http://valgrind.org/docs/manual/mc-manual.html
 
 Separately, if you are using the GNU C library, the G_SLICE, MALLOC_PERTURB_,
 and MALLOC_CHECK_ environment variables can be set to affect the library's
 memory management functions.
 
 When using valgrind, G_SLICE should be set to "always-malloc", which helps
 valgrind track memory by always using the malloc() and free() routines
 directly. When not using valgrind, G_SLICE can be left unset, or set to
 "debug-blocks", which enables the C library to catch many memory errors
 but may impact performance.
 
 If the MALLOC_PERTURB_ environment variable is set to an 8-bit integer, the C
 library will initialize all newly allocated bytes of memory to the integer
 value, and will set all newly freed bytes of memory to the bitwise inverse of
 the integer value. This helps catch uses of uninitialized or freed memory
 blocks that might otherwise go unnoticed. Example:
 
     MALLOC_PERTURB_=221
 
 If the MALLOC_CHECK_ environment variable is set, the C library will check for
 certain heap corruption errors. The most useful value in testing is 3, which
 will cause the library to print a message to stderr and abort execution.
 Example:
 
     MALLOC_CHECK_=3
 
 Valgrind should be enabled for either all nodes or none, but the C library
 variables may be set differently on different nodes.
 
 
 ### Remote node testing
 
 If the pacemaker_remoted daemon is installed on all cluster nodes, CTS will
 enable remote node tests.
 
 The remote node tests choose a random node, stop the cluster on it, start
 pacemaker_remote on it, and add an ocf:pacemaker:remote resource to turn it
 into a remote node. When the test is done, CTS will turn the node back into
 a cluster node.
 
 To avoid conflicts, CTS will rename the node, prefixing the original node name
 with "remote_". For example, "pcmk-1" will become "remote_pcmk-1".
 
 The name change may require special stonith configuration, if the fence agent
 expects the node name to be the same as its hostname. A common approach is to
 specify the "remote_" names in pcmk_host_list. If you use pcmk_host_list=all,
 CTS will expand that to all cluster nodes and their "remote_" names.
 You may additionally need a pcmk_host_map argument to map the "remote_" names
 to the hostnames. Example:
 
     --stonith xvm --stonith-args \
     pcmk_arg_map=domain:uname,pcmk_host_list=all,pcmk_host_map=remote_pcmk-1:pcmk-1;remote_pcmk-2:pcmk-2
 
 ### Remote node testing with valgrind
 
 When running the remote node tests, the pacemaker components on the cluster
 nodes can be run under valgrind as described in the "Memory testing" section.
 However, pacemaker_remote cannot be run under valgrind that way, because it is
 started by the OS's regular boot system and not by pacemaker.
 
 Details vary by system, but the goal is to set the VALGRIND_OPTS environment
 variable and then start pacemaker_remoted by prefixing it with the path to
 valgrind.
 
 The init script and systemd service file provided with pacemaker_remote will
 load the pacemaker environment variables from the same location used by other
 pacemaker components, so VALGRIND_OPTS will be set correctly if using one of
 those.
 
 For an OS using systemd, you can override the ExecStart parameter to run
 valgrind. For example:
 
     mkdir /etc/systemd/system/pacemaker_remote.service.d
     cat >/etc/systemd/system/pacemaker_remote.service.d/valgrind.conf <<EOF
     [Service]
     ExecStart=
     ExecStart=/usr/bin/valgrind /usr/sbin/pacemaker_remoted
     EOF
 
+### Container testing
+
+If the --container-tests option is given to CTS, it will enable
+testing of LXC resources (currently only the RemoteLXC test,
+which starts a remote node using an LXC container).
+
+The container tests have additional package dependencies (see the toplevel
+README). Also, SELinux must be enabled (in either permissive or enforcing mode),
+libvirtd must be enabled and running, and root must be able to ssh without a
+password between all cluster nodes (not just from the test machine). Before
+running the tests, you can verify your environment with:
+
+    /usr/share/pacemaker/tests/cts/lxc_autogen.sh -v
+
+LXC tests will create two containers with hardcoded parameters: a NAT'ed bridge
+named virbr0 using the IP network 192.168.123.0/24 will be created on the
+cluster node hosting the containers; the host will be assigned
+52:54:00:A8:12:35 as the MAC address and 192.168.123.1 as the IP address.
+Each container will be assigned a random MAC address starting with 52:54:,
+the IP address 192.168.123.11 or 192.168.123.12, the hostname lxc1 or lxc2
+(which will be added to the host's /etc/hosts file), and 196MB RAM.
+
+The test will revert all of the configuration when it is done.
+
 
 ## Mini-HOWTOs
 
 ### Allow passwordless remote SSH connections
 
 The CTS scripts run "ssh -l root" so you don't have to do any of your testing
 logged in as root on the test machine. Here is how to allow such connections
 without requiring a password to be entered each time:
 
 * On your test exerciser, create an SSH key if you do not already have one.
   Most commonly, SSH keys will be in your ~/.ssh directory, with the
   private key file not having an extension, and the public key file
-  named the same with the extension ".pub" (for example, ~/.ssh/id_dsa.pub).
+  named the same with the extension ".pub" (for example, ~/.ssh/id_rsa.pub).
 
   If you don't already have a key, you can create one with:
 
-      ssh-keygen -t dsa
+      ssh-keygen -t rsa
 
 * From your test exerciser, authorize your SSH public key for root on all test
   machines (both the exerciser and the cluster test machines):
 
-      ssh-copy-id -i ~/.ssh/id_dsa.pub root@$MACHINE
+      ssh-copy-id -i ~/.ssh/id_rsa.pub root@$MACHINE
 
   You will probably have to provide your password, and possibly say
   "yes" to some questions about accepting the identity of the test machines.
 
-  The above assumes you have a DSA SSH key in the specified location;
-  if you have some other type of key (RSA, ECDSA, etc.), use its file name
+  The above assumes you have a RSA SSH key in the specified location;
+  if you have some other type of key (DSA, ECDSA, etc.), use its file name
   in the -i option above.
 
-  If you have an old version of SSH that doesn't have ssh-copy-id,
-  you can take the single line out of your public key file
-  (e.g. ~/.ssh/identity.pub or ~/.ssh/id_dsa.pub) and manually add it to
-  root's ~/.ssh/authorized_keys file on each test machine.
-
 * To test, try this command from the exerciser machine for each
   of your cluster machines, and for the exerciser machine itself.
 
       ssh -l root $MACHINE
 
   If this works without prompting for a password, you're in business.
   If not, look at the documentation for your version of ssh.
diff --git a/cts/lxc_autogen.sh.in b/cts/lxc_autogen.sh.in
index e11532b33c..d06ba2ddb5 100755
--- a/cts/lxc_autogen.sh.in
+++ b/cts/lxc_autogen.sh.in
@@ -1,409 +1,424 @@
 #!/bin/bash
 
 containers="2"
 download=0
 share_configs=0
 # different than default libvirt network in case this is run nested in a KVM instance
 addr="192.168.123.1"
 restore=0
 restore_pcmk=0
 restore_all=0
 generate=0
 key_gen=0
 cib=0
 anywhere=0
 add_master=0
 verify=0
 working_dir="@CRM_CONFIG_CTS@/lxc"
 curdir=$(pwd)
+run_dirs="/run /var/run /usr/var/run"
 
 function helptext() {
 	echo "lxc_autogen.sh - A tool for generating libvirt lxc containers for testing purposes."
 	echo ""
 	echo "Usage: lxc-autogen [options]"
 	echo ""
 	echo "Options:"
 	echo "-g, --generate         Generate libvirt lxc environment in the directory this script is run from."
 	echo "-k, --key-gen          Generate local pacemaker remote key only."
 	echo "-r, --restore-libvirt  Restore the default network, and libvirt config to before this script ran."
 	echo "-p, --restore-cib      Remove cib entries this script generated."
 	echo "-R, --restore-all      Restore both libvirt and cib plus clean working directory. This will leave libvirt xml files though so rsc can be stopped properly."
 	echo ""
 	echo "-A, --allow-anywhere   Allow the containers to live anywhere in the cluster"
 	echo "-a, --add-cib          Add remote-node entries for each lxc instance into the cib"
 	echo "-m, --add-master       Add master resource shared between remote-nodes"
 	echo "-d, --download-agent   Download and install the latest VirtualDomain agent."
 	echo "-s, --share-configs    Copy container configs to all other known cluster nodes, (crm_node -l)"
 	echo "-c, --containers       Specify the number of containers to generate, defaults to $containers. Used with -g"
 	echo "-n, --network          What network to override default libvirt network to. Example: -n 192.168.123.1. Used with -g"
 	echo "-v, --verify           Verify environment is capable of running lxc"
 	echo ""
 	exit $1
 }
 
 while true ; do
 	case "$1" in
 	--help|-h|-\?) helptext 0;;
 	-c|--containers) containers="$2"; shift; shift;;
 	-d|--download-agent) download=1; shift;;
 	-s|--share-configs) share_configs=1; shift;;
 	-n|--network) addr="$2"; shift; shift;;
 	-r|--restore-libvirt) restore=1; shift;;
 	-p|--restore-cib) restore_pcmk=1; shift;;
 	-R|--restore-all)
 		restore_all=1
 		restore=1
 		restore_pcmk=1
 		shift;;
 	-g|--generate) generate=1; shift;;
 	-k|--key-gen) key_gen=1; shift;;
 	-a|--add-cib) cib=1; shift;;
 	-A|--allow-anywhere) anywhere=1; shift;;
 	-m|--add-master) add_master=1; shift;;
 	-v|--verify) verify=1; shift;;
 	"") break;;
 	*) helptext 1;;
 	esac
 done
 
 if [ $verify -eq 1 ]; then
 	# verify virsh tool is available and that 
 	# we can connect to lxc driver.
 	virsh -c lxc:/// list --all > /dev/null 2>&1
 	if [ $? -ne 0 ]; then
 		echo "Could not connect 'virsh -c lxc:///' check that libvirt lxc driver is installed"
 		# yum install -y libvirt-daemon-driver-lxc libvirt-daemon-lxc libvirt-login-shell
 		exit 1
 	fi
 
 
 	cat /etc/selinux/config  | grep -e "SELINUX.*=.*permissive" -e "SELINUX.*=.*enforcing" > /dev/null 2>&1
 	if [ $? -ne 0 ]; then
 		echo "/etc/selinux/config must have SELINUX set to permissive or enforcing mode."
 		exit 1
 	fi
 
 	ps x > /tmp/lxc-autogen-libvirt-test.txt
 	grep "libvirtd" /tmp/lxc-autogen-libvirt-test.txt
 	if [ $? -ne 0 ]; then
 		rm -f /tmp/lxc-autogen-libvirt-test.txt
 		echo "libvirtd isn't up."
 		exit 1
 	fi
 	rm -f /tmp/lxc-autogen-libvirt-test.txt
 
 	which rsync > /dev/null 2>&1
 	if [ $? -ne 0 ]; then
 		echo "rsync is required"
 	fi
 
 	which pacemaker_remoted > /dev/null 2>&1
 	if [ $? -ne 0 ]; then
 		echo "pacemaker_remoted is required"
 	fi
 fi
 
 #strip last digits off addr
 addr=$(echo $addr | awk -F. '{print $1"."$2"."$3}')
 
 set_network()
 {
 	rm -f cur_network.xml
 	cat << END >> cur_network.xml
 <network>
   <name>default</name>
   <uuid>41ebdb84-7134-1111-a136-91f0f1119225</uuid>
   <forward mode='nat'/>
   <bridge name='virbr0' stp='on' delay='0' />
   <mac address='52:54:00:A8:12:35'/>
   <ip address='$addr.1' netmask='255.255.255.0'>
     <dhcp>
       <range start='$addr.2' end='$addr.254' />
     </dhcp>
   </ip>
 </network>
 END
 
-	ls restore_default.xml > /dev/null 2>&1
-	if [ $? -ne 0 ]; then
-		virsh net-dumpxml default > restore_default.xml
+	virsh net-info default >/dev/null 2>&1
+	if [ $? -eq 0 ]; then
+		if [ ! -f restore_default.xml ]; then
+			virsh net-dumpxml default > restore_default.xml
+		fi
+		virsh net-destroy default
+		virsh net-undefine default
 	fi
-	virsh net-destroy default
-	virsh net-undefine default
 	virsh net-define cur_network.xml
 	virsh net-start default
 	virsh net-autostart default
 }
 
 generate_key()
 {
 	#generate pacemaker remote key
 	ls /etc/pacemaker/authkey > /dev/null 2>&1
 	if [ $? != 0 ]; then
 			mkdir -p /etc/pacemaker
 			dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1
 	fi
 }
 
 generate()
 {
 	set_network
 
 	# Generate libvirt domains in xml
 	for (( c=1; c <= $containers; c++ ))
 	do
 		rm -rf lxc$c-filesystem
-		mkdir -p lxc$c-filesystem/var/run/
-		mkdir -p lxc$c-filesystem/usr/var/run
+		for dir in $run_dirs; do
+			mkdir -p lxc$c-filesystem/$dir
+		done
 		rm -f lxc$c.xml
 
 		suffix=$((10 + $c))
 		prefix=$(echo $addr | awk -F. '{print $1"."$2}')
 		subnet=$(echo $addr | awk -F. '{print $3}')
 		while [ $suffix -gt 255 ]; do
 		    subnet=$(($subnet + 1))
 		    suffix=$(($subnet - 255))
 		done
 
 		cat << END >> lxc$c.xml
 <domain type='lxc'>
   <name>lxc$c</name>
   <memory unit='KiB'>200704</memory>
   <os>
     <type>exe</type>
     <init>$working_dir/lxc$c-filesystem/launch-helper</init>
   </os>
   <devices>
     <console type='pty'/>
     <filesystem type='ram'>
         <source usage='150528'/>
         <target dir='/dev/shm'/>
     </filesystem>
+END
+		for dir in $run_dirs; do
+			cat << END >> lxc$c.xml
     <filesystem type='mount'>
-      <source dir='$working_dir/lxc$c-filesystem/var/run'/>
-      <target dir='/var/run'/>
-    </filesystem>
-    <filesystem type='mount'>
-      <source dir='$working_dir/lxc$c-filesystem/usr/var/run'/>
-      <target dir='/usr/var/run'/>
+      <source dir='$working_dir/lxc$c-filesystem/$dir'/>
+      <target dir='$dir'/>
     </filesystem>
+END
+		done
+		cat << END >> lxc$c.xml
     <interface type='network'>
       <mac address='52:54:$(($RANDOM % 9))$(($RANDOM % 9)):$(($RANDOM % 9))$(($RANDOM % 9)):$(($RANDOM % 9))$(($RANDOM % 9)):$(($RANDOM % 9))$(($RANDOM % 9))'/>
       <source network='default'/>
     </interface>
   </devices>
 </domain>
 END
 		rm -f container$c.cib
 		cat << END >> container$c.cib
       <primitive class="ocf" id="container$c" provider="heartbeat" type="VirtualDomain">
         <instance_attributes id="container$c-instance_attributes">
           <nvpair id="container$c-instance_attributes-force_stop" name="force_stop" value="true"/>
           <nvpair id="container$c-instance_attributes-hypervisor" name="hypervisor" value="lxc:///"/>
           <nvpair id="container$c-instance_attributes-config" name="config" value="$working_dir/lxc$c.xml"/>
         </instance_attributes>
         <utilization id="container$c-utilization">
           <nvpair id="container$c-utilization-cpu" name="cpu" value="1"/>
           <nvpair id="container$c-utilization-hv_memory" name="hv_memory" value="100"/>
         </utilization>
         <meta_attributes id="container$c-meta_attributes">
           <nvpair id="container$c-meta_attributes-remote-node" name="remote-node" value="lxc$c"/>
         </meta_attributes>
       </primitive>
 END
 
 		rm -f lxc$c-filesystem/launch-helper
 		cat << END >> lxc$c-filesystem/launch-helper
 #!/bin/bash
 ip -f inet addr add $prefix.$subnet.$suffix/24 dev eth0
-route add 0.0.0.0 gw $addr.1 eth0
+ip link set eth0 up
+ip route add default via $addr.1
 hostname lxc$c
 df > $working_dir/lxc$c-filesystem/disk_usage.txt
 export PCMK_debugfile=/var/log/pacemaker_remote_lxc$c.log
 /usr/sbin/pacemaker_remoted
 END
 		chmod 711 lxc$c-filesystem/launch-helper
 
 		cat << END >> /etc/hosts
 $prefix.$subnet.$suffix     lxc$c
 END
 	done
 
 	rm -f lxc-ms.cib
 	cat << END >> lxc-ms.cib
       <master id="lxc-ms-master">
         <primitive class="ocf" id="lxc-ms" provider="pacemaker" type="Stateful">
           <instance_attributes id="lxc-ms-instance_attributes"/>
           <operations>
             <op id="lxc-ms-monitor-interval-10s" interval="10s" name="monitor"/>
           </operations>
         </primitive>
         <meta_attributes id="lxc-ms-meta_attributes">
           <nvpair id="lxc-ms-meta_attributes-master-max" name="master-max" value="1"/>
           <nvpair id="lxc-ms-meta_attributes-clone-max" name="clone-max" value="$containers"/>
         </meta_attributes>
       </master>
 END
 
 }
 
 apply_cib_master()
 {
 	cibadmin -Q > cur.cib
 	export CIB_file=cur.cib
 
 	cibadmin -o resources -Mc -x lxc-ms.cib
 	for tmp in $(ls lxc*.xml); do
 		tmp=$(echo $tmp | sed -e 's/\.xml//g')
 		echo "<rsc_location id=\"lxc-ms-location-${tmp}\" node=\"${tmp}\" rsc=\"lxc-ms-master\" score=\"INFINITY\"/>" > tmp_constraint
 		cibadmin -o constraints -Mc -x tmp_constraint
 	done
 	# Make sure the version changes even if the content doesn't
 	cibadmin -B
 	unset CIB_file
 
-	cibadmin --replace --xml-file cur.cib
+	cibadmin --replace -o configuration --xml-file cur.cib
 	rm -f cur.cib
 }
 
 apply_cib_entries()
 {
 	node=$(crm_node -n)
 
 	cibadmin -Q > cur.cib
 	export CIB_file=cur.cib
 	for tmp in $(ls container*.cib); do
 		cibadmin -o resources -Mc -x $tmp
 
 		remote_node=$(cat ${tmp} | grep remote-node | sed -n -e 's/^.*value=\"\(.*\)\".*/\1/p')
 		if [ $anywhere -eq 0 ]; then
 			tmp=$(echo $tmp | sed -e 's/\.cib//g')
 			crm_resource -M -r $tmp -H $node
 		fi
 		echo "<rsc_location id=\"lxc-ping-location-${remote_node}\" node=\"${remote_node}\" rsc=\"Connectivity\" score=\"-INFINITY\"/>" > tmp_constraint
 		# it's fine if applying this constraint fails. it's just to help with cts
 		# when the connectivity resources are in use. those resources fail the remote-nodes.
 		cibadmin -o constraints -Mc -x tmp_constraint > /dev/null 2>&1
 
 		for rsc in $(crm_resource -l | grep rsc_ ); do
 			echo "<rsc_location id=\"lxc-${rsc}-location-${remote_node}\" node=\"${remote_node}\" rsc=\"${rsc}\" score=\"-INFINITY\"/>" > tmp_constraint
 			cibadmin -o constraints -Mc -x tmp_constraint > /dev/null 2>&1
 		done
 
 		rm -f tmp_constraint
 	done
 
 	# Make sure the version changes even if the content doesn't
 	cibadmin -B
 
 	unset CIB_file
 
-	cibadmin --replace --xml-file cur.cib
+	cibadmin --replace -o configuration --xml-file cur.cib
 	rm -f cur.cib
 }
 
 restore_cib()
 {
 	node=$(crm_node -n)
 	cibadmin -Q > cur.cib
 	export CIB_file=cur.cib
 
 	for tmp in $(ls lxc*.xml); do
 		tmp=$(echo $tmp | sed -e 's/\.xml//g')
 		echo "<rsc_location id=\"lxc-ms-location-${tmp}\" node=\"${tmp}\" rsc=\"lxc-ms-master\" score=\"INFINITY\"/>" > tmp_constraint
 		cibadmin -o constraints -D -x tmp_constraint
 		echo "<rsc_location id=\"lxc-ping-location-${tmp}\" node=\"${tmp}\" rsc=\"Connectivity\" score=\"-INFINITY\"/>" > tmp_constraint
 		cibadmin -o constraints -D -x tmp_constraint
 
 		for rsc in $(crm_resource -l | grep rsc_ ); do
 			echo "<rsc_location id=\"lxc-${rsc}-location-${tmp}\" node=\"${tmp}\" rsc=\"${rsc}\" score=\"-INFINITY\"/>" > tmp_constraint
 			cibadmin -o constraints -D -x tmp_constraint
 		done
 		rm -f tmp_constraint
 	done
 	cibadmin -o resources -D -x lxc-ms.cib
 
 	for tmp in $(ls container*.cib); do
 		tmp=$(echo $tmp | sed -e 's/\.cib//g')
 		crm_resource -U -r $tmp -H $node
 		crm_resource -D -r $tmp -t primitive
 	done
 	# Make sure the version changes even if the content doesn't
 	cibadmin -B
 	unset CIB_file
 
-	cibadmin --replace --xml-file cur.cib
+	cibadmin --replace -o configuration --xml-file cur.cib
 	rm -f  cur.cib 
+
+	# Allow the cluster to stabilize before continuing
+	crm_resource --wait
+
+	# Purge nodes from caches and CIB status section
+	for tmp in $(ls lxc*.xml); do
+		tmp=$(echo $tmp | sed -e 's/\.xml//g')
+		crm_node --force --remove $tmp
+	done
 }
 
 restore_libvirt()
 {
 	for tmp in $(ls lxc*.xml); do
 		tmp=$(echo $tmp | sed -e 's/\.xml//g')
 		virsh -c lxc:/// destroy $tmp > /dev/null 2>&1
 		virsh -c lxc:/// undefine $tmp > /dev/null 2>&1
 
 		sed -i.bak "/...\....\....\..* ${tmp}/d" /etc/hosts
 		echo "$tmp destroyed"
 	done
 
-	ls restore_default.xml > /dev/null 2>&1
-	if [ $? -eq 0 ]; then
-		virsh net-destroy default > /dev/null 2>&1
-		virsh net-undefine default > /dev/null 2>&1
+	virsh net-destroy default > /dev/null 2>&1
+	virsh net-undefine default > /dev/null 2>&1
+	if [ -f restore_default.xml ]; then
 		virsh net-define restore_default.xml
 		virsh net-start default
 		if [ $? -eq 0 ]; then
 			echo "default network restored"
 		fi
 	fi
 	rm -f restore_default.xml > /dev/null 2>&1 
 }
 
 distribute_configs()
 {
     nodes=`crm_node -l | awk '{print $2}'`
     for node in $nodes; do
 		ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 -o BatchMode=yes -l root $node mkdir -p /$working_dir
 		rsync -ave 'ssh -o UserKnownHostsFile=/dev/null -o BatchMode=yes -o StrictHostKeyChecking=no' $working_dir/lxc*.xml $node:/$working_dir
 		rsync -ave 'ssh -o UserKnownHostsFile=/dev/null -o BatchMode=yes -o StrictHostKeyChecking=no' $working_dir/lxc*-filesystem $node:/$working_dir
     done
 }
 
 mkdir -p $working_dir
 cd $working_dir
 
 if [ $download -eq 1 ]; then
 	wget https://raw.github.com/ClusterLabs/resource-agents/master/heartbeat/VirtualDomain
 	chmod 755 VirtualDomain
 	mv -f VirtualDomain /usr/lib/ocf/resource.d/heartbeat/VirtualDomain
 fi
 if [ $restore_pcmk -eq 1 ]; then
 	restore_cib
 fi
 if [ $restore -eq 1 ]; then
 	restore_libvirt
 fi
 if [ $key_gen -eq 1 ]; then
 	generate_key
 fi
 if [ $generate -eq 1 ]; then
 	if [ $key_gen -eq 0 ]; then
 		generate_key
 	fi
 	generate
 fi
 if [ $cib -eq 1 ]; then
 	apply_cib_entries
 fi
 if [ $add_master -eq 1 ]; then
 	apply_cib_master
 fi
 if [ $share_configs -eq 1 ]; then
 	distribute_configs
 fi
 if [ $restore_all -eq 1 ]; then
 	ls | grep -v "lxc.\.xml" | xargs rm -rf
 fi
 
 cd $curdir
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.txt b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.txt
index 6b25b36105..2f85501b14 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.txt
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.txt
@@ -1,153 +1,153 @@
 = Configure STONITH =
 
 == What is STONITH? ==
 
 STONITH (Shoot The Other Node In The Head aka. fencing) protects your data from
 being corrupted by rogue nodes or unintended concurrent access.
 
 Just because a node is unresponsive doesn't mean it has stopped
 accessing your data. The only way to be 100% sure that your data is
 safe, is to use STONITH to ensure that the node is truly
 offline before allowing the data to be accessed from another node.
 
 STONITH also has a role to play in the event that a clustered service
 cannot be stopped. In this case, the cluster uses STONITH to force the
 whole node offline, thereby making it safe to start the service
 elsewhere.
 
 == Choose a STONITH Device ==
 
 It is crucial that your STONITH device can allow the cluster to
 differentiate between a node failure and a network failure.
 
 A common mistake people make when choosing a STONITH device is to use a remote
 power switch (such as many on-board IPMI controllers) that shares power with
 the node it controls. If the power fails in such a case, the cluster cannot be
 sure whether the node is really offline, or active and suffering from a network
 fault, so the cluster will stop all resources to avoid a possible split-brain
 situation.
 
 Likewise, any device that relies on the machine being active (such as
 SSH-based "devices" sometimes used during testing) is inappropriate.
 
 == Configure the Cluster for STONITH ==
 
 . Install the STONITH agent(s). To see what packages are available, run `yum
   search fence-`. Be sure to install the package(s) on all cluster nodes.
 
 . Configure the STONITH device itself to be able to fence your nodes and accept
   fencing requests. This includes any necessary configuration on the device and
   on the nodes, and any firewall or SELinux changes needed. Test the
   communication between the device and your nodes.
 
 . Find the correct STONITH agent script: `pcs stonith list`
 
 . Find the parameters associated with the device: +pcs stonith describe pass:[<replaceable>agent_name</replaceable>]+
 
 . Create a local copy of the CIB: `pcs cluster cib stonith_cfg`
 
 . Create the fencing resource: +pcs -f stonith_cfg stonith create pass:[<replaceable>stonith_id
   stonith_device_type &#91;stonith_device_options&#93;</replaceable>]+
-  
-  If the any flags that do not take arguments, such as `--ssl` should be passed as `ssl=1`
++
+Any flags that do not take arguments, such as +--ssl+, should be passed as +ssl=1+.
 
 . Enable STONITH in the cluster: `pcs -f stonith_cfg property set stonith-enabled=true`
 
 . If the device does not know how to fence nodes based on their uname,
   you may also need to set the special *pcmk_host_map* parameter.  See
   `man stonithd` for details.
 
 . If the device does not support the *list* command, you may also need
   to set the special *pcmk_host_list* and/or *pcmk_host_check*
   parameters.  See `man stonithd` for details.
 
 . If the device does not expect the victim to be specified with the
   *port* parameter, you may also need to set the special
   *pcmk_host_argument* parameter. See `man stonithd` for details.
 
 . Commit the new configuration: `pcs cluster cib-push stonith_cfg`
 
 . Once the STONITH resource is running, test it (you might want to stop
   the cluster on that machine first): +stonith_admin --reboot pass:[<replaceable>nodename</replaceable>]+
 
 == Example ==
 
 For this example, assume we have a chassis containing four nodes
 and an IPMI device active on 10.0.0.1. Following the steps above
 would go something like this:
 
 Step 1: Install the *fence-agents-ipmilan* package on both nodes.
 
 Step 2: Configure the IP address, authentication credentials, etc. in the IPMI device itself.
 
 Step 3: Choose the *fence_ipmilan* STONITH agent.
 
 Step 4: Obtain the agent's possible parameters:
 ----
 [root@pcmk-1 ~]# pcs stonith describe fence_ipmilan
 Stonith options for: fence_ipmilan
   ipport: TCP/UDP port to use for connection with device
   inet6_only: Forces agent to use IPv6 addresses only
   ipaddr (required): IP Address or Hostname
   passwd_script: Script to retrieve password
   method: Method to fence (onoff|cycle)
   inet4_only: Forces agent to use IPv4 addresses only
   passwd: Login password or passphrase
   lanplus: Use Lanplus to improve security of connection
   auth: IPMI Lan Auth type.
   cipher: Ciphersuite to use (same as ipmitool -C parameter)
   privlvl: Privilege level on IPMI device
   action (required): Fencing Action
   login: Login Name
   verbose: Verbose mode
   debug: Write debug information to given file
   version: Display version information and exit
   help: Display help and exit
   power_wait: Wait X seconds after issuing ON/OFF
   login_timeout: Wait X seconds for cmd prompt after login
   power_timeout: Test X seconds for status change after ON/OFF
   delay: Wait X seconds before fencing is started
   ipmitool_path: Path to ipmitool binary
   shell_timeout: Wait X seconds for cmd prompt after issuing command
   retry_on: Count of attempts to retry power on
   sudo: Use sudo (without password) when calling 3rd party sotfware.
   stonith-timeout: How long to wait for the STONITH action (reboot, on, off) to complete per a stonith device.
   priority: The priority of the stonith resource. Devices are tried in order of highest priority to lowest.
   pcmk_host_map: A mapping of host names to ports numbers for devices that do not support host names.
   pcmk_host_list: A list of machines controlled by this device (Optional unless pcmk_host_check=static-list).
   pcmk_host_check: How to determine which machines are controlled by the device.
 ----
 
 Step 5: `pcs cluster cib stonith_cfg`
 
 Step 6: Here are example parameters for creating our STONITH resource:
 ----
 [root@pcmk-1 ~]# pcs -f stonith_cfg stonith create ipmi-fencing fence_ipmilan \
       pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser \
       passwd=acd123 op monitor interval=60s
 [root@pcmk-1 ~]# pcs -f stonith_cfg stonith
  ipmi-fencing	(stonith:fence_ipmilan):	Stopped 
 ----
 
 Steps 7-10: Enable STONITH in the cluster:
 ----
 [root@pcmk-1 ~]# pcs -f stonith_cfg property set stonith-enabled=true
 [root@pcmk-1 ~]# pcs -f stonith_cfg property
 Cluster Properties:
  cluster-infrastructure: corosync
  cluster-name: mycluster
  dc-version: 1.1.12-a14efad
  have-watchdog: false
  stonith-enabled: true
 ----
 
 Step 11: `pcs cluster cib-push stonith_cfg`
 
 Step 12: Test:
 ----
 [root@pcmk-1 ~]# pcs cluster stop pcmk-2
 [root@pcmk-1 ~]# stonith_admin --reboot pcmk-2
 ----
 
 After a successful test, login to any rebooted nodes, and start the cluster
 (with `pcs cluster start`).
diff --git a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt
index 80439e6a1e..7b49af0433 100644
--- a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt
+++ b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt
@@ -1,827 +1,835 @@
 = Cluster Resources =
 
 == What is a Cluster Resource? ==
 
 indexterm:[Resource]
 
 A resource is a service made highly available by a cluster.
 The simplest type of resource, a 'primitive' resource, is described
 in this chapter. More complex forms, such as groups and clones,
 are described in later chapters.
 
 Every primitive resource has a 'resource agent'. A resource agent is an
 external program that abstracts the service it provides and present a
 consistent view to the cluster.
 
 This allows the cluster to be agnostic about the resources it manages.
 The cluster doesn't need to understand how the resource works because
 it relies on the resource agent to do the right thing when given a
 `start`, `stop` or `monitor` command. For this reason, it is crucial that
 resource agents are well-tested.
 
 Typically, resource agents come in the form of shell scripts. However,
 they can be written using any technology (such as C, Python or Perl)
 that the author is comfortable with.
 
 [[s-resource-supported]]
 == Resource Classes ==
 
 indexterm:[Resource,class]
 
 Pacemaker supports several classes of agents:
 
 * OCF
 * LSB
 * Upstart
 * Systemd
 * Service
 * Fencing
 * Nagios Plugins
 
 === Open Cluster Framework ===
 
 indexterm:[Resource,OCF]
 indexterm:[OCF,Resources]
 indexterm:[Open Cluster Framework,Resources]
 
 The OCF standard
 footnote:[See
 http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD
  -- at least as it relates to resource agents.  The Pacemaker implementation has
 been somewhat extended from the OCF specs, but none of those changes are
 incompatible with the original OCF specification.]
 is basically an extension of the Linux Standard Base conventions for
 init scripts to:
 
 * support parameters,
 * make them self-describing, and
 * make them extensible
 
 OCF specs have strict definitions of the exit codes that actions must return.
 footnote:[
 The resource-agents source code includes the `ocf-tester` script, which
 can be useful in this regard.
 ]
 
 The cluster follows these specifications exactly, and giving the wrong
 exit code will cause the cluster to behave in ways you will likely
 find puzzling and annoying.  In particular, the cluster needs to
 distinguish a completely stopped resource from one which is in some
 erroneous and indeterminate state.
 
 Parameters are passed to the resource agent as environment variables, with the
 special prefix +OCF_RESKEY_+.  So, a parameter which the user thinks
 of as +ip+ will be passed to the resource agent as +OCF_RESKEY_ip+.  The
 number and purpose of the parameters is left to the resource agent; however,
 the resource agent should use the `meta-data` command to advertise any that it
 supports.
 
 The OCF class is the most preferred as it is an industry standard,
 highly flexible (allowing parameters to be passed to agents in a
 non-positional manner) and self-describing.
 
 For more information, see the
 http://www.linux-ha.org/wiki/OCF_Resource_Agents[reference] and
 <<ap-ocf>>.
 
 === Linux Standard Base ===
 indexterm:[Resource,LSB]
 indexterm:[LSB,Resources]
 indexterm:[Linux Standard Base,Resources]
 
 LSB resource agents are those found in +/etc/init.d+.
 
 Generally, they are provided by the OS distribution and, in order to be used
 with the cluster, they must conform to the LSB Spec.
 footnote:[
 See
 http://refspecs.linux-foundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
 for the LSB Spec as it relates to init scripts.
 ]
 
 [WARNING]
 ====
 Many distributions claim LSB compliance but ship with broken init
 scripts.  For details on how to check whether your init script is
 LSB-compatible, see <<ap-lsb>>. Common problematic violations of
 the LSB standard include:
 
 * Not implementing the status operation at all
 * Not observing the correct exit status codes for `start/stop/status` actions
 * Starting a started resource returns an error
 * Stopping a stopped resource returns an error
 ====
 
 [IMPORTANT]
 ====
 Remember to make sure the computer is _not_ configured to start any
 services at boot time -- that should be controlled by the cluster.
 ====
 
 === Systemd ===
 indexterm:[Resource,Systemd]
 indexterm:[Systemd,Resources]
 
 Some newer distributions have replaced the old
 http://en.wikipedia.org/wiki/Init#SysV-style["SysV"] style of
 initialization daemons and scripts with an alternative called
 http://www.freedesktop.org/wiki/Software/systemd[Systemd].
 
 Pacemaker is able to manage these services _if they are present_.
 
 Instead of init scripts, systemd has 'unit files'.  Generally, the
 services (unit files) are provided by the OS distribution, but there
 are online guides for converting from init scripts.
 footnote:[For example,
 http://0pointer.de/blog/projects/systemd-for-admins-3.html]
 
 [IMPORTANT]
 ====
 Remember to make sure the computer is _not_ configured to start any
 services at boot time -- that should be controlled by the cluster.
 ====
 
 === Upstart ===
 indexterm:[Resource,Upstart]
 indexterm:[Upstart,Resources]
 
 Some newer distributions have replaced the old
 http://en.wikipedia.org/wiki/Init#SysV-style["SysV"] style of
 initialization daemons (and scripts) with an alternative called
 http://upstart.ubuntu.com/[Upstart].
 
 Pacemaker is able to manage these services _if they are present_.
 
 Instead of init scripts, upstart has 'jobs'.  Generally, the
 services (jobs) are provided by the OS distribution.
 
 [IMPORTANT]
 ====
 Remember to make sure the computer is _not_ configured to start any
 services at boot time -- that should be controlled by the cluster.
 ====
 
 === System Services ===
 indexterm:[Resource,System Services]
 indexterm:[System Service,Resources]
 
 Since there are various types of system services (+systemd+,
 +upstart+, and +lsb+), Pacemaker supports a special +service+ alias which
 intelligently figures out which one applies to a given cluster node.
 
 This is particularly useful when the cluster contains a mix of
 +systemd+, +upstart+, and +lsb+.
 
 In order, Pacemaker will try to find the named service as:
 
 . an LSB init script
 . a Systemd unit file
 . an Upstart job
 
 === STONITH ===
 indexterm:[Resource,STONITH]
 indexterm:[STONITH,Resources]
 
 The STONITH class is used exclusively for fencing-related resources.  This is
 discussed later in <<ch-stonith>>.
 
 === Nagios Plugins ===
 indexterm:[Resource,Nagios Plugins]
 indexterm:[Nagios Plugins,Resources]
 
 Nagios Plugins
 footnote:[The project has two independent forks, hosted at
 https://www.nagios-plugins.org/ and https://www.monitoring-plugins.org/. Output
 from both projects' plugins is similar, so plugins from either project can be
 used with pacemaker.]
 allow us to monitor services on remote hosts.
 
 Pacemaker is able to do remote monitoring with the plugins _if they are
 present_.
 
 A common use case is to configure them as resources belonging to a resource
 container (usually a virtual machine), and the container will be restarted
 if any of them has failed. Another use is to configure them as ordinary
 resources to be used for monitoring hosts or services via the network.
 
 The supported parameters are same as the long options of the plugin.
 
 [[primitive-resource]]
 == Resource Properties ==
 
 These values tell the cluster which resource agent to use for the resource,
 where to find that resource agent and what standards it conforms to.
 
 .Properties of a Primitive Resource
 [width="95%",cols="1m,6<",options="header",align="center"]
 |=========================================================
 
 |Field
 |Description
 
 |id
 |Your name for the resource
  indexterm:[id,Resource]
  indexterm:[Resource,Property,id]
 
 |class
 
 |The standard the resource agent conforms to. Allowed values:
 +lsb+, +nagios+, +ocf+, +service+, +stonith+, +systemd+, +upstart+
  indexterm:[class,Resource]
  indexterm:[Resource,Property,class]
 
 |type
 |The name of the Resource Agent you wish to use. E.g. +IPaddr+ or +Filesystem+
  indexterm:[type,Resource]
  indexterm:[Resource,Property,type]
 
 |provider
 |The OCF spec allows multiple vendors to supply the same
  resource agent. To use the OCF resource agents supplied by
  the Heartbeat project, you would specify +heartbeat+ here.
  indexterm:[provider,Resource]
  indexterm:[Resource,Property,provider]
 
 |=========================================================
 
 The XML definition of a resource can be queried with the `crm_resource` tool.
 For example:
 
 ----
 # crm_resource --resource Email --query-xml
 ----
 
 might produce:
 
 .A system resource definition
 =====
 [source,XML]
 <primitive id="Email" class="service" type="exim"/>
 =====
 
 [NOTE]
 =====
 One of the main drawbacks to system services (LSB, systemd or
 Upstart) resources is that they do not allow any parameters!
 =====
 
 ////
 See https://tools.ietf.org/html/rfc5737 for choice of example IP address
 ////
 
 .An OCF resource definition
 =====
 [source,XML]
 -------
 <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat">
    <instance_attributes id="Public-IP-params">
       <nvpair id="Public-IP-ip" name="ip" value="192.0.2.2"/>
    </instance_attributes>
 </primitive>
 -------
 =====
 
 [[s-resource-options]]
 == Resource Options ==
 
 Resources have two types of options: 'meta-attributes' and 'instance attributes'.
 Meta-attributes apply to any type of resource, while instance attributes
 are specific to each resource agent.
 
 === Resource Meta-Attributes ===
 
 Meta-attributes are used by the cluster to decide how a resource should
 behave and can be easily set using the `--meta` option of the
 `crm_resource` command.
 
 .Meta-attributes of a Primitive Resource
 [width="95%",cols="2m,2,5<a",options="header",align="center"]
 |=========================================================
 
 |Field
 |Default
 |Description
 
 |priority
 |0
 |If not all resources can be active, the cluster will stop lower
 priority resources in order to keep higher priority ones active.
 indexterm:[priority,Resource Option]
 indexterm:[Resource,Option,priority]
 
 |target-role
 |started
 |What state should the cluster attempt to keep this resource in? Allowed values:
 
 * +stopped:+ Force the resource to be stopped
 * +started:+ Allow the resource to be started (In the case of
   <<s-resource-multistate,multi-state>> resources, they will not be promoted to
   master)
 * +master:+ Allow the resource to be started and, if appropriate, promoted
 indexterm:[target-role,Resource Option]
 indexterm:[Resource,Option,target-role]
 
 |is-managed
 |TRUE
 |Is the cluster allowed to start and stop the resource?  Allowed
  values: +true+, +false+
  indexterm:[is-managed,Resource Option]
  indexterm:[Resource,Option,is-managed]
 
 |resource-stickiness
 |value of +resource-stickiness+ in the +rsc_defaults+ section
 |How much does the resource prefer to stay where it is?
  indexterm:[resource-stickiness,Resource Option]
  indexterm:[Resource,Option,resource-stickiness]
 
 |requires
 |fencing (unless +stonith-enabled+ is +false+ or +class+ is
 +stonith+, in which case it defaults to quorum)
 |Conditions under which the resource can be started ('Since 1.1.8')
 Allowed values:
 
 * +nothing:+ can always be started
 * +quorum:+ The cluster can only start this resource if a majority of
   the configured nodes are active
 * +fencing:+ The cluster can only start this resource if a majority
   of the configured nodes are active _and_ any failed or unknown nodes
   have been powered off
 * +unfencing:+ The cluster can only start this resource if a majority
   of the configured nodes are active _and_ any failed or unknown nodes
   have been powered off _and_ only on nodes that have been 'unfenced'
 
 indexterm:[requires,Resource Option]
 indexterm:[Resource,Option,requires]
 
 |migration-threshold
 |INFINITY
 |How many failures may occur for this resource on a node, before this
  node is marked ineligible to host this resource. A value of INFINITY
  indicates that this feature is disabled.
  indexterm:[migration-threshold,Resource Option]
  indexterm:[Resource,Option,migration-threshold]
 
 |failure-timeout
 |0
 |How many seconds to wait before acting as if the failure had not
  occurred, and potentially allowing the resource back to the node on
  which it failed. A value of 0 indicates that this feature is disabled.
  indexterm:[failure-timeout,Resource Option]
  indexterm:[Resource,Option,failure-timeout]
 
 |multiple-active
 |stop_start
 |What should the cluster do if it ever finds the resource active on
  more than one node? Allowed values:
 
 * +block:+ mark the resource as unmanaged
 * +stop_only:+ stop all active instances and leave them that way
 * +stop_start:+ stop all active instances and start the resource in
   one location only
 
 indexterm:[multiple-active,Resource Option]
 indexterm:[Resource,Option,multiple-active]
 
 |remote-node
 |
 |The name of the remote-node this resource defines.  This both enables the
 resource as a remote-node and defines the unique name used to identify the
 remote-node. If no other parameters are set, this value will also be assumed as
 the hostname to connect to at the port specified by +remote-port+. +WARNING:+
 This value cannot overlap with any resource or node IDs. If not specified,
 this feature is disabled.
 
 |remote-port
 |3121
 |Port to use for the guest connection to pacemaker_remote
 
 |remote-addr
 |value of +remote-node+
 |The IP address or hostname to connect to if remote-node's name is not the
 hostname of the guest.
 
 |+remote-connect-timeout+
 |60s
 |How long before a pending guest connection will time out.
 
 |=========================================================
 
 [NOTE]
 ====
 Support for remote nodes was added in pacemaker 1.1.10. If you are using an
 earlier version, options related to remote nodes will not be available.
 ====
 
 As an example of setting resource options, if you performed the following
 commands on an LSB Email resource:
 
 -------
 # crm_resource --meta --resource Email --set-parameter priority --parameter-value 100
 # crm_resource -m -r Email -p multiple-active -v block
 -------
 
 the resulting resource definition might be:
 
 .An LSB resource with cluster options
 =====
 [source,XML]
 -------
 <primitive id="Email" class="lsb" type="exim">
   <meta_attributes id="Email-meta_attributes">
     <nvpair id="Email-meta_attributes-priority" name="priority" value="100"/>
     <nvpair id="Email-meta_attributes-multiple-active" name="multiple-active" value="block"/>
   </meta_attributes>
 </primitive>
 -------
 =====
 
 [[s-resource-defaults]]
 === Setting Global Defaults for Resource Meta-Attributes ===
 
 To set a default value for a resource option, add it to the
 +rsc_defaults+ section with `crm_attribute`. For example,
 
 ----
 # crm_attribute --type rsc_defaults --name is-managed --update false
 ----
 
 would prevent the cluster from starting or stopping any of the
 resources in the configuration (unless of course the individual
 resources were specifically enabled by having their +is-managed+ set to
 +true+).
 
 === Resource Instance Attributes ===
 
 The resource agents of some resource classes (lsb, systemd and upstart 'not' among them)
 can be given parameters which determine how they behave and which instance
 of a service they control.
 
 If your resource agent supports parameters, you can add them with the
 `crm_resource` command. For example,
 
 ----
 # crm_resource --resource Public-IP --set-parameter ip --parameter-value 192.0.2.2
 ----
 
 would create an entry in the resource like this:
 
 .An example OCF resource with instance attributes
 =====
 [source,XML]
 -------
 <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat">
    <instance_attributes id="params-public-ip">
       <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/>
    </instance_attributes>
 </primitive>
 -------
 =====
 
 For an OCF resource, the result would be an environment variable
 called +OCF_RESKEY_ip+ with a value of +192.0.2.2+.
 
 The list of instance attributes supported by an OCF resource agent can be
 found by calling the resource agent with the `meta-data` command.
 The output contains an XML description of all the supported
 attributes, their purpose and default values.
 
 .Displaying the metadata for the Dummy resource agent template
 =====
 ----
 # export OCF_ROOT=/usr/lib/ocf
 # $OCF_ROOT/resource.d/pacemaker/Dummy meta-data
 ----
 [source,XML]
 -------
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="Dummy" version="1.0">
 <version>1.0</version>
 
 <longdesc lang="en">
 This is a Dummy Resource Agent. It does absolutely nothing except 
 keep track of whether its running or not.
 Its purpose in life is for testing and to serve as a template for RA writers.
 
 NB: Please pay attention to the timeouts specified in the actions
 section below. They should be meaningful for the kind of resource
 the agent manages. They should be the minimum advised timeouts,
 but they shouldn't/cannot cover _all_ possible resource
 instances. So, try to be neither overly generous nor too stingy,
 but moderate. The minimum timeouts should never be below 10 seconds.
 </longdesc>
 <shortdesc lang="en">Example stateless resource agent</shortdesc>
 
 <parameters>
 <parameter name="state" unique="1">
 <longdesc lang="en">
 Location to store the resource state in.
 </longdesc>
 <shortdesc lang="en">State file</shortdesc>
-<content type="string" default="/var/run//Dummy-{OCF_RESOURCE_INSTANCE}.state" />
+<content type="string" default="/var/run//Dummy-default.state" />
 </parameter>
 
 <parameter name="fake" unique="0">
 <longdesc lang="en">
 Fake attribute that can be changed to cause a reload
 </longdesc>
 <shortdesc lang="en">Fake attribute that can be changed to cause a reload</shortdesc>
 <content type="string" default="dummy" />
 </parameter>
 
 <parameter name="op_sleep" unique="1">
 <longdesc lang="en">
 Number of seconds to sleep during operations.  This can be used to test how
 the cluster reacts to operation timeouts.
 </longdesc>
 <shortdesc lang="en">Operation sleep duration in seconds.</shortdesc>
 <content type="string" default="0" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"        timeout="20" />
 <action name="stop"         timeout="20" />
 <action name="monitor"      timeout="20" interval="10" depth="0"/>
 <action name="reload"       timeout="20" />
 <action name="migrate_to"   timeout="20" />
 <action name="migrate_from" timeout="20" />
 <action name="validate-all" timeout="20" />
 <action name="meta-data"    timeout="5" />
 </actions>
 </resource-agent>
 -------
 =====
 
 == Resource Operations ==
 
 indexterm:[Resource,Action]
 
 'Operations' are actions the cluster can perform on a resource by calling the
 resource agent. Resource agents must support certain common operations such as
 start, stop and monitor, and may implement any others.
 
 Some operations are generated by the cluster itself, for example, stopping and
 starting resources as needed.
 
 You can configure operations in the cluster configuration. As an example, by
 default the cluster will 'not' ensure your resources stay healthy once they are
 started. footnote:[Currently, anyway. Automatic monitoring operations may be
 added in a future version of Pacemaker.] To instruct the cluster to do this,
 you need to add a +monitor+ operation to the resource's definition.
 
 .An OCF resource with a recurring health check
 =====
 [source,XML]
 -------
 <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat">
   <operations>
      <op id="public-ip-check" name="monitor" interval="60s"/>
   </operations>
   <instance_attributes id="params-public-ip">
      <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/>
   </instance_attributes>
 </primitive>
 -------
 =====
 
 .Properties of an Operation
 [width="95%",cols="2m,3,6<a",options="header",align="center"]
 |=========================================================
 
 |Field
 |Default
 |Description
 
 |id
 |
 |A unique name for the operation.
  indexterm:[id,Action Property]
  indexterm:[Action,Property,id]
 
 |name
 |
 |The action to perform. This can be any action supported by the agent; common
  values include +monitor+, +start+, and +stop+.
  indexterm:[name,Action Property]
  indexterm:[Action,Property,name]
 
 |interval
 |0
 |How frequently (in seconds) to perform the operation. A value of 0 means never.
+ A positive value defines a 'recurring action', which is typically used with
+ <<s-resource-monitoring,monitor>>.
  indexterm:[interval,Action Property]
  indexterm:[Action,Property,interval]
 
 |timeout
 |
 |How long to wait before declaring the action has failed
  indexterm:[timeout,Action Property]
  indexterm:[Action,Property,timeout]
 
 |on-fail
 |restart '(except for stop operations, which default to' fence 'when
  STONITH is enabled and' block 'otherwise)'
 |The action to take if this action ever fails. Allowed values:
 
 * +ignore:+ Pretend the resource did not fail.
 * +block:+ Don't perform any further operations on the resource.
 * +stop:+ Stop the resource and do not start it elsewhere.
 * +restart:+ Stop the resource and start it again (possibly on a different node).
 * +fence:+ STONITH the node on which the resource failed.
 * +standby:+ Move _all_ resources away from the node on which the resource failed.
 
 indexterm:[on-fail,Action Property]
 indexterm:[Action,Property,on-fail]
 
 |enabled
 |TRUE
-|If +false+, the operation is treated as if it does not exist. Allowed
- values: +true+, +false+
+|If +false+, ignore this operation definition.  This is typically used to pause
+ a particular recurring monitor operation;  for instance, it can complement
+ the respective resource being unmanaged (+is-managed=false+), as this alone
+ will <<s-monitoring-unmanaged,not block any configured monitoring>>.
+ Disabling the operation does not suppress all actions of the given type.
+ Allowed values: +true+, +false+.
  indexterm:[enabled,Action Property]
  indexterm:[Action,Property,enabled]
 
 |record-pending
 |
 |If +true+, the intention to perform the operation is recorded so that
  GUIs and CLI tools can indicate that an operation is in progress.
  This is best set as an 'operation default' (see next section).
  Allowed values: +true+, +false+.
  indexterm:[enabled,Action Property]
  indexterm:[Action,Property,enabled]
 
 |role
 |
 |Run the operation only on node(s) that the cluster thinks should be in
  the specified role. This only makes sense for recurring monitor operations.
  Allowed (case-sensitive) values: +Stopped+, +Started+, and in the
  case of <<s-resource-multistate,multi-state>> resources, +Slave+ and +Master+.
  indexterm:[role,Action Property]
  indexterm:[Action,Property,role]
 
 |=========================================================
 
+[[s-resource-monitoring]]
 === Monitoring Resources for Failure ===
 
 When Pacemaker first starts a resource, it runs one-time monitor operations
 (referred to as 'probes') to ensure the resource is running where it's
 supposed to be, and not running where it's not supposed to be. (This behavior
 can be affected by the +resource-discovery+ location constraint property.)
 
 Other than those initial probes, Pacemaker will not (by default) check that
 the resource continues to stay healthy. As in the example above, you must
 configure monitor operations explicitly to perform these checks.
 
 By default, a monitor operation will ensure that the resource is running
 where it is supposed to. The +target-role+ property can be used for further
 checking.
 
 For example, if a resource has one monitor operation with
 +interval=10 role=Started+ and a second monitor operation with
 +interval=11 role=Stopped+, the cluster will run the first monitor on any nodes
 it thinks 'should' be running the resource, and the second monitor on any nodes
 that it thinks 'should not' be running the resource (for the truly paranoid,
 who want to know when an administrator manually starts a service by mistake).
 
+[[s-monitoring-unmanaged]]
 === Monitoring Resources When Administration is Disabled ===
 
 Recurring monitor operations behave differently under various administrative
 settings:
 
 * When a resource is unmanaged (by setting +is-managed=false+): No monitors
   will be stopped.
 +
 If the unmanaged resource is stopped on a node where the cluster thinks it
 should be running, the cluster will detect and report that it is not, but it
 will not consider the monitor failed, and will not try to start the resource
 until it is managed again.
 +
 Starting the unmanaged resource on a different node is strongly discouraged
 and will at least cause the cluster to consider the resource failed, and
 may require the resource's +target-role+ to be set to +Stopped+ then +Started+
 to be recovered.
 
 * When a node is put into standby: All resources will be moved away from the
   node, and all monitor operations will be stopped on the node, except those
   with +role=Stopped+. Monitor operations with +role=Stopped+ will be started
   on the node if appropriate.
 
 * When the cluster is put into maintenance mode: All resources will be marked
   as unmanaged. All monitor operations will be stopped, except those with
   +role=Stopped+. As with single unmanaged resources, starting a resource
   on a node other than where the cluster expects it to be will cause problems.
 
 [[s-operation-defaults]]
 === Setting Global Defaults for Operations ===
 
 You can change the global default values for operation properties
 in a given cluster. These are defined in an +op_defaults+ section 
 of the CIB's +configuration+ section, and can be set with `crm_attribute`.
 For example,
 
 ----
 # crm_attribute --type op_defaults --name timeout --update 20s
 ----
 
 would default each operation's +timeout+ to 20 seconds.  If an
 operation's definition also includes a value for +timeout+, then that
 value would be used for that operation instead.
 
 === When Implicit Operations Take a Long Time ===
 
 The cluster will always perform a number of implicit operations: +start+,
 +stop+ and a non-recurring +monitor+ operation used at startup to check
 whether the resource is already active.  If one of these is taking too long,
 then you can create an entry for them and specify a longer timeout.
 
 .An OCF resource with custom timeouts for its implicit actions
 =====
 [source,XML]
 -------
 <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat">
   <operations>
      <op id="public-ip-startup" name="monitor" interval="0" timeout="90s"/>
      <op id="public-ip-start" name="start" interval="0" timeout="180s"/>
      <op id="public-ip-stop" name="stop" interval="0" timeout="15min"/>
   </operations>
   <instance_attributes id="params-public-ip">
      <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/>
   </instance_attributes>
 </primitive>
 -------
 =====
 
 === Multiple Monitor Operations ===
 
 Provided no two operations (for a single resource) have the same name
 and interval, you can have as many monitor operations as you like.  In
 this way, you can do a superficial health check every minute and
 progressively more intense ones at higher intervals.
 
 To tell the resource agent what kind of check to perform, you need to
 provide each monitor with a different value for a common parameter.
 The OCF standard creates a special parameter called +OCF_CHECK_LEVEL+
 for this purpose and dictates that it is "made available to the
 resource agent without the normal +OCF_RESKEY+ prefix".
 
 Whatever name you choose, you can specify it by adding an
 +instance_attributes+ block to the +op+ tag. It is up to each
 resource agent to look for the parameter and decide how to use it.
 
 .An OCF resource with two recurring health checks, performing different levels of checks specified via +OCF_CHECK_LEVEL+.
 =====
 [source,XML]
 -------
 <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat">
    <operations>
       <op id="public-ip-health-60" name="monitor" interval="60">
          <instance_attributes id="params-public-ip-depth-60">
             <nvpair id="public-ip-depth-60" name="OCF_CHECK_LEVEL" value="10"/>
          </instance_attributes>
       </op>
       <op id="public-ip-health-300" name="monitor" interval="300">
          <instance_attributes id="params-public-ip-depth-300">
             <nvpair id="public-ip-depth-300" name="OCF_CHECK_LEVEL" value="20"/>
        </instance_attributes>
      </op>
    </operations>
    <instance_attributes id="params-public-ip">
        <nvpair id="public-ip-level" name="ip" value="192.0.2.2"/>
    </instance_attributes>
 </primitive>
 -------
 =====
 
 === Disabling a Monitor Operation ===
 
 The easiest way to stop a recurring monitor is to just delete it.
 However, there can be times when you only want to disable it
 temporarily.  In such cases, simply add +enabled="false"+ to the
 operation's definition.
 
 .Example of an OCF resource with a disabled health check
 =====
 [source,XML]
 -------
 <primitive id="Public-IP" class="ocf" type="IPaddr" provider="heartbeat">
    <operations>
       <op id="public-ip-check" name="monitor" interval="60s" enabled="false"/>
    </operations>
    <instance_attributes id="params-public-ip">
       <nvpair id="public-ip-addr" name="ip" value="192.0.2.2"/>
    </instance_attributes>
 </primitive>
 -------
 =====
 
 This can be achieved from the command line by executing:
 
 ----
 # cibadmin --modify --xml-text '<op id="public-ip-check" enabled="false"/>'
 ----
 
 Once you've done whatever you needed to do, you can then re-enable it with
 ----
 # cibadmin --modify --xml-text '<op id="public-ip-check" enabled="true"/>'
 ----
diff --git a/extra/resources/Dummy b/extra/resources/Dummy
index 8a38ef5750..c24e7aa237 100644
--- a/extra/resources/Dummy
+++ b/extra/resources/Dummy
@@ -1,228 +1,228 @@
 #!/bin/sh
 #
 #
 #	Dummy OCF RA. Does nothing but wait a few seconds, can be
 #	configured to fail occassionally.
 #
 # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Br�e
 #                    All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
 . ${OCF_FUNCTIONS}
 : ${__OCF_ACTION=$1}
 
 #######################################################################
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="Dummy" version="1.0">
 <version>1.0</version>
 
 <longdesc lang="en">
 This is a Dummy Resource Agent. It does absolutely nothing except 
 keep track of whether its running or not.
 Its purpose in life is for testing and to serve as a template for RA writers.
 
 NB: Please pay attention to the timeouts specified in the actions
 section below. They should be meaningful for the kind of resource
 the agent manages. They should be the minimum advised timeouts,
 but they shouldn't/cannot cover _all_ possible resource
 instances. So, try to be neither overly generous nor too stingy,
 but moderate. The minimum timeouts should never be below 10 seconds.
 </longdesc>
 <shortdesc lang="en">Example stateless resource agent</shortdesc>
 
 <parameters>
 <parameter name="state" unique="1">
 <longdesc lang="en">
 Location to store the resource state in.
 </longdesc>
 <shortdesc lang="en">State file</shortdesc>
-<content type="string" default="${HA_VARRUN}/Dummy-{OCF_RESOURCE_INSTANCE}.state" />
+<content type="string" default="${HA_VARRUN}/Dummy-${OCF_RESOURCE_INSTANCE}.state" />
 </parameter>
 
 <parameter name="passwd" unique="1">
 <longdesc lang="en">
 Fake password field
 </longdesc>
 <shortdesc lang="en">Password</shortdesc>
 <content type="string" default="" />
 </parameter>
 
 <parameter name="fake" unique="0">
 <longdesc lang="en">
 Fake attribute that can be changed to cause a reload
 </longdesc>
 <shortdesc lang="en">Fake attribute that can be changed to cause a reload</shortdesc>
 <content type="string" default="dummy" />
 </parameter>
 
 <parameter name="op_sleep" unique="1">
 <longdesc lang="en">
 Number of seconds to sleep during operations.  This can be used to test how
 the cluster reacts to operation timeouts.
 </longdesc>
 <shortdesc lang="en">Operation sleep duration in seconds.</shortdesc>
 <content type="string" default="0" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"        timeout="20" />
 <action name="stop"         timeout="20" />
 <action name="monitor"      timeout="20" interval="10" depth="0"/>
 <action name="reload"       timeout="20" />
 <action name="migrate_to"   timeout="20" />
 <action name="migrate_from" timeout="20" />
 <action name="validate-all" timeout="20" />
 <action name="meta-data"    timeout="5" />
 </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 
 # don't exit on TERM, to test that lrmd makes sure that we do exit
 trap sigterm_handler TERM
 sigterm_handler() {
 	ocf_log info "They use TERM to bring us down. No such luck."
 	return
 }
 
 dummy_usage() {
 	cat <<END
 usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 dummy_start() {
     dummy_monitor
     if [ $? =  $OCF_SUCCESS ]; then
 	return $OCF_SUCCESS
     fi
     touch ${OCF_RESKEY_state}
 }
 
 dummy_stop() {
     dummy_monitor
     if [ $? =  $OCF_SUCCESS ]; then
 	rm ${OCF_RESKEY_state}
     fi
     rm -f ${VERIFY_SERIALIZED_FILE}
     return $OCF_SUCCESS
 }
 
 dummy_monitor() {
 	# Monitor _MUST!_ differentiate correctly between running
 	# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
 	# That is THREE states, not just yes/no.
 
 	if [ "$OCF_RESKEY_op_sleep" -ne "0" ]; then
 		if [ -f ${VERIFY_SERIALIZED_FILE} ]; then
 			# two monitor ops have occurred at the same time.
 			# this is to verify a condition in the lrmd regression tests.
 			ocf_log err "$VERIFY_SERIALIZED_FILE exists already"
 			return $OCF_ERR_GENERIC
 		fi
 
 		touch ${VERIFY_SERIALIZED_FILE}
 		sleep ${OCF_RESKEY_op_sleep}
 		rm ${VERIFY_SERIALIZED_FILE}
 	fi
 	
 	if [ -f ${OCF_RESKEY_state} ]; then
 		return $OCF_SUCCESS
 	fi
 	if false ; then
 		return $OCF_ERR_GENERIC
 	fi
 	return $OCF_NOT_RUNNING
 }
 
 dummy_validate() {
     
     # Is the state directory writable? 
     state_dir=`dirname "$OCF_RESKEY_state"`
     touch "$state_dir/$$"
     if [ $? != 0 ]; then
 	return $OCF_ERR_ARGS
     fi
     rm "$state_dir/$$"
 
     return $OCF_SUCCESS
 }
 
 : ${OCF_RESKEY_fake=dummy}
 : ${OCF_RESKEY_op_sleep=0}
 : ${OCF_RESKEY_CRM_meta_interval=0}
 : ${OCF_RESKEY_CRM_meta_globally_unique:="true"}
 
 if [ "x$OCF_RESKEY_state" = "x" ]; then
     if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
 	state="${HA_VARRUN}/Dummy-${OCF_RESOURCE_INSTANCE}.state"
 	
 	# Strip off the trailing clone marker
 	OCF_RESKEY_state=`echo $state | sed s/:[0-9][0-9]*\.state/.state/`
     else 
 	OCF_RESKEY_state="${HA_VARRUN}/Dummy-${OCF_RESOURCE_INSTANCE}.state"
     fi
 fi
 VERIFY_SERIALIZED_FILE="${OCF_RESKEY_state}.serialized"
 
 case $__OCF_ACTION in
 meta-data)	meta_data
 		exit $OCF_SUCCESS
 		;;
 start)		dummy_start;;
 stop)		dummy_stop;;
 monitor)	dummy_monitor;;
 migrate_to)	ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
 	        dummy_stop
 		;;
 migrate_from)	ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_source}."
 	        dummy_start
 		;;
 reload)		ocf_log err "Reloading..."
 	        dummy_start
 		;;
 validate-all)	dummy_validate;;
 usage|help)	dummy_usage
 		exit $OCF_SUCCESS
 		;;
 *)		dummy_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 rc=$?
 ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
 exit $rc
 
diff --git a/extra/resources/HealthCPU b/extra/resources/HealthCPU
index 1ceaa01bbc..f48854fb04 100644
--- a/extra/resources/HealthCPU
+++ b/extra/resources/HealthCPU
@@ -1,222 +1,222 @@
 #!/bin/sh
 #
 #
 #	HealthCPU OCF RA. Measures CPUs idling and writes
 #	#health-cpu status into the CIB
 #
 # Copyright (c) 2009 Michael Schwartzkopff
 #	in collaboration with the Bull company. Merci!
 #
 #                    All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 ################################
 #
 #	TODO: Enter default values
 #		Error handling in getting uptime
 #
 ##################################
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
 . ${OCF_FUNCTIONS}
 : ${__OCF_ACTION=$1}
 
 #######################################################################
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="HealthCPU" version="0.1">
 <version>0.1</version>
 
 <longdesc lang="en">
 Systhem health agent that measures the CPU idling and updates the #health-cpu attribute.
 </longdesc>
 <shortdesc lang="en">System health CPU usage</shortdesc>
 
 <parameters>
 <parameter name="state" unique="1">
 <longdesc lang="en">
 Location to store the resource state in.
 </longdesc>
 <shortdesc lang="en">State file</shortdesc>
-<content type="string" default="${HA_VARRUN}/health-cpu-{OCF_RESOURCE_INSTANCE}.state" />
+<content type="string" default="${HA_VARRUN}/health-cpu-${OCF_RESOURCE_INSTANCE}.state" />
 </parameter>
 
 <parameter name="yellow_limit" unique="1">
 <longdesc lang="en">
 Lower (!) limit of idle percentage to switch the health attribute to yellow. I.e.
 the #health-cpu will go yellow if the %idle of the CPU falls below 50%.
 </longdesc>
 <shortdesc lang="en">Lower limit for yellow health attribute</shortdesc>
 <content type="string" default="50"/>
 </parameter>
 
 <parameter name="red_limit" unique="1">
 <longdesc lang="en">
 Lower (!) limit of idle percentage to switch the health attribute to red. I.e.
 the #health-cpu will go red if the %idle of the CPU falls below 10%.
 </longdesc>
 <shortdesc lang="en">Lower limit for red health attribute</shortdesc>
 <content type="string" default="10"/>
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"        timeout="10" />
 <action name="stop"         timeout="10" />
 <action name="monitor"      timeout="10" interval="10" start-delay="0" />
 <action name="meta-data"    timeout="5" />
 <action name="validate-all"   timeout="10" />
 </actions>
 </resource-agent>
 END
 }
 
 #######################################################################
 
 # don't exit on TERM, to test that lrmd makes sure that we do exit
 trap sigterm_handler TERM
 sigterm_handler() {
 	ocf_log info "They use TERM to bring us down. No such luck."
 	return
 }
 
 dummy_usage() {
 	cat <<END
 usage: $0 {start|stop|monitor|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
 dummy_start() {
     dummy_monitor
     if [ $? =  $OCF_SUCCESS ]; then
 	return $OCF_SUCCESS
     fi
     touch ${OCF_RESKEY_state}
 }
 
 dummy_stop() {
     dummy_monitor
     if [ $? =  $OCF_SUCCESS ]; then
 	rm ${OCF_RESKEY_state}
     fi
     return $OCF_SUCCESS
 }
 
 dummy_monitor() {
 	# Monitor _MUST!_ differentiate correctly between running
 	# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
 	# That is THREE states, not just yes/no.
 	
 	if [ -f ${OCF_RESKEY_state} ]; then
 
 	  IDLE=`top -b -n2 | grep Cpu | tail -1 | awk -F",|.[0-9][ %]id" '{ print $4 }'`
 	  # echo "System idle: " $IDLE
 	  # echo "$OCF_RESKEY_red_limit"
 	  # echo $OCF_RESKEY_yellow_limit
 
 	  if [ $IDLE -lt ${OCF_RESKEY_red_limit} ] ; then
 	    # echo "System state RED!"
 	    attrd_updater -n "#health-cpu" -U "red" -d "30s"
 	    return $OCF_SUCCESS
           fi
 
 	  if [ $IDLE -lt ${OCF_RESKEY_yellow_limit} ] ; then
 	    # echo "System state yellow."
             attrd_updater -n "#health-cpu" -U "yellow" -d "30s"
 	  else
 	    # echo "System state green."
 	    attrd_updater -n "#health-cpu" -U "green" -d "30s"
 
           fi
 	
 	  return $OCF_SUCCESS
 	fi
 
 	if false ; then
                 return $OCF_ERR_GENERIC
         fi
         return $OCF_NOT_RUNNING
 
 }
 
 dummy_validate() {
     
     # Is the state directory writable? 
     state_dir=`dirname "$OCF_RESKEY_state"`
     touch "$state_dir/$$"
     if [ $? != 0 ]; then
 	return $OCF_ERR_ARGS
     fi
     rm "$state_dir/$$"
 
     return $OCF_SUCCESS
 }
 
 : ${OCF_RESKEY_CRM_meta_interval=0}
 : ${OCF_RESKEY_CRM_meta_globally_unique:="true"}
 
 if [ "x$OCF_RESKEY_state" = "x" ]; then
     if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
 	state="${HA_VARRUN}/Dummy-${OCF_RESOURCE_INSTANCE}.state"
 	
 	# Strip off the trailing clone marker
 	OCF_RESKEY_state=`echo $state | sed s/:[0-9][0-9]*\.state/.state/`
     else 
 	OCF_RESKEY_state="${HA_VARRUN}/Dummy-${OCF_RESOURCE_INSTANCE}.state"
     fi
 fi
 
 if [ "x${OCF_RESKEY_red_limit}" = "x" ] ; then
   OCF_RESKEY_red_limit=10
 fi
 
 if [ "x${OCF_RESKEY_yellow_limit}" = "x" ] ; then
   OCF_RESKEY_yellow_limit=50
 fi
 
 case $__OCF_ACTION in
 meta-data)	meta_data
 		exit $OCF_SUCCESS
 		;;
 start)		dummy_start;;
 stop)		dummy_stop;;
 monitor)	dummy_monitor;;
 validate-all)	dummy_validate;;
 usage|help)	dummy_usage
 		exit $OCF_SUCCESS
 		;;
 *)		dummy_usage
 		exit $OCF_ERR_UNIMPLEMENTED
 		;;
 esac
 rc=$?
 ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
 exit $rc
 
diff --git a/extra/resources/Stateful b/extra/resources/Stateful
index 2ae65add68..c5184d572d 100644
--- a/extra/resources/Stateful
+++ b/extra/resources/Stateful
@@ -1,213 +1,213 @@
 #!/bin/sh
 #
 #
 #	Example of a stateful OCF Resource Agent. 
 #
 # Copyright (c) 2006 Andrew Beekhof
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of version 2 of the GNU General Public License as
 # published by the Free Software Foundation.
 #
 # This program is distributed in the hope that it would be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
 # Further, this software is distributed without any warranty that it is
 # free of the rightful claim of any third person regarding infringement
 # or the like.  Any license provided herein, whether implied or
 # otherwise, applies only to this software file.  Patent licenses, if
 # any, provided herein do not apply to combinations of this program with
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
 #
 
 #######################################################################
 # Initialization:
 
 : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
 . ${OCF_FUNCTIONS}
 : ${__OCF_ACTION=$1}
 CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
 
 #######################################################################
 
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="Stateful" version="1.0">
 <version>1.0</version>
 
 <longdesc lang="en">
 This is an example resource agent that impliments two states
 </longdesc>
 <shortdesc lang="en">Example stateful resource agent</shortdesc>
 
 <parameters>
 
 <parameter name="state" unique="1">
 <longdesc lang="en">
 Location to store the resource state in
 </longdesc>
 <shortdesc lang="en">State file</shortdesc>
-<content type="string" default="${HA_VARRUN}/Stateful-{OCF_RESOURCE_INSTANCE}.state" />
+<content type="string" default="${HA_VARRUN}/Stateful-${OCF_RESOURCE_INSTANCE}.state" />
 </parameter>
 
 </parameters>
 
 <actions>
 <action name="start"   timeout="20" />
 <action name="stop"    timeout="20" />
 <action name="monitor" depth="0"  timeout="20" interval="10" role="Master"/>
 <action name="monitor" depth="0"  timeout="20" interval="10" role="Slave"/>
 <action name="meta-data"  timeout="5" />
 <action name="validate-all"  timeout="30" />
 </actions>
 </resource-agent>
 END
 	exit $OCF_SUCCESS
 }
 
 #######################################################################
 
 stateful_usage() {
 	cat <<END
 usage: $0 {start|stop|promote|demote|monitor|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
 	exit $1
 }
 
 stateful_update() {
 	echo $1 > ${OCF_RESKEY_state}
 }
 
 stateful_check_state() {
     target=$1
     if [ -f ${OCF_RESKEY_state} ]; then
 	state=`cat ${OCF_RESKEY_state}`
 	if [ "x$target" = "x$state" ]; then
 	    return 0
 	fi
 
     else
 	if [ "x$target" = "x" ]; then
 	    return 0
 	fi
     fi
 
     return 1
 }
 
 stateful_start() {
     stateful_check_state master
     if [ $? = 0 ]; then
        	# CRM Error - Should never happen
 	return $OCF_RUNNING_MASTER
     fi
     stateful_update slave
     $CRM_MASTER -v ${slave_score}
     return 0
 }
 
 stateful_demote() {
     stateful_check_state 
     if [ $? = 0 ]; then
        	# CRM Error - Should never happen
 	return $OCF_NOT_RUNNING
     fi
     stateful_update slave
     $CRM_MASTER -v ${slave_score}
     return 0
 }
 
 stateful_promote() {
     stateful_check_state 
     if [ $? = 0 ]; then
 	return $OCF_NOT_RUNNING
     fi
     stateful_update master
     $CRM_MASTER -v ${master_score}
     return 0
 }
 
 stateful_stop() {
     $CRM_MASTER -D
     stateful_check_state master
     if [ $? = 0 ]; then
        	# CRM Error - Should never happen
 	return $OCF_RUNNING_MASTER
     fi
     if [ -f ${OCF_RESKEY_state} ]; then
 	rm ${OCF_RESKEY_state}
     fi
     return 0
 }
 
 stateful_monitor() {
     stateful_check_state "master"
     if [ $? = 0 ]; then
 	if [ $OCF_RESKEY_CRM_meta_interval = 0 ]; then
 	    # Restore the master setting during probes
 	    $CRM_MASTER -v ${master_score}
 	fi
 	return $OCF_RUNNING_MASTER
     fi
 
     stateful_check_state "slave"
     if [ $? = 0 ]; then
 	if [ $OCF_RESKEY_CRM_meta_interval = 0 ]; then
 	    # Restore the master setting during probes
 	    $CRM_MASTER -v ${slave_score}
 	fi
 	return $OCF_SUCCESS
     fi
 
     if [ -f ${OCF_RESKEY_state} ]; then
 	echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents"
 	cat ${OCF_RESKEY_state}
 	return $OCF_ERR_GENERIC
     fi
     return 7
 }
 
 stateful_validate() {
 	exit $OCF_SUCCESS
 }
 
 : ${slave_score=5}
 : ${master_score=10}
 
 : ${OCF_RESKEY_CRM_meta_interval=0}
 : ${OCF_RESKEY_CRM_meta_globally_unique:="true"}
 
 if [ "x$OCF_RESKEY_state" = "x" ]; then
     if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then
 	state="${HA_VARRUN}/Stateful-${OCF_RESOURCE_INSTANCE}.state"
 	
 	# Strip off the trailing clone marker
 	OCF_RESKEY_state=`echo $state | sed s/:[0-9][0-9]*\.state/.state/`
     else 
 	OCF_RESKEY_state="${HA_VARRUN}/Stateful-${OCF_RESOURCE_INSTANCE}.state"
     fi
 fi
 
 case $__OCF_ACTION in
 meta-data)	meta_data;;
 start)		stateful_start;;
 promote)	stateful_promote;;
 demote)		stateful_demote;;
 stop)		stateful_stop;;
 monitor)	stateful_monitor;;
 validate-all)	stateful_validate;;
 usage|help)	stateful_usage $OCF_SUCCESS;;
 *)		stateful_usage $OCF_ERR_UNIMPLEMENTED;;
 esac
 
 exit $?
 
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index 2c380bf554..9b623073b8 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -1,234 +1,238 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef CRM_COMMON_CLUSTER__H
 #  define CRM_COMMON_CLUSTER__H
 
 #  include <crm/common/xml.h>
 #  include <crm/common/util.h>
 
 #  if SUPPORT_HEARTBEAT
 #    include <heartbeat/hb_api.h>
 #    include <ocf/oc_event.h>
 #  endif
 
 #  if SUPPORT_COROSYNC
 #    include <corosync/cpg.h>
 #  endif
 
 extern gboolean crm_have_quorum;
 extern GHashTable *crm_peer_cache;
 extern GHashTable *crm_remote_peer_cache;
 extern unsigned long long crm_peer_seq;
 
 #  ifndef CRM_SERVICE
 #    define CRM_SERVICE PCMK_SERVICE_ID
 #  endif
 
 /* *INDENT-OFF* */
 #define CRM_NODE_LOST      "lost"
 #define CRM_NODE_MEMBER    "member"
 #define CRM_NODE_ACTIVE    CRM_NODE_MEMBER
 #define CRM_NODE_EVICTED   "evicted"
 
 enum crm_join_phase
 {
     crm_join_nack       = -1,
     crm_join_none       = 0,
     crm_join_welcomed   = 1,
     crm_join_integrated = 2,
     crm_join_finalized  = 3,
     crm_join_confirmed  = 4,
 };
 
 enum crm_node_flags
 {
     /* node is not a cluster node and should not be considered for cluster membership */
     crm_remote_node          = 0x0001,
 
     /* deprecated (not used by cluster) */
     crm_remote_container     = 0x0002,
     crm_remote_baremetal     = 0x0004,
+
+    /* node's cache entry is dirty */
+    crm_node_dirty           = 0x0010,
 };
 /* *INDENT-ON* */
 
 typedef struct crm_peer_node_s {
     uint32_t id;                /* Only used by corosync derivatives */
     uint64_t born;              /* Only used by heartbeat and the legacy plugin */
     uint64_t last_seen;
     uint64_t flags;             /* Specified by crm_node_flags enum */
 
     int32_t votes;              /* Only used by the legacy plugin */
     uint32_t processes;
     enum crm_join_phase join;
 
     char *uname;
     char *uuid;
     char *state;
     char *expected;
 
     char *addr;                 /* Only used by the legacy plugin */
     char *version;              /* Unused */
 } crm_node_t;
 
 void crm_peer_init(void);
 void crm_peer_destroy(void);
 
 typedef struct crm_cluster_s {
     char *uuid;
     char *uname;
     uint32_t nodeid;
 
     void (*destroy) (gpointer);
 
 #  if SUPPORT_HEARTBEAT
     ll_cluster_t *hb_conn;
     void (*hb_dispatch) (HA_Message * msg, void *private);
 #  endif
 
 #  if SUPPORT_COROSYNC
     struct cpg_name group;
     cpg_callbacks_t cpg;
     cpg_handle_t cpg_handle;
 #  endif
 
 } crm_cluster_t;
 
 gboolean crm_cluster_connect(crm_cluster_t * cluster);
 void crm_cluster_disconnect(crm_cluster_t * cluster);
 
 /* *INDENT-OFF* */
 enum crm_ais_msg_class {
     crm_class_cluster = 0,
     crm_class_members = 1,
     crm_class_notify  = 2,
     crm_class_nodeid  = 3,
     crm_class_rmpeer  = 4,
     crm_class_quorum  = 5,
 };
 
 /* order here matters - its used to index into the crm_children array */
 enum crm_ais_msg_types {
     crm_msg_none     = 0,
     crm_msg_ais      = 1,
     crm_msg_lrmd     = 2,
     crm_msg_cib      = 3,
     crm_msg_crmd     = 4,
     crm_msg_attrd    = 5,
     crm_msg_stonithd = 6,
     crm_msg_te       = 7,
     crm_msg_pe       = 8,
     crm_msg_stonith_ng = 9,
 };
 
 /* used with crm_get_peer_full */
 enum crm_get_peer_flags {
     CRM_GET_PEER_CLUSTER   = 0x0001,
     CRM_GET_PEER_REMOTE    = 0x0002,
     CRM_GET_PEER_ANY       = CRM_GET_PEER_CLUSTER|CRM_GET_PEER_REMOTE,
 };
 /* *INDENT-ON* */
 
 gboolean send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service,
                               xmlNode * data, gboolean ordered);
 
 
 int crm_remote_peer_cache_size(void);
 
 /* Initialize and refresh the remote peer cache from a cib config */
 void crm_remote_peer_cache_refresh(xmlNode *cib);
 void crm_remote_peer_cache_add(const char *node_name);
+crm_node_t *crm_remote_peer_get(const char *node_name);
 void crm_remote_peer_cache_remove(const char *node_name);
 
 /* allows filtering of remote and cluster nodes using crm_get_peer_flags */
 crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags);
 
 /* only searches cluster nodes */
 crm_node_t *crm_get_peer(unsigned int id, const char *uname);
 
 guint crm_active_peers(void);
 gboolean crm_is_peer_active(const crm_node_t * node);
 guint reap_crm_member(uint32_t id, const char *name);
 int crm_terminate_member(int nodeid, const char *uname, void *unused);
 int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection);
 
 #  if SUPPORT_HEARTBEAT
 gboolean crm_is_heartbeat_peer_active(const crm_node_t * node);
 #  endif
 
 #  if SUPPORT_COROSYNC
 extern int ais_fd_sync;
 uint32_t get_local_nodeid(cpg_handle_t handle);
 
 gboolean cluster_connect_cpg(crm_cluster_t *cluster);
 void cluster_disconnect_cpg(crm_cluster_t * cluster);
 
 void pcmk_cpg_membership(cpg_handle_t handle,
                          const struct cpg_name *groupName,
                          const struct cpg_address *member_list, size_t member_list_entries,
                          const struct cpg_address *left_list, size_t left_list_entries,
                          const struct cpg_address *joined_list, size_t joined_list_entries);
 gboolean crm_is_corosync_peer_active(const crm_node_t * node);
 gboolean send_cluster_text(int class, const char *data, gboolean local,
                        crm_node_t * node, enum crm_ais_msg_types dest);
 #  endif
 
 const char *crm_peer_uuid(crm_node_t *node);
 const char *crm_peer_uname(const char *uuid);
 void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node);
 
 enum crm_status_type {
     crm_status_uname,
     crm_status_nstate,
     crm_status_processes,
     crm_status_rstate, /* remote node state */
 };
 
 enum crm_ais_msg_types text2msg_type(const char *text);
 void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *));
 void crm_set_autoreap(gboolean autoreap);
 
 /* *INDENT-OFF* */
 enum cluster_type_e
 {
     pcmk_cluster_unknown     = 0x0001,
     pcmk_cluster_invalid     = 0x0002,
     pcmk_cluster_heartbeat   = 0x0004,
     pcmk_cluster_classic_ais = 0x0010,
     pcmk_cluster_corosync    = 0x0020,
     pcmk_cluster_cman        = 0x0040,
 };
 /* *INDENT-ON* */
 
 enum cluster_type_e get_cluster_type(void);
 const char *name_for_cluster_type(enum cluster_type_e type);
 
 gboolean is_corosync_cluster(void);
 gboolean is_cman_cluster(void);
 gboolean is_openais_cluster(void);
 gboolean is_classic_ais_cluster(void);
 gboolean is_heartbeat_cluster(void);
 
 const char *get_local_node_name(void);
 char *get_node_name(uint32_t nodeid);
 
 #  if SUPPORT_COROSYNC
 char *pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *msg,
                         uint32_t *kind, const char **from);
 #  endif
 
 #endif
diff --git a/include/crm/crm.h b/include/crm/crm.h
index 09ec10a3d9..d035d16d9d 100644
--- a/include/crm/crm.h
+++ b/include/crm/crm.h
@@ -1,201 +1,202 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef CRM__H
 #  define CRM__H
 
 /**
  * \file
  * \brief A dumping ground
  * \ingroup core
  */
 
 #  include <crm_config.h>
 #  include <stdlib.h>
 #  include <glib.h>
 #  include <stdbool.h>
 
 #  undef MIN
 #  undef MAX
 #  include <string.h>
 
 #  include <libxml/tree.h>
 
 #  define CRM_FEATURE_SET		"3.0.10"
 
 #  define EOS		'\0'
 #  define DIMOF(a)	((int) (sizeof(a)/sizeof(a[0])) )
 
 #  ifndef MAX_NAME
 #    define MAX_NAME	256
 #  endif
 
 #  ifndef __GNUC__
 #    define __builtin_expect(expr, result) (expr)
 #  endif
 
 /* Some handy macros used by the Linux kernel */
 #  define __likely(expr) __builtin_expect(expr, 1)
 #  define __unlikely(expr) __builtin_expect(expr, 0)
 
 #  define CRM_META			"CRM_meta"
 
 extern char *crm_system_name;
 
 /* *INDENT-OFF* */
 
 /* Clean these up at some point, some probably should be runtime options */
 #  define SOCKET_LEN	1024
 #  define APPNAME_LEN	256
 #  define MAX_IPC_FAIL	5
 #  define MAX_IPC_DELAY   120
 
 #  define DAEMON_RESPAWN_STOP 100
 
 #  define MSG_LOG			1
 #  define DOT_FSA_ACTIONS		1
 #  define DOT_ALL_FSA_INPUTS	1
 /* #define FSA_TRACE		1 */
 
 #  define INFINITY_S        "INFINITY"
 #  define MINUS_INFINITY_S "-INFINITY"
 
 #  define INFINITY        1000000
 
 /* Sub-systems */
 #  define CRM_SYSTEM_DC		"dc"
 #  define CRM_SYSTEM_DCIB		"dcib"
                                         /*  The master CIB */
 #  define CRM_SYSTEM_CIB		"cib"
 #  define CRM_SYSTEM_CRMD		"crmd"
 #  define CRM_SYSTEM_LRMD		"lrmd"
 #  define CRM_SYSTEM_PENGINE	"pengine"
 #  define CRM_SYSTEM_TENGINE	"tengine"
 #  define CRM_SYSTEM_STONITHD	"stonithd"
 #  define CRM_SYSTEM_MCP	"pacemakerd"
 
 /* Valid operations */
 #  define CRM_OP_NOOP		"noop"
 
 #  define CRM_OP_JOIN_ANNOUNCE	"join_announce"
 #  define CRM_OP_JOIN_OFFER	"join_offer"
 #  define CRM_OP_JOIN_REQUEST	"join_request"
 #  define CRM_OP_JOIN_ACKNAK	"join_ack_nack"
 #  define CRM_OP_JOIN_CONFIRM	"join_confirm"
 
 #  define CRM_OP_DIE		"die_no_respawn"
 #  define CRM_OP_RETRIVE_CIB	"retrieve_cib"
 #  define CRM_OP_PING		"ping"
 #  define CRM_OP_THROTTLE	"throttle"
 #  define CRM_OP_VOTE		"vote"
 #  define CRM_OP_NOVOTE		"no-vote"
 #  define CRM_OP_HELLO		"hello"
 #  define CRM_OP_HBEAT		"dc_beat"
 #  define CRM_OP_PECALC		"pe_calc"
 #  define CRM_OP_ABORT		"abort"
 #  define CRM_OP_QUIT		"quit"
 #  define CRM_OP_LOCAL_SHUTDOWN 	"start_shutdown"
 #  define CRM_OP_SHUTDOWN_REQ	"req_shutdown"
 #  define CRM_OP_SHUTDOWN 	"do_shutdown"
 #  define CRM_OP_FENCE	 	"stonith"
 #  define CRM_OP_EVENTCC		"event_cc"
 #  define CRM_OP_TEABORT		"te_abort"
 #  define CRM_OP_TEABORTED	"te_abort_confirmed"    /* we asked */
 #  define CRM_OP_TE_HALT		"te_halt"
 #  define CRM_OP_TECOMPLETE	"te_complete"
 #  define CRM_OP_TETIMEOUT	"te_timeout"
 #  define CRM_OP_TRANSITION	"transition"
 #  define CRM_OP_REGISTER		"register"
 #  define CRM_OP_IPC_FWD		"ipc_fwd"
 #  define CRM_OP_DEBUG_UP		"debug_inc"
 #  define CRM_OP_DEBUG_DOWN	"debug_dec"
 #  define CRM_OP_INVOKE_LRM	"lrm_invoke"
 #  define CRM_OP_LRM_REFRESH	"lrm_refresh" /* Deprecated */
 #  define CRM_OP_LRM_QUERY	"lrm_query"
 #  define CRM_OP_LRM_DELETE	"lrm_delete"
 #  define CRM_OP_LRM_FAIL		"lrm_fail"
 #  define CRM_OP_PROBED		"probe_complete"
 #  define CRM_OP_NODES_PROBED	"probe_nodes_complete"
 #  define CRM_OP_REPROBE		"probe_again"
 #  define CRM_OP_CLEAR_FAILCOUNT  "clear_failcount"
+#  define CRM_OP_REMOTE_STATE     "remote_state"
 #  define CRM_OP_RELAXED_SET  "one-or-more"
 #  define CRM_OP_RELAXED_CLONE  "clone-one-or-more"
 #  define CRM_OP_RM_NODE_CACHE "rm_node_cache"
 
 #  define CRMD_JOINSTATE_DOWN           "down"
 #  define CRMD_JOINSTATE_PENDING        "pending"
 #  define CRMD_JOINSTATE_MEMBER         "member"
 #  define CRMD_JOINSTATE_NACK           "banned"
 
 #  define CRMD_ACTION_DELETE		"delete"
 #  define CRMD_ACTION_CANCEL		"cancel"
 
 #  define CRMD_ACTION_MIGRATE		"migrate_to"
 #  define CRMD_ACTION_MIGRATED		"migrate_from"
 
 #  define CRMD_ACTION_START		"start"
 #  define CRMD_ACTION_STARTED		"running"
 
 #  define CRMD_ACTION_STOP		"stop"
 #  define CRMD_ACTION_STOPPED		"stopped"
 
 #  define CRMD_ACTION_PROMOTE		"promote"
 #  define CRMD_ACTION_PROMOTED		"promoted"
 #  define CRMD_ACTION_DEMOTE		"demote"
 #  define CRMD_ACTION_DEMOTED		"demoted"
 
 #  define CRMD_ACTION_NOTIFY		"notify"
 #  define CRMD_ACTION_NOTIFIED		"notified"
 
 #  define CRMD_ACTION_STATUS		"monitor"
 
 /* short names */
 #  define RSC_DELETE	CRMD_ACTION_DELETE
 #  define RSC_CANCEL	CRMD_ACTION_CANCEL
 
 #  define RSC_MIGRATE	CRMD_ACTION_MIGRATE
 #  define RSC_MIGRATED	CRMD_ACTION_MIGRATED
 
 #  define RSC_START	CRMD_ACTION_START
 #  define RSC_STARTED	CRMD_ACTION_STARTED
 
 #  define RSC_STOP	CRMD_ACTION_STOP
 #  define RSC_STOPPED	CRMD_ACTION_STOPPED
 
 #  define RSC_PROMOTE	CRMD_ACTION_PROMOTE
 #  define RSC_PROMOTED	CRMD_ACTION_PROMOTED
 #  define RSC_DEMOTE	CRMD_ACTION_DEMOTE
 #  define RSC_DEMOTED	CRMD_ACTION_DEMOTED
 
 #  define RSC_NOTIFY	CRMD_ACTION_NOTIFY
 #  define RSC_NOTIFIED	CRMD_ACTION_NOTIFIED
 
 #  define RSC_STATUS	CRMD_ACTION_STATUS
 /* *INDENT-ON* */
 
 typedef GList *GListPtr;
 
 #  include <crm/common/logging.h>
 #  include <crm/common/util.h>
 #  include <crm/error.h>
 
 #  define crm_str_hash g_str_hash_traditional
 
 guint crm_strcase_hash(gconstpointer v);
 guint g_str_hash_traditional(gconstpointer v);
 
 #endif
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
index 180b97e74c..66c1ff7065 100644
--- a/include/crm/pengine/status.h
+++ b/include/crm/pengine/status.h
@@ -1,426 +1,426 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef PENGINE_STATUS__H
 #  define PENGINE_STATUS__H
 
 #  include <glib.h>
 #  include <crm/common/iso8601.h>
 #  include <crm/pengine/common.h>
 
 typedef struct node_s pe_node_t;
 typedef struct node_s node_t;
 typedef struct pe_action_s action_t;
 typedef struct pe_action_s pe_action_t;
 typedef struct resource_s resource_t;
 typedef struct ticket_s ticket_t;
 
 typedef enum no_quorum_policy_e {
     no_quorum_freeze,
     no_quorum_stop,
     no_quorum_ignore,
     no_quorum_suicide
 } no_quorum_policy_t;
 
 enum node_type {
     node_ping,
     node_member,
     node_remote
 };
 
 enum pe_restart {
     pe_restart_restart,
     pe_restart_ignore
 };
 
 enum pe_find {
     pe_find_renamed = 0x001,
     pe_find_clone = 0x004,
     pe_find_current = 0x008,
     pe_find_inactive = 0x010,
 };
 
 #  define pe_flag_have_quorum		0x00000001ULL
 #  define pe_flag_symmetric_cluster	0x00000002ULL
 #  define pe_flag_is_managed_default	0x00000004ULL
 #  define pe_flag_maintenance_mode	0x00000008ULL
 
 #  define pe_flag_stonith_enabled	0x00000010ULL
 #  define pe_flag_have_stonith_resource	0x00000020ULL
 #  define pe_flag_enable_unfencing	0x00000040ULL
 #  define pe_flag_concurrent_fencing	0x00000080ULL
 
 #  define pe_flag_stop_rsc_orphans	0x00000100ULL
 #  define pe_flag_stop_action_orphans	0x00000200ULL
 #  define pe_flag_stop_everything	0x00000400ULL
 
 #  define pe_flag_start_failure_fatal	0x00001000ULL
 #  define pe_flag_remove_after_stop	0x00002000ULL
 
 #  define pe_flag_startup_probes	0x00010000ULL
 #  define pe_flag_have_status		0x00020000ULL
 #  define pe_flag_have_remote_nodes	0x00040000ULL
 
 #  define pe_flag_quick_location  	0x00100000ULL
 #  define pe_flag_sanitized             0x00200000ULL
 
 typedef struct pe_working_set_s {
     xmlNode *input;
     crm_time_t *now;
 
     /* options extracted from the input */
     char *dc_uuid;
     node_t *dc_node;
     const char *stonith_action;
     const char *placement_strategy;
 
     unsigned long long flags;
 
     int stonith_timeout;
     int default_resource_stickiness;
     no_quorum_policy_t no_quorum_policy;
 
     GHashTable *config_hash;
     GHashTable *tickets;
     GHashTable *singletons; /* Actions for which there can be only one - ie. fence nodeX */
 
     GListPtr nodes;
     GListPtr resources;
     GListPtr placement_constraints;
     GListPtr ordering_constraints;
     GListPtr colocation_constraints;
     GListPtr ticket_constraints;
 
     GListPtr actions;
     xmlNode *failed;
     xmlNode *op_defaults;
     xmlNode *rsc_defaults;
 
     /* stats */
     int num_synapse;
     int max_valid_nodes;
     int order_id;
     int action_id;
 
     /* final output */
     xmlNode *graph;
 
     GHashTable *template_rsc_sets;
     const char *localhost;
     GHashTable *tags;
 
 } pe_working_set_t;
 
 struct node_shared_s {
     const char *id;
     const char *uname;
 /* Make all these flags into a bitfield one day */
     gboolean online;
     gboolean standby;
     gboolean standby_onfail;
     gboolean pending;
     gboolean unclean;
     gboolean unseen;
     gboolean shutdown;
     gboolean expected_up;
     gboolean is_dc;
 
     int num_resources;
     GListPtr running_rsc;       /* resource_t* */
     GListPtr allocated_rsc;     /* resource_t* */
 
     resource_t *remote_rsc;
 
     GHashTable *attrs;          /* char* => char* */
     enum node_type type;
 
     GHashTable *utilization;
 
     /*! cache of calculated rsc digests for this node. */
     GHashTable *digest_cache;
 
     gboolean maintenance;
     gboolean rsc_discovery_enabled;
     gboolean remote_requires_reset;
     gboolean remote_was_fenced;
 };
 
 struct node_s {
     int weight;
     gboolean fixed;
     int count;
     struct node_shared_s *details;
     int rsc_discover_mode;
 };
 
 #  include <crm/pengine/complex.h>
 
 #  define pe_rsc_orphan		0x00000001ULL
 #  define pe_rsc_managed	0x00000002ULL
 #  define pe_rsc_block          0x00000004ULL   /* Further operations are prohibited due to failure policy */
 #  define pe_rsc_orphan_container_filler	0x00000008ULL
 
 #  define pe_rsc_notify		0x00000010ULL
 #  define pe_rsc_unique		0x00000020ULL
 #  define pe_rsc_fence_device   0x00000040ULL
 
 #  define pe_rsc_provisional	0x00000100ULL
 #  define pe_rsc_allocating	0x00000200ULL
 #  define pe_rsc_merging	0x00000400ULL
 #  define pe_rsc_munging	0x00000800ULL
 
 #  define pe_rsc_try_reload     0x00001000ULL
 #  define pe_rsc_reload         0x00002000ULL
 
 #  define pe_rsc_failed		0x00010000ULL
 #  define pe_rsc_shutdown	0x00020000ULL
 #  define pe_rsc_runnable	0x00040000ULL
 #  define pe_rsc_start_pending	0x00080000ULL
 
 #  define pe_rsc_starting       0x00100000ULL
 #  define pe_rsc_stopping       0x00200000ULL
 #  define pe_rsc_migrating      0x00400000ULL
 #  define pe_rsc_allow_migrate  0x00800000ULL
 
 #  define pe_rsc_failure_ignored 0x01000000ULL
 #  define pe_rsc_unexpectedly_running 0x02000000ULL
 #  define pe_rsc_maintenance	 0x04000000ULL
 
 #  define pe_rsc_needs_quorum	 0x10000000ULL
 #  define pe_rsc_needs_fencing	 0x20000000ULL
 #  define pe_rsc_needs_unfencing 0x40000000ULL
 #  define pe_rsc_have_unfencing  0x80000000ULL
 
 enum pe_graph_flags {
     pe_graph_none = 0x00000,
     pe_graph_updated_first = 0x00001,
     pe_graph_updated_then = 0x00002,
     pe_graph_disable = 0x00004,
 };
 
 /* *INDENT-OFF* */
 enum pe_action_flags {
     pe_action_pseudo = 0x00001,
     pe_action_runnable = 0x00002,
     pe_action_optional = 0x00004,
     pe_action_print_always = 0x00008,
 
     pe_action_have_node_attrs = 0x00010,
     pe_action_failure_is_fatal = 0x00020, /* no longer used, here for API compatibility */
     pe_action_implied_by_stonith = 0x00040,
     pe_action_migrate_runnable =   0x00080,
 
     pe_action_dumped = 0x00100,
     pe_action_processed = 0x00200,
     pe_action_clear = 0x00400,
     pe_action_dangle = 0x00800,
 
     pe_action_requires_any = 0x01000, /* This action requires one or mre of its dependencies to be runnable
                                        * We use this to clear the runnable flag before checking dependencies
                                        */
     pe_action_reschedule = 0x02000,
     pe_action_tracking = 0x04000,
 };
 /* *INDENT-ON* */
 
 struct resource_s {
     char *id;
     char *clone_name;
     xmlNode *xml;
     xmlNode *orig_xml;
     xmlNode *ops_xml;
 
     resource_t *parent;
     void *variant_opaque;
     enum pe_obj_types variant;
     resource_object_functions_t *fns;
     resource_alloc_functions_t *cmds;
 
     enum rsc_recovery_type recovery_type;
     enum pe_restart restart_type;
 
     int priority;
     int stickiness;
     int sort_index;
     int failure_timeout;
     int effective_priority;
     int migration_threshold;
 
     gboolean is_remote_node;
 
     unsigned long long flags;
 
     GListPtr rsc_cons_lhs;      /* rsc_colocation_t* */
     GListPtr rsc_cons;          /* rsc_colocation_t* */
     GListPtr rsc_location;      /* rsc_to_node_t*    */
     GListPtr actions;           /* action_t*         */
     GListPtr rsc_tickets;       /* rsc_ticket*       */
 
     node_t *allocated_to;
     GListPtr running_on;        /* node_t*   */
     GHashTable *known_on;       /* node_t*   */
     GHashTable *allowed_nodes;  /* node_t*   */
 
     enum rsc_role_e role;
     enum rsc_role_e next_role;
 
     GHashTable *meta;
     GHashTable *parameters;
     GHashTable *utilization;
 
     GListPtr children;          /* resource_t*   */
     GListPtr dangling_migrations;       /* node_t*       */
 
     node_t *partial_migration_target;
     node_t *partial_migration_source;
 
     resource_t *container;
     GListPtr fillers;
 
     char *pending_task;
 
     const char *isolation_wrapper;
     gboolean exclusive_discover;
     int remote_reconnect_interval;
 };
 
 struct pe_action_s {
     int id;
     int priority;
 
     resource_t *rsc;
     node_t *node;
     xmlNode *op_entry;
 
     char *task;
     char *uuid;
     char *cancel_task;
 
     enum pe_action_flags flags;
     enum rsc_start_requirement needs;
     enum action_fail_response on_fail;
     enum rsc_role_e fail_role;
 
     action_t *pre_notify;
     action_t *pre_notified;
     action_t *post_notify;
     action_t *post_notified;
 
     int seen_count;
 
     GHashTable *meta;
     GHashTable *extra;
 
     /* 
      * These two varables are associated with the constraint logic
      * that involves first having one or more actions runnable before
      * then allowing this action to execute.
      *
      * These varables are used with features such as 'clone-min' which
      * requires at minimum X number of cloned instances to be running
      * before an order dependency can run. Another option that uses
      * this is 'require-all=false' in ordering constrants. This option
      * says "only required one instance of a resource to start before
      * allowing dependencies to start" basicall require-all=false is
      * the same as clone-min=1.
      */
 
     /* current number of known runnable actions in the before list. */
     int runnable_before;
     /* the number of "before" runnable actions required for this action
      * to be considered runnable */ 
     int required_runnable_before;
 
     GListPtr actions_before;    /* action_warpper_t* */
     GListPtr actions_after;     /* action_warpper_t* */
 };
 
 struct ticket_s {
     char *id;
     gboolean granted;
     time_t last_granted;
     gboolean standby;
     GHashTable *state;
 };
 
 typedef struct tag_s {
     char *id;
     GListPtr refs;
 } tag_t;
 
 enum pe_link_state {
     pe_link_not_dumped,
     pe_link_dumped,
     pe_link_dup,
 };
 
 /* *INDENT-OFF* */
 enum pe_ordering {
     pe_order_none                  = 0x0,       /* deleted */
     pe_order_optional              = 0x1,       /* pure ordering, nothing implied */
     pe_order_apply_first_non_migratable = 0x2,  /* Only apply this constraint's ordering if first is not migratable. */
 
     pe_order_implies_first         = 0x10,      /* If 'then' is required, ensure 'first' is too */
     pe_order_implies_then          = 0x20,      /* If 'first' is required, ensure 'then' is too */
     pe_order_implies_first_master  = 0x40,      /* Imply 'first' is required when 'then' is required and then's rsc holds Master role. */
 
     /* first requires then to be both runnable and migrate runnable. */
     pe_order_implies_first_migratable  = 0x80,
 
     pe_order_runnable_left         = 0x100,     /* 'then' requires 'first' to be runnable */
 
     pe_order_pseudo_left           = 0x200,     /* 'then' can only be pseudo if 'first' is runnable */
     pe_order_implies_then_on_node  = 0x400,     /* If 'first' is required on 'nodeX',
                                                  * ensure instances of 'then' on 'nodeX' are too.
                                                  * Only really useful if 'then' is a clone and 'first' is not
                                                  */
 
     pe_order_restart               = 0x1000,    /* 'then' is runnable if 'first' is optional or runnable */
     pe_order_stonith_stop          = 0x2000,    /* only applies if the action is non-pseudo */
     pe_order_serialize_only        = 0x4000,    /* serialize */
 
-    pe_order_implies_first_printed = 0x10000,   /* Like ..implies_first but only ensures 'first' is printed, not manditory */
-    pe_order_implies_then_printed  = 0x20000,   /* Like ..implies_then but only ensures 'then' is printed, not manditory */
+    pe_order_implies_first_printed = 0x10000,   /* Like ..implies_first but only ensures 'first' is printed, not mandatory */
+    pe_order_implies_then_printed  = 0x20000,   /* Like ..implies_then but only ensures 'then' is printed, not mandatory */
 
     pe_order_asymmetrical          = 0x100000,  /* Indicates asymmetrical one way ordering constraint. */
     pe_order_load                  = 0x200000,  /* Only relevant if... */
     pe_order_one_or_more           = 0x400000,  /* 'then' is only runnable if one or more of it's dependencies are too */
     pe_order_anti_colocation       = 0x800000,
 
     pe_order_preserve              = 0x1000000, /* Hack for breaking user ordering constraints with container resources */
     pe_order_trace                 = 0x4000000, /* test marker */
 };
 /* *INDENT-ON* */
 
 typedef struct action_wrapper_s action_wrapper_t;
 struct action_wrapper_s {
     enum pe_ordering type;
     enum pe_link_state state;
     action_t *action;
 };
 
 const char *rsc_printable_id(resource_t *rsc);
 gboolean cluster_status(pe_working_set_t * data_set);
 void set_working_set_defaults(pe_working_set_t * data_set);
 void cleanup_calculations(pe_working_set_t * data_set);
 resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh);
 node_t *pe_find_node(GListPtr node_list, const char *uname);
 node_t *pe_find_node_id(GListPtr node_list, const char *id);
 node_t *pe_find_node_any(GListPtr node_list, const char *id, const char *uname);
 GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter,
                          pe_working_set_t * data_set);
 #endif
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index f443c99fa4..a91cad3084 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -1,658 +1,669 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 #include <dlfcn.h>
 
 #include <stdio.h>
 #include <unistd.h>
 #include <string.h>
 #include <stdlib.h>
 #include <time.h>
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/utsname.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 
 #include <crm/common/ipc.h>
 #include <crm/cluster/internal.h>
 
 CRM_TRACE_INIT_DATA(cluster);
 
 #if SUPPORT_HEARTBEAT
 void *hb_library = NULL;
 #endif
 
 static char *
 get_heartbeat_uuid(const char *uname)
 {
     char *uuid_calc = NULL;
 
 #if SUPPORT_HEARTBEAT
     cl_uuid_t uuid_raw;
     const char *unknown = "00000000-0000-0000-0000-000000000000";
 
     if (heartbeat_cluster == NULL) {
         crm_warn("No connection to heartbeat, using uuid=uname");
         return NULL;
     } else if(uname == NULL) {
         return NULL;
     }
 
     if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) ==
         HA_FAIL) {
         crm_err("get_uuid_by_name() call failed for host %s", uname);
         free(uuid_calc);
         return NULL;
     }
 
     uuid_calc = calloc(1, 50);
     cl_uuid_unparse(&uuid_raw, uuid_calc);
 
     if (safe_str_eq(uuid_calc, unknown)) {
         crm_warn("Could not calculate UUID for %s", uname);
         free(uuid_calc);
         return NULL;
     }
 #endif
     return uuid_calc;
 }
 
 static gboolean
 uname_is_uuid(void)
 {
     static const char *uuid_pref = NULL;
 
     if (uuid_pref == NULL) {
         uuid_pref = getenv("PCMK_uname_is_uuid");
     }
 
     if (uuid_pref == NULL) {
         /* true is legacy mode */
         uuid_pref = "false";
     }
 
     return crm_is_true(uuid_pref);
 }
 
 int
 get_corosync_id(int id, const char *uuid)
 {
     if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) {
         id = crm_atoi(uuid, "0");
     }
 
     return id;
 }
 
 char *
 get_corosync_uuid(crm_node_t *node)
 {
     if(node == NULL) {
         return NULL;
 
     } else if (!uname_is_uuid() && is_corosync_cluster()) {
         if (node->id > 0) {
             int len = 32;
             char *buffer = NULL;
 
             buffer = calloc(1, (len + 1));
             if (buffer != NULL) {
                 snprintf(buffer, len, "%u", node->id);
             }
 
             return buffer;
 
         } else {
             crm_info("Node %s is not yet known by corosync", node->uname);
         }
 
     } else if (node->uname != NULL) {
         return strdup(node->uname);
     }
 
     return NULL;
 }
 
 const char *
 crm_peer_uuid(crm_node_t *peer)
 {
     char *uuid = NULL;
     enum cluster_type_e type = get_cluster_type();
 
     /* avoid blocking heartbeat calls where possible */
     if(peer == NULL) {
         return NULL;
 
     } else if (peer->uuid) {
         return peer->uuid;
     }
 
     switch (type) {
         case pcmk_cluster_corosync:
             uuid = get_corosync_uuid(peer);
             break;
 
         case pcmk_cluster_cman:
         case pcmk_cluster_classic_ais:
             if (peer->uname) {
                 uuid = strdup(peer->uname);
             }
             break;
 
         case pcmk_cluster_heartbeat:
             uuid = get_heartbeat_uuid(peer->uname);
             break;
 
         case pcmk_cluster_unknown:
         case pcmk_cluster_invalid:
             crm_err("Unsupported cluster type");
             break;
     }
 
     peer->uuid = uuid;
     return peer->uuid;
 }
 
 gboolean
 crm_cluster_connect(crm_cluster_t * cluster)
 {
     enum cluster_type_e type = get_cluster_type();
 
     crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type));
 #if SUPPORT_COROSYNC
     if (is_openais_cluster()) {
         crm_peer_init();
         return init_cs_connection(cluster);
     }
 #endif
 
 #if SUPPORT_HEARTBEAT
     if (is_heartbeat_cluster()) {
         int rv;
 
         /* coverity[var_deref_op] False positive */
         if (cluster->hb_conn == NULL) {
             /* No object passed in, create a new one. */
             ll_cluster_t *(*new_cluster) (const char *llctype) =
                 find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
 
             cluster->hb_conn = (*new_cluster) ("heartbeat");
             /* dlclose(handle); */
 
         } else {
             /* Object passed in. Disconnect first, then reconnect below. */
             cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE);
         }
 
         /* make sure we are disconnected first with the old object, if any. */
         if (heartbeat_cluster && heartbeat_cluster != cluster->hb_conn) {
             heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE);
         }
 
         CRM_ASSERT(cluster->hb_conn != NULL);
         heartbeat_cluster = cluster->hb_conn;
 
         rv = register_heartbeat_conn(cluster);
         if (rv) {
             /* we'll benefit from a bigger queue length on heartbeat side.
              * Otherwise, if peers send messages faster than we can consume
              * them right now, heartbeat messaging layer will kick us out once
              * it's (small) default queue fills up :(
              * If we fail to adjust the sendq length, that's not yet fatal, though.
              */
             if (HA_OK != heartbeat_cluster->llc_ops->set_sendq_len(heartbeat_cluster, 1024)) {
                 crm_warn("Cannot set sendq length: %s",
                          heartbeat_cluster->llc_ops->errmsg(heartbeat_cluster));
             }
         }
         return rv;
     }
 #endif
     crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
     return FALSE;
 }
 
 void
 crm_cluster_disconnect(crm_cluster_t * cluster)
 {
     enum cluster_type_e type = get_cluster_type();
     const char *type_str = name_for_cluster_type(type);
 
     crm_info("Disconnecting from cluster infrastructure: %s", type_str);
 #if SUPPORT_COROSYNC
     if (is_openais_cluster()) {
         crm_peer_destroy();
         terminate_cs_connection(cluster);
         crm_info("Disconnected from %s", type_str);
         return;
     }
 #endif
 
 #if SUPPORT_HEARTBEAT
     if (is_heartbeat_cluster()) {
         if (cluster == NULL) {
             crm_info("No cluster connection");
             return;
 
         } else if (cluster->hb_conn) {
             cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, TRUE);
             cluster->hb_conn = NULL;
             crm_info("Disconnected from %s", type_str);
             return;
 
         } else {
             crm_info("No %s connection", type_str);
             return;
         }
     }
 #endif
     crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
 }
 
 gboolean
 send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode * data,
                      gboolean ordered)
 {
 
 #if SUPPORT_COROSYNC
     if (is_openais_cluster()) {
         return send_cluster_message_cs(data, FALSE, node, service);
     }
 #endif
 #if SUPPORT_HEARTBEAT
     if (is_heartbeat_cluster()) {
         return send_ha_message(heartbeat_cluster, data, node ? node->uname : NULL, ordered);
     }
 #endif
     return FALSE;
 }
 
 const char *
 get_local_node_name(void)
 {
     static char *name = NULL;
 
     if(name) {
         return name;
     }
     name = get_node_name(0);
     return name;
 }
 
 char *
 get_node_name(uint32_t nodeid)
 {
     char *name = NULL;
     const char *isolation_host = NULL;
     enum cluster_type_e stack;
 
     if (nodeid == 0) {
         isolation_host = getenv("OCF_RESKEY_"CRM_META"_isolation_host");
         if (isolation_host) {
             return strdup(isolation_host);
         }
     }
 
     stack = get_cluster_type();
     switch (stack) {
         case pcmk_cluster_heartbeat:
             break;
 
 #if SUPPORT_PLUGIN
         case pcmk_cluster_classic_ais:
             name = classic_node_name(nodeid);
             break;
 #else
 #  if SUPPORT_COROSYNC
         case pcmk_cluster_corosync:
             name = corosync_node_name(0, nodeid);
             break;
 #  endif
 #endif
 
 #if SUPPORT_CMAN
         case pcmk_cluster_cman:
             name = cman_node_name(nodeid);
             break;
 #endif
 
         default:
             crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack);
     }
 
     if(name == NULL && nodeid == 0) {
         struct utsname res;
         int rc = uname(&res);
 
         if (rc == 0) {
             crm_notice("Defaulting to uname -n for the local %s node name",
                        name_for_cluster_type(stack));
             name = strdup(res.nodename);
         }
 
         if (name == NULL) {
             crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack));
             crm_exit(DAEMON_RESPAWN_STOP);
         }
     }
 
     if (name == NULL) {
         crm_notice("Could not obtain a node name for %s nodeid %u",
                    name_for_cluster_type(stack), nodeid);
     }
     return name;
 }
 
+/*!
+ * \brief Get the node name corresponding to a node UUID
+ *
+ * \param[in] uuid  UUID of desired node
+ *
+ * \return name of desired node
+ *
+ * \note This relies on the remote peer cache being populated with all
+ *       remote nodes in the cluster, so callers should maintain that cache.
+ */
 const char *
 crm_peer_uname(const char *uuid)
 {
     GHashTableIter iter;
     crm_node_t *node = NULL;
 
     CRM_CHECK(uuid != NULL, return NULL);
 
     /* remote nodes have the same uname and uuid */
     if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
         return uuid;
     }
 
     /* avoid blocking calls where possible */
     g_hash_table_iter_init(&iter, crm_peer_cache);
     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
         if(node->uuid && strcasecmp(node->uuid, uuid) == 0) {
             if(node->uname) {
                 return node->uname;
             }
             break;
         }
     }
+    node = NULL;
 
 #if SUPPORT_COROSYNC
     if (is_openais_cluster()) {
         if (uname_is_uuid() == FALSE && is_corosync_cluster()) {
             uint32_t id = crm_int_helper(uuid, NULL);
             if(id != 0) {
                 node = crm_find_peer(id, NULL);
             } else {
                 crm_err("Invalid node id: %s", uuid);
             }
 
         } else {
             node = crm_find_peer(0, uuid);
         }
 
         if (node) {
             crm_info("Setting uuid for node %s[%u] to '%s'", node->uname, node->id, uuid);
             node->uuid = strdup(uuid);
             if(node->uname) {
                 return node->uname;
             }
         }
         return NULL;
     }
 #endif
 
 #if SUPPORT_HEARTBEAT
     if (is_heartbeat_cluster()) {
         if (heartbeat_cluster != NULL) {
             cl_uuid_t uuid_raw;
             char *uuid_copy = strdup(uuid);
             char *uname = malloc(MAX_NAME);
 
             cl_uuid_parse(uuid_copy, &uuid_raw);
 
             if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname,
                                                              MAX_NAME) == HA_FAIL) {
                 crm_err("Could not calculate uname for %s", uuid);
             } else {
                 node = crm_get_peer(0, uname);
             }
 
             free(uuid_copy);
             free(uname);
         }
 
         if (node) {
             crm_info("Setting uuid for node %s to '%s'", node->uname, uuid);
             node->uuid = strdup(uuid);
             if(node->uname) {
                 return node->uname;
             }
         }
         return NULL;
     }
 #endif
 
     return NULL;
 }
 
 void
 set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
 {
     const char *uuid_calc = crm_peer_uuid(node);
 
     crm_xml_add(xml, attr, uuid_calc);
     return;
 }
 
 const char *
 name_for_cluster_type(enum cluster_type_e type)
 {
     switch (type) {
         case pcmk_cluster_classic_ais:
             return "classic openais (with plugin)";
         case pcmk_cluster_cman:
             return "cman";
         case pcmk_cluster_corosync:
             return "corosync";
         case pcmk_cluster_heartbeat:
             return "heartbeat";
         case pcmk_cluster_unknown:
             return "unknown";
         case pcmk_cluster_invalid:
             return "invalid";
     }
     crm_err("Invalid cluster type: %d", type);
     return "invalid";
 }
 
 /* Do not expose these two */
 int set_cluster_type(enum cluster_type_e type);
 static enum cluster_type_e cluster_type = pcmk_cluster_unknown;
 
 int
 set_cluster_type(enum cluster_type_e type)
 {
     if (cluster_type == pcmk_cluster_unknown) {
         crm_info("Cluster type set to: %s", name_for_cluster_type(type));
         cluster_type = type;
         return 0;
 
     } else if (cluster_type == type) {
         return 0;
 
     } else if (pcmk_cluster_unknown == type) {
         cluster_type = type;
         return 0;
     }
 
     crm_err("Cluster type already set to %s, ignoring %s",
             name_for_cluster_type(cluster_type), name_for_cluster_type(type));
     return -1;
 }
 enum cluster_type_e
 get_cluster_type(void)
 {
     bool detected = FALSE;
     const char *cluster = NULL;
 
     /* Return the previous calculation, if any */
     if (cluster_type != pcmk_cluster_unknown) {
         return cluster_type;
     }
 
     cluster = getenv("HA_cluster_type");
 
 #if SUPPORT_HEARTBEAT
     /* If nothing is defined in the environment, try heartbeat (if supported) */
     if(cluster == NULL) {
         ll_cluster_t *hb;
         ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(
             &hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
 
         hb = (*new_cluster) ("heartbeat");
 
         crm_debug("Testing with Heartbeat");
         /*
          * Test as "casual" client (clientid == NULL; will be replaced by
          * current pid).  We are trying to detect if we can communicate with
          * heartbeat, not if we can register as some specific service.
          * Otherwise all but one of several concurrent invocations will get
          * HA_FAIL because of:
          * WARN: duplicate client add request
          * ERROR: api_process_registration_msg: cannot add client()
          * and then likely fail :(
          */
         if (hb->llc_ops->signon(hb, NULL) == HA_OK) {
             hb->llc_ops->signoff(hb, FALSE);
 
             cluster_type = pcmk_cluster_heartbeat;
             detected = TRUE;
             goto done;
         }
     }
 #endif
 
 #if SUPPORT_COROSYNC
     /* If nothing is defined in the environment, try corosync (if supported) */
     if(cluster == NULL) {
         crm_debug("Testing with Corosync");
         cluster_type = find_corosync_variant();
         if (cluster_type != pcmk_cluster_unknown) {
             detected = TRUE;
             goto done;
         }
     }
 #endif
 
     /* Something was defined in the environment, test it against what we support */
     crm_info("Verifying cluster type: '%s'", cluster?cluster:"-unspecified-");
     if (cluster == NULL) {
 
 #if SUPPORT_HEARTBEAT
     } else if (safe_str_eq(cluster, "heartbeat")) {
         cluster_type = pcmk_cluster_heartbeat;
 #endif
 
 #if SUPPORT_COROSYNC
     } else if (safe_str_eq(cluster, "openais")
                || safe_str_eq(cluster, "classic openais (with plugin)")) {
         cluster_type = pcmk_cluster_classic_ais;
 
     } else if (safe_str_eq(cluster, "corosync")) {
         cluster_type = pcmk_cluster_corosync;
 #endif
 
 #if SUPPORT_CMAN
     } else if (safe_str_eq(cluster, "cman")) {
         cluster_type = pcmk_cluster_cman;
 #endif
 
     } else {
         cluster_type = pcmk_cluster_invalid;
         goto done; /* Keep the compiler happy when no stacks are supported */
     }
 
   done:
     if (cluster_type == pcmk_cluster_unknown) {
         crm_notice("Could not determine the current cluster type");
 
     } else if (cluster_type == pcmk_cluster_invalid) {
         crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.",
                    cluster);
         crm_exit(DAEMON_RESPAWN_STOP);
 
     } else {
         crm_info("%s an active '%s' cluster", detected?"Detected":"Assuming", name_for_cluster_type(cluster_type));
     }
 
     return cluster_type;
 }
 
 gboolean
 is_cman_cluster(void)
 {
     return get_cluster_type() == pcmk_cluster_cman;
 }
 
 gboolean
 is_corosync_cluster(void)
 {
     return get_cluster_type() == pcmk_cluster_corosync;
 }
 
 gboolean
 is_classic_ais_cluster(void)
 {
     return get_cluster_type() == pcmk_cluster_classic_ais;
 }
 
 gboolean
 is_openais_cluster(void)
 {
     enum cluster_type_e type = get_cluster_type();
 
     if (type == pcmk_cluster_classic_ais) {
         return TRUE;
     } else if (type == pcmk_cluster_corosync) {
         return TRUE;
     } else if (type == pcmk_cluster_cman) {
         return TRUE;
     }
     return FALSE;
 }
 
 gboolean
 is_heartbeat_cluster(void)
 {
     return get_cluster_type() == pcmk_cluster_heartbeat;
 }
 
 gboolean
 node_name_is_valid(const char *key, const char *name)
 {
     int octet;
 
     if (name == NULL) {
         crm_trace("%s is empty", key);
         return FALSE;
 
     } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
         crm_trace("%s contains an ipv4 address, ignoring: %s", key, name);
         return FALSE;
 
     } else if (strstr(name, ":") != NULL) {
         crm_trace("%s contains an ipv6 address, ignoring: %s", key, name);
         return FALSE;
     }
     crm_trace("%s is valid", key);
     return TRUE;
 }
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index 3ec2c251f5..9d17bfb259 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -1,954 +1,1102 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <crm_internal.h>
 
 #ifndef _GNU_SOURCE
 #  define _GNU_SOURCE
 #endif
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <string.h>
 #include <glib.h>
 #include <crm/common/ipc.h>
 #include <crm/cluster/internal.h>
 #include <crm/msg_xml.h>
 #include <crm/stonith-ng.h>
 
+/* The peer cache remembers cluster nodes that have been seen.
+ * This is managed mostly automatically by libcluster, based on
+ * cluster membership events.
+ *
+ * Because cluster nodes can have conflicting names or UUIDs,
+ * the hash table key is a uniquely generated ID.
+ */
 GHashTable *crm_peer_cache = NULL;
+
+/*
+ * The remote peer cache tracks pacemaker_remote nodes. While the
+ * value has the same type as the peer cache's, it is tracked separately for
+ * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
+ * so the name (which is also the UUID) is used as the hash table key; there
+ * is no equivalent of membership events, so management is not automatic; and
+ * most users of the peer cache need to exclude pacemaker_remote nodes.
+ *
+ * That said, using a single cache would be more logical and less error-prone,
+ * so it would be a good idea to merge them one day.
+ *
+ * libcluster provides two avenues for populating the cache:
+ * crm_remote_peer_get(), crm_remote_peer_cache_add() and
+ * crm_remote_peer_cache_remove() directly manage it,
+ * while crm_remote_peer_cache_refresh() populates it via the CIB.
+ */
 GHashTable *crm_remote_peer_cache = NULL;
+
 unsigned long long crm_peer_seq = 0;
 gboolean crm_have_quorum = FALSE;
 static gboolean crm_autoreap  = TRUE;
 
 int
 crm_remote_peer_cache_size(void)
 {
     if (crm_remote_peer_cache == NULL) {
         return 0;
     }
     return g_hash_table_size(crm_remote_peer_cache);
 }
 
-void
-crm_remote_peer_cache_add(const char *node_name)
+/*!
+ * \brief Get a remote node peer cache entry, creating it if necessary
+ *
+ * \param[in] node_name  Name of remote node
+ *
+ * \return Cache entry for node on success, NULL (and set errno) otherwise
+ *
+ * \note When creating a new entry, this will leave the node state undetermined,
+ *       so the caller should also call crm_update_peer_state() if the state is
+ *       known.
+ */
+crm_node_t *
+crm_remote_peer_get(const char *node_name)
 {
-    crm_node_t *node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
+    crm_node_t *node;
+
+    if (node_name == NULL) {
+        errno = -EINVAL;
+        return NULL;
+    }
 
+    /* Return existing cache entry if one exists */
+    node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
+    if (node) {
+        return node;
+    }
+
+    /* Allocate a new entry */
+    node = calloc(1, sizeof(crm_node_t));
     if (node == NULL) {
-            crm_trace("added %s to remote cache", node_name);
-            node = calloc(1, sizeof(crm_node_t));
-            node->flags = crm_remote_node;
-            CRM_ASSERT(node);
-            node->uname = strdup(node_name);
-            node->uuid = strdup(node_name);
-            node->state = strdup(CRM_NODE_MEMBER);
-            g_hash_table_replace(crm_remote_peer_cache, node->uname, node);
+        return NULL;
+    }
+
+    /* Populate the essential information */
+    node->flags = crm_remote_node;
+    node->uuid = strdup(node_name);
+    if (node->uuid == NULL) {
+        free(node);
+        errno = -ENOMEM;
+        return NULL;
     }
+
+    /* Add the new entry to the cache */
+    g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
+    crm_trace("added %s to remote cache", node_name);
+
+    /* Update the entry's uname, ensuring peer status callbacks are called */
+    crm_update_peer_uname(node, node_name);
+    return node;
+}
+
+/*!
+ * \brief Add a node to the remote peer cache
+ *
+ * \param[in] node_name  Name of remote node
+ *
+ * \note This is a legacy convenience wrapper for crm_remote_peer_get()
+ *       for callers that don't need the cache entry returned.
+ */
+void
+crm_remote_peer_cache_add(const char *node_name)
+{
+    CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
 }
 
 void
 crm_remote_peer_cache_remove(const char *node_name)
 {
-    g_hash_table_remove(crm_remote_peer_cache, node_name);
+    if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
+        crm_trace("removed %s from remote peer cache", node_name);
+    }
 }
 
+/*!
+ * \internal
+ * \brief Return node status based on a CIB status entry
+ *
+ * \param[in] node_state  XML of node state
+ *
+ * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state,
+ *         CRM_NODE_MEMBER otherwise
+ * \note Unlike most boolean XML attributes, this one defaults to true, for
+ *       backward compatibility with older crmd versions that don't set it.
+ */
+static const char *
+remote_state_from_cib(xmlNode *node_state)
+{
+    const char *status;
+
+    status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
+    if (status && !crm_is_true(status)) {
+        status = CRM_NODE_LOST;
+    } else {
+        status = CRM_NODE_MEMBER;
+    }
+    return status;
+}
+
+/* user data for looping through remote node xpath searches */
+struct refresh_data {
+    const char *field;  /* XML attribute to check for node name */
+    gboolean has_state; /* whether to update node state based on XML */
+};
+
+/*!
+ * \internal
+ * \brief Process one pacemaker_remote node xpath search result
+ *
+ * \param[in] result     XML search result
+ * \param[in] user_data  what to look for in the XML
+ */
 static void
-remote_cache_refresh_helper(xmlNode *cib, const char *xpath, const char *field)
+remote_cache_refresh_helper(xmlNode *result, void *user_data)
 {
-    const char *remote = NULL;
-    crm_node_t *node = NULL;
-    xmlXPathObjectPtr xpathObj = NULL;
-    int max = 0;
-    int lpc = 0;
-
-    xpathObj = xpath_search(cib, xpath);
-    max = numXpathResults(xpathObj);
-    for (lpc = 0; lpc < max; lpc++) {
-        xmlNode *xml = getXpathResult(xpathObj, lpc);
-
-        CRM_LOG_ASSERT(xml != NULL);
-        if(xml != NULL) {
-            remote = crm_element_value(xml, field);
+    struct refresh_data *data = user_data;
+    const char *remote = crm_element_value(result, data->field);
+    const char *state = NULL;
+    crm_node_t *node;
+
+    CRM_CHECK(remote != NULL, return);
+
+    /* Determine node's state, if the result has it */
+    if (data->has_state) {
+        state = remote_state_from_cib(result);
+    }
+
+    /* Check whether cache already has entry for node */
+    node = g_hash_table_lookup(crm_remote_peer_cache, remote);
+
+    if (node == NULL) {
+        /* Node is not in cache, so add a new entry for it */
+        node = crm_remote_peer_get(remote);
+        CRM_ASSERT(node);
+        if (state) {
+            crm_update_peer_state(__FUNCTION__, node, state, 0);
         }
 
-        if (remote) {
-            crm_trace("added %s to remote cache", remote);
-            node = calloc(1, sizeof(crm_node_t));
-            node->flags = crm_remote_node;
-            CRM_ASSERT(node);
-            node->uname = strdup(remote);
-            node->uuid = strdup(remote);
-            node->state = strdup(CRM_NODE_MEMBER);
-            g_hash_table_replace(crm_remote_peer_cache, node->uname, node);
+    } else if (is_set(node->flags, crm_node_dirty)) {
+        /* Node is in cache and hasn't been updated already, so mark it clean */
+        clear_bit(node->flags, crm_node_dirty);
+        if (state) {
+            crm_update_peer_state(__FUNCTION__, node, state, 0);
         }
     }
-    freeXpathObject(xpathObj);
+}
+
+static void
+mark_dirty(gpointer key, gpointer value, gpointer user_data)
+{
+    set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
+}
+
+static gboolean
+is_dirty(gpointer key, gpointer value, gpointer user_data)
+{
+    return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
 }
 
 /* search string to find CIB resources entries for guest nodes */
 #define XPATH_GUEST_NODE_CONFIG \
     "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
     "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
     "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
 
 /* search string to find CIB resources entries for remote nodes */
 #define XPATH_REMOTE_NODE_CONFIG \
     "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
     "[@type='remote'][@provider='pacemaker']"
 
 /* search string to find CIB node status entries for pacemaker_remote nodes */
 #define XPATH_REMOTE_NODE_STATUS \
     "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
     "[@" XML_NODE_IS_REMOTE "='true']"
 
 /*!
  * \brief Repopulate the remote peer cache based on CIB XML
  *
  * \param[in] xmlNode  CIB XML to parse
  */
-void crm_remote_peer_cache_refresh(xmlNode *cib)
+void
+crm_remote_peer_cache_refresh(xmlNode *cib)
 {
-    g_hash_table_remove_all(crm_remote_peer_cache);
-
-    /* remote nodes associated with a cluster resource */
-    remote_cache_refresh_helper(cib, XPATH_GUEST_NODE_CONFIG, "value");
-
-    /* baremetal nodes defined by connection resources*/
-    remote_cache_refresh_helper(cib, XPATH_REMOTE_NODE_CONFIG, "id");
-
-    /* baremetal nodes we have seen in the config that may or may not have connection
-     * resources associated with them anymore */
-    remote_cache_refresh_helper(cib, XPATH_REMOTE_NODE_STATUS, "id");
+    struct refresh_data data;
+
+    /* First, we mark all existing cache entries as dirty,
+     * so that later we can remove any that weren't in the CIB.
+     * We don't empty the cache, because we need to detect changes in state.
+     */
+    g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
+
+    /* Look for guest nodes and remote nodes in the status section */
+    data.field = "id";
+    data.has_state = TRUE;
+    crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_STATUS,
+                             remote_cache_refresh_helper, &data);
+
+    /* Look for guest nodes and remote nodes in the configuration section,
+     * because they may have just been added and not have a status entry yet.
+     * In that case, the cached node state will be left NULL, so that the
+     * peer status callback isn't called until we're sure the node started
+     * successfully.
+     */
+    data.field = "value";
+    data.has_state = FALSE;
+    crm_foreach_xpath_result(cib, XPATH_GUEST_NODE_CONFIG,
+                             remote_cache_refresh_helper, &data);
+    data.field = "id";
+    data.has_state = FALSE;
+    crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_CONFIG,
+                             remote_cache_refresh_helper, &data);
+
+    /* Remove all old cache entries that weren't seen in the CIB */
+    g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
 }
 
 gboolean
 crm_is_peer_active(const crm_node_t * node)
 {
     if(node == NULL) {
         return FALSE;
     }
 
     if (is_set(node->flags, crm_remote_node)) {
         /* remote nodes are never considered active members. This
          * guarantees they will never be considered for DC membership.*/
         return FALSE;
     }
 #if SUPPORT_COROSYNC
     if (is_openais_cluster()) {
         return crm_is_corosync_peer_active(node);
     }
 #endif
 #if SUPPORT_HEARTBEAT
     if (is_heartbeat_cluster()) {
         return crm_is_heartbeat_peer_active(node);
     }
 #endif
     crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
     return FALSE;
 }
 
 static gboolean
 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
 {
     crm_node_t *node = value;
     crm_node_t *search = user_data;
 
     if (search == NULL) {
         return FALSE;
 
     } else if (search->id && node->id != search->id) {
         return FALSE;
 
     } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
         return FALSE;
 
     } else if (crm_is_peer_active(value) == FALSE) {
         crm_notice("Removing %s/%u from the membership list", node->uname, node->id);
         return TRUE;
     }
     return FALSE;
 }
 
 /*!
  * \brief Remove all peer cache entries matching a node ID and/or uname
  *
  * \param[in] id    ID of node to remove (or 0 to ignore)
  * \param[in] name  Uname of node to remove (or NULL to ignore)
  *
  * \return Number of cache entries removed
  */
 guint
 reap_crm_member(uint32_t id, const char *name)
 {
     int matches = 0;
     crm_node_t search;
 
     if (crm_peer_cache == NULL) {
         crm_trace("Nothing to do, cache not initialized");
         return 0;
     }
 
     search.id = id;
     search.uname = name ? strdup(name) : NULL;
     matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
     if(matches) {
         crm_notice("Purged %d peers with id=%u and/or uname=%s from the membership cache",
                    matches, search.id, search.uname);
 
     } else {
         crm_info("No peers with id=%u and/or uname=%s exist", id, name);
     }
 
     free(search.uname);
     return matches;
 }
 
 static void
 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
 {
     guint *count = user_data;
     crm_node_t *node = value;
 
     if (crm_is_peer_active(node)) {
         *count = *count + 1;
     }
 }
 
 guint
 crm_active_peers(void)
 {
     guint count = 0;
 
     if (crm_peer_cache) {
         g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
     }
     return count;
 }
 
 static void
 destroy_crm_node(gpointer data)
 {
     crm_node_t *node = data;
 
     crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
 
     free(node->addr);
     free(node->uname);
     free(node->state);
     free(node->uuid);
     free(node->expected);
     free(node);
 }
 
 void
 crm_peer_init(void)
 {
     if (crm_peer_cache == NULL) {
         crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
     }
 
     if (crm_remote_peer_cache == NULL) {
         crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
     }
 }
 
 void
 crm_peer_destroy(void)
 {
     if (crm_peer_cache != NULL) {
         crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
         g_hash_table_destroy(crm_peer_cache);
         crm_peer_cache = NULL;
     }
 
     if (crm_remote_peer_cache != NULL) {
         crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
         g_hash_table_destroy(crm_remote_peer_cache);
         crm_remote_peer_cache = NULL;
     }
 }
 
 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
 
 /*!
  * \brief Set a client function that will be called after peer status changes
  *
  * \param[in] dispatch  Pointer to function to use as callback
  *
  * \note Previously, client callbacks were responsible for peer cache
  *       management. This is no longer the case, and client callbacks should do
  *       only client-specific handling. Callbacks MUST NOT add or remove entries
  *       in the peer caches.
  */
 void
 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
 {
     crm_status_callback = dispatch;
 }
 
 /*!
  * \brief Tell the library whether to automatically reap lost nodes
  *
  * If TRUE (the default), calling crm_update_peer_proc() will also update the
  * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and crm_update_peer_state()
  * will reap peers whose state changes to anything other than CRM_NODE_MEMBER.
  * Callers should leave this enabled unless they plan to manage the cache
  * separately on their own.
  *
  * \param[in] autoreap  TRUE to enable automatic reaping, FALSE to disable
  */
 void
 crm_set_autoreap(gboolean autoreap)
 {
     crm_autoreap = autoreap;
 }
 
 static void crm_dump_peer_hash(int level, const char *caller)
 {
     GHashTableIter iter;
     const char *id = NULL;
     crm_node_t *node = NULL;
 
     g_hash_table_iter_init(&iter, crm_peer_cache);
     while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
         do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
     }
 }
 
 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
 {
     if(value == user_data) {
         return TRUE;
     }
     return FALSE;
 }
 
 crm_node_t *
 crm_find_peer_full(unsigned int id, const char *uname, int flags)
 {
     crm_node_t *node = NULL;
 
     CRM_ASSERT(id > 0 || uname != NULL);
 
     crm_peer_init();
 
     if (flags & CRM_GET_PEER_REMOTE) {
         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
     }
 
     if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
         node = crm_find_peer(id, uname);
     }
     return node;
 }
 
 crm_node_t *
 crm_get_peer_full(unsigned int id, const char *uname, int flags)
 {
     crm_node_t *node = NULL;
 
     CRM_ASSERT(id > 0 || uname != NULL);
 
     crm_peer_init();
 
     if (flags & CRM_GET_PEER_REMOTE) {
         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
     }
 
     if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
         node = crm_get_peer(id, uname);
     }
     return node;
 }
 
 crm_node_t *
 crm_find_peer(unsigned int id, const char *uname)
 {
     GHashTableIter iter;
     crm_node_t *node = NULL;
     crm_node_t *by_id = NULL;
     crm_node_t *by_name = NULL;
 
     CRM_ASSERT(id > 0 || uname != NULL);
 
     crm_peer_init();
 
     if (uname != NULL) {
         g_hash_table_iter_init(&iter, crm_peer_cache);
         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
             if(node->uname && strcasecmp(node->uname, uname) == 0) {
                 crm_trace("Name match: %s = %p", node->uname, node);
                 by_name = node;
                 break;
             }
         }
     }
 
     if (id > 0) {
         g_hash_table_iter_init(&iter, crm_peer_cache);
         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
             if(node->id == id) {
                 crm_trace("ID match: %u = %p", node->id, node);
                 by_id = node;
                 break;
             }
         }
     }
 
     node = by_id; /* Good default */
     if(by_id == by_name) {
         /* Nothing to do if they match (both NULL counts) */
         crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
 
     } else if(by_id == NULL && by_name) {
         crm_trace("Only one: %p for %u/%s", by_name, id, uname);
 
         if(id && by_name->id) {
             crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
             crm_crit("Node %u and %u share the same name '%s'",
                      id, by_name->id, uname);
             node = NULL; /* Create a new one */
 
         } else {
             node = by_name;
         }
 
     } else if(by_name == NULL && by_id) {
         crm_trace("Only one: %p for %u/%s", by_id, id, uname);
 
         if(uname && by_id->uname) {
             crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
             crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
                      uname, by_id->uname, id, uname);
         }
 
     } else if(uname && by_id->uname) {
         if(safe_str_eq(uname, by_id->uname)) {
             crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
             g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
 
         } else {
             crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
             crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
             crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
         }
 
     } else if(id && by_name->id) {
         crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
 
     } else {
         /* Simple merge */
 
         /* Only corosync based clusters use nodeid's
          *
          * The functions that call crm_update_peer_state() only know nodeid
          * so 'by_id' is authorative when merging
          *
          * Same for crm_update_peer_proc()
          */
         crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
 
         crm_info("Merging %p into %p", by_name, by_id);
         g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
     }
 
     return node;
 }
 
 #if SUPPORT_COROSYNC
 static guint
 crm_remove_conflicting_peer(crm_node_t *node)
 {
     int matches = 0;
     GHashTableIter iter;
     crm_node_t *existing_node = NULL;
 
     if (node->id == 0 || node->uname == NULL) {
         return 0;
     }
 
 #  if !SUPPORT_PLUGIN
     if (corosync_cmap_has_config("nodelist") != 0) {
         return 0;
     }
 #  endif
 
     g_hash_table_iter_init(&iter, crm_peer_cache);
     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
         if (existing_node->id > 0
             && existing_node->id != node->id
             && existing_node->uname != NULL
             && strcasecmp(existing_node->uname, node->uname) == 0) {
 
             if (crm_is_peer_active(existing_node)) {
                 continue;
             }
 
             crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
                      existing_node->id, existing_node->uname, node->id);
 
             g_hash_table_iter_remove(&iter);
             matches++;
         }
     }
 
     return matches;
 }
 #endif
 
 /* coverity[-alloc] Memory is referenced in one or both hashtables */
 crm_node_t *
 crm_get_peer(unsigned int id, const char *uname)
 {
     crm_node_t *node = NULL;
     char *uname_lookup = NULL;
 
     CRM_ASSERT(id > 0 || uname != NULL);
 
     crm_peer_init();
 
     node = crm_find_peer(id, uname);
 
     /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
      * we need to do a lookup of the node name using the id in the cluster membership. */
     if ((node == NULL || node->uname == NULL) && (uname == NULL)) { 
         uname_lookup = get_node_name(id);
     }
 
     if (uname_lookup) {
         uname = uname_lookup;
         crm_trace("Inferred a name of '%s' for node %u", uname, id);
 
         /* try to turn up the node one more time now that we know the uname. */
         if (node == NULL) {
             node = crm_find_peer(id, uname);
         }
     }
 
 
     if (node == NULL) {
         char *uniqueid = crm_generate_uuid();
 
         node = calloc(1, sizeof(crm_node_t));
         CRM_ASSERT(node);
 
         crm_info("Created entry %s/%p for node %s/%u (%d total)",
                  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
         g_hash_table_replace(crm_peer_cache, uniqueid, node);
     }
 
     if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
         crm_info("Node %u is now known as %s", id, uname);
     }
 
     if(id > 0 && node->id == 0) {
         node->id = id;
     }
 
     if (uname && (node->uname == NULL)) {
         crm_update_peer_uname(node, uname);
     }
 
     if(node->uuid == NULL) {
         const char *uuid = crm_peer_uuid(node);
 
         if (uuid) {
             crm_info("Node %u has uuid %s", id, uuid);
 
         } else {
             crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
         }
     }
 
     free(uname_lookup);
 
     return node;
 }
 
 /*!
  * \internal
  * \brief Update all of a node's information (process list, state, etc.)
  *
  * \param[in] source      Caller's function name (for log messages)
  *
  * \return NULL if node was reaped from peer caches, pointer to node otherwise
  *
  * \note This function should not be called within a peer cache iteration,
  *       otherwise reaping could invalidate the iterator.
  */
 crm_node_t *
 crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
                 uint32_t children, const char *uuid, const char *uname, const char *addr,
                 const char *state)
 {
 #if SUPPORT_PLUGIN
     gboolean addr_changed = FALSE;
     gboolean votes_changed = FALSE;
 #endif
     crm_node_t *node = NULL;
 
     id = get_corosync_id(id, uuid);
     node = crm_get_peer(id, uname);
 
     CRM_ASSERT(node != NULL);
 
     if (node->uuid == NULL) {
         if (is_openais_cluster()) {
             /* Yes, overrule whatever was passed in */
             crm_peer_uuid(node);
 
         } else if (uuid != NULL) {
             node->uuid = strdup(uuid);
         }
     }
 
     if (children > 0) {
         if (crm_update_peer_proc(source, node, children, state) == NULL) {
             return NULL;
         }
     }
 
     if (state != NULL) {
         if (crm_update_peer_state(source, node, state, seen) == NULL) {
             return NULL;
         }
     }
 #if SUPPORT_HEARTBEAT
     if (born != 0) {
         node->born = born;
     }
 #endif
 
 #if SUPPORT_PLUGIN
     /* These were only used by the plugin */
     if (born != 0) {
         node->born = born;
     }
 
     if (votes > 0 && node->votes != votes) {
         votes_changed = TRUE;
         node->votes = votes;
     }
 
     if (addr != NULL) {
         if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
             addr_changed = TRUE;
             free(node->addr);
             node->addr = strdup(addr);
         }
     }
     if (addr_changed || votes_changed) {
         crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
                  " proc=%.32x", source, node->uname, node->id, node->state,
                  node->addr, addr_changed ? " (new)" : "", node->votes,
                  votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
     }
 #endif
 
     return node;
 }
 
 /*!
  * \internal
  * \brief Update a node's uname
  *
  * \param[in] node        Node object to update
  * \param[in] uname       New name to set
  *
  * \note This function should not be called within a peer cache iteration,
  *       because in some cases it can remove conflicting cache entries,
  *       which would invalidate the iterator.
  */
 void
 crm_update_peer_uname(crm_node_t *node, const char *uname)
 {
     int i, len = strlen(uname);
 
     for (i = 0; i < len; i++) {
         if (uname[i] >= 'A' && uname[i] <= 'Z') {
             crm_warn("Node names with capitals are discouraged, consider changing '%s'",
                      uname);
             break;
         }
     }
 
     free(node->uname);
     node->uname = strdup(uname);
     if (crm_status_callback) {
         crm_status_callback(crm_status_uname, node, NULL);
     }
 
 #if SUPPORT_COROSYNC
     if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) {
         crm_remove_conflicting_peer(node);
     }
 #endif
 }
 
 /*!
  * \internal
  * \brief Update a node's process information (and potentially state)
  *
  * \param[in] source      Caller's function name (for log messages)
  * \param[in] node        Node object to update
  * \param[in] flag        Bitmask of new process information
  * \param[in] status      node status (online, offline, etc.)
  *
  * \return NULL if any node was reaped from peer caches, value of node otherwise
  *
  * \note If this function returns NULL, the supplied node object was likely
  *       freed and should not be used again. This function should not be
  *       called within a cache iteration if reaping is possible, otherwise
  *       reaping could invalidate the iterator.
  */
 crm_node_t *
 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
 {
     uint32_t last = 0;
     gboolean changed = FALSE;
 
     CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
                                     source, peer2text(flag), status); return NULL);
 
     /* Pacemaker doesn't spawn processes on remote nodes */
     if (is_set(node->flags, crm_remote_node)) {
         return node;
     }
 
     last = node->processes;
     if (status == NULL) {
         node->processes = flag;
         if (node->processes != last) {
             changed = TRUE;
         }
 
     } else if (safe_str_eq(status, ONLINESTATUS)) {
         if ((node->processes & flag) != flag) {
             set_bit(node->processes, flag);
             changed = TRUE;
         }
 #if SUPPORT_PLUGIN
     } else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
         if (flag > 0 && node->processes != flag) {
             node->processes = flag;
             changed = TRUE;
         }
 #endif
 
     } else if (node->processes & flag) {
         clear_bit(node->processes, flag);
         changed = TRUE;
     }
 
     if (changed) {
         if (status == NULL && flag <= crm_proc_none) {
             crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
                      node->id);
         } else {
             crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
                      peer2text(flag), status);
         }
 
         /* Call the client callback first, then update the peer state,
          * in case the node will be reaped
          */
         if (crm_status_callback) {
             crm_status_callback(crm_status_processes, node, &last);
         }
 
         /* The client callback shouldn't touch the peer caches,
          * but as a safety net, bail if the peer cache was destroyed.
          */
         if (crm_peer_cache == NULL) {
             return NULL;
         }
 
         if (crm_autoreap) {
             node = crm_update_peer_state(__FUNCTION__, node,
                                          is_set(node->processes, crm_get_cluster_proc())?
                                          CRM_NODE_MEMBER : CRM_NODE_LOST, 0);
         }
     } else {
         crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
                   peer2text(flag), status);
     }
     return node;
 }
 
 void
 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
 {
     char *last = NULL;
     gboolean changed = FALSE;
 
     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
               return);
 
     /* Remote nodes don't participate in joins */
     if (is_set(node->flags, crm_remote_node)) {
         return;
     }
 
     last = node->expected;
     if (expected != NULL && safe_str_neq(node->expected, expected)) {
         node->expected = strdup(expected);
         changed = TRUE;
     }
 
     if (changed) {
         crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
                  expected, last);
         free(last);
     } else {
         crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
                   node->id, expected);
     }
 }
 
 /*!
  * \internal
  * \brief Update a node's state and membership information
  *
  * \param[in] source      Caller's function name (for log messages)
  * \param[in] node        Node object to update
  * \param[in] state       Node's new state
  * \param[in] membership  Node's new membership ID
  * \param[in] iter        If not NULL, pointer to node's peer cache iterator
  *
  * \return NULL if any node was reaped, value of node otherwise
  *
  * \note If this function returns NULL, the supplied node object was likely
  *       freed and should not be used again. This function may be called from
  *       within a peer cache iteration if the iterator is supplied.
  */
 static crm_node_t *
 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter)
 {
     gboolean is_member;
 
     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'state' to %s", source, state);
                             return NULL);
 
     is_member = safe_str_eq(state, CRM_NODE_MEMBER);
     if (membership && is_member) {
         node->last_seen = membership;
     }
 
     if (state && safe_str_neq(node->state, state)) {
         char *last = node->state;
         enum crm_status_type status_type = is_set(node->flags, crm_remote_node)?
                                            crm_status_rstate : crm_status_nstate;
 
         node->state = strdup(state);
         crm_notice("%s: Node %s[%u] - state is now %s (was %s)",
                    source, node->uname, node->id, state, last);
         if (crm_status_callback) {
             crm_status_callback(status_type, node, last);
         }
         free(last);
 
         if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
             /* We only autoreap from the peer cache, not the remote peer cache,
              * because the latter should be managed only by
              * crm_remote_peer_cache_refresh().
              */
             if(iter) {
                 crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
                 g_hash_table_iter_remove(iter);
 
             } else {
                 reap_crm_member(node->id, node->uname);
             }
             node = NULL;
         }
 
     } else {
         crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id,
                   state);
     }
     return node;
 }
 
 /*!
  * \brief Update a node's state and membership information
  *
  * \param[in] source      Caller's function name (for log messages)
  * \param[in] node        Node object to update
  * \param[in] state       Node's new state
  * \param[in] membership  Node's new membership ID
  *
  * \return NULL if any node was reaped, value of node otherwise
  *
  * \note If this function returns NULL, the supplied node object was likely
  *       freed and should not be used again. This function should not be
  *       called within a cache iteration if reaping is possible,
  *       otherwise reaping could invalidate the iterator.
  */
 crm_node_t *
 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
 {
     return crm_update_peer_state_iter(source, node, state, membership, NULL);
 }
 
 /*!
  * \internal
  * \brief Reap all nodes from cache whose membership information does not match
  *
  * \param[in] membership  Membership ID of nodes to keep
  */
 void
 crm_reap_unseen_nodes(uint64_t membership)
 {
     GHashTableIter iter;
     crm_node_t *node = NULL;
 
     crm_trace("Reaping unseen nodes...");
     g_hash_table_iter_init(&iter, crm_peer_cache);
     while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
         if (node->last_seen != membership) {
             if (node->state) {
                 /*
                  * Calling crm_update_peer_state_iter() allows us to
                  * remove the node from crm_peer_cache without
                  * invalidating our iterator
                  */
                 crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
 
             } else {
                 crm_info("State of node %s[%u] is still unknown",
                          node->uname, node->id);
             }
         }
     }
 }
 
 int
 crm_terminate_member(int nodeid, const char *uname, void *unused)
 {
     /* Always use the synchronous, non-mainloop version */
     return stonith_api_kick(nodeid, uname, 120, TRUE);
 }
 
 int
 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
 {
     return stonith_api_kick(nodeid, uname, 120, TRUE);
 }
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
index 990a97fef3..7e844287f1 100644
--- a/lib/common/ipc.c
+++ b/lib/common/ipc.c
@@ -1,1274 +1,1274 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <sys/param.h>
 
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <grp.h>
 
 #include <errno.h>
 #include <fcntl.h>
 #include <bzlib.h>
 
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/ipc.h>
 #include <crm/common/ipcs.h>
 
 #define PCMK_IPC_VERSION 1
 
 struct crm_ipc_response_header {
     struct qb_ipc_response_header qb;
     uint32_t size_uncompressed;
     uint32_t size_compressed;
     uint32_t flags;
     uint8_t  version; /* Protect against version changes for anyone that might bother to statically link us */
 };
 
 static int hdr_offset = 0;
 static unsigned int ipc_buffer_max = 0;
 static unsigned int pick_ipc_buffer(unsigned int max);
 
 static inline void
 crm_ipc_init(void)
 {
     if (hdr_offset == 0) {
         hdr_offset = sizeof(struct crm_ipc_response_header);
     }
     if (ipc_buffer_max == 0) {
         ipc_buffer_max = pick_ipc_buffer(0);
     }
 }
 
 unsigned int
 crm_ipc_default_buffer_size(void)
 {
     return pick_ipc_buffer(0);
 }
 
 static char *
 generateReference(const char *custom1, const char *custom2)
 {
     static uint ref_counter = 0;
     const char *local_cust1 = custom1;
     const char *local_cust2 = custom2;
     int reference_len = 4;
     char *since_epoch = NULL;
 
     reference_len += 20;        /* too big */
     reference_len += 40;        /* too big */
 
     if (local_cust1 == NULL) {
         local_cust1 = "_empty_";
     }
     reference_len += strlen(local_cust1);
 
     if (local_cust2 == NULL) {
         local_cust2 = "_empty_";
     }
     reference_len += strlen(local_cust2);
 
     since_epoch = calloc(1, reference_len);
 
     if (since_epoch != NULL) {
         sprintf(since_epoch, "%s-%s-%lu-%u",
                 local_cust1, local_cust2, (unsigned long)time(NULL), ref_counter++);
     }
 
     return since_epoch;
 }
 
 xmlNode *
 create_request_adv(const char *task, xmlNode * msg_data,
                    const char *host_to, const char *sys_to,
                    const char *sys_from, const char *uuid_from, const char *origin)
 {
     char *true_from = NULL;
     xmlNode *request = NULL;
     char *reference = generateReference(task, sys_from);
 
     if (uuid_from != NULL) {
         true_from = generate_hash_key(sys_from, uuid_from);
     } else if (sys_from != NULL) {
         true_from = strdup(sys_from);
     } else {
         crm_err("No sys from specified");
     }
 
     /* host_from will get set for us if necessary by CRMd when routed */
     request = create_xml_node(NULL, __FUNCTION__);
     crm_xml_add(request, F_CRM_ORIGIN, origin);
     crm_xml_add(request, F_TYPE, T_CRM);
     crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET);
     crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST);
     crm_xml_add(request, F_CRM_REFERENCE, reference);
     crm_xml_add(request, F_CRM_TASK, task);
     crm_xml_add(request, F_CRM_SYS_TO, sys_to);
     crm_xml_add(request, F_CRM_SYS_FROM, true_from);
 
     /* HOSTTO will be ignored if it is to the DC anyway. */
     if (host_to != NULL && strlen(host_to) > 0) {
         crm_xml_add(request, F_CRM_HOST_TO, host_to);
     }
 
     if (msg_data != NULL) {
         add_message_xml(request, F_CRM_DATA, msg_data);
     }
     free(reference);
     free(true_from);
 
     return request;
 }
 
 /*
  * This method adds a copy of xml_response_data
  */
 xmlNode *
 create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const char *origin)
 {
     xmlNode *reply = NULL;
 
     const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM);
     const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM);
     const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO);
     const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE);
     const char *operation = crm_element_value(original_request, F_CRM_TASK);
     const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE);
 
     if (type == NULL) {
         crm_err("Cannot create new_message, no message type in original message");
         CRM_ASSERT(type != NULL);
         return NULL;
 #if 0
     } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) {
         crm_err("Cannot create new_message, original message was not a request");
         return NULL;
 #endif
     }
     reply = create_xml_node(NULL, __FUNCTION__);
     if (reply == NULL) {
         crm_err("Cannot create new_message, malloc failed");
         return NULL;
     }
 
     crm_xml_add(reply, F_CRM_ORIGIN, origin);
     crm_xml_add(reply, F_TYPE, T_CRM);
     crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET);
     crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE);
     crm_xml_add(reply, F_CRM_REFERENCE, crm_msg_reference);
     crm_xml_add(reply, F_CRM_TASK, operation);
 
     /* since this is a reply, we reverse the from and to */
     crm_xml_add(reply, F_CRM_SYS_TO, sys_from);
     crm_xml_add(reply, F_CRM_SYS_FROM, sys_to);
 
     /* HOSTTO will be ignored if it is to the DC anyway. */
     if (host_from != NULL && strlen(host_from) > 0) {
         crm_xml_add(reply, F_CRM_HOST_TO, host_from);
     }
 
     if (xml_response_data != NULL) {
         add_message_xml(reply, F_CRM_DATA, xml_response_data);
     }
 
     return reply;
 }
 
 /* Libqb based IPC */
 
 /* Server... */
 
 GHashTable *client_connections = NULL;
 
 crm_client_t *
 crm_client_get(qb_ipcs_connection_t * c)
 {
     if (client_connections) {
         return g_hash_table_lookup(client_connections, c);
     }
 
     crm_trace("No client found for %p", c);
     return NULL;
 }
 
 crm_client_t *
 crm_client_get_by_id(const char *id)
 {
     gpointer key;
     crm_client_t *client;
     GHashTableIter iter;
 
     if (client_connections && id) {
         g_hash_table_iter_init(&iter, client_connections);
         while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) {
             if (strcmp(client->id, id) == 0) {
                 return client;
             }
         }
     }
 
     crm_trace("No client found with id=%s", id);
     return NULL;
 }
 
 const char *
 crm_client_name(crm_client_t * c)
 {
     if (c == NULL) {
         return "null";
     } else if (c->name == NULL && c->id == NULL) {
         return "unknown";
     } else if (c->name == NULL) {
         return c->id;
     } else {
         return c->name;
     }
 }
 
 void
 crm_client_init(void)
 {
     if (client_connections == NULL) {
         crm_trace("Creating client hash table");
         client_connections = g_hash_table_new(g_direct_hash, g_direct_equal);
     }
 }
 
 void
 crm_client_cleanup(void)
 {
     if (client_connections != NULL) {
         int active = g_hash_table_size(client_connections);
 
         if (active) {
             crm_err("Exiting with %d active connections", active);
         }
         g_hash_table_destroy(client_connections); client_connections = NULL;
     }
 }
 
 void
 crm_client_disconnect_all(qb_ipcs_service_t *service)
 {
     qb_ipcs_connection_t *c = qb_ipcs_connection_first_get(service);
 
     while (c != NULL) {
         qb_ipcs_connection_t *last = c;
 
         c = qb_ipcs_connection_next_get(service, last);
 
         /* There really shouldn't be anyone connected at this point */
         crm_notice("Disconnecting client %p, pid=%d...", last, crm_ipcs_client_pid(last));
         qb_ipcs_disconnect(last);
         qb_ipcs_connection_unref(last);
     }
 }
 
 crm_client_t *
 crm_client_new(qb_ipcs_connection_t * c, uid_t uid_client, gid_t gid_client)
 {
     static uid_t uid_server = 0;
     static gid_t gid_cluster = 0;
 
     crm_client_t *client = NULL;
 
     CRM_LOG_ASSERT(c);
     if (c == NULL) {
         return NULL;
     }
 
     if (gid_cluster == 0) {
         uid_server = getuid();
         if(crm_user_lookup(CRM_DAEMON_USER, NULL, &gid_cluster) < 0) {
             static bool have_error = FALSE;
             if(have_error == FALSE) {
                 crm_warn("Could not find group for user %s", CRM_DAEMON_USER);
                 have_error = TRUE;
             }
         }
     }
 
     if(gid_cluster != 0 && gid_client != 0) {
         uid_t best_uid = -1; /* Passing -1 to chown(2) means don't change */
 
         if(uid_client == 0 || uid_server == 0) { /* Someone is priveliged, but the other may not be */
             best_uid = QB_MAX(uid_client, uid_server);
             crm_trace("Allowing user %u to clean up after disconnect", best_uid);
         }
 
         crm_trace("Giving access to group %u", gid_cluster);
         qb_ipcs_connection_auth_set(c, best_uid, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
     }
 
     crm_client_init();
 
     /* TODO: Do our own auth checking, return NULL if unauthorized */
     client = calloc(1, sizeof(crm_client_t));
 
     client->ipcs = c;
     client->kind = CRM_CLIENT_IPC;
     client->pid = crm_ipcs_client_pid(c);
 
     client->id = crm_generate_uuid();
 
     crm_debug("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid_client, gid_client, client->pid, client->id);
 
 #if ENABLE_ACL
     client->user = uid2username(uid_client);
 #endif
 
     g_hash_table_insert(client_connections, c, client);
     return client;
 }
 
 void
 crm_client_destroy(crm_client_t * c)
 {
     if (c == NULL) {
         return;
     }
 
     if (client_connections) {
         if (c->ipcs) {
             crm_trace("Destroying %p/%p (%d remaining)",
                       c, c->ipcs, crm_hash_table_size(client_connections) - 1);
             g_hash_table_remove(client_connections, c->ipcs);
 
         } else {
             crm_trace("Destroying remote connection %p (%d remaining)",
                       c, crm_hash_table_size(client_connections) - 1);
             g_hash_table_remove(client_connections, c->id);
         }
     }
 
     if (c->event_timer) {
         g_source_remove(c->event_timer);
     }
 
     crm_debug("Destroying %d events", g_list_length(c->event_queue));
     while (c->event_queue) {
         struct iovec *event = c->event_queue->data;
 
         c->event_queue = g_list_remove(c->event_queue, event);
         free(event[0].iov_base);
         free(event[1].iov_base);
         free(event);
     }
 
     free(c->id);
     free(c->name);
     free(c->user);
     if (c->remote) {
         if (c->remote->auth_timeout) {
             g_source_remove(c->remote->auth_timeout);
         }
         free(c->remote->buffer);
         free(c->remote);
     }
     free(c);
 }
 
 int
 crm_ipcs_client_pid(qb_ipcs_connection_t * c)
 {
     struct qb_ipcs_connection_stats stats;
 
     stats.client_pid = 0;
     qb_ipcs_connection_stats_get(c, &stats, 0);
     return stats.client_pid;
 }
 
 xmlNode *
 crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t * flags)
 {
     xmlNode *xml = NULL;
     char *uncompressed = NULL;
     char *text = ((char *)data) + sizeof(struct crm_ipc_response_header);
     struct crm_ipc_response_header *header = data;
 
     if (id) {
         *id = ((struct qb_ipc_response_header *)data)->id;
     }
     if (flags) {
         *flags = header->flags;
     }
 
     if (is_set(header->flags, crm_ipc_proxied)) {
         /* mark this client as being the endpoint of a proxy connection.
          * Proxy connections responses are sent on the event channel to avoid
          * blocking the proxy daemon (crmd) */
         c->flags |= crm_client_flag_ipc_proxied;
     }
 
     if(header->version > PCMK_IPC_VERSION) {
         crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d",
                 header->version, PCMK_IPC_VERSION);
         return NULL;
     }
 
     if (header->size_compressed) {
         int rc = 0;
         unsigned int size_u = 1 + header->size_uncompressed;
         uncompressed = calloc(1, size_u);
 
         crm_trace("Decompressing message data %u bytes into %u bytes",
                   header->size_compressed, size_u);
 
         rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0);
         text = uncompressed;
 
         if (rc != BZ_OK) {
             crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc);
             free(uncompressed);
             return NULL;
         }
     }
 
     CRM_ASSERT(text[header->size_uncompressed - 1] == 0);
 
     crm_trace("Received %.200s", text);
     xml = string2xml(text);
 
     free(uncompressed);
     return xml;
 }
 
 ssize_t crm_ipcs_flush_events(crm_client_t * c);
 
 static gboolean
 crm_ipcs_flush_events_cb(gpointer data)
 {
     crm_client_t *c = data;
 
     c->event_timer = 0;
     crm_ipcs_flush_events(c);
     return FALSE;
 }
 
 ssize_t
 crm_ipcs_flush_events(crm_client_t * c)
 {
     int sent = 0;
     ssize_t rc = 0;
     int queue_len = 0;
 
     if (c == NULL) {
         return pcmk_ok;
 
     } else if (c->event_timer) {
         /* There is already a timer, wait until it goes off */
         crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer);
         return pcmk_ok;
     }
 
     queue_len = g_list_length(c->event_queue);
     while (c->event_queue && sent < 100) {
         struct crm_ipc_response_header *header = NULL;
         struct iovec *event = c->event_queue->data;
 
         rc = qb_ipcs_event_sendv(c->ipcs, event, 2);
         if (rc < 0) {
             break;
         }
 
         sent++;
         header = event[0].iov_base;
         if (header->size_compressed) {
             crm_trace("Event %d to %p[%d] (%d compressed bytes) sent",
                       header->qb.id, c->ipcs, c->pid, rc);
         } else {
             crm_trace("Event %d to %p[%d] (%d bytes) sent: %.120s",
                       header->qb.id, c->ipcs, c->pid, rc, event[1].iov_base);
         }
 
         c->event_queue = g_list_remove(c->event_queue, event);
         free(event[0].iov_base);
         free(event[1].iov_base);
         free(event);
     }
 
     queue_len -= sent;
     if (sent > 0 || c->event_queue) {
         crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%d)",
                   sent, queue_len, c->ipcs, c->pid, pcmk_strerror(rc < 0 ? rc : 0), rc);
     }
 
     if (c->event_queue) {
         if (queue_len % 100 == 0 && queue_len > 99) {
             crm_warn("Event queue for %p[%d] has grown to %d", c->ipcs, c->pid, queue_len);
 
         } else if (queue_len > 500) {
             crm_err("Evicting slow client %p[%d]: event queue reached %d entries",
                     c->ipcs, c->pid, queue_len);
             qb_ipcs_disconnect(c->ipcs);
             return rc;
         }
 
         c->event_timer = g_timeout_add(1000 + 100 * queue_len, crm_ipcs_flush_events_cb, c);
     }
 
     return rc;
 }
 
 ssize_t
 crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, uint32_t max_send_size)
 {
     static unsigned int biggest = 0;
     struct iovec *iov;
     unsigned int total = 0;
     char *compressed = NULL;
     char *buffer = dump_xml_unformatted(message);
     struct crm_ipc_response_header *header = calloc(1, sizeof(struct crm_ipc_response_header));
 
     CRM_ASSERT(result != NULL);
 
     crm_ipc_init();
 
     if (max_send_size == 0) {
         max_send_size = ipc_buffer_max;
     }
 
     CRM_LOG_ASSERT(max_send_size != 0);
 
     *result = NULL;
     iov = calloc(2, sizeof(struct iovec));
 
 
     iov[0].iov_len = hdr_offset;
     iov[0].iov_base = header;
 
     header->version = PCMK_IPC_VERSION;
     header->size_uncompressed = 1 + strlen(buffer);
     total = iov[0].iov_len + header->size_uncompressed;
 
     if (total < max_send_size) {
         iov[1].iov_base = buffer;
         iov[1].iov_len = header->size_uncompressed;
 
     } else {
         unsigned int new_size = 0;
 
         if (crm_compress_string
             (buffer, header->size_uncompressed, max_send_size, &compressed, &new_size)) {
 
             header->flags |= crm_ipc_compressed;
             header->size_compressed = new_size;
 
             iov[1].iov_len = header->size_compressed;
             iov[1].iov_base = compressed;
 
             free(buffer);
 
             biggest = QB_MAX(header->size_compressed, biggest);
 
         } else {
             ssize_t rc = -EMSGSIZE;
 
             crm_log_xml_trace(message, "EMSGSIZE");
             biggest = QB_MAX(header->size_uncompressed, biggest);
 
             crm_err
                 ("Could not compress the message (%u bytes) into less than the configured ipc limit (%u bytes). "
                  "Set PCMK_ipc_buffer to a higher value (%u bytes suggested)",
                  header->size_uncompressed, max_send_size, 4 * biggest);
 
             free(compressed);
             free(buffer);
             free(header);
             free(iov);
 
             return rc;
         }
     }
 
     header->qb.size = iov[0].iov_len + iov[1].iov_len;
     header->qb.id = (int32_t)request;    /* Replying to a specific request */
 
     *result = iov;
     CRM_ASSERT(header->qb.size > 0);
     return header->qb.size;
 }
 
 ssize_t
 crm_ipcs_sendv(crm_client_t * c, struct iovec * iov, enum crm_ipc_flags flags)
 {
     ssize_t rc;
     static uint32_t id = 1;
     struct crm_ipc_response_header *header = iov[0].iov_base;
 
     if (c->flags & crm_client_flag_ipc_proxied) {
         /* _ALL_ replies to proxied connections need to be sent as events */
         if (is_not_set(flags, crm_ipc_server_event)) {
             flags |= crm_ipc_server_event;
             /* this flag lets us know this was originally meant to be a response.
              * even though we're sending it over the event channel. */
             flags |= crm_ipc_proxied_relay_response;
         }
     }
 
     header->flags |= flags;
     if (flags & crm_ipc_server_event) {
         header->qb.id = id++;   /* We don't really use it, but doesn't hurt to set one */
 
         if (flags & crm_ipc_server_free) {
             crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid);
             c->event_queue = g_list_append(c->event_queue, iov);
 
         } else {
             struct iovec *iov_copy = calloc(2, sizeof(struct iovec));
 
             crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid);
             iov_copy[0].iov_len = iov[0].iov_len;
             iov_copy[0].iov_base = malloc(iov[0].iov_len);
             memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len);
 
             iov_copy[1].iov_len = iov[1].iov_len;
             iov_copy[1].iov_base = malloc(iov[1].iov_len);
             memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len);
 
             c->event_queue = g_list_append(c->event_queue, iov_copy);
         }
 
     } else {
         CRM_LOG_ASSERT(header->qb.id != 0);     /* Replying to a specific request */
 
         rc = qb_ipcs_response_sendv(c->ipcs, iov, 2);
         if (rc < header->qb.size) {
             crm_notice("Response %d to %p[%d] (%u bytes) failed: %s (%d)",
                        header->qb.id, c->ipcs, c->pid, header->qb.size, pcmk_strerror(rc), rc);
 
         } else {
             crm_trace("Response %d sent, %d bytes to %p[%d]", header->qb.id, rc, c->ipcs, c->pid);
         }
 
         if (flags & crm_ipc_server_free) {
             free(iov[0].iov_base);
             free(iov[1].iov_base);
             free(iov);
         }
     }
 
     if (flags & crm_ipc_server_event) {
         rc = crm_ipcs_flush_events(c);
     } else {
         crm_ipcs_flush_events(c);
     }
 
     if (rc == -EPIPE || rc == -ENOTCONN) {
         crm_trace("Client %p disconnected", c->ipcs);
     }
 
     return rc;
 }
 
 ssize_t
 crm_ipcs_send(crm_client_t * c, uint32_t request, xmlNode * message,
               enum crm_ipc_flags flags)
 {
     struct iovec *iov = NULL;
     ssize_t rc = 0;
 
     if(c == NULL) {
         return -EDESTADDRREQ;
     }
     crm_ipc_init();
 
     rc = crm_ipc_prepare(request, message, &iov, ipc_buffer_max);
     if (rc > 0) {
         rc = crm_ipcs_sendv(c, iov, flags | crm_ipc_server_free);
 
     } else {
         free(iov);
         crm_notice("Message to %p[%d] failed: %s (%d)",
                    c->ipcs, c->pid, pcmk_strerror(rc), rc);
     }
 
     return rc;
 }
 
 void
 crm_ipcs_send_ack(crm_client_t * c, uint32_t request, uint32_t flags, const char *tag, const char *function,
                   int line)
 {
     if (flags & crm_ipc_client_response) {
         xmlNode *ack = create_xml_node(NULL, tag);
 
         crm_trace("Ack'ing msg from %s (%p)", crm_client_name(c), c);
         c->request_id = 0;
         crm_xml_add(ack, "function", function);
         crm_xml_add_int(ack, "line", line);
         crm_ipcs_send(c, request, ack, flags);
         free_xml(ack);
     }
 }
 
 /* Client... */
 
 #define MIN_MSG_SIZE    12336   /* sizeof(struct qb_ipc_connection_response) */
 #define MAX_MSG_SIZE    128*1024 /* 128k default */
 
 struct crm_ipc_s {
     struct pollfd pfd;
 
     /* the max size we can send/receive over ipc */
     unsigned int max_buf_size;
     /* Size of the allocated 'buffer' */
     unsigned int buf_size;
     int msg_size;
     int need_reply;
     char *buffer;
     char *name;
     uint32_t buffer_flags;
 
     qb_ipcc_connection_t *ipc;
 
 };
 
 static unsigned int
 pick_ipc_buffer(unsigned int max)
 {
-    const char *env;
     static unsigned int global_max = 0;
 
-    if (global_max != 0)
-        return global_max;
+    if (global_max == 0) {
+        const char *env = getenv("PCMK_ipc_buffer");
 
-    env = getenv("PCMK_ipc_buffer");
-    if (env) {
-        int env_max = crm_parse_int(env, "0");
+        if (env) {
+            int env_max = crm_parse_int(env, "0");
 
-        global_max = QB_MAX(MIN_MSG_SIZE, env_max);
-    } else {
-        global_max = QB_MAX(MAX_MSG_SIZE, max);
+            global_max = (env_max > 0)? QB_MAX(MIN_MSG_SIZE, env_max) : MAX_MSG_SIZE;
+
+        } else {
+            global_max = MAX_MSG_SIZE;
+        }
     }
 
-    return global_max;
+    return QB_MAX(max, global_max);
 }
 
 crm_ipc_t *
 crm_ipc_new(const char *name, size_t max_size)
 {
     crm_ipc_t *client = NULL;
 
     client = calloc(1, sizeof(crm_ipc_t));
 
     client->name = strdup(name);
     client->buf_size = pick_ipc_buffer(max_size);
     client->buffer = malloc(client->buf_size);
 
     /* Clients initiating connection pick the max buf size */
     client->max_buf_size = client->buf_size;
 
     client->pfd.fd = -1;
     client->pfd.events = POLLIN;
     client->pfd.revents = 0;
 
     return client;
 }
 
 /*!
  * \brief Establish an IPC connection to a Pacemaker component
  *
  * \param[in] client  Connection instance obtained from crm_ipc_new()
  *
  * \return TRUE on success, FALSE otherwise (in which case errno will be set)
  */
 bool
 crm_ipc_connect(crm_ipc_t * client)
 {
     client->need_reply = FALSE;
     client->ipc = qb_ipcc_connect(client->name, client->buf_size);
 
     if (client->ipc == NULL) {
         crm_debug("Could not establish %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno);
         return FALSE;
     }
 
     client->pfd.fd = crm_ipc_get_fd(client);
     if (client->pfd.fd < 0) {
         crm_debug("Could not obtain file descriptor for %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno);
         return FALSE;
     }
 
     qb_ipcc_context_set(client->ipc, client);
 
 #ifdef HAVE_IPCS_GET_BUFFER_SIZE
     client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc);
     if (client->max_buf_size > client->buf_size) {
         free(client->buffer);
         client->buffer = calloc(1, client->max_buf_size);
         client->buf_size = client->max_buf_size;
     }
 #endif
 
     return TRUE;
 }
 
 void
 crm_ipc_close(crm_ipc_t * client)
 {
     if (client) {
         crm_trace("Disconnecting %s IPC connection %p (%p.%p)", client->name, client, client->ipc);
 
         if (client->ipc) {
             qb_ipcc_connection_t *ipc = client->ipc;
 
             client->ipc = NULL;
             qb_ipcc_disconnect(ipc);
         }
     }
 }
 
 void
 crm_ipc_destroy(crm_ipc_t * client)
 {
     if (client) {
         if (client->ipc && qb_ipcc_is_connected(client->ipc)) {
             crm_notice("Destroying an active IPC connection to %s", client->name);
             /* The next line is basically unsafe
              *
              * If this connection was attached to mainloop and mainloop is active,
              *   the 'disconnected' callback will end up back here and we'll end
              *   up free'ing the memory twice - something that can still happen
              *   even without this if we destroy a connection and it closes before
              *   we call exit
              */
             /* crm_ipc_close(client); */
         }
         crm_trace("Destroying IPC connection to %s: %p", client->name, client);
         free(client->buffer);
         free(client->name);
         free(client);
     }
 }
 
 int
 crm_ipc_get_fd(crm_ipc_t * client)
 {
     int fd = 0;
 
     if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) {
         return fd;
     }
     errno = EINVAL;
     crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s",
                (client? client->name : "unspecified client"));
     return -errno;
 }
 
 bool
 crm_ipc_connected(crm_ipc_t * client)
 {
     bool rc = FALSE;
 
     if (client == NULL) {
         crm_trace("No client");
         return FALSE;
 
     } else if (client->ipc == NULL) {
         crm_trace("No connection");
         return FALSE;
 
     } else if (client->pfd.fd < 0) {
         crm_trace("Bad descriptor");
         return FALSE;
     }
 
     rc = qb_ipcc_is_connected(client->ipc);
     if (rc == FALSE) {
         client->pfd.fd = -EINVAL;
     }
     return rc;
 }
 
 int
 crm_ipc_ready(crm_ipc_t * client)
 {
     CRM_ASSERT(client != NULL);
 
     if (crm_ipc_connected(client) == FALSE) {
         return -ENOTCONN;
     }
 
     client->pfd.revents = 0;
     return poll(&(client->pfd), 1, 0);
 }
 
 static int
 crm_ipc_decompress(crm_ipc_t * client)
 {
     struct crm_ipc_response_header *header = (struct crm_ipc_response_header *)(void*)client->buffer;
 
     if (header->size_compressed) {
         int rc = 0;
         unsigned int size_u = 1 + header->size_uncompressed;
         /* never let buf size fall below our max size required for ipc reads. */
         unsigned int new_buf_size = QB_MAX((hdr_offset + size_u), client->max_buf_size);
         char *uncompressed = calloc(1, new_buf_size);
 
         crm_trace("Decompressing message data %u bytes into %u bytes",
                  header->size_compressed, size_u);
 
         rc = BZ2_bzBuffToBuffDecompress(uncompressed + hdr_offset, &size_u,
                                         client->buffer + hdr_offset, header->size_compressed, 1, 0);
 
         if (rc != BZ_OK) {
             crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc);
             free(uncompressed);
             return -EILSEQ;
         }
 
         /*
          * This assert no longer holds true.  For an identical msg, some clients may
          * require compression, and others may not. If that same msg (event) is sent
          * to multiple clients, it could result in some clients receiving a compressed
          * msg even though compression was not explicitly required for them.
          *
          * CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max);
          */
         CRM_ASSERT(size_u == header->size_uncompressed);
 
         memcpy(uncompressed, client->buffer, hdr_offset);       /* Preserve the header */
         header = (struct crm_ipc_response_header *)(void*)uncompressed;
 
         free(client->buffer);
         client->buf_size = new_buf_size;
         client->buffer = uncompressed;
     }
 
     CRM_ASSERT(client->buffer[hdr_offset + header->size_uncompressed - 1] == 0);
     return pcmk_ok;
 }
 
 long
 crm_ipc_read(crm_ipc_t * client)
 {
     struct crm_ipc_response_header *header = NULL;
 
     CRM_ASSERT(client != NULL);
     CRM_ASSERT(client->ipc != NULL);
     CRM_ASSERT(client->buffer != NULL);
 
     crm_ipc_init();
 
     client->buffer[0] = 0;
     client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size - 1, 0);
     if (client->msg_size >= 0) {
         int rc = crm_ipc_decompress(client);
 
         if (rc != pcmk_ok) {
             return rc;
         }
 
         header = (struct crm_ipc_response_header *)(void*)client->buffer;
         if(header->version > PCMK_IPC_VERSION) {
             crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d",
                     header->version, PCMK_IPC_VERSION);
             return -EBADMSG;
         }
 
         crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s",
                   client->name, header->qb.id, header->qb.size, client->msg_size,
                   client->buffer + hdr_offset);
 
     } else {
         crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size));
     }
 
     if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) {
         crm_err("Connection to %s failed", client->name);
     }
 
     if (header) {
         /* Data excluding the header */
         return header->size_uncompressed;
     }
     return -ENOMSG;
 }
 
 const char *
 crm_ipc_buffer(crm_ipc_t * client)
 {
     CRM_ASSERT(client != NULL);
     return client->buffer + sizeof(struct crm_ipc_response_header);
 }
 
 uint32_t
 crm_ipc_buffer_flags(crm_ipc_t * client)
 {
     struct crm_ipc_response_header *header = NULL;
 
     CRM_ASSERT(client != NULL);
     if (client->buffer == NULL) {
         return 0;
     }
 
     header = (struct crm_ipc_response_header *)(void*)client->buffer;
     return header->flags;
 }
 
 const char *
 crm_ipc_name(crm_ipc_t * client)
 {
     CRM_ASSERT(client != NULL);
     return client->name;
 }
 
 static int
 internal_ipc_send_recv(crm_ipc_t * client, const void *iov)
 {
     int rc = 0;
 
     do {
         rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1);
     } while (rc == -EAGAIN && crm_ipc_connected(client));
 
     return rc;
 }
 
 static int
 internal_ipc_send_request(crm_ipc_t * client, const void *iov, int ms_timeout)
 {
     int rc = 0;
     time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
 
     do {
         rc = qb_ipcc_sendv(client->ipc, iov, 2);
     } while (rc == -EAGAIN && time(NULL) < timeout && crm_ipc_connected(client));
 
     return rc;
 }
 
 static int
 internal_ipc_get_reply(crm_ipc_t * client, int request_id, int ms_timeout)
 {
     time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
     int rc = 0;
 
     crm_ipc_init();
 
     /* get the reply */
     crm_trace("client %s waiting on reply to msg id %d", client->name, request_id);
     do {
 
         rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000);
         if (rc > 0) {
             struct crm_ipc_response_header *hdr = NULL;
 
             int rc = crm_ipc_decompress(client);
 
             if (rc != pcmk_ok) {
                 return rc;
             }
 
             hdr = (struct crm_ipc_response_header *)(void*)client->buffer;
             if (hdr->qb.id == request_id) {
                 /* Got it */
                 break;
             } else if (hdr->qb.id < request_id) {
                 xmlNode *bad = string2xml(crm_ipc_buffer(client));
 
                 crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id);
                 crm_log_xml_notice(bad, "OldIpcReply");
 
             } else {
                 xmlNode *bad = string2xml(crm_ipc_buffer(client));
 
                 crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id);
                 crm_log_xml_notice(bad, "ImpossibleReply");
                 CRM_ASSERT(hdr->qb.id <= request_id);
             }
         } else if (crm_ipc_connected(client) == FALSE) {
             crm_err("Server disconnected client %s while waiting for msg id %d", client->name,
                     request_id);
             break;
         }
 
     } while (time(NULL) < timeout);
 
     return rc;
 }
 
 int
 crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout,
              xmlNode ** reply)
 {
     long rc = 0;
     struct iovec *iov;
     static uint32_t id = 0;
     static int factor = 8;
     struct crm_ipc_response_header *header;
 
     crm_ipc_init();
 
     if (client == NULL) {
         crm_notice("Invalid connection");
         return -ENOTCONN;
 
     } else if (crm_ipc_connected(client) == FALSE) {
         /* Don't even bother */
         crm_notice("Connection to %s closed", client->name);
         return -ENOTCONN;
     }
 
     if (ms_timeout == 0) {
         ms_timeout = 5000;
     }
 
     if (client->need_reply) {
         crm_trace("Trying again to obtain pending reply from %s", client->name);
         rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout);
         if (rc < 0) {
             crm_warn("Sending to %s (%p) is disabled until pending reply is received", client->name,
                      client->ipc);
             return -EALREADY;
 
         } else {
             crm_notice("Lost reply from %s (%p) finally arrived, sending re-enabled", client->name,
                        client->ipc);
             client->need_reply = FALSE;
         }
     }
 
     id++;
     CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */
     rc = crm_ipc_prepare(id, message, &iov, client->max_buf_size);
     if(rc < 0) {
         return rc;
     }
 
     header = iov[0].iov_base;
     header->flags |= flags;
 
     if(is_set(flags, crm_ipc_proxied)) {
         /* Don't look for a synchronous response */
         clear_bit(flags, crm_ipc_client_response);
     }
 
     if(header->size_compressed) {
         if(factor < 10 && (client->max_buf_size / 10) < (rc / factor)) {
             crm_notice("Compressed message exceeds %d0%% of the configured ipc limit (%u bytes), "
                        "consider setting PCMK_ipc_buffer to %u or higher",
                        factor, client->max_buf_size, 2 * client->max_buf_size);
             factor++;
         }
     }
 
     crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg...",
               client->name, header->qb.id, header->qb.size, ms_timeout);
 
     if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) {
 
         rc = internal_ipc_send_request(client, iov, ms_timeout);
 
         if (rc <= 0) {
             crm_trace("Failed to send from client %s request %d with %u bytes...",
                       client->name, header->qb.id, header->qb.size);
             goto send_cleanup;
 
         } else if (is_not_set(flags, crm_ipc_client_response)) {
             crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes...",
                       header->qb.id, client->name, header->qb.size);
 
             goto send_cleanup;
         }
 
         rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout);
         if (rc < 0) {
             /* No reply, for now, disable sending
              *
              * The alternative is to close the connection since we don't know
              * how to detect and discard out-of-sequence replies
              *
              * TODO - implement the above
              */
             client->need_reply = TRUE;
         }
 
     } else {
         rc = internal_ipc_send_recv(client, iov);
     }
 
     if (rc > 0) {
         struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer;
 
         crm_trace("Received response %d, size=%u, rc=%ld, text: %.200s", hdr->qb.id, hdr->qb.size,
                   rc, crm_ipc_buffer(client));
 
         if (reply) {
             *reply = string2xml(crm_ipc_buffer(client));
         }
 
     } else {
         crm_trace("Response not received: rc=%ld, errno=%d", rc, errno);
     }
 
   send_cleanup:
     if (crm_ipc_connected(client) == FALSE) {
         crm_notice("Connection to %s closed: %s (%ld)", client->name, pcmk_strerror(rc), rc);
 
     } else if (rc == -ETIMEDOUT) {
         crm_warn("Request %d to %s (%p) failed: %s (%ld) after %dms",
                  header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc, ms_timeout);
         crm_write_blackbox(0, NULL);
 
     } else if (rc <= 0) {
         crm_warn("Request %d to %s (%p) failed: %s (%ld)",
                  header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc);
     }
 
     free(header);
     free(iov[1].iov_base);
     free(iov);
     return rc;
 }
 
 /* Utils */
 
 xmlNode *
 create_hello_message(const char *uuid,
                      const char *client_name, const char *major_version, const char *minor_version)
 {
     xmlNode *hello_node = NULL;
     xmlNode *hello = NULL;
 
     if (uuid == NULL || strlen(uuid) == 0
         || client_name == NULL || strlen(client_name) == 0
         || major_version == NULL || strlen(major_version) == 0
         || minor_version == NULL || strlen(minor_version) == 0) {
         crm_err("Missing fields, Hello message will not be valid.");
         return NULL;
     }
 
     hello_node = create_xml_node(NULL, XML_TAG_OPTIONS);
     crm_xml_add(hello_node, "major_version", major_version);
     crm_xml_add(hello_node, "minor_version", minor_version);
     crm_xml_add(hello_node, "client_name", client_name);
     crm_xml_add(hello_node, "client_uuid", uuid);
 
     crm_trace("creating hello message");
     hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid);
     free_xml(hello_node);
 
     return hello;
 }
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index 6f9ea7f6b5..174f41bf0d 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -1,2223 +1,2224 @@
 /*
  * Copyright (c) 2012 David Vossel <davidvossel@gmail.com>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  *
  */
 
 #include <crm_internal.h>
 
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdarg.h>
 #include <string.h>
 #include <ctype.h>
 
 #include <sys/types.h>
 #include <sys/wait.h>
 
 #include <glib.h>
 #include <dirent.h>
 
 #include <crm/crm.h>
 #include <crm/lrmd.h>
 #include <crm/services.h>
 #include <crm/common/mainloop.h>
 #include <crm/common/ipcs.h>
 #include <crm/msg_xml.h>
 
 #include <crm/stonith-ng.h>
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
 #  undef KEYFILE
 #  include <gnutls/gnutls.h>
 #endif
 
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <arpa/inet.h>
 #include <netdb.h>
 
 #define MAX_TLS_RECV_WAIT 10000
 
 CRM_TRACE_INIT_DATA(lrmd);
 
 static int lrmd_api_disconnect(lrmd_t * lrmd);
 static int lrmd_api_is_connected(lrmd_t * lrmd);
 
 /* IPC proxy functions */
 int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
 static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg);
 void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
 #  define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000    /* 5 seconds */
 gnutls_psk_client_credentials_t psk_cred_s;
 int lrmd_tls_set_key(gnutls_datum_t * key);
 static void lrmd_tls_disconnect(lrmd_t * lrmd);
 static int global_remote_msg_id = 0;
 int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type);
 static void lrmd_tls_connection_destroy(gpointer userdata);
 #endif
 
 typedef struct lrmd_private_s {
     enum client_type type;
     char *token;
     mainloop_io_t *source;
 
     /* IPC parameters */
     crm_ipc_t *ipc;
 
     crm_remote_t *remote;
 
     /* Extra TLS parameters */
     char *remote_nodename;
 #ifdef HAVE_GNUTLS_GNUTLS_H
     char *server;
     int port;
     gnutls_psk_client_credentials_t psk_cred_c;
 
     /* while the async connection is occuring, this is the id
      * of the connection timeout timer. */
     int async_timer;
     int sock;
     /* since tls requires a round trip across the network for a
      * request/reply, there are times where we just want to be able
      * to send a request from the client and not wait around (or even care
      * about) what the reply is. */
     int expected_late_replies;
     GList *pending_notify;
     crm_trigger_t *process_notify;
 #endif
 
     lrmd_event_callback callback;
 
     /* Internal IPC proxy msg passing for remote guests */
     void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg);
     void *proxy_callback_userdata;
     char *peer_version;
 } lrmd_private_t;
 
 static lrmd_list_t *
 lrmd_list_add(lrmd_list_t * head, const char *value)
 {
     lrmd_list_t *p, *end;
 
     p = calloc(1, sizeof(lrmd_list_t));
     p->val = strdup(value);
 
     end = head;
     while (end && end->next) {
         end = end->next;
     }
 
     if (end) {
         end->next = p;
     } else {
         head = p;
     }
 
     return head;
 }
 
 void
 lrmd_list_freeall(lrmd_list_t * head)
 {
     lrmd_list_t *p;
 
     while (head) {
         char *val = (char *)head->val;
 
         p = head->next;
         free(val);
         free(head);
         head = p;
     }
 }
 
 lrmd_key_value_t *
 lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value)
 {
     lrmd_key_value_t *p, *end;
 
     p = calloc(1, sizeof(lrmd_key_value_t));
     p->key = strdup(key);
     p->value = strdup(value);
 
     end = head;
     while (end && end->next) {
         end = end->next;
     }
 
     if (end) {
         end->next = p;
     } else {
         head = p;
     }
 
     return head;
 }
 
 void
 lrmd_key_value_freeall(lrmd_key_value_t * head)
 {
     lrmd_key_value_t *p;
 
     while (head) {
         p = head->next;
         free(head->key);
         free(head->value);
         free(head);
         head = p;
     }
 }
 
 static void
 dup_attr(gpointer key, gpointer value, gpointer user_data)
 {
     g_hash_table_replace(user_data, strdup(key), strdup(value));
 }
 
 lrmd_event_data_t *
 lrmd_copy_event(lrmd_event_data_t * event)
 {
     lrmd_event_data_t *copy = NULL;
 
     copy = calloc(1, sizeof(lrmd_event_data_t));
 
     /* This will get all the int values.
      * we just have to be careful not to leave any
      * dangling pointers to strings. */
     memcpy(copy, event, sizeof(lrmd_event_data_t));
 
     copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL;
     copy->op_type = event->op_type ? strdup(event->op_type) : NULL;
     copy->user_data = event->user_data ? strdup(event->user_data) : NULL;
     copy->output = event->output ? strdup(event->output) : NULL;
     copy->exit_reason = event->exit_reason ? strdup(event->exit_reason) : NULL;
     copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL;
 
     if (event->params) {
         copy->params = g_hash_table_new_full(crm_str_hash,
                                              g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
 
         if (copy->params != NULL) {
             g_hash_table_foreach(event->params, dup_attr, copy->params);
         }
     }
 
     return copy;
 }
 
 void
 lrmd_free_event(lrmd_event_data_t * event)
 {
     if (!event) {
         return;
     }
 
     /* free gives me grief if i try to cast */
     free((char *)event->rsc_id);
     free((char *)event->op_type);
     free((char *)event->user_data);
     free((char *)event->output);
     free((char *)event->exit_reason);
     free((char *)event->remote_nodename);
     if (event->params) {
         g_hash_table_destroy(event->params);
     }
     free(event);
 }
 
 static int
 lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
 {
     const char *type;
     const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION);
     lrmd_private_t *native = lrmd->private;
     lrmd_event_data_t event = { 0, };
 
     if (proxy_session != NULL) {
         /* this is proxy business */
         lrmd_internal_proxy_dispatch(lrmd, msg);
         return 1;
     } else if (!native->callback) {
         /* no callback set */
         crm_trace("notify event received but client has not set callback");
         return 1;
     }
 
     event.remote_nodename = native->remote_nodename;
     type = crm_element_value(msg, F_LRMD_OPERATION);
     crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id);
     event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID);
 
     if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) {
         event.type = lrmd_event_register;
     } else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) {
         event.type = lrmd_event_unregister;
     } else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) {
         crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout);
         crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval);
         crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay);
         crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc);
         crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status);
         crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted);
 
         crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run);
         crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange);
         crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time);
         crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time);
 
         event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION);
         event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR);
         event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT);
         event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON);
         event.type = lrmd_event_exec_complete;
 
         event.params = xml2list(msg);
     } else if (crm_str_eq(type, LRMD_OP_NEW_CLIENT, TRUE)) {
         event.type = lrmd_event_new_client;
     } else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) {
         event.type = lrmd_event_poke;
     } else {
         return 1;
     }
 
     crm_trace("op %s notify event received", type);
     native->callback(&event);
 
     if (event.params) {
         g_hash_table_destroy(event.params);
     }
     return 1;
 }
 
 static int
 lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
 {
     lrmd_t *lrmd = userdata;
     lrmd_private_t *native = lrmd->private;
     xmlNode *msg;
     int rc;
 
     if (!native->callback) {
         /* no callback set */
         return 1;
     }
 
     msg = string2xml(buffer);
     rc = lrmd_dispatch_internal(lrmd, msg);
     free_xml(msg);
     return rc;
 }
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
 static void
 lrmd_free_xml(gpointer userdata)
 {
     free_xml((xmlNode *) userdata);
 }
 
 static int
 lrmd_tls_connected(lrmd_t * lrmd)
 {
     lrmd_private_t *native = lrmd->private;
 
     if (native->remote->tls_session) {
         return TRUE;
     }
 
     return FALSE;
 }
 
 static int
 lrmd_tls_dispatch(gpointer userdata)
 {
     lrmd_t *lrmd = userdata;
     lrmd_private_t *native = lrmd->private;
     xmlNode *xml = NULL;
     int rc = 0;
     int disconnected = 0;
 
     if (lrmd_tls_connected(lrmd) == FALSE) {
         crm_trace("tls dispatch triggered after disconnect");
         return 0;
     }
 
     crm_trace("tls_dispatch triggered");
 
     /* First check if there are any pending notifies to process that came
      * while we were waiting for replies earlier. */
     if (native->pending_notify) {
         GList *iter = NULL;
 
         crm_trace("Processing pending notifies");
         for (iter = native->pending_notify; iter; iter = iter->next) {
             lrmd_dispatch_internal(lrmd, iter->data);
         }
         g_list_free_full(native->pending_notify, lrmd_free_xml);
         native->pending_notify = NULL;
     }
 
     /* Next read the current buffer and see if there are any messages to handle. */
     rc = crm_remote_ready(native->remote, 0);
     if (rc == 0) {
         /* nothing to read, see if any full messages are already in buffer. */
         xml = crm_remote_parse_buffer(native->remote);
     } else if (rc < 0) {
         disconnected = 1;
     } else {
         crm_remote_recv(native->remote, -1, &disconnected);
         xml = crm_remote_parse_buffer(native->remote);
     }
     while (xml) {
         const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
         if (safe_str_eq(msg_type, "notify")) {
             lrmd_dispatch_internal(lrmd, xml);
         } else if (safe_str_eq(msg_type, "reply")) {
             if (native->expected_late_replies > 0) {
                 native->expected_late_replies--;
             } else {
                 int reply_id = 0;
                 crm_element_value_int(xml, F_LRMD_CALLID, &reply_id);
                 /* if this happens, we want to know about it */
                 crm_err("Got outdated reply %d", reply_id);
             }
         }
         free_xml(xml);
         xml = crm_remote_parse_buffer(native->remote);
     }
 
     if (disconnected) {
         crm_info("Server disconnected while reading remote server msg.");
         lrmd_tls_disconnect(lrmd);
         return 0;
     }
     return 1;
 }
 #endif
 
 /* Not used with mainloop */
 int
 lrmd_poll(lrmd_t * lrmd, int timeout)
 {
     lrmd_private_t *native = lrmd->private;
 
     switch (native->type) {
         case CRM_CLIENT_IPC:
             return crm_ipc_ready(native->ipc);
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             if (native->pending_notify) {
                 return 1;
             }
 
             return crm_remote_ready(native->remote, 0);
 #endif
         default:
             crm_err("Unsupported connection type: %d", native->type);
     }
 
     return 0;
 }
 
 /* Not used with mainloop */
 bool
 lrmd_dispatch(lrmd_t * lrmd)
 {
     lrmd_private_t *private = NULL;
 
     CRM_ASSERT(lrmd != NULL);
 
     private = lrmd->private;
     switch (private->type) {
         case CRM_CLIENT_IPC:
             while (crm_ipc_ready(private->ipc)) {
                 if (crm_ipc_read(private->ipc) > 0) {
                     const char *msg = crm_ipc_buffer(private->ipc);
 
                     lrmd_ipc_dispatch(msg, strlen(msg), lrmd);
                 }
             }
             break;
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             lrmd_tls_dispatch(lrmd);
             break;
 #endif
         default:
             crm_err("Unsupported connection type: %d", private->type);
     }
 
     if (lrmd_api_is_connected(lrmd) == FALSE) {
         crm_err("Connection closed");
         return FALSE;
     }
 
     return TRUE;
 }
 
 static xmlNode *
 lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options)
 {
     xmlNode *op_msg = create_xml_node(NULL, "lrmd_command");
 
     CRM_CHECK(op_msg != NULL, return NULL);
     CRM_CHECK(token != NULL, return NULL);
 
     crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command");
 
     crm_xml_add(op_msg, F_TYPE, T_LRMD);
     crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token);
     crm_xml_add(op_msg, F_LRMD_OPERATION, op);
     crm_trace("Sending call options: %.8lx, %d", (long)options, options);
     crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options);
 
     if (data != NULL) {
         add_message_xml(op_msg, F_LRMD_CALLDATA, data);
     }
 
     return op_msg;
 }
 
 static void
 lrmd_ipc_connection_destroy(gpointer userdata)
 {
     lrmd_t *lrmd = userdata;
     lrmd_private_t *native = lrmd->private;
 
     crm_info("IPC connection destroyed");
 
     /* Prevent these from being cleaned up in lrmd_api_disconnect() */
     native->ipc = NULL;
     native->source = NULL;
 
     if (native->callback) {
         lrmd_event_data_t event = { 0, };
         event.type = lrmd_event_disconnect;
         event.remote_nodename = native->remote_nodename;
         native->callback(&event);
     }
 }
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
 static void
 lrmd_tls_connection_destroy(gpointer userdata)
 {
     lrmd_t *lrmd = userdata;
     lrmd_private_t *native = lrmd->private;
 
     crm_info("TLS connection destroyed");
 
     if (native->remote->tls_session) {
         gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR);
         gnutls_deinit(*native->remote->tls_session);
         gnutls_free(native->remote->tls_session);
     }
     if (native->psk_cred_c) {
         gnutls_psk_free_client_credentials(native->psk_cred_c);
     }
     if (native->sock) {
         close(native->sock);
     }
     if (native->process_notify) {
         mainloop_destroy_trigger(native->process_notify);
         native->process_notify = NULL;
     }
     if (native->pending_notify) {
         g_list_free_full(native->pending_notify, lrmd_free_xml);
         native->pending_notify = NULL;
     }
 
     free(native->remote->buffer);
     native->remote->buffer = NULL;
     native->source = 0;
     native->sock = 0;
     native->psk_cred_c = NULL;
     native->remote->tls_session = NULL;
     native->sock = 0;
 
     if (native->callback) {
         lrmd_event_data_t event = { 0, };
         event.remote_nodename = native->remote_nodename;
         event.type = lrmd_event_disconnect;
         native->callback(&event);
     }
     return;
 }
 
 int
 lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type)
 {
     int rc = -1;
 
     crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id);
     crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type);
 
     rc = crm_remote_send(session, msg);
 
     if (rc < 0) {
         crm_err("Failed to send remote lrmd tls msg, rc = %d", rc);
         return rc;
     }
 
     return rc;
 }
 
 static xmlNode *
 lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected)
 {
     lrmd_private_t *native = lrmd->private;
     xmlNode *xml = NULL;
     time_t start = time(NULL);
     const char *msg_type = NULL;
     int reply_id = 0;
     int remaining_timeout = 0;
 
     /* A timeout of 0 here makes no sense.  We have to wait a period of time
      * for the response to come back.  If -1 or 0, default to 10 seconds. */
     if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) {
         total_timeout = MAX_TLS_RECV_WAIT;
     }
 
     while (!xml) {
 
         xml = crm_remote_parse_buffer(native->remote);
         if (!xml) {
             /* read some more off the tls buffer if we still have time left. */
             if (remaining_timeout) {
                 remaining_timeout = remaining_timeout - ((time(NULL) - start) * 1000);
             } else {
                 remaining_timeout = total_timeout;
             }
             if (remaining_timeout <= 0) {
                 crm_err("Never received the expected reply during the timeout period, disconnecting.");
                 *disconnected = TRUE;
                 return NULL;
             }
 
             crm_remote_recv(native->remote, remaining_timeout, disconnected);
             xml = crm_remote_parse_buffer(native->remote);
             if (!xml) {
                 crm_err("Unable to receive expected reply, disconnecting.");
                 *disconnected = TRUE;
                 return NULL;
             } else if (*disconnected) {
                 return NULL;
             }
         }
 
         CRM_ASSERT(xml != NULL);
 
         crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id);
         msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
 
         if (!msg_type) {
             crm_err("Empty msg type received while waiting for reply");
             free_xml(xml);
             xml = NULL;
         } else if (safe_str_eq(msg_type, "notify")) {
             /* got a notify while waiting for reply, trigger the notify to be processed later */
             crm_info("queueing notify");
             native->pending_notify = g_list_append(native->pending_notify, xml);
             if (native->process_notify) {
                 crm_info("notify trigger set.");
                 mainloop_set_trigger(native->process_notify);
             }
             xml = NULL;
         } else if (safe_str_neq(msg_type, "reply")) {
             /* msg isn't a reply, make some noise */
             crm_err("Expected a reply, got %s", msg_type);
             free_xml(xml);
             xml = NULL;
         } else if (reply_id != expected_reply_id) {
             if (native->expected_late_replies > 0) {
                 native->expected_late_replies--;
             } else {
                 crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
             }
             free_xml(xml);
             xml = NULL;
         }
     }
 
     if (native->remote->buffer && native->process_notify) {
         mainloop_set_trigger(native->process_notify);
     }
 
     return xml;
 }
 
 static int
 lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg)
 {
     int rc = 0;
     lrmd_private_t *native = lrmd->private;
 
     global_remote_msg_id++;
     if (global_remote_msg_id <= 0) {
         global_remote_msg_id = 1;
     }
 
     rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request");
     if (rc <= 0) {
         crm_err("Remote lrmd send failed, disconnecting");
         lrmd_tls_disconnect(lrmd);
         return -ENOTCONN;
     }
     return pcmk_ok;
 }
 
 static int
 lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
 {
     int rc = 0;
     int disconnected = 0;
     xmlNode *xml = NULL;
 
     if (lrmd_tls_connected(lrmd) == FALSE) {
         return -1;
     }
 
     rc = lrmd_tls_send(lrmd, msg);
     if (rc < 0) {
         return rc;
     }
 
     xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected);
 
     if (disconnected) {
         crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ",
                 global_remote_msg_id);
         lrmd_tls_disconnect(lrmd);
         rc = -ENOTCONN;
     } else if (!xml) {
         crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ",
                 global_remote_msg_id, timeout);
         rc = -ECOMM;
     }
 
     if (reply) {
         *reply = xml;
     } else {
         free_xml(xml);
     }
 
     return rc;
 }
 #endif
 
 static int
 lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
 {
     int rc = -1;
     lrmd_private_t *native = lrmd->private;
 
     switch (native->type) {
         case CRM_CLIENT_IPC:
             rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply);
             break;
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply);
             break;
 #endif
         default:
             crm_err("Unsupported connection type: %d", native->type);
     }
 
     return rc;
 }
 
 static int
 lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg)
 {
     int rc = -1;
     lrmd_private_t *native = lrmd->private;
 
     switch (native->type) {
         case CRM_CLIENT_IPC:
             rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL);
             break;
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             rc = lrmd_tls_send(lrmd, msg);
             if (rc == pcmk_ok) {
                 /* we don't want to wait around for the reply, but
                  * since the request/reply protocol needs to behave the same
                  * as libqb, a reply will eventually come later anyway. */
                 native->expected_late_replies++;
             }
             break;
 #endif
         default:
             crm_err("Unsupported connection type: %d", native->type);
     }
 
     return rc;
 }
 
 static int
 lrmd_api_is_connected(lrmd_t * lrmd)
 {
     lrmd_private_t *native = lrmd->private;
 
     switch (native->type) {
         case CRM_CLIENT_IPC:
             return crm_ipc_connected(native->ipc);
             break;
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             return lrmd_tls_connected(lrmd);
             break;
 #endif
         default:
             crm_err("Unsupported connection type: %d", native->type);
     }
 
     return 0;
 }
 
 static int
 lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout,   /* ms. defaults to 1000 if set to 0 */
                   enum lrmd_call_options options, gboolean expect_reply)
 {                               /* TODO we need to reduce usage of this boolean */
     int rc = pcmk_ok;
     int reply_id = -1;
     lrmd_private_t *native = lrmd->private;
     xmlNode *op_msg = NULL;
     xmlNode *op_reply = NULL;
 
     if (!lrmd_api_is_connected(lrmd)) {
         return -ENOTCONN;
     }
 
     if (op == NULL) {
         crm_err("No operation specified");
         return -EINVAL;
     }
 
     CRM_CHECK(native->token != NULL,;
         );
     crm_trace("sending %s op to lrmd", op);
 
     op_msg = lrmd_create_op(native->token, op, data, options);
 
     if (op_msg == NULL) {
         return -EINVAL;
     }
 
     crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout);
 
     if (expect_reply) {
         rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply);
     } else {
         rc = lrmd_send_xml_no_reply(lrmd, op_msg);
         goto done;
     }
 
     if (rc < 0) {
         crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc);
         rc = -ECOMM;
         goto done;
 
     } else if(op_reply == NULL) {
         rc = -ENOMSG;
         goto done;
     }
 
     rc = pcmk_ok;
     crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id);
     crm_trace("%s op reply received", op);
     if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) {
         rc = -ENOMSG;
         goto done;
     }
 
     crm_log_xml_trace(op_reply, "Reply");
 
     if (output_data) {
         *output_data = op_reply;
         op_reply = NULL;        /* Prevent subsequent free */
     }
 
   done:
     if (lrmd_api_is_connected(lrmd) == FALSE) {
         crm_err("LRMD disconnected");
     }
 
     free_xml(op_msg);
     free_xml(op_reply);
     return rc;
 }
 
 static int
 lrmd_api_poke_connection(lrmd_t * lrmd)
 {
     int rc;
     lrmd_private_t *native = lrmd->private;
     xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
 
     crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
     rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE);
     free_xml(data);
 
     return rc < 0 ? rc : pcmk_ok;
 }
 
 int
 remote_proxy_check(lrmd_t * lrmd, GHashTable *hash)
 {
     int rc;
     const char *value;
     lrmd_private_t *native = lrmd->private;
     xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION);
 
     crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
 
     value = g_hash_table_lookup(hash, "stonith-watchdog-timeout");
     crm_xml_add(data, F_LRMD_WATCHDOG, value);
 
     rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE);
     free_xml(data);
 
     return rc < 0 ? rc : pcmk_ok;
 }
 
 static int
 lrmd_handshake(lrmd_t * lrmd, const char *name)
 {
     int rc = pcmk_ok;
     lrmd_private_t *native = lrmd->private;
     xmlNode *reply = NULL;
     xmlNode *hello = create_xml_node(NULL, "lrmd_command");
 
     crm_xml_add(hello, F_TYPE, T_LRMD);
     crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER);
     crm_xml_add(hello, F_LRMD_CLIENTNAME, name);
     crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
 
     /* advertise that we are a proxy provider */
     if (native->proxy_callback) {
         crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true");
     }
 
     rc = lrmd_send_xml(lrmd, hello, -1, &reply);
 
     if (rc < 0) {
         crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc);
         rc = -ECOMM;
     } else if (reply == NULL) {
         crm_err("Did not receive registration reply");
         rc = -EPROTO;
     } else {
         const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION);
         const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION);
         const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID);
 
         crm_element_value_int(reply, F_LRMD_RC, &rc);
 
         if (rc == -EPROTO) {
             crm_err("LRMD protocol mismatch client version %s, server version %s",
                 LRMD_PROTOCOL_VERSION, version);
             crm_log_xml_err(reply, "Protocol Error");
 
         } else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
             crm_err("Invalid registration message: %s", msg_type);
             crm_log_xml_err(reply, "Bad reply");
             rc = -EPROTO;
         } else if (tmp_ticket == NULL) {
             crm_err("No registration token provided");
             crm_log_xml_err(reply, "Bad reply");
             rc = -EPROTO;
         } else {
             crm_trace("Obtained registration token: %s", tmp_ticket);
             native->token = strdup(tmp_ticket);
             native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */
             rc = pcmk_ok;
         }
     }
 
     free_xml(reply);
     free_xml(hello);
 
     if (rc != pcmk_ok) {
         lrmd_api_disconnect(lrmd);
     }
     return rc;
 }
 
 static int
 lrmd_ipc_connect(lrmd_t * lrmd, int *fd)
 {
     int rc = pcmk_ok;
     lrmd_private_t *native = lrmd->private;
 
     static struct ipc_client_callbacks lrmd_callbacks = {
         .dispatch = lrmd_ipc_dispatch,
         .destroy = lrmd_ipc_connection_destroy
     };
 
     crm_info("Connecting to lrmd");
 
     if (fd) {
         /* No mainloop */
         native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0);
         if (native->ipc && crm_ipc_connect(native->ipc)) {
             *fd = crm_ipc_get_fd(native->ipc);
         } else if (native->ipc) {
             crm_perror(LOG_ERR, "Connection to local resource manager failed");
             rc = -ENOTCONN;
         }
     } else {
         native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks);
         native->ipc = mainloop_get_ipc_client(native->source);
     }
 
     if (native->ipc == NULL) {
         crm_debug("Could not connect to the LRMD API");
         rc = -ENOTCONN;
     }
 
     return rc;
 }
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
 static int
 set_key(gnutls_datum_t * key, const char *location)
 {
     FILE *stream;
     int read_len = 256;
     int cur_len = 0;
     int buf_len = read_len;
     static char *key_cache = NULL;
     static size_t key_cache_len = 0;
     static time_t key_cache_updated;
 
     if (location == NULL) {
         return -1;
     }
 
     if (key_cache) {
         time_t now = time(NULL);
 
         if ((now - key_cache_updated) < 60) {
             key->data = gnutls_malloc(key_cache_len + 1);
             key->size = key_cache_len;
             memcpy(key->data, key_cache, key_cache_len);
 
             crm_debug("using cached LRMD key");
             return 0;
         } else {
             key_cache_len = 0;
             key_cache_updated = 0;
             free(key_cache);
             key_cache = NULL;
             crm_debug("clearing lrmd key cache");
         }
     }
 
     stream = fopen(location, "r");
     if (!stream) {
         return -1;
     }
 
     key->data = gnutls_malloc(read_len);
     while (!feof(stream)) {
         int next;
 
         if (cur_len == buf_len) {
             buf_len = cur_len + read_len;
             key->data = gnutls_realloc(key->data, buf_len);
         }
         next = fgetc(stream);
         if (next == EOF && feof(stream)) {
             break;
         }
 
         key->data[cur_len] = next;
         cur_len++;
     }
     fclose(stream);
 
     key->size = cur_len;
     if (!cur_len) {
         gnutls_free(key->data);
         key->data = 0;
         return -1;
     }
 
     if (!key_cache) {
         key_cache = calloc(1, key->size + 1);
         memcpy(key_cache, key->data, key->size);
 
         key_cache_len = key->size;
         key_cache_updated = time(NULL);
     }
 
     return 0;
 }
 
 int
 lrmd_tls_set_key(gnutls_datum_t * key)
 {
     int rc = 0;
     const char *specific_location = getenv("PCMK_authkey_location");
 
     if (set_key(key, specific_location) == 0) {
         crm_debug("Using custom authkey location %s", specific_location);
         return 0;
 
     } else if (specific_location) {
         crm_err("No valid lrmd remote key found at %s, trying default location", specific_location);
     }
 
     if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION) != 0) {
         rc = set_key(key, ALT_REMOTE_KEY_LOCATION);
     }
 
     if (rc) {
         crm_err("No valid lrmd remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION);
         return -1;
     }
 
     return rc;
 }
 
 static void
 lrmd_gnutls_global_init(void)
 {
     static int gnutls_init = 0;
 
     if (!gnutls_init) {
         crm_gnutls_global_init();
     }
     gnutls_init = 1;
 }
 #endif
 
 static void
 report_async_connection_result(lrmd_t * lrmd, int rc)
 {
     lrmd_private_t *native = lrmd->private;
 
     if (native->callback) {
         lrmd_event_data_t event = { 0, };
         event.type = lrmd_event_connect;
         event.remote_nodename = native->remote_nodename;
         event.connection_rc = rc;
         native->callback(&event);
     }
 }
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
 static void
 lrmd_tcp_connect_cb(void *userdata, int sock)
 {
     lrmd_t *lrmd = userdata;
     lrmd_private_t *native = lrmd->private;
     char name[256] = { 0, };
     static struct mainloop_fd_callbacks lrmd_tls_callbacks = {
         .dispatch = lrmd_tls_dispatch,
         .destroy = lrmd_tls_connection_destroy,
     };
     int rc = sock;
     gnutls_datum_t psk_key = { NULL, 0 };
 
     native->async_timer = 0;
 
     if (rc < 0) {
         lrmd_tls_connection_destroy(lrmd);
         crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port);
         report_async_connection_result(lrmd, rc);
         return;
     }
 
     /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon
      * to avoid all blocking code in the client. */
     native->sock = sock;
 
     if (lrmd_tls_set_key(&psk_key) != 0) {
         lrmd_tls_connection_destroy(lrmd);
         return;
     }
 
     gnutls_psk_allocate_client_credentials(&native->psk_cred_c);
     gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW);
     gnutls_free(psk_key.data);
 
     native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c);
 
     if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) {
         crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server,
                  native->port);
         gnutls_deinit(*native->remote->tls_session);
         gnutls_free(native->remote->tls_session);
         native->remote->tls_session = NULL;
         lrmd_tls_connection_destroy(lrmd);
         report_async_connection_result(lrmd, -1);
         return;
     }
 
     crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server,
              native->port);
 
     snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port);
 
     native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd);
     native->source =
         mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks);
 
     rc = lrmd_handshake(lrmd, name);
     report_async_connection_result(lrmd, rc);
 
     return;
 }
 
 static int
 lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ )
 {
     int rc = -1;
     int sock = 0;
     int timer_id = 0;
 
     lrmd_private_t *native = lrmd->private;
 
     lrmd_gnutls_global_init();
 
     sock = crm_remote_tcp_connect_async(native->server, native->port, timeout, &timer_id, lrmd,
                                       lrmd_tcp_connect_cb);
 
     if (sock != -1) {
         native->sock = sock;
         rc = 0;
         native->async_timer = timer_id;
     }
 
     return rc;
 }
 
 static int
 lrmd_tls_connect(lrmd_t * lrmd, int *fd)
 {
     static struct mainloop_fd_callbacks lrmd_tls_callbacks = {
         .dispatch = lrmd_tls_dispatch,
         .destroy = lrmd_tls_connection_destroy,
     };
 
     lrmd_private_t *native = lrmd->private;
     int sock;
     gnutls_datum_t psk_key = { NULL, 0 };
 
     lrmd_gnutls_global_init();
 
     sock = crm_remote_tcp_connect(native->server, native->port);
     if (sock < 0) {
         crm_warn("Could not establish remote lrmd connection to %s", native->server);
         lrmd_tls_connection_destroy(lrmd);
         return -ENOTCONN;
     }
 
     native->sock = sock;
 
     if (lrmd_tls_set_key(&psk_key) != 0) {
         lrmd_tls_connection_destroy(lrmd);
         return -1;
     }
 
     gnutls_psk_allocate_client_credentials(&native->psk_cred_c);
     gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW);
     gnutls_free(psk_key.data);
 
     native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c);
 
     if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) {
         crm_err("Session creation for %s:%d failed", native->server, native->port);
         gnutls_deinit(*native->remote->tls_session);
         gnutls_free(native->remote->tls_session);
         native->remote->tls_session = NULL;
         lrmd_tls_connection_destroy(lrmd);
         return -1;
     }
 
     crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server,
              native->port);
 
     if (fd) {
         *fd = sock;
     } else {
         char name[256] = { 0, };
         snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port);
 
         native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd);
         native->source =
             mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks);
     }
     return pcmk_ok;
 }
 #endif
 
 static int
 lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd)
 {
     int rc = -ENOTCONN;
     lrmd_private_t *native = lrmd->private;
 
     switch (native->type) {
         case CRM_CLIENT_IPC:
             rc = lrmd_ipc_connect(lrmd, fd);
             break;
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             rc = lrmd_tls_connect(lrmd, fd);
             break;
 #endif
         default:
             crm_err("Unsupported connection type: %d", native->type);
     }
 
     if (rc == pcmk_ok) {
         rc = lrmd_handshake(lrmd, name);
     }
 
     return rc;
 }
 
 static int
 lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout)
 {
     int rc = 0;
     lrmd_private_t *native = lrmd->private;
 
     if (!native->callback) {
         crm_err("Async connect not possible, no lrmd client callback set.");
         return -1;
     }
 
     switch (native->type) {
         case CRM_CLIENT_IPC:
             /* fake async connection with ipc.  it should be fast
              * enough that we gain very little from async */
             rc = lrmd_api_connect(lrmd, name, NULL);
             if (!rc) {
                 report_async_connection_result(lrmd, rc);
             }
             break;
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             rc = lrmd_tls_connect_async(lrmd, timeout);
             if (rc) {
                 /* connection failed, report rc now */
                 report_async_connection_result(lrmd, rc);
             }
             break;
 #endif
         default:
             crm_err("Unsupported connection type: %d", native->type);
     }
 
     return rc;
 }
 
 static void
 lrmd_ipc_disconnect(lrmd_t * lrmd)
 {
     lrmd_private_t *native = lrmd->private;
 
     if (native->source != NULL) {
         /* Attached to mainloop */
         mainloop_del_ipc_client(native->source);
         native->source = NULL;
         native->ipc = NULL;
 
     } else if (native->ipc) {
         /* Not attached to mainloop */
         crm_ipc_t *ipc = native->ipc;
 
         native->ipc = NULL;
         crm_ipc_close(ipc);
         crm_ipc_destroy(ipc);
     }
 }
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
 static void
 lrmd_tls_disconnect(lrmd_t * lrmd)
 {
     lrmd_private_t *native = lrmd->private;
 
     if (native->remote->tls_session) {
         gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR);
         gnutls_deinit(*native->remote->tls_session);
         gnutls_free(native->remote->tls_session);
         native->remote->tls_session = 0;
     }
 
     if (native->async_timer) {
         g_source_remove(native->async_timer);
         native->async_timer = 0;
     }
 
     if (native->source != NULL) {
         /* Attached to mainloop */
         mainloop_del_ipc_client(native->source);
         native->source = NULL;
 
     } else if (native->sock) {
         close(native->sock);
+        native->sock = 0;
     }
 
     if (native->pending_notify) {
         g_list_free_full(native->pending_notify, lrmd_free_xml);
         native->pending_notify = NULL;
     }
 }
 #endif
 
 static int
 lrmd_api_disconnect(lrmd_t * lrmd)
 {
     lrmd_private_t *native = lrmd->private;
 
     crm_info("Disconnecting from %d lrmd service", native->type);
     switch (native->type) {
         case CRM_CLIENT_IPC:
             lrmd_ipc_disconnect(lrmd);
             break;
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             lrmd_tls_disconnect(lrmd);
             break;
 #endif
         default:
             crm_err("Unsupported connection type: %d", native->type);
     }
 
     free(native->token);
     native->token = NULL;
     return 0;
 }
 
 static int
 lrmd_api_register_rsc(lrmd_t * lrmd,
                       const char *rsc_id,
                       const char *class,
                       const char *provider, const char *type, enum lrmd_call_options options)
 {
     int rc = pcmk_ok;
     xmlNode *data = NULL;
 
     if (!class || !type || !rsc_id) {
         return -EINVAL;
     }
     if (safe_str_eq(class, "ocf") && !provider) {
         return -EINVAL;
     }
 
     data = create_xml_node(NULL, F_LRMD_RSC);
 
     crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
     crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
     crm_xml_add(data, F_LRMD_CLASS, class);
     crm_xml_add(data, F_LRMD_PROVIDER, provider);
     crm_xml_add(data, F_LRMD_TYPE, type);
     rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE);
     free_xml(data);
 
     return rc;
 }
 
 static int
 lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
 {
     int rc = pcmk_ok;
     xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
 
     crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
     crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
     rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE);
     free_xml(data);
 
     return rc;
 }
 
 lrmd_rsc_info_t *
 lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info)
 {
     lrmd_rsc_info_t *copy = NULL;
 
     copy = calloc(1, sizeof(lrmd_rsc_info_t));
 
     copy->id = strdup(rsc_info->id);
     copy->type = strdup(rsc_info->type);
     copy->class = strdup(rsc_info->class);
     if (rsc_info->provider) {
         copy->provider = strdup(rsc_info->provider);
     }
 
     return copy;
 }
 
 void
 lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info)
 {
     if (!rsc_info) {
         return;
     }
     free(rsc_info->id);
     free(rsc_info->type);
     free(rsc_info->class);
     free(rsc_info->provider);
     free(rsc_info);
 }
 
 static lrmd_rsc_info_t *
 lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
 {
     lrmd_rsc_info_t *rsc_info = NULL;
     xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
     xmlNode *output = NULL;
     const char *class = NULL;
     const char *provider = NULL;
     const char *type = NULL;
 
     crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
     crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
     lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE);
     free_xml(data);
 
     if (!output) {
         return NULL;
     }
 
     class = crm_element_value(output, F_LRMD_CLASS);
     provider = crm_element_value(output, F_LRMD_PROVIDER);
     type = crm_element_value(output, F_LRMD_TYPE);
 
     if (!class || !type) {
         free_xml(output);
         return NULL;
     } else if (safe_str_eq(class, "ocf") && !provider) {
         free_xml(output);
         return NULL;
     }
 
     rsc_info = calloc(1, sizeof(lrmd_rsc_info_t));
     rsc_info->id = strdup(rsc_id);
     rsc_info->class = strdup(class);
     if (provider) {
         rsc_info->provider = strdup(provider);
     }
     rsc_info->type = strdup(type);
 
     free_xml(output);
     return rsc_info;
 }
 
 static void
 lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback)
 {
     lrmd_private_t *native = lrmd->private;
 
     native->callback = callback;
 }
 
 void
 lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg))
 {
     lrmd_private_t *native = lrmd->private;
 
     native->proxy_callback = callback;
     native->proxy_callback_userdata = userdata;
 }
 
 void
 lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg)
 {
     lrmd_private_t *native = lrmd->private;
 
     if (native->proxy_callback) {
         crm_log_xml_trace(msg, "PROXY_INBOUND");
         native->proxy_callback(lrmd, native->proxy_callback_userdata, msg);
     }
 }
 
 int
 lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg)
 {
     if (lrmd == NULL) {
         return -ENOTCONN;
     }
     crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD);
 
     crm_log_xml_trace(msg, "PROXY_OUTBOUND");
     return lrmd_send_xml_no_reply(lrmd, msg);
 }
 
 static int
 stonith_get_metadata(const char *provider, const char *type, char **output)
 {
     int rc = pcmk_ok;
     stonith_t *stonith_api = stonith_api_new();
 
     if(stonith_api) {
         stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0);
         stonith_api->cmds->free(stonith_api);
     }
     if (*output == NULL) {
         rc = -EIO;
     }
     return rc;
 }
 
 #define lsb_metadata_template  \
     "<?xml version='1.0'?>\n"                                           \
     "<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n"                 \
     "<resource-agent name='%s' version='0.1'>\n"                        \
     "  <version>1.0</version>\n"                                        \
     "  <longdesc lang='en'>\n"                                          \
     "    %s\n"                                                          \
     "  </longdesc>\n"                                                   \
     "  <shortdesc lang='en'>%s</shortdesc>\n"                           \
     "  <parameters>\n"                                                  \
     "  </parameters>\n"                                                 \
     "  <actions>\n"                                                     \
     "    <action name='meta-data'    timeout='5' />\n"                  \
     "    <action name='start'        timeout='15' />\n"                 \
     "    <action name='stop'         timeout='15' />\n"                 \
     "    <action name='status'       timeout='15' />\n"                 \
     "    <action name='restart'      timeout='15' />\n"                 \
     "    <action name='force-reload' timeout='15' />\n"                 \
     "    <action name='monitor'      timeout='15' interval='15' />\n"   \
     "  </actions>\n"                                                    \
     "  <special tag='LSB'>\n"                                           \
     "    <Provides>%s</Provides>\n"                                     \
     "    <Required-Start>%s</Required-Start>\n"                         \
     "    <Required-Stop>%s</Required-Stop>\n"                           \
     "    <Should-Start>%s</Should-Start>\n"                             \
     "    <Should-Stop>%s</Should-Stop>\n"                               \
     "    <Default-Start>%s</Default-Start>\n"                           \
     "    <Default-Stop>%s</Default-Stop>\n"                             \
     "  </special>\n"                                                    \
     "</resource-agent>\n"
 
 #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO"
 #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO"
 #define PROVIDES    "# Provides:"
 #define REQ_START   "# Required-Start:"
 #define REQ_STOP    "# Required-Stop:"
 #define SHLD_START  "# Should-Start:"
 #define SHLD_STOP   "# Should-Stop:"
 #define DFLT_START  "# Default-Start:"
 #define DFLT_STOP   "# Default-Stop:"
 #define SHORT_DSCR  "# Short-Description:"
 #define DESCRIPTION "# Description:"
 
 #define lsb_meta_helper_free_value(m)           \
     do {                                        \
         if ((m) != NULL) {                      \
             xmlFree(m);                         \
             (m) = NULL;                         \
         }                                       \
     } while(0)
 
 /*
  * \internal
  * \brief Grab an LSB header value
  *
  * \param[in]     line    Line read from LSB init script
  * \param[in/out] value   If not set, will be set to XML-safe copy of value
  * \param[in]     prefix  Set value if line starts with this pattern
  *
  * \return TRUE if value was set, FALSE otherwise
  */
 static inline gboolean
 lsb_meta_helper_get_value(const char *line, char **value, const char *prefix)
 {
     if (!*value && !strncasecmp(line, prefix, strlen(prefix))) {
         *value = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST line+strlen(prefix));
         return TRUE;
     }
     return FALSE;
 }
 
 static int
 lsb_get_metadata(const char *type, char **output)
 {
     char ra_pathname[PATH_MAX] = { 0, };
     FILE *fp;
     char buffer[1024];
     char *provides = NULL;
     char *req_start = NULL;
     char *req_stop = NULL;
     char *shld_start = NULL;
     char *shld_stop = NULL;
     char *dflt_start = NULL;
     char *dflt_stop = NULL;
     char *s_dscrpt = NULL;
     char *xml_l_dscrpt = NULL;
     int offset = 0;
     int max = 2048;
     char description[max];
 
     if(type[0] == '/') {
         snprintf(ra_pathname, sizeof(ra_pathname), "%s", type);
     } else {
         snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type);
     }
 
     crm_trace("Looking into %s", ra_pathname);
     if (!(fp = fopen(ra_pathname, "r"))) {
         return -errno;
     }
 
     /* Enter into the lsb-compliant comment block */
     while (fgets(buffer, sizeof(buffer), fp)) {
 
         /* Now suppose each of the following eight arguments contain only one line */
         if (lsb_meta_helper_get_value(buffer, &provides, PROVIDES)) {
             continue;
         }
         if (lsb_meta_helper_get_value(buffer, &req_start, REQ_START)) {
             continue;
         }
         if (lsb_meta_helper_get_value(buffer, &req_stop, REQ_STOP)) {
             continue;
         }
         if (lsb_meta_helper_get_value(buffer, &shld_start, SHLD_START)) {
             continue;
         }
         if (lsb_meta_helper_get_value(buffer, &shld_stop, SHLD_STOP)) {
             continue;
         }
         if (lsb_meta_helper_get_value(buffer, &dflt_start, DFLT_START)) {
             continue;
         }
         if (lsb_meta_helper_get_value(buffer, &dflt_stop, DFLT_STOP)) {
             continue;
         }
         if (lsb_meta_helper_get_value(buffer, &s_dscrpt, SHORT_DSCR)) {
             continue;
         }
 
         /* Long description may cross multiple lines */
         if (offset == 0 && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) {
             /* Between # and keyword, more than one space, or a tab
              * character, indicates the continuation line.
              *
              * Extracted from LSB init script standard
              */
             while (fgets(buffer, sizeof(buffer), fp)) {
                 if (!strncmp(buffer, "#  ", 3) || !strncmp(buffer, "#\t", 2)) {
                     buffer[0] = ' ';
                     offset += snprintf(description+offset, max-offset, "%s", buffer);
 
                 } else {
                     fputs(buffer, fp);
                     break;      /* Long description ends */
                 }
             }
             continue;
         }
 
         if (xml_l_dscrpt == NULL && offset > 0) {
             xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(description));
         }
 
         if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) {
             /* Get to the out border of LSB comment block */
             break;
         }
         if (buffer[0] != '#') {
             break;              /* Out of comment block in the beginning */
         }
     }
     fclose(fp);
 
     *output = crm_strdup_printf(lsb_metadata_template, type,
                                 (xml_l_dscrpt == NULL) ? type : xml_l_dscrpt,
                                 (s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides,
                                 (req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop,
                                 (shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop,
                                 (dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop);
 
     lsb_meta_helper_free_value(xml_l_dscrpt);
     lsb_meta_helper_free_value(s_dscrpt);
     lsb_meta_helper_free_value(provides);
     lsb_meta_helper_free_value(req_start);
     lsb_meta_helper_free_value(req_stop);
     lsb_meta_helper_free_value(shld_start);
     lsb_meta_helper_free_value(shld_stop);
     lsb_meta_helper_free_value(dflt_start);
     lsb_meta_helper_free_value(dflt_stop);
 
     crm_trace("Created fake metadata: %d", strlen(*output));
     return pcmk_ok;
 }
 
 #if SUPPORT_NAGIOS
 static int
 nagios_get_metadata(const char *type, char **output)
 {
     int rc = pcmk_ok;
     FILE *file_strm = NULL;
     int start = 0, length = 0, read_len = 0;
     char *metadata_file = NULL;
     int len = 36;
 
     len += strlen(NAGIOS_METADATA_DIR);
     len += strlen(type);
     metadata_file = calloc(1, len);
     CRM_CHECK(metadata_file != NULL, return -ENOMEM);
 
     sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type);
     file_strm = fopen(metadata_file, "r");
     if (file_strm == NULL) {
         crm_err("Metadata file %s does not exist", metadata_file);
         free(metadata_file);
         return -EIO;
     }
 
     /* see how big the file is */
     start = ftell(file_strm);
     fseek(file_strm, 0L, SEEK_END);
     length = ftell(file_strm);
     fseek(file_strm, 0L, start);
 
     CRM_ASSERT(length >= 0);
     CRM_ASSERT(start == ftell(file_strm));
 
     if (length <= 0) {
         crm_info("%s was not valid", metadata_file);
         free(*output);
         *output = NULL;
         rc = -EIO;
 
     } else {
         crm_trace("Reading %d bytes from file", length);
         *output = calloc(1, (length + 1));
         read_len = fread(*output, 1, length, file_strm);
         if (read_len != length) {
             crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len);
             free(*output);
             *output = NULL;
             rc = -EIO;
         }
     }
 
     fclose(file_strm);
     free(metadata_file);
     return rc;
 }
 #endif
 
 #if SUPPORT_HEARTBEAT
 /* strictly speaking, support for class=heartbeat style scripts
  * does not require "heartbeat support" to be enabled.
  * But since those scripts are part of the "heartbeat" package usually,
  * and are very unlikely to be present in any other deployment,
  * I leave it inside this ifdef.
  *
  * Yes, I know, these are legacy and should die,
  * or at least be rewritten to be a proper OCF style agent.
  * But they exist, and custom scripts following these rules do, too.
  *
  * Taken from the old "glue" lrmd, see
  * http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l49
  * http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l393
  */
 
 static const char hb_metadata_template[] =
 "<?xml version='1.0'?>\n"
 "<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n"
 "<resource-agent name='%s' version='0.1'>\n"
 "<version>1.0</version>\n"
 "<longdesc lang='en'>\n"
 "%s"
 "</longdesc>\n"
 "<shortdesc lang='en'>%s</shortdesc>\n"
 "<parameters>\n"
 "<parameter name='1' unique='1' required='0'>\n"
 "<longdesc lang='en'>\n"
 "This argument will be passed as the first argument to the "
 "heartbeat resource agent (assuming it supports one)\n"
 "</longdesc>\n"
 "<shortdesc lang='en'>argv[1]</shortdesc>\n"
 "<content type='string' default=' ' />\n"
 "</parameter>\n"
 "<parameter name='2' unique='1' required='0'>\n"
 "<longdesc lang='en'>\n"
 "This argument will be passed as the second argument to the "
 "heartbeat resource agent (assuming it supports one)\n"
 "</longdesc>\n"
 "<shortdesc lang='en'>argv[2]</shortdesc>\n"
 "<content type='string' default=' ' />\n"
 "</parameter>\n"
 "<parameter name='3' unique='1' required='0'>\n"
 "<longdesc lang='en'>\n"
 "This argument will be passed as the third argument to the "
 "heartbeat resource agent (assuming it supports one)\n"
 "</longdesc>\n"
 "<shortdesc lang='en'>argv[3]</shortdesc>\n"
 "<content type='string' default=' ' />\n"
 "</parameter>\n"
 "<parameter name='4' unique='1' required='0'>\n"
 "<longdesc lang='en'>\n"
 "This argument will be passed as the fourth argument to the "
 "heartbeat resource agent (assuming it supports one)\n"
 "</longdesc>\n"
 "<shortdesc lang='en'>argv[4]</shortdesc>\n"
 "<content type='string' default=' ' />\n"
 "</parameter>\n"
 "<parameter name='5' unique='1' required='0'>\n"
 "<longdesc lang='en'>\n"
 "This argument will be passed as the fifth argument to the "
 "heartbeat resource agent (assuming it supports one)\n"
 "</longdesc>\n"
 "<shortdesc lang='en'>argv[5]</shortdesc>\n"
 "<content type='string' default=' ' />\n"
 "</parameter>\n"
 "</parameters>\n"
 "<actions>\n"
 "<action name='start'   timeout='15' />\n"
 "<action name='stop'    timeout='15' />\n"
 "<action name='status'  timeout='15' />\n"
 "<action name='monitor' timeout='15' interval='15' start-delay='15' />\n"
 "<action name='meta-data'  timeout='5' />\n"
 "</actions>\n"
 "<special tag='heartbeat'>\n"
 "</special>\n"
 "</resource-agent>\n";
 
 static int
 heartbeat_get_metadata(const char *type, char **output)
 {
 	*output = crm_strdup_printf(hb_metadata_template, type, type, type);
 	crm_trace("Created fake metadata: %d", strlen(*output));
 	return pcmk_ok;
 }
 #endif
 
 static int
 generic_get_metadata(const char *standard, const char *provider, const char *type, char **output)
 {
     svc_action_t *action = resources_action_create(type,
                                                    standard,
                                                    provider,
                                                    type,
                                                    "meta-data",
                                                    0,
                                                    30000,
                                                    NULL,
                                                    0);
 
     if (!(services_action_sync(action))) {
         crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type);
         services_action_free(action);
         return -EIO;
     }
 
     if (!action->stdout_data) {
         crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type);
         services_action_free(action);
         return -EIO;
     }
 
     *output = strdup(action->stdout_data);
     services_action_free(action);
 
     return pcmk_ok;
 }
 
 static int
 lrmd_api_get_metadata(lrmd_t * lrmd,
                       const char *class,
                       const char *provider,
                       const char *type, char **output, enum lrmd_call_options options)
 {
     if (!class || !type) {
         return -EINVAL;
     }
 
     if (safe_str_eq(class, "service")) {
         class = resources_find_service_class(type);
     }
 
     if (safe_str_eq(class, "stonith")) {
         return stonith_get_metadata(provider, type, output);
     } else if (safe_str_eq(class, "lsb")) {
         return lsb_get_metadata(type, output);
 #if SUPPORT_NAGIOS
     } else if (safe_str_eq(class, "nagios")) {
         return nagios_get_metadata(type, output);
 #endif
 #if SUPPORT_HEARTBEAT
     } else if (safe_str_eq(class, "heartbeat")) {
 	return heartbeat_get_metadata(type, output);
 #endif
     }
     return generic_get_metadata(class, provider, type, output);
 }
 
 static int
 lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval,        /* ms */
               int timeout,      /* ms */
               int start_delay,  /* ms */
               enum lrmd_call_options options, lrmd_key_value_t * params)
 {
     int rc = pcmk_ok;
     xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
     xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
     lrmd_key_value_t *tmp = NULL;
 
     crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
     crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
     crm_xml_add(data, F_LRMD_RSC_ACTION, action);
     crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata);
     crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval);
     crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout);
     crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay);
 
     for (tmp = params; tmp; tmp = tmp->next) {
         hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args);
     }
 
     rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE);
     free_xml(data);
 
     lrmd_key_value_freeall(params);
     return rc;
 }
 
 static int
 lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval)
 {
     int rc = pcmk_ok;
     xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
 
     crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
     crm_xml_add(data, F_LRMD_RSC_ACTION, action);
     crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
     crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval);
     rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE);
     free_xml(data);
     return rc;
 }
 
 static int
 list_stonith_agents(lrmd_list_t ** resources)
 {
     int rc = 0;
     stonith_t *stonith_api = stonith_api_new();
     stonith_key_value_t *stonith_resources = NULL;
     stonith_key_value_t *dIter = NULL;
 
     if(stonith_api) {
         stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0);
         stonith_api->cmds->free(stonith_api);
     }
 
     for (dIter = stonith_resources; dIter; dIter = dIter->next) {
         rc++;
         if (resources) {
             *resources = lrmd_list_add(*resources, dIter->value);
         }
     }
 
     stonith_key_value_freeall(stonith_resources, 1, 0);
     return rc;
 }
 
 static int
 lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class,
                      const char *provider)
 {
     int rc = 0;
 
     if (safe_str_eq(class, "stonith")) {
         rc += list_stonith_agents(resources);
 
     } else {
         GListPtr gIter = NULL;
         GList *agents = resources_list_agents(class, provider);
 
         for (gIter = agents; gIter != NULL; gIter = gIter->next) {
             *resources = lrmd_list_add(*resources, (const char *)gIter->data);
             rc++;
         }
         g_list_free_full(agents, free);
 
         if (!class) {
             rc += list_stonith_agents(resources);
         }
     }
 
     if (rc == 0) {
         crm_notice("No agents found for class %s", class);
         rc = -EPROTONOSUPPORT;
     }
     return rc;
 }
 
 static int
 does_provider_have_agent(const char *agent, const char *provider, const char *class)
 {
     int found = 0;
     GList *agents = NULL;
     GListPtr gIter2 = NULL;
 
     agents = resources_list_agents(class, provider);
     for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) {
         if (safe_str_eq(agent, gIter2->data)) {
             found = 1;
         }
     }
     g_list_free_full(agents, free);
 
     return found;
 }
 
 static int
 lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers)
 {
     int rc = pcmk_ok;
     char *provider = NULL;
     GList *ocf_providers = NULL;
     GListPtr gIter = NULL;
 
     ocf_providers = resources_list_providers("ocf");
 
     for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) {
         provider = gIter->data;
         if (!agent || does_provider_have_agent(agent, provider, "ocf")) {
             *providers = lrmd_list_add(*providers, (const char *)gIter->data);
             rc++;
         }
     }
 
     g_list_free_full(ocf_providers, free);
     return rc;
 }
 
 static int
 lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported)
 {
     int rc = 0;
     GList *standards = NULL;
     GListPtr gIter = NULL;
 
     standards = resources_list_standards();
 
     for (gIter = standards; gIter != NULL; gIter = gIter->next) {
         *supported = lrmd_list_add(*supported, (const char *)gIter->data);
         rc++;
     }
 
     if (list_stonith_agents(NULL) > 0) {
         *supported = lrmd_list_add(*supported, "stonith");
         rc++;
     }
 
     g_list_free_full(standards, free);
     return rc;
 }
 
 lrmd_t *
 lrmd_api_new(void)
 {
     lrmd_t *new_lrmd = NULL;
     lrmd_private_t *pvt = NULL;
 
     new_lrmd = calloc(1, sizeof(lrmd_t));
     pvt = calloc(1, sizeof(lrmd_private_t));
     pvt->remote = calloc(1, sizeof(crm_remote_t));
     new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t));
 
     pvt->type = CRM_CLIENT_IPC;
     new_lrmd->private = pvt;
 
     new_lrmd->cmds->connect = lrmd_api_connect;
     new_lrmd->cmds->connect_async = lrmd_api_connect_async;
     new_lrmd->cmds->is_connected = lrmd_api_is_connected;
     new_lrmd->cmds->poke_connection = lrmd_api_poke_connection;
     new_lrmd->cmds->disconnect = lrmd_api_disconnect;
     new_lrmd->cmds->register_rsc = lrmd_api_register_rsc;
     new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc;
     new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info;
     new_lrmd->cmds->set_callback = lrmd_api_set_callback;
     new_lrmd->cmds->get_metadata = lrmd_api_get_metadata;
     new_lrmd->cmds->exec = lrmd_api_exec;
     new_lrmd->cmds->cancel = lrmd_api_cancel;
     new_lrmd->cmds->list_agents = lrmd_api_list_agents;
     new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers;
     new_lrmd->cmds->list_standards = lrmd_api_list_standards;
 
     return new_lrmd;
 }
 
 lrmd_t *
 lrmd_remote_api_new(const char *nodename, const char *server, int port)
 {
 #ifdef HAVE_GNUTLS_GNUTLS_H
     lrmd_t *new_lrmd = lrmd_api_new();
     lrmd_private_t *native = new_lrmd->private;
 
     if (!nodename && !server) {
         lrmd_api_delete(new_lrmd);
         return NULL;
     }
 
     native->type = CRM_CLIENT_TLS;
     native->remote_nodename = nodename ? strdup(nodename) : strdup(server);
     native->server = server ? strdup(server) : strdup(nodename);
     native->port = port;
     if (native->port == 0) {
         const char *remote_port_str = getenv("PCMK_remote_port");
         native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT;
     }
 
     return new_lrmd;
 #else
     crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created");
     return NULL;
 #endif
 
 }
 
 void
 lrmd_api_delete(lrmd_t * lrmd)
 {
     if (!lrmd) {
         return;
     }
     lrmd->cmds->disconnect(lrmd);       /* no-op if already disconnected */
     free(lrmd->cmds);
     if (lrmd->private) {
         lrmd_private_t *native = lrmd->private;
 
 #ifdef HAVE_GNUTLS_GNUTLS_H
         free(native->server);
 #endif
         free(native->remote_nodename);
         free(native->remote);
     }
 
     free(lrmd->private);
     free(lrmd);
 }
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 2f2a93a636..5fd7a4b762 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -1,2232 +1,2232 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <crm_internal.h>
 #include <crm/crm.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 #include <crm/common/util.h>
 
 #include <glib.h>
 
 #include <crm/pengine/rules.h>
 #include <crm/pengine/internal.h>
 
 pe_working_set_t *pe_dataset = NULL;
 
 extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root);
 void print_str_str(gpointer key, gpointer value, gpointer user_data);
 gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data);
 void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
                       pe_working_set_t * data_set);
 static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key,
                                          gboolean include_disabled);
 static gboolean is_rsc_baremetal_remote_node(resource_t *rsc, pe_working_set_t * data_set);
 
 bool pe_can_fence(pe_working_set_t * data_set, node_t *node)
 {
     if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
         return FALSE; /* Turned off */
 
     } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) {
         return FALSE; /* No devices */
 
     } else if (is_set(data_set->flags, pe_flag_have_quorum)) {
         return TRUE;
 
     } else if (data_set->no_quorum_policy == no_quorum_ignore) {
         return TRUE;
 
     } else if(node == NULL) {
         return FALSE;
 
     } else if(node->details->online) {
         crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname);
         return TRUE;
     }
 
     crm_trace("Cannot fence %s", node->details->uname);
     return FALSE;
 }
 
 node_t *
 node_copy(node_t * this_node)
 {
     node_t *new_node = NULL;
 
     CRM_CHECK(this_node != NULL, return NULL);
 
     new_node = calloc(1, sizeof(node_t));
     CRM_ASSERT(new_node != NULL);
 
     crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node);
 
     new_node->rsc_discover_mode = this_node->rsc_discover_mode;
     new_node->weight = this_node->weight;
     new_node->fixed = this_node->fixed;
     new_node->details = this_node->details;
 
     return new_node;
 }
 
 /* any node in list1 or list2 and not in the other gets a score of -INFINITY */
 void
 node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores)
 {
     GHashTable *result = hash;
     node_t *other_node = NULL;
     GListPtr gIter = list;
 
     GHashTableIter iter;
     node_t *node = NULL;
 
     g_hash_table_iter_init(&iter, hash);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
 
         other_node = pe_find_node_id(list, node->details->id);
         if (other_node == NULL) {
             node->weight = -INFINITY;
         } else if (merge_scores) {
             node->weight = merge_weights(node->weight, other_node->weight);
         }
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         other_node = pe_hash_table_lookup(result, node->details->id);
 
         if (other_node == NULL) {
             node_t *new_node = node_copy(node);
 
             new_node->weight = -INFINITY;
             g_hash_table_insert(result, (gpointer) new_node->details->id, new_node);
         }
     }
 }
 
 GHashTable *
 node_hash_from_list(GListPtr list)
 {
     GListPtr gIter = list;
     GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str);
 
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
         node_t *n = node_copy(node);
 
         g_hash_table_insert(result, (gpointer) n->details->id, n);
     }
 
     return result;
 }
 
 GListPtr
 node_list_dup(GListPtr list1, gboolean reset, gboolean filter)
 {
     GListPtr result = NULL;
     GListPtr gIter = list1;
 
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *new_node = NULL;
         node_t *this_node = (node_t *) gIter->data;
 
         if (filter && this_node->weight < 0) {
             continue;
         }
 
         new_node = node_copy(this_node);
         if (reset) {
             new_node->weight = 0;
         }
         if (new_node != NULL) {
             result = g_list_prepend(result, new_node);
         }
     }
 
     return result;
 }
 
 gint
 sort_node_uname(gconstpointer a, gconstpointer b)
 {
     const node_t *node_a = a;
     const node_t *node_b = b;
 
     return strcmp(node_a->details->uname, node_b->details->uname);
 }
 
 void
 dump_node_scores_worker(int level, const char *file, const char *function, int line,
                         resource_t * rsc, const char *comment, GHashTable * nodes)
 {
     GHashTable *hash = nodes;
     GHashTableIter iter;
     node_t *node = NULL;
 
     if (rsc) {
         hash = rsc->allowed_nodes;
     }
 
     if (rsc && is_set(rsc->flags, pe_rsc_orphan)) {
         /* Don't show the allocation scores for orphans */
         return;
     }
 
     if (level == 0) {
         char score[128];
         int len = sizeof(score);
         /* For now we want this in sorted order to keep the regression tests happy */
         GListPtr gIter = NULL;
         GListPtr list = g_hash_table_get_values(hash);
 
         list = g_list_sort(list, sort_node_uname);
 
         gIter = list;
         for (; gIter != NULL; gIter = gIter->next) {
             node_t *node = (node_t *) gIter->data;
             /* This function is called a whole lot, use stack allocated score */
             score2char_stack(node->weight, score, len);
 
             if (rsc) {
                 printf("%s: %s allocation score on %s: %s\n",
                        comment, rsc->id, node->details->uname, score);
             } else {
                 printf("%s: %s = %s\n", comment, node->details->uname, score);
             }
         }
 
         g_list_free(list);
 
     } else if (hash) {
         char score[128];
         int len = sizeof(score);
         g_hash_table_iter_init(&iter, hash);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
             /* This function is called a whole lot, use stack allocated score */
             score2char_stack(node->weight, score, len);
 
             if (rsc) {
                 do_crm_log_alias(LOG_TRACE, file, function, line,
                                  "%s: %s allocation score on %s: %s", comment, rsc->id,
                                  node->details->uname, score);
             } else {
                 do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment,
                                  node->details->uname, score);
             }
         }
     }
 
     if (rsc && rsc->children) {
         GListPtr gIter = NULL;
 
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             dump_node_scores_worker(level, file, function, line, child, comment, nodes);
         }
     }
 }
 
 static void
 append_dump_text(gpointer key, gpointer value, gpointer user_data)
 {
     char **dump_text = user_data;
     int len = 0;
     char *new_text = NULL;
 
     len = strlen(*dump_text) + strlen(" ") + strlen(key) + strlen("=") + strlen(value) + 1;
     new_text = calloc(1, len);
     sprintf(new_text, "%s %s=%s", *dump_text, (char *)key, (char *)value);
 
     free(*dump_text);
     *dump_text = new_text;
 }
 
 void
 dump_node_capacity(int level, const char *comment, node_t * node)
 {
     int len = 0;
     char *dump_text = NULL;
 
     len = strlen(comment) + strlen(": ") + strlen(node->details->uname) + strlen(" capacity:") + 1;
     dump_text = calloc(1, len);
     sprintf(dump_text, "%s: %s capacity:", comment, node->details->uname);
 
     g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text);
 
     if (level == 0) {
         fprintf(stdout, "%s\n", dump_text);
     } else {
         crm_trace("%s", dump_text);
     }
 
     free(dump_text);
 }
 
 void
 dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node)
 {
     int len = 0;
     char *dump_text = NULL;
 
     len = strlen(comment) + strlen(": ") + strlen(rsc->id) + strlen(" utilization on ")
         + strlen(node->details->uname) + strlen(":") + 1;
     dump_text = calloc(1, len);
     sprintf(dump_text, "%s: %s utilization on %s:", comment, rsc->id, node->details->uname);
 
     g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text);
 
     if (level == 0) {
         fprintf(stdout, "%s\n", dump_text);
     } else {
         crm_trace("%s", dump_text);
     }
 
     free(dump_text);
 }
 
 gint
 sort_rsc_index(gconstpointer a, gconstpointer b)
 {
     const resource_t *resource1 = (const resource_t *)a;
     const resource_t *resource2 = (const resource_t *)b;
 
     if (a == NULL && b == NULL) {
         return 0;
     }
     if (a == NULL) {
         return 1;
     }
     if (b == NULL) {
         return -1;
     }
 
     if (resource1->sort_index > resource2->sort_index) {
         return -1;
     }
 
     if (resource1->sort_index < resource2->sort_index) {
         return 1;
     }
 
     return 0;
 }
 
 gint
 sort_rsc_priority(gconstpointer a, gconstpointer b)
 {
     const resource_t *resource1 = (const resource_t *)a;
     const resource_t *resource2 = (const resource_t *)b;
 
     if (a == NULL && b == NULL) {
         return 0;
     }
     if (a == NULL) {
         return 1;
     }
     if (b == NULL) {
         return -1;
     }
 
     if (resource1->priority > resource2->priority) {
         return -1;
     }
 
     if (resource1->priority < resource2->priority) {
         return 1;
     }
 
     return 0;
 }
 
 action_t *
 custom_action(resource_t * rsc, char *key, const char *task,
               node_t * on_node, gboolean optional, gboolean save_action,
               pe_working_set_t * data_set)
 {
     action_t *action = NULL;
     GListPtr possible_matches = NULL;
 
     CRM_CHECK(key != NULL, return NULL);
     CRM_CHECK(task != NULL, free(key); return NULL);
 
     if (save_action && rsc != NULL) {
         possible_matches = find_actions(rsc->actions, key, on_node);
     } else if(save_action) {
 #if 0
         action = g_hash_table_lookup(data_set->singletons, key);
 #else
         /* More expensive but takes 'node' into account */
         possible_matches = find_actions(data_set->actions, key, on_node);
 #endif
     }
 
     if(data_set->singletons == NULL) {
         data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);
     }
 
     if (possible_matches != NULL) {
         if (g_list_length(possible_matches) > 1) {
             pe_warn("Action %s for %s on %s exists %d times",
                     task, rsc ? rsc->id : "<NULL>",
                     on_node ? on_node->details->uname : "<NULL>", g_list_length(possible_matches));
         }
 
         action = g_list_nth_data(possible_matches, 0);
         pe_rsc_trace(rsc, "Found existing action (%d) %s for %s on %s",
                      action->id, task, rsc ? rsc->id : "<NULL>",
                      on_node ? on_node->details->uname : "<NULL>");
         g_list_free(possible_matches);
     }
 
     if (action == NULL) {
         if (save_action) {
             pe_rsc_trace(rsc, "Creating%s action %d: %s for %s on %s %d",
-                         optional ? "" : " manditory", data_set->action_id, key,
+                         optional ? "" : " mandatory", data_set->action_id, key,
                          rsc ? rsc->id : "<NULL>", on_node ? on_node->details->uname : "<NULL>", optional);
         }
 
         action = calloc(1, sizeof(action_t));
         if (save_action) {
             action->id = data_set->action_id++;
         } else {
             action->id = 0;
         }
         action->rsc = rsc;
         CRM_ASSERT(task != NULL);
         action->task = strdup(task);
         if (on_node) {
             action->node = node_copy(on_node);
         }
         action->uuid = strdup(key);
 
         pe_set_action_bit(action, pe_action_runnable);
         if (optional) {
             pe_rsc_trace(rsc, "Set optional on %s", action->uuid);
             pe_set_action_bit(action, pe_action_optional);
         } else {
             pe_clear_action_bit(action, pe_action_optional);
             pe_rsc_trace(rsc, "Unset optional on %s", action->uuid);
         }
 
 /*
   Implied by calloc()...
   action->actions_before   = NULL;
   action->actions_after    = NULL;
 
   action->pseudo     = FALSE;
   action->dumped     = FALSE;
   action->processed  = FALSE;
   action->seen_count = 0;
 */
 
         action->extra = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free);
 
         action->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free);
 
         if (save_action) {
             data_set->actions = g_list_prepend(data_set->actions, action);
             if(rsc == NULL) {
                 g_hash_table_insert(data_set->singletons, action->uuid, action);
             }
         }
 
         if (rsc != NULL) {
             action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE);
 
             unpack_operation(action, action->op_entry, rsc->container, data_set);
 
             if (save_action) {
                 rsc->actions = g_list_prepend(rsc->actions, action);
             }
         }
 
         if (save_action) {
             pe_rsc_trace(rsc, "Action %d created", action->id);
         }
     }
 
     if (optional == FALSE) {
         pe_rsc_trace(rsc, "Unset optional on %s", action->uuid);
         pe_clear_action_bit(action, pe_action_optional);
     }
 
     if (rsc != NULL) {
         enum action_tasks a_task = text2task(action->task);
         int warn_level = LOG_TRACE;
 
         if (save_action) {
             warn_level = LOG_WARNING;
         }
 
         if (is_set(action->flags, pe_action_have_node_attrs) == FALSE
             && action->node != NULL && action->op_entry != NULL) {
             pe_set_action_bit(action, pe_action_have_node_attrs);
             unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS,
                                        action->node->details->attrs,
                                        action->extra, NULL, FALSE, data_set->now);
         }
 
         if (is_set(action->flags, pe_action_pseudo)) {
             /* leave untouched */
 
         } else if (action->node == NULL) {
             pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid);
             pe_clear_action_bit(action, pe_action_runnable);
 
         } else if (is_not_set(rsc->flags, pe_rsc_managed)
                    && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL) {
             crm_debug("Action %s (unmanaged)", action->uuid);
             pe_rsc_trace(rsc, "Set optional on %s", action->uuid);
             pe_set_action_bit(action, pe_action_optional);
 /*   			action->runnable = FALSE; */
 
         } else if (action->node->details->online == FALSE) {
             pe_clear_action_bit(action, pe_action_runnable);
             do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)",
                        action->uuid, action->node->details->uname);
             if (is_set(action->rsc->flags, pe_rsc_managed)
                 && save_action && a_task == stop_rsc) {
                 pe_fence_node(data_set, action->node, "Node is unclean");
             }
 
         } else if (action->node->details->pending) {
             pe_clear_action_bit(action, pe_action_runnable);
             do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)",
                        action->uuid, action->node->details->uname);
 
         } else if (action->needs == rsc_req_nothing) {
             pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid);
             pe_set_action_bit(action, pe_action_runnable);
 #if 0
             /*
              * No point checking this
              * - if we dont have quorum we can't stonith anyway
              */
         } else if (action->needs == rsc_req_stonith) {
             crm_trace("Action %s requires only stonith", action->uuid);
             action->runnable = TRUE;
 #endif
         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE
                    && data_set->no_quorum_policy == no_quorum_stop) {
             pe_clear_action_bit(action, pe_action_runnable);
             crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid);
 
         } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE
                    && data_set->no_quorum_policy == no_quorum_freeze) {
             pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role));
             if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) {
                 pe_clear_action_bit(action, pe_action_runnable);
                 pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)",
                              action->node->details->uname, action->uuid);
             }
 
         } else {
             pe_rsc_trace(rsc, "Action %s is runnable", action->uuid);
             pe_set_action_bit(action, pe_action_runnable);
         }
 
         if (save_action) {
             switch (a_task) {
                 case stop_rsc:
                     set_bit(rsc->flags, pe_rsc_stopping);
                     break;
                 case start_rsc:
                     clear_bit(rsc->flags, pe_rsc_starting);
                     if (is_set(action->flags, pe_action_runnable)) {
                         set_bit(rsc->flags, pe_rsc_starting);
                     }
                     break;
                 default:
                     break;
             }
         }
     }
 
     free(key);
     return action;
 }
 
 static const char *
 unpack_operation_on_fail(action_t * action)
 {
 
     const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
 
     if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) {
         crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id);
         return NULL;
     } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) {
         /* demote on_fail defaults to master monitor value if present */
         xmlNode *operation = NULL;
         const char *name = NULL;
         const char *role = NULL;
         const char *on_fail = NULL;
         const char *interval = NULL;
         const char *enabled = NULL;
 
         CRM_CHECK(action->rsc != NULL, return NULL);
 
         for (operation = __xml_first_child(action->rsc->ops_xml);
              operation && !value; operation = __xml_next_element(operation)) {
 
             if (!crm_str_eq((const char *)operation->name, "op", TRUE)) {
                 continue;
             }
             name = crm_element_value(operation, "name");
             role = crm_element_value(operation, "role");
             on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
             enabled = crm_element_value(operation, "enabled");
             interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
             if (!on_fail) {
                 continue;
             } else if (enabled && !crm_is_true(enabled)) {
                 continue;
             } else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) {
                 continue;
             } else if (crm_get_interval(interval) <= 0) {
                 continue;
             }
 
             value = on_fail;
         }
     }
 
     return value;
 }
 
 static xmlNode *
 find_min_interval_mon(resource_t * rsc, gboolean include_disabled)
 {
     int number = 0;
     int min_interval = -1;
     const char *name = NULL;
     const char *value = NULL;
     const char *interval = NULL;
     xmlNode *op = NULL;
     xmlNode *operation = NULL;
 
     for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
          operation = __xml_next_element(operation)) {
 
         if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
             name = crm_element_value(operation, "name");
             interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
             value = crm_element_value(operation, "enabled");
             if (!include_disabled && value && crm_is_true(value) == FALSE) {
                 continue;
             }
 
             if (safe_str_neq(name, RSC_STATUS)) {
                 continue;
             }
 
             number = crm_get_interval(interval);
             if (number < 0) {
                 continue;
             }
 
             if (min_interval < 0 || number < min_interval) {
                 min_interval = number;
                 op = operation;
             }
         }
     }
 
     return op;
 }
 
 void
 unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
                  pe_working_set_t * data_set)
 {
     int value_i = 0;
     unsigned long long interval = 0;
     unsigned long long start_delay = 0;
     char *value_ms = NULL;
     const char *value = NULL;
     const char *field = NULL;
 
     CRM_CHECK(action->rsc != NULL, return);
 
     unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL,
                                action->meta, NULL, FALSE, data_set->now);
 
     if (xml_obj) {
         xmlAttrPtr xIter = NULL;
 
         for (xIter = xml_obj->properties; xIter; xIter = xIter->next) {
             const char *prop_name = (const char *)xIter->name;
             const char *prop_value = crm_element_value(xml_obj, prop_name);
 
             g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value));
         }
     }
 
     unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS,
                                NULL, action->meta, NULL, FALSE, data_set->now);
 
     unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS,
                                NULL, action->meta, NULL, FALSE, data_set->now);
     g_hash_table_remove(action->meta, "id");
 
     field = XML_LRM_ATTR_INTERVAL;
     value = g_hash_table_lookup(action->meta, field);
     if (value != NULL) {
         interval = crm_get_interval(value);
         if (interval > 0) {
             value_ms = crm_itoa(interval);
             g_hash_table_replace(action->meta, strdup(field), value_ms);
 
         } else {
             g_hash_table_remove(action->meta, field);
         }
     }
 
     /* Begin compatability code */
     value = g_hash_table_lookup(action->meta, "requires");
 
     if (safe_str_neq(action->task, RSC_START)
         && safe_str_neq(action->task, RSC_PROMOTE)) {
         action->needs = rsc_req_nothing;
         value = "nothing (not start/promote)";
 
     } else if (safe_str_eq(value, "nothing")) {
         action->needs = rsc_req_nothing;
 
     } else if (safe_str_eq(value, "quorum")) {
         action->needs = rsc_req_quorum;
 
     } else if (safe_str_eq(value, "unfencing")) {
         action->needs = rsc_req_stonith;
         set_bit(action->rsc->flags, pe_rsc_needs_unfencing);
         if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
             crm_notice("%s requires (un)fencing but fencing is disabled", action->rsc->id);
         }
 
     } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
                && safe_str_eq(value, "fencing")) {
         action->needs = rsc_req_stonith;
         if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
             crm_notice("%s requires fencing but fencing is disabled", action->rsc->id);
         }
         /* End compatability code */
 
     } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) {
         action->needs = rsc_req_stonith;
         value = "fencing (resource)";
 
     } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) {
         action->needs = rsc_req_quorum;
         value = "quorum (resource)";
 
     } else {
         action->needs = rsc_req_nothing;
         value = "nothing (resource)";
     }
 
     pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->task, value);
 
     value = unpack_operation_on_fail(action);
 
     if (value == NULL) {
 
     } else if (safe_str_eq(value, "block")) {
         action->on_fail = action_fail_block;
         g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block"));
 
     } else if (safe_str_eq(value, "fence")) {
         action->on_fail = action_fail_fence;
         value = "node fencing";
 
         if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
             crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense");
             action->on_fail = action_fail_stop;
             action->fail_role = RSC_ROLE_STOPPED;
             value = "stop resource";
         }
 
     } else if (safe_str_eq(value, "standby")) {
         action->on_fail = action_fail_standby;
         value = "node standby";
 
     } else if (safe_str_eq(value, "ignore")
                || safe_str_eq(value, "nothing")) {
         action->on_fail = action_fail_ignore;
         value = "ignore";
 
     } else if (safe_str_eq(value, "migrate")) {
         action->on_fail = action_fail_migrate;
         value = "force migration";
 
     } else if (safe_str_eq(value, "stop")) {
         action->on_fail = action_fail_stop;
         action->fail_role = RSC_ROLE_STOPPED;
         value = "stop resource";
 
     } else if (safe_str_eq(value, "restart")) {
         action->on_fail = action_fail_recover;
         value = "restart (and possibly migrate)";
 
     } else if (safe_str_eq(value, "restart-container")) {
         if (container) {
             action->on_fail = action_fail_restart_container;
             value = "restart container (and possibly migrate)";
 
         } else {
             value = NULL;
         }
 
     } else {
         pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value);
         value = NULL;
     }
 
     /* defaults */
     if (value == NULL && container) {
         action->on_fail = action_fail_restart_container;
         value = "restart container (and possibly migrate) (default)";
 
     /* for barmetal remote nodes, ensure that any failure that results in
      * dropping an active connection to a remote node results in fencing of
      * the remote node.
      *
      * There are only two action failures that don't result in fencing.
      * 1. probes - probe failures are expected.
      * 2. start - a start failure indicates that an active connection does not already
      * exist. The user can set op on-fail=fence if they really want to fence start
      * failures. */
     } else if (value == NULL &&
                is_rsc_baremetal_remote_node(action->rsc, data_set) &&
                !(safe_str_eq(action->task, CRMD_ACTION_STATUS) && interval == 0) &&
                 (safe_str_neq(action->task, CRMD_ACTION_START))) {
 
         if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
             value = "fence baremetal remote node (default)";
         } else {
             value = "recover baremetal remote node connection (default)";
         }
         if (action->rsc->remote_reconnect_interval) {
             action->fail_role = RSC_ROLE_STOPPED;
         }
         action->on_fail = action_fail_reset_remote;
 
     } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) {
         if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
             action->on_fail = action_fail_fence;
             value = "resource fence (default)";
 
         } else {
             action->on_fail = action_fail_block;
             value = "resource block (default)";
         }
 
     } else if (value == NULL) {
         action->on_fail = action_fail_recover;
         value = "restart (and possibly migrate) (default)";
     }
 
     pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value);
 
     value = NULL;
     if (xml_obj != NULL) {
         value = g_hash_table_lookup(action->meta, "role_after_failure");
     }
     if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) {
         action->fail_role = text2role(value);
     }
     /* defaults */
     if (action->fail_role == RSC_ROLE_UNKNOWN) {
         if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) {
             action->fail_role = RSC_ROLE_SLAVE;
         } else {
             action->fail_role = RSC_ROLE_STARTED;
         }
     }
     pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task,
                  role2text(action->fail_role));
 
     field = XML_OP_ATTR_START_DELAY;
     value = g_hash_table_lookup(action->meta, field);
     if (value != NULL) {
         value_i = crm_get_msec(value);
         if (value_i < 0) {
             value_i = 0;
         }
         start_delay = value_i;
         value_ms = crm_itoa(value_i);
         g_hash_table_replace(action->meta, strdup(field), value_ms);
 
     } else if (interval > 0 && g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN)) {
         crm_time_t *origin = NULL;
 
         value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN);
         origin = crm_time_new(value);
 
         if (origin == NULL) {
             crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s",
                            ID(xml_obj), value);
 
         } else {
             crm_time_t *delay = NULL;
             int rc = crm_time_compare(origin, data_set->now);
             long long delay_s = 0;
             int interval_s = (interval / 1000);
 
             crm_trace("Origin: %s, interval: %d", value, interval_s);
 
             /* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */
             while(rc > 0) {
                 crm_time_add_seconds(origin, -interval_s);
                 rc = crm_time_compare(origin, data_set->now);
             }
 
             /* Now find the first "multiple" that occurs after 'now' */
             while (rc < 0) {
                 crm_time_add_seconds(origin, interval_s);
                 rc = crm_time_compare(origin, data_set->now);
             }
 
             delay = crm_time_calculate_duration(origin, data_set->now);
 
             crm_time_log(LOG_TRACE, "origin", origin,
                          crm_time_log_date | crm_time_log_timeofday |
                          crm_time_log_with_timezone);
             crm_time_log(LOG_TRACE, "now", data_set->now,
                          crm_time_log_date | crm_time_log_timeofday |
                          crm_time_log_with_timezone);
             crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration);
 
             delay_s = crm_time_get_seconds(delay);
 
             CRM_CHECK(delay_s >= 0, delay_s = 0);
             start_delay = delay_s * 1000;
 
             crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj));
             g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY),
                                  crm_itoa(start_delay));
             crm_time_free(origin);
             crm_time_free(delay);
         }
     }
 
     field = XML_ATTR_TIMEOUT;
     value = g_hash_table_lookup(action->meta, field);
     if (value == NULL && xml_obj == NULL && safe_str_eq(action->task, RSC_STATUS) && interval == 0) {
         xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE);
 
         if (min_interval_mon) {
             value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT);
             pe_rsc_trace(action->rsc,
                          "\t%s uses the timeout value '%s' from the minimum interval monitor",
                          action->uuid, value);
         }
     }
     if (value == NULL) {
         value = pe_pref(data_set->config_hash, "default-action-timeout");
     }
     value_i = crm_get_msec(value);
     if (value_i < 0) {
         value_i = 0;
     }
     value_i += start_delay;
     value_ms = crm_itoa(value_i);
     g_hash_table_replace(action->meta, strdup(field), value_ms);
 }
 
 static xmlNode *
 find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled)
 {
     unsigned long long number = 0;
     gboolean do_retry = TRUE;
     char *local_key = NULL;
     const char *name = NULL;
     const char *value = NULL;
     const char *interval = NULL;
     char *match_key = NULL;
     xmlNode *op = NULL;
     xmlNode *operation = NULL;
 
   retry:
     for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
          operation = __xml_next_element(operation)) {
         if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
             name = crm_element_value(operation, "name");
             interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
             value = crm_element_value(operation, "enabled");
             if (!include_disabled && value && crm_is_true(value) == FALSE) {
                 continue;
             }
 
             number = crm_get_interval(interval);
             match_key = generate_op_key(rsc->id, name, number);
             if (safe_str_eq(key, match_key)) {
                 op = operation;
             }
             free(match_key);
 
             if (rsc->clone_name) {
                 match_key = generate_op_key(rsc->clone_name, name, number);
                 if (safe_str_eq(key, match_key)) {
                     op = operation;
                 }
                 free(match_key);
             }
 
             if (op != NULL) {
                 free(local_key);
                 return op;
             }
         }
     }
 
     free(local_key);
     if (do_retry == FALSE) {
         return NULL;
     }
 
     do_retry = FALSE;
     if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) {
         local_key = generate_op_key(rsc->id, "migrate", 0);
         key = local_key;
         goto retry;
 
     } else if (strstr(key, "_notify_")) {
         local_key = generate_op_key(rsc->id, "notify", 0);
         key = local_key;
         goto retry;
     }
 
     return NULL;
 }
 
 xmlNode *
 find_rsc_op_entry(resource_t * rsc, const char *key)
 {
     return find_rsc_op_entry_helper(rsc, key, FALSE);
 }
 
 void
 print_node(const char *pre_text, node_t * node, gboolean details)
 {
     if (node == NULL) {
         crm_trace("%s%s: <NULL>", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
         return;
     }
 
     CRM_ASSERT(node->details);
     crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)",
               pre_text == NULL ? "" : pre_text,
               pre_text == NULL ? "" : ": ",
               node->details->online ? "" : "Unavailable/Unclean ",
               node->details->uname, node->weight, node->fixed ? "True" : "False");
 
     if (details) {
         char *pe_mutable = strdup("\t\t");
         GListPtr gIter = node->details->running_rsc;
 
         crm_trace("\t\t===Node Attributes");
         g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable);
         free(pe_mutable);
 
         crm_trace("\t\t=== Resources");
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *rsc = (resource_t *) gIter->data;
 
             print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE);
         }
     }
 }
 
 /*
  * Used by the HashTable for-loop
  */
 void
 print_str_str(gpointer key, gpointer value, gpointer user_data)
 {
     crm_trace("%s%s %s ==> %s",
               user_data == NULL ? "" : (char *)user_data,
               user_data == NULL ? "" : ": ", (char *)key, (char *)value);
 }
 
 void
 print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details)
 {
     long options = pe_print_log;
 
     if (rsc == NULL) {
         do_crm_log(log_level - 1, "%s%s: <NULL>",
                    pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
         return;
     }
     if (details) {
         options |= pe_print_details;
     }
     rsc->fns->print(rsc, pre_text, options, &log_level);
 }
 
 void
 pe_free_action(action_t * action)
 {
     if (action == NULL) {
         return;
     }
     g_list_free_full(action->actions_before, free);     /* action_warpper_t* */
     g_list_free_full(action->actions_after, free);      /* action_warpper_t* */
     if (action->extra) {
         g_hash_table_destroy(action->extra);
     }
     if (action->meta) {
         g_hash_table_destroy(action->meta);
     }
     free(action->cancel_task);
     free(action->task);
     free(action->uuid);
     free(action->node);
     free(action);
 }
 
 GListPtr
 find_recurring_actions(GListPtr input, node_t * not_on_node)
 {
     const char *value = NULL;
     GListPtr result = NULL;
     GListPtr gIter = input;
 
     CRM_CHECK(input != NULL, return NULL);
 
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL);
         if (value == NULL) {
             /* skip */
         } else if (safe_str_eq(value, "0")) {
             /* skip */
         } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) {
             /* skip */
         } else if (not_on_node == NULL) {
             crm_trace("(null) Found: %s", action->uuid);
             result = g_list_prepend(result, action);
 
         } else if (action->node == NULL) {
             /* skip */
         } else if (action->node->details != not_on_node->details) {
             crm_trace("Found: %s", action->uuid);
             result = g_list_prepend(result, action);
         }
     }
 
     return result;
 }
 
 enum action_tasks
 get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic)
 {
     enum action_tasks task = text2task(name);
 
     if (rsc == NULL) {
         return task;
 
     } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) {
         switch (task) {
             case stopped_rsc:
             case started_rsc:
             case action_demoted:
             case action_promoted:
                 crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id);
                 return task - 1;
                 break;
             default:
                 break;
         }
     }
     return task;
 }
 
 action_t *
 find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node)
 {
     GListPtr gIter = NULL;
 
     CRM_CHECK(uuid || task, return NULL);
 
     for (gIter = input; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (uuid != NULL && safe_str_neq(uuid, action->uuid)) {
             continue;
 
         } else if (task != NULL && safe_str_neq(task, action->task)) {
             continue;
 
         } else if (on_node == NULL) {
             return action;
 
         } else if (action->node == NULL) {
             continue;
 
         } else if (on_node->details == action->node->details) {
             return action;
         }
     }
 
     return NULL;
 }
 
 GListPtr
 find_actions(GListPtr input, const char *key, node_t * on_node)
 {
     GListPtr gIter = input;
     GListPtr result = NULL;
 
     CRM_CHECK(key != NULL, return NULL);
 
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         crm_trace("Matching %s against %s", key, action->uuid);
         if (safe_str_neq(key, action->uuid)) {
             continue;
 
         } else if (on_node == NULL) {
             result = g_list_prepend(result, action);
 
         } else if (action->node == NULL) {
             /* skip */
             crm_trace("While looking for %s action on %s, "
                       "found an unallocated one.  Assigning"
                       " it to the requested node...", key, on_node->details->uname);
 
             action->node = node_copy(on_node);
             result = g_list_prepend(result, action);
 
         } else if (on_node->details == action->node->details) {
             result = g_list_prepend(result, action);
         }
     }
 
     return result;
 }
 
 GListPtr
 find_actions_exact(GListPtr input, const char *key, node_t * on_node)
 {
     GListPtr gIter = input;
     GListPtr result = NULL;
 
     CRM_CHECK(key != NULL, return NULL);
 
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         crm_trace("Matching %s against %s", key, action->uuid);
         if (safe_str_neq(key, action->uuid)) {
             crm_trace("Key mismatch: %s vs. %s", key, action->uuid);
             continue;
 
         } else if (on_node == NULL || action->node == NULL) {
             crm_trace("on_node=%p, action->node=%p", on_node, action->node);
             continue;
 
         } else if (safe_str_eq(on_node->details->id, action->node->details->id)) {
             result = g_list_prepend(result, action);
         }
         crm_trace("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id);
     }
 
     return result;
 }
 
 static void
 resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag)
 {
     node_t *match = NULL;
 
     if (rsc->children) {
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             resource_node_score(child_rsc, node, score, tag);
         }
     }
 
     pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score);
     match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
     if (match == NULL) {
         match = node_copy(node);
         match->weight = merge_weights(score, node->weight);
         g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match);
     }
     match->weight = merge_weights(match->weight, score);
 }
 
 void
 resource_location(resource_t * rsc, node_t * node, int score, const char *tag,
                   pe_working_set_t * data_set)
 {
     if (node != NULL) {
         resource_node_score(rsc, node, score, tag);
 
     } else if (data_set != NULL) {
         GListPtr gIter = data_set->nodes;
 
         for (; gIter != NULL; gIter = gIter->next) {
             node_t *node = (node_t *) gIter->data;
 
             resource_node_score(rsc, node, score, tag);
         }
 
     } else {
         GHashTableIter iter;
         node_t *node = NULL;
 
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
             resource_node_score(rsc, node, score, tag);
         }
     }
 
     if (node == NULL && score == -INFINITY) {
         if (rsc->allocated_to) {
             crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname);
             free(rsc->allocated_to);
             rsc->allocated_to = NULL;
         }
     }
 }
 
 #define sort_return(an_int, why) do {					\
 	free(a_uuid);						\
 	free(b_uuid);						\
 	crm_trace("%s (%d) %c %s (%d) : %s",				\
 		  a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=',	\
 		  b_xml_id, b_call_id, why);				\
 	return an_int;							\
     } while(0)
 
 gint
 sort_op_by_callid(gconstpointer a, gconstpointer b)
 {
     int a_call_id = -1;
     int b_call_id = -1;
 
     char *a_uuid = NULL;
     char *b_uuid = NULL;
 
     const xmlNode *xml_a = a;
     const xmlNode *xml_b = b;
 
     const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID);
     const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID);
 
     if (safe_str_eq(a_xml_id, b_xml_id)) {
         /* We have duplicate lrm_rsc_op entries in the status
          *    section which is unliklely to be a good thing
          *    - we can handle it easily enough, but we need to get
          *    to the bottom of why its happening.
          */
         pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
         sort_return(0, "duplicate");
     }
 
     crm_element_value_const_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
     crm_element_value_const_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
 
     if (a_call_id == -1 && b_call_id == -1) {
         /* both are pending ops so it doesn't matter since
          *   stops are never pending
          */
         sort_return(0, "pending");
 
     } else if (a_call_id >= 0 && a_call_id < b_call_id) {
         sort_return(-1, "call id");
 
     } else if (b_call_id >= 0 && a_call_id > b_call_id) {
         sort_return(1, "call id");
 
     } else if (b_call_id >= 0 && a_call_id == b_call_id) {
         /*
          * The op and last_failed_op are the same
          * Order on last-rc-change
          */
         int last_a = -1;
         int last_b = -1;
 
         crm_element_value_const_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
         crm_element_value_const_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
 
         crm_trace("rc-change: %d vs %d", last_a, last_b);
         if (last_a >= 0 && last_a < last_b) {
             sort_return(-1, "rc-change");
 
         } else if (last_b >= 0 && last_a > last_b) {
             sort_return(1, "rc-change");
         }
         sort_return(0, "rc-change");
 
     } else {
         /* One of the inputs is a pending operation
          * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
          */
 
         int a_id = -1;
         int b_id = -1;
         int dummy = -1;
 
         const char *a_magic = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC);
         const char *b_magic = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC);
 
         CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
         if(!decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy)) {
             sort_return(0, "bad magic a");
         }
         if(!decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy)) {
             sort_return(0, "bad magic b");
         }
         /* try to determine the relative age of the operation...
          * some pending operations (ie. a start) may have been superseded
          *   by a subsequent stop
          *
          * [a|b]_id == -1 means its a shutdown operation and _always_ comes last
          */
         if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) {
             /*
              * some of the logic in here may be redundant...
              *
              * if the UUID from the TE doesn't match then one better
              *   be a pending operation.
              * pending operations dont survive between elections and joins
              *   because we query the LRM directly
              */
 
             if (b_call_id == -1) {
                 sort_return(-1, "transition + call");
 
             } else if (a_call_id == -1) {
                 sort_return(1, "transition + call");
             }
 
         } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
             sort_return(-1, "transition");
 
         } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
             sort_return(1, "transition");
         }
     }
 
     /* we should never end up here */
     CRM_CHECK(FALSE, sort_return(0, "default"));
 
 }
 
 time_t
 get_effective_time(pe_working_set_t * data_set)
 {
     if(data_set) {
         if (data_set->now == NULL) {
             crm_trace("Recording a new 'now'");
             data_set->now = crm_time_new(NULL);
         }
         return crm_time_get_seconds_since_epoch(data_set->now);
     }
 
     crm_trace("Defaulting to 'now'");
     return time(NULL);
 }
 
 struct fail_search {
     resource_t *rsc;
     pe_working_set_t * data_set;
 
     int count;
     long long last;
     char *key;
 };
 
 static void
 get_failcount_by_prefix(gpointer key_p, gpointer value, gpointer user_data)
 {
     struct fail_search *search = user_data;
     const char *attr_id = key_p;
     const char *match = strstr(attr_id, search->key);
     resource_t *parent = NULL;
 
     if (match == NULL) {
         return;
     }
 
     /* we are only incrementing the failcounts here if the rsc
      * that matches our prefix has the same uber parent as the rsc we're
      * calculating the failcounts for. This prevents false positive matches
      * where unrelated resources may have similar prefixes in their names.
      *
      * search->rsc is already set to be the uber parent. */
     parent = uber_parent(pe_find_resource(search->data_set->resources, match));
     if (parent == NULL || parent != search->rsc) {
         return;
     }
     if (strstr(attr_id, "last-failure-") == attr_id) {
         search->last = crm_int_helper(value, NULL);
 
     } else if (strstr(attr_id, "fail-count-") == attr_id) {
         search->count += char2score(value);
     }
 }
 
 int
 get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set)
 {
     return get_failcount_full(node, rsc, last_failure, TRUE, NULL, data_set);
 }
 
 static gboolean
 is_matched_failure(const char * rsc_id, xmlNode * conf_op_xml, xmlNode * lrm_op_xml)
 {
     gboolean matched = FALSE;
     const char *conf_op_name = NULL;
     int conf_op_interval = 0;
     const char *lrm_op_task = NULL;
     int lrm_op_interval = 0;
     const char *lrm_op_id = NULL;
     char *last_failure_key = NULL;
 
     if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) {
         return FALSE;
     }
 
     conf_op_name = crm_element_value(conf_op_xml, "name");
     conf_op_interval = crm_get_msec(crm_element_value(conf_op_xml, "interval"));
     lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK);
     crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_INTERVAL, &lrm_op_interval);
 
     if (safe_str_eq(conf_op_name, lrm_op_task) == FALSE
         || conf_op_interval != lrm_op_interval) {
         return FALSE;
     }
 
     lrm_op_id = ID(lrm_op_xml);
     last_failure_key = generate_op_key(rsc_id, "last_failure", 0);
 
     if (safe_str_eq(last_failure_key, lrm_op_id)) {
         matched = TRUE;
 
     } else {
         char *expected_op_key = generate_op_key(rsc_id, conf_op_name, conf_op_interval);
 
         if (safe_str_eq(expected_op_key, lrm_op_id)) {
             int rc = 0;
             int target_rc = get_target_rc(lrm_op_xml);
 
             crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc);
             if (rc != target_rc) {
                 matched = TRUE;
             }
         }
         free(expected_op_key);
     }
 
     free(last_failure_key);
     return matched;
 }
 
 static gboolean
 block_failure(node_t * node, resource_t * rsc, xmlNode * xml_op, pe_working_set_t * data_set)
 {
     char *xml_name = clone_strip(rsc->id);
     char *xpath = crm_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", xml_name);
     xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath);
     gboolean should_block = FALSE;
 
     free(xpath);
 
     if (xpathObj) {
         int max = numXpathResults(xpathObj);
         int lpc = 0;
 
         for (lpc = 0; lpc < max; lpc++) {
             xmlNode *pref = getXpathResult(xpathObj, lpc);
 
             if (xml_op) {
                 should_block = is_matched_failure(xml_name, pref, xml_op);
                 if (should_block) {
                     break;
                 }
 
             } else {
                 const char *conf_op_name = NULL;
                 int conf_op_interval = 0;
                 char *lrm_op_xpath = NULL;
                 xmlXPathObject *lrm_op_xpathObj = NULL;
 
                 conf_op_name = crm_element_value(pref, "name");
                 conf_op_interval = crm_get_msec(crm_element_value(pref, "interval"));
 
                 lrm_op_xpath = crm_strdup_printf("//node_state[@uname='%s']"
                                                "//lrm_resource[@id='%s']"
                                                "/lrm_rsc_op[@operation='%s'][@interval='%d']",
                                                node->details->uname, xml_name,
                                                conf_op_name, conf_op_interval);
                 lrm_op_xpathObj = xpath_search(data_set->input, lrm_op_xpath);
 
                 free(lrm_op_xpath);
 
                 if (lrm_op_xpathObj) {
                     int max2 = numXpathResults(lrm_op_xpathObj);
                     int lpc2 = 0;
 
                     for (lpc2 = 0; lpc2 < max2; lpc2++) {
                         xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj, lpc2);
 
                         should_block = is_matched_failure(xml_name, pref, lrm_op_xml);
                         if (should_block) {
                             break;
                         }
                     }
                 }
                 freeXpathObject(lrm_op_xpathObj);
 
                 if (should_block) {
                     break;
                 }
             }
         }
     }
 
     free(xml_name);
     freeXpathObject(xpathObj);
 
     return should_block;
 }
 
 int
 get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure,
                    bool effective, xmlNode * xml_op, pe_working_set_t * data_set)
 {
     char *key = NULL;
     const char *value = NULL;
     struct fail_search search = { rsc, data_set, 0, 0, NULL };
 
     /* Optimize the "normal" case */
     key = crm_concat("fail-count", rsc->clone_name ? rsc->clone_name : rsc->id, '-');
     value = g_hash_table_lookup(node->details->attrs, key);
     search.count = char2score(value);
     crm_trace("%s = %s", key, value);
     free(key);
 
     if (value) {
         key = crm_concat("last-failure", rsc->clone_name ? rsc->clone_name : rsc->id, '-');
         value = g_hash_table_lookup(node->details->attrs, key);
         search.last = crm_int_helper(value, NULL);
         free(key);
 
         /* This block is still relevant once we omit anonymous instance numbers
          * because stopped clones won't have clone_name set
          */
     } else if (is_not_set(rsc->flags, pe_rsc_unique)) {
         search.rsc = uber_parent(rsc);
         search.key = clone_strip(rsc->id);
 
         g_hash_table_foreach(node->details->attrs, get_failcount_by_prefix, &search);
         free(search.key);
         search.key = NULL;
     }
 
     if (search.count != 0 && search.last != 0 && last_failure) {
         *last_failure = search.last;
     }
 
     if(search.count && rsc->failure_timeout) {
         /* Never time-out if blocking failures are configured */
         if (block_failure(node, rsc, xml_op, data_set)) {
             pe_warn("Setting %s.failure-timeout=%d conflicts with on-fail=block: ignoring timeout", rsc->id, rsc->failure_timeout);
             rsc->failure_timeout = 0;
 #if 0
             /* A good idea? */
         } else if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
             /* In this case, stop.on-fail defaults to block in unpack_operation() */
             rsc->failure_timeout = 0;
 #endif
         }
     }
 
     if (effective && search.count != 0 && search.last != 0 && rsc->failure_timeout) {
         if (search.last > 0) {
             time_t now = get_effective_time(data_set);
 
             if (now > (search.last + rsc->failure_timeout)) {
                 crm_debug("Failcount for %s on %s has expired (limit was %ds)",
                           search.rsc->id, node->details->uname, rsc->failure_timeout);
                 search.count = 0;
             }
         }
     }
 
     if (search.count != 0) {
         char *score = score2char(search.count);
 
         crm_info("%s has failed %s times on %s", search.rsc->id, score, node->details->uname);
         free(score);
     }
 
     return search.count;
 }
 
 /* If it's a resource container, get its failcount plus all the failcounts of the resources within it */
 int
 get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set)
 {
     int failcount_all = 0;
 
     failcount_all = get_failcount(node, rsc, last_failure, data_set);
 
     if (rsc->fillers) {
         GListPtr gIter = NULL;
 
         for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) {
             resource_t *filler = (resource_t *) gIter->data;
             time_t filler_last_failure = 0;
 
             failcount_all += get_failcount(node, filler, &filler_last_failure, data_set);
 
             if (last_failure && filler_last_failure > *last_failure) {
                 *last_failure = filler_last_failure;
             }
         }
 
         if (failcount_all != 0) {
             char *score = score2char(failcount_all);
 
             crm_info("Container %s and the resources within it have failed %s times on %s",
                      rsc->id, score, node->details->uname);
             free(score);
         }
     }
 
     return failcount_all;
 }
 
 gboolean
 get_target_role(resource_t * rsc, enum rsc_role_e * role)
 {
     enum rsc_role_e local_role = RSC_ROLE_UNKNOWN;
     const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
 
     CRM_CHECK(role != NULL, return FALSE);
 
     if (value == NULL || safe_str_eq("started", value)
         || safe_str_eq("default", value)) {
         return FALSE;
     }
 
     local_role = text2role(value);
     if (local_role == RSC_ROLE_UNKNOWN) {
         crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value);
         return FALSE;
 
     } else if (local_role > RSC_ROLE_STARTED) {
         if (uber_parent(rsc)->variant == pe_master) {
             if (local_role > RSC_ROLE_SLAVE) {
                 /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */
                 return FALSE;
             }
 
         } else {
             crm_config_err("%s is not part of a master/slave resource, a %s of '%s' makes no sense",
                            rsc->id, XML_RSC_ATTR_TARGET_ROLE, value);
             return FALSE;
         }
     }
 
     *role = local_role;
     return TRUE;
 }
 
 gboolean
 order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order)
 {
     GListPtr gIter = NULL;
     action_wrapper_t *wrapper = NULL;
     GListPtr list = NULL;
 
     if (order == pe_order_none) {
         return FALSE;
     }
 
     if (lh_action == NULL || rh_action == NULL) {
         return FALSE;
     }
 
     crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid);
 
     /* Ensure we never create a dependency on ourselves... its happened */
     CRM_ASSERT(lh_action != rh_action);
 
     /* Filter dups, otherwise update_action_states() has too much work to do */
     gIter = lh_action->actions_after;
     for (; gIter != NULL; gIter = gIter->next) {
         action_wrapper_t *after = (action_wrapper_t *) gIter->data;
 
         if (after->action == rh_action && (after->type & order)) {
             return FALSE;
         }
     }
 
     wrapper = calloc(1, sizeof(action_wrapper_t));
     wrapper->action = rh_action;
     wrapper->type = order;
 
     list = lh_action->actions_after;
     list = g_list_prepend(list, wrapper);
     lh_action->actions_after = list;
 
     wrapper = NULL;
 
 /* 	order |= pe_order_implies_then; */
 /* 	order ^= pe_order_implies_then; */
 
     wrapper = calloc(1, sizeof(action_wrapper_t));
     wrapper->action = lh_action;
     wrapper->type = order;
     list = rh_action->actions_before;
     list = g_list_prepend(list, wrapper);
     rh_action->actions_before = list;
     return TRUE;
 }
 
 action_t *
 get_pseudo_op(const char *name, pe_working_set_t * data_set)
 {
     action_t *op = NULL;
 
     if(data_set->singletons) {
         op = g_hash_table_lookup(data_set->singletons, name);
     }
     if (op == NULL) {
         op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set);
         set_bit(op->flags, pe_action_pseudo);
         set_bit(op->flags, pe_action_runnable);
     }
 
     return op;
 }
 
 void
 destroy_ticket(gpointer data)
 {
     ticket_t *ticket = data;
 
     if (ticket->state) {
         g_hash_table_destroy(ticket->state);
     }
     free(ticket->id);
     free(ticket);
 }
 
 ticket_t *
 ticket_new(const char *ticket_id, pe_working_set_t * data_set)
 {
     ticket_t *ticket = NULL;
 
     if (ticket_id == NULL || strlen(ticket_id) == 0) {
         return NULL;
     }
 
     if (data_set->tickets == NULL) {
         data_set->tickets =
             g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket);
     }
 
     ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
     if (ticket == NULL) {
 
         ticket = calloc(1, sizeof(ticket_t));
         if (ticket == NULL) {
             crm_err("Cannot allocate ticket '%s'", ticket_id);
             return NULL;
         }
 
         crm_trace("Creaing ticket entry for %s", ticket_id);
 
         ticket->id = strdup(ticket_id);
         ticket->granted = FALSE;
         ticket->last_granted = -1;
         ticket->standby = FALSE;
         ticket->state = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                               g_hash_destroy_str, g_hash_destroy_str);
 
         g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket);
     }
 
     return ticket;
 }
 
 static void
 filter_parameters(xmlNode * param_set, const char *param_string, bool need_present)
 {
     int len = 0;
     char *name = NULL;
     char *match = NULL;
 
     if (param_set == NULL) {
         return;
     }
 
     if (param_set) {
         xmlAttrPtr xIter = param_set->properties;
 
         while (xIter) {
             const char *prop_name = (const char *)xIter->name;
 
             xIter = xIter->next;
             name = NULL;
             len = strlen(prop_name) + 3;
 
             name = malloc(len);
             if(name) {
                 sprintf(name, " %s ", prop_name);
                 name[len - 1] = 0;
                 match = strstr(param_string, name);
             }
 
             if (need_present && match == NULL) {
                 crm_trace("%s not found in %s", prop_name, param_string);
                 xml_remove_prop(param_set, prop_name);
 
             } else if (need_present == FALSE && match) {
                 crm_trace("%s found in %s", prop_name, param_string);
                 xml_remove_prop(param_set, prop_name);
             }
             free(name);
         }
     }
 }
 
 op_digest_cache_t *
 rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
                       pe_working_set_t * data_set)
 {
     op_digest_cache_t *data = NULL;
 
     GHashTable *local_rsc_params = NULL;
 
     action_t *action = NULL;
     char *key = NULL;
 
     int interval = 0;
     const char *op_id = ID(xml_op);
     const char *interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
     const char *digest_all;
     const char *digest_restart;
     const char *secure_list;
     const char *restart_list;
     const char *op_version;
 
     data = g_hash_table_lookup(node->details->digest_cache, op_id);
     if (data) {
         return data;
     }
 
     data = calloc(1, sizeof(op_digest_cache_t));
 
     digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
     digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
 
     secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE);
     restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART);
 
     op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
 
     /* key is freed in custom_action */
     interval = crm_parse_int(interval_s, "0");
     key = generate_op_key(rsc->id, task, interval);
     action = custom_action(rsc, key, task, node, TRUE, FALSE, data_set);
     key = NULL;
 
     local_rsc_params = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                              g_hash_destroy_str, g_hash_destroy_str);
     get_rsc_attributes(local_rsc_params, rsc, node, data_set);
     data->params_all = create_xml_node(NULL, XML_TAG_PARAMS);
     g_hash_table_foreach(local_rsc_params, hash2field, data->params_all);
     g_hash_table_foreach(action->extra, hash2field, data->params_all);
     g_hash_table_foreach(rsc->parameters, hash2field, data->params_all);
     g_hash_table_foreach(action->meta, hash2metafield, data->params_all);
     filter_action_parameters(data->params_all, op_version);
 
     data->digest_all_calc = calculate_operation_digest(data->params_all, op_version);
 
     if (secure_list && is_set(data_set->flags, pe_flag_sanitized)) {
         data->params_secure = copy_xml(data->params_all);
 
         if (secure_list) {
             filter_parameters(data->params_secure, secure_list, FALSE);
         }
         data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version);
     }
 
     if (digest_restart) {
         data->params_restart = copy_xml(data->params_all);
 
         if (restart_list) {
             filter_parameters(data->params_restart, restart_list, TRUE);
         }
         data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version);
     }
 
     data->rc = RSC_DIGEST_MATCH;
     if (digest_restart && strcmp(data->digest_restart_calc, digest_restart) != 0) {
         data->rc = RSC_DIGEST_RESTART;
 
     } else if (digest_all == NULL) {
         /* it is unknown what the previous op digest was */
         data->rc = RSC_DIGEST_UNKNOWN;
 
     } else if (strcmp(digest_all, data->digest_all_calc) != 0) {
         data->rc = RSC_DIGEST_ALL;
     }
 
     g_hash_table_insert(node->details->digest_cache, strdup(op_id), data);
     g_hash_table_destroy(local_rsc_params);
     pe_free_action(action);
 
     return data;
 }
 
 const char *rsc_printable_id(resource_t *rsc)
 {
     if (is_not_set(rsc->flags, pe_rsc_unique)) {
         return ID(rsc->xml);
     }
     return rsc->id;
 }
 
 gboolean
 is_rsc_baremetal_remote_node(resource_t *rsc, pe_working_set_t * data_set)
 {
     node_t *node;
 
     if (rsc == NULL) {
         return FALSE;
     } else if (rsc->is_remote_node == FALSE) {
         return FALSE;
     }
 
     node = pe_find_node(data_set->nodes, rsc->id);
     if (node == NULL) {
         return FALSE;
     }
 
     return is_baremetal_remote_node(node);
 }
 
 gboolean
 is_baremetal_remote_node(node_t *node)
 {
     if (is_remote_node(node) && (node->details->remote_rsc == FALSE || node->details->remote_rsc->container == FALSE)) {
         return TRUE;
     }
     return FALSE;
 }
 
 gboolean
 is_container_remote_node(node_t *node)
 {
     if (is_remote_node(node) && (node->details->remote_rsc && node->details->remote_rsc->container)) {
         return TRUE;
     }
     return FALSE;
 }
 
 gboolean
 is_remote_node(node_t *node)
 {
     if (node && node->details->type == node_remote) {
         return TRUE;
     }
     return FALSE;
 }
 
 resource_t *
 rsc_contains_remote_node(pe_working_set_t * data_set, resource_t *rsc)
 {
     if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
         return NULL;
     }
 
     if (rsc->fillers) {
         GListPtr gIter = NULL;
         for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) {
             resource_t *filler = (resource_t *) gIter->data;
 
             if (filler->is_remote_node) {
                 return filler;
             }
         }
     }
     return NULL;
 }
 
 gboolean
 xml_contains_remote_node(xmlNode *xml)
 {
     const char *class = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
     const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
     const char *agent = crm_element_value(xml, XML_ATTR_TYPE);
 
     if (safe_str_eq(agent, "remote") && safe_str_eq(provider, "pacemaker") && safe_str_eq(class, "ocf")) {
         return TRUE;
     }
     return FALSE;
 }
 
 void
 clear_bit_recursive(resource_t * rsc, unsigned long long flag)
 {
     GListPtr gIter = rsc->children;
 
     clear_bit(rsc->flags, flag);
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         clear_bit_recursive(child_rsc, flag);
     }
 }
 
 void
 set_bit_recursive(resource_t * rsc, unsigned long long flag)
 {
     GListPtr gIter = rsc->children;
 
     set_bit(rsc->flags, flag);
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         set_bit_recursive(child_rsc, flag);
     }
 }
 
 action_t *
 pe_fence_op(node_t * node, const char *op, bool optional, pe_working_set_t * data_set)
 {
     char *key = NULL;
     action_t *stonith_op = NULL;
 
     if(op == NULL) {
         op = data_set->stonith_action;
     }
 
     key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op);
 
     if(data_set->singletons) {
         stonith_op = g_hash_table_lookup(data_set->singletons, key);
     }
 
     if(stonith_op == NULL) {
         stonith_op = custom_action(NULL, key, CRM_OP_FENCE, node, optional, TRUE, data_set);
 
         add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
         add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
         add_hash_param(stonith_op->meta, "stonith_action", op);
     } else {
         free(key);
     }
 
     if(optional == FALSE) {
         crm_trace("%s is no longer optional", stonith_op->uuid);
         pe_clear_action_bit(stonith_op, pe_action_optional);
     }
 
     return stonith_op;
 }
 
 void
 trigger_unfencing(
     resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set) 
 {
     if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
         /* No resources require it */
         return;
 
     } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) {
         /* Wasnt a stonith device */
         return;
 
     } else if(node
               && node->details->online
               && node->details->unclean == FALSE
               && node->details->shutdown == FALSE) {
         action_t *unfence = pe_fence_op(node, "on", FALSE, data_set);
 
         crm_notice("Unfencing %s: %s", node->details->uname, reason);
         if(dependency) {
             order_actions(unfence, dependency, pe_order_optional);
         }
 
     } else if(rsc) {
         GHashTableIter iter;
 
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
             if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) {
                 trigger_unfencing(rsc, node, reason, dependency, data_set);
             }
         }
     }
 }
 
 gboolean
 add_tag_ref(GHashTable * tags, const char * tag_name,  const char * obj_ref)
 {
     tag_t *tag = NULL;
     GListPtr gIter = NULL;
     gboolean is_existing = FALSE;
 
     CRM_CHECK(tags && tag_name && obj_ref, return FALSE);
 
     tag = g_hash_table_lookup(tags, tag_name);
     if (tag == NULL) {
         tag = calloc(1, sizeof(tag_t));
         if (tag == NULL) {
             return FALSE;
         }
         tag->id = strdup(tag_name);
         tag->refs = NULL;
         g_hash_table_insert(tags, strdup(tag_name), tag);
     }
 
     for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
         const char *existing_ref = (const char *) gIter->data;
 
         if (crm_str_eq(existing_ref, obj_ref, TRUE)){
             is_existing = TRUE;
             break;
         }
     }
 
     if (is_existing == FALSE) {
         tag->refs = g_list_append(tag->refs, strdup(obj_ref));
         crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref);
     }
 
     return TRUE;
 }
diff --git a/mcp/pacemaker.sysconfig b/mcp/pacemaker.sysconfig
index 4b1fdff12a..28b8588ab6 100644
--- a/mcp/pacemaker.sysconfig
+++ b/mcp/pacemaker.sysconfig
@@ -1,96 +1,96 @@
 # For non-systemd based systems, prefix export to each enabled line
 
 # Turn on special handling for CMAN clusters in the init script
 # Without this, fenced (and by inference, cman) cannot reliably be made to shut down
 # PCMK_STACK=cman
 
 #==#==# Variables that control logging
 
 # Enable debug logging globally or per-subsystem
 # Multiple subsystems may me listed separated by commas
 # eg. PCMK_debug=crmd,pengine
 # PCMK_debug=yes|no|crmd|pengine|cib|stonith-ng|attrd|pacemakerd
 
 # Send INFO (and higher) messages to the named log file
 # Additional messages may also appear here depending on any configured debug and trace settings
 # By default Pacemaker will inherit the logfile specified in corosync.conf
 # PCMK_logfile=/var/log/pacemaker.log
 
 # Specify an alternate syslog target for NOTICE (and higher) messages
 # Use 'none' to disable - not recommended
 # The default value is 'daemon'
 # PCMK_logfacility=none|daemon|user|local0|local1|local2|local3|local4|local5|local6|local7
 
 # Send all messages up-to-and-including the configured priority to syslog
 # A value of 'info' will be far too verbose for most installations and 'debug' is almost certain to send you blind
 # The default value is 'notice'
 # PCMK_logpriority=emerg|alert|crit|error|warning|notice|info|debug
 
 # Log all messages from a comma-separated list of functions
 # PCMK_trace_functions=function1,function2,function3
 
 # Log all messages from a comma-separated list of files (no path)
 # Supports wildcards eg. PCMK_trace_files=prefix*.c
 # PCMK_trace_files=file.c,other.h
 
 # Log all messages matching comma-separated list of formats
 # PCMK_trace_formats="Sent delete %d"
 
 # Log all messages from a comma-separated list of tags
 # PCMK_trace_tags=tag1,tag2
 
 # Dump the blackbox whenever the message at function and line is printed
 # eg. PCMK_trace_blackbox=te_graph_trigger:223,unpack_clone:81
 # PCMK_trace_blackbox=fn:line,fn2:line2,...
 
 # Enable blackbox logging globally or per-subsystem
 # The blackbox contains a rolling buffer of all logs (including info+debug+trace)
 # and is written after a crash, assertion failure and/or when SIGTRAP is received
 #
 # The blackbox recorder can also be enabled for Pacemaker daemons at runtime by 
 # sending SIGUSR1 (or SIGTRAP), and disabled by sending SIGUSR2
 #
 # Multiple subsystems may me listed separated by commas
 # eg. PCMK_blackbox=crmd,pengine
 # PCMK_blackbox=yes|no|crmd|pengine|cib|stonith-ng|attrd|pacemakerd
 
 #==#==# Advanced use only
 
 # Enable this for compatibility with older corosync (prior to 2.0)
 # based clusters which used the nodes uname as its uuid also
 # PCMK_uname_is_uuid=no
 
 # Specify an alternate location for RNG schemas and XSL transforms
 # Mostly only useful for developer testing
 # PCMK_schema_directory=/some/path
 
 # Enable this for rebooting this machine at the time of process (subsystem) failure
 # PCMK_fail_fast=no
 
 #==#==# Pacemaker Remote
 # Use a custom directory for finding the authkey.
 # PCMK_authkey_location=/etc/pacemaker/authkey
 #
 # Specify a custom port for Pacemaker Remote connections
 # PCMK_remote_port=3121
 
 #==#==# IPC
 
 # Force use of a particular class of IPC connection
 # PCMK_ipc_type=shared-mem|socket|posix|sysv
 
 # Specify an IPC buffer size in bytes
-# Useful when connecting to really big clusters that exceed the default 20k buffer
-# PCMK_ipc_buffer=20480
+# Useful when connecting to really big clusters that exceed the default 128k buffer
+# PCMK_ipc_buffer=131072
 
 #==#==# Profiling and memory leak testing
 
 # Variables for running child daemons under valgrind and/or checking for memory problems
 # G_SLICE=always-malloc
 # MALLOC_PERTURB_=221 # or 0
 # MALLOC_CHECK_=3     # or 0,1,2
 # PCMK_valgrind_enabled=yes
 # PCMK_valgrind_enabled=cib,crmd
 # PCMK_callgrind_enabled=yes
 # PCMK_callgrind_enabled=cib,crmd
 # VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all"
diff --git a/pacemaker.spec.in b/pacemaker.spec.in
index b372b61eb6..578795fde5 100644
--- a/pacemaker.spec.in
+++ b/pacemaker.spec.in
@@ -1,591 +1,592 @@
 %global gname haclient
 %global uname hacluster
 %global pcmk_docdir %{_docdir}/%{name}
 
 %global specversion 1
 %global commit HEAD
 %global shortcommit %(c=%{commit}; echo ${c:0:7})
 %global github_owner ClusterLabs
 
 # Turn off the auto compilation of python files not in the site-packages directory
 # Needed so that the -devel package is multilib compliant
 %global __os_install_post %(echo '%{__os_install_post}' | sed -e 's!/usr/lib[^[:space:]]*/brp-python-bytecompile[[:space:]].*$!!g')
 
 %global rawhide  %(test ! -e /etc/yum.repos.d/fedora-rawhide.repo; echo $?)
 %global cs_version %(pkg-config corosync --modversion  | awk -F . '{print $1}')
 # It has to be eventually decided whether to use Python2 or Python3
 %global py_site    %{?python_sitearch}%{!?python_sitearch:%(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")}
 
 # https://fedoraproject.org/wiki/EPEL:Packaging?rd=Packaging:EPEL#The_.25license_tag
 %{!?_licensedir:%global license %doc}
 
 # Conditionals
 # Invoke "rpmbuild --without <feature>" or "rpmbuild --with <feature>"
 # to disable or enable specific features
 
 # Legacy stonithd fencing agents
 %bcond_with stonithd
 
 # Build with/without support for profiling tools
 %bcond_with profiling
 
 # Include Build with/without support for performing coverage analysis
 %bcond_with coverage
 
 # We generate docs using Publican, Asciidoc and Inkscape, but they're not available everywhere
 %bcond_without doc
 
 # Use a different versioning scheme
 %bcond_with pre_release
 
 # Ship an Upstart job file
 %bcond_with upstart_job
 
 # Turn off cman support on platforms that normally ship with it
 %bcond_without cman
 
 %if %{with profiling}
 # This disables -debuginfo package creation and also the stripping binaries/libraries
 # Useful if you want sane profiling data
 %global debug_package %{nil}
 %endif
 
 %if %{with pre_release}
 %global pcmk_release 0.%{specversion}.%{shortcommit}.git
 %else
 %global pcmk_release %{specversion}
 %endif
 
 Name:          pacemaker
 Summary:       Scalable High-Availability cluster resource manager
 Version:       1.1.14
 Release:       %{pcmk_release}%{?dist}
 License:       GPLv2+ and LGPLv2+
 Url:           http://www.clusterlabs.org
 Group:         System Environment/Daemons
 
 Source0:       https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz
 BuildRoot:     %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 AutoReqProv:   on
 Requires:      python
 Requires:      resource-agents
 Requires:      %{name}-libs = %{version}-%{release}
 Requires:      %{name}-cluster-libs = %{version}-%{release}
 Requires:      %{name}-cli = %{version}-%{release}
 
 %if %{defined systemd_requires}
 %systemd_requires
 %endif
 
 %if 0%{?rhel} > 0
 ExclusiveArch: i386 i686 x86_64
 %endif
 
 
 # Required for core functionality (python-devel depends on python)
 BuildRequires: automake autoconf libtool pkgconfig libtool-ltdl-devel
 BuildRequires: pkgconfig(glib-2.0) libxml2-devel libxslt-devel libuuid-devel
 BuildRequires: python-devel bzip2-devel pam-devel
 
 # Required for agent_config.h which specifies the correct scratch directory
 BuildRequires: resource-agents
 
 # We need reasonably recent versions of libqb
 BuildRequires: libqb-devel > 0.11.0
 Requires:      libqb > 0.11.0
 
 # Enables optional functionality
 BuildRequires: ncurses-devel openssl-devel libselinux-devel docbook-style-xsl
 BuildRequires: bison byacc flex help2man gnutls-devel pkgconfig(dbus-1)
 
 %if %{defined _unitdir}
 BuildRequires: systemd-devel
 %endif
 
 %if %{with cman}
 
 %if 0%{?fedora} > 0
 %if 0%{?fedora} < 17
 BuildRequires: clusterlib-devel
 %endif
 %endif
 
 %if 0%{?rhel} > 0
 %if 0%{?rhel} < 7
 BuildRequires: clusterlib-devel
 %endif
 %endif
 
 %endif
 
 Requires:      corosync
 BuildRequires: corosynclib-devel
 
 %if %{with stonithd}
 BuildRequires: cluster-glue-libs-devel
 %endif
 
 %if !%{rawhide}
 # More often than not, inkscape is busted on rawhide, don't even bother
 
 %if %{with doc}
 %ifarch %{ix86} x86_64
 BuildRequires: publican inkscape asciidoc
 %endif
 %endif
 
 %endif
 
 %description
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 It supports more than 16 node clusters with significant capabilities
 for managing resources and dependencies.
 
 It will run scripts at initialization, when machines go up or down,
 when related resources fail and can be configured to periodically check
 resource health.
 
 Available rpmbuild rebuild options:
   --with(out) : cman stonithd doc coverage profiling pre_release upstart_job
 
 %package cli
 License:       GPLv2+ and LGPLv2+
 Summary:       Command line tools for controlling Pacemaker clusters
 Group:         System Environment/Daemons
 Requires:      %{name}-libs = %{version}-%{release}
 Requires:      perl-TimeDate
 
 %description cli
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-cli package contains command line tools that can be used
 to query and control the cluster from machines that may, or may not,
 be part of the cluster.
 
 %package -n %{name}-libs
 License:       GPLv2+ and LGPLv2+
 Summary:       Core Pacemaker libraries
 Group:         System Environment/Daemons
 
 %description -n %{name}-libs
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-libs package contains shared libraries needed for cluster
 nodes and those just running the CLI tools.
 
 %package -n %{name}-cluster-libs
 License:       GPLv2+ and LGPLv2+
 Summary:       Cluster Libraries used by Pacemaker
 Group:         System Environment/Daemons
 Requires:      %{name}-libs = %{version}-%{release}
 
 %description -n %{name}-cluster-libs
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-cluster-libs package contains cluster-aware shared
 libraries needed for nodes that will form part of the cluster nodes.
 
 %package remote
 License:       GPLv2+ and LGPLv2+
 Summary:       Pacemaker remote daemon for non-cluster nodes
 Group:         System Environment/Daemons
 Requires:      %{name}-libs = %{version}-%{release}
 Requires:      %{name}-cli = %{version}-%{release}
 Requires:      resource-agents
 %if %{defined systemd_requires}
 %systemd_requires
 %endif
 
 %description remote
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-remote package contains the Pacemaker Remote daemon
 which is capable of extending pacemaker functionality to remote
 nodes not running the full corosync/cluster stack.
 
 %package -n %{name}-libs-devel
 License:       GPLv2+ and LGPLv2+
 Summary:       Pacemaker development package
 Group:         Development/Libraries
 Requires:      %{name}-cts = %{version}-%{release}
 Requires:      %{name}-libs = %{version}-%{release}
 Requires:      %{name}-cluster-libs = %{version}-%{release}
 Requires:      libtool-ltdl-devel libqb-devel libuuid-devel
 Requires:      libxml2-devel libxslt-devel bzip2-devel glib2-devel
 Requires:      corosynclib-devel
 
 %description -n %{name}-libs-devel
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 The %{name}-libs-devel package contains headers and shared libraries
 for developing tools for Pacemaker.
 
 %package       cts
 License:       GPLv2+ and LGPLv2+
 Summary:       Test framework for cluster-related technologies like Pacemaker
 Group:         System Environment/Daemons
 Requires:      python
 Requires:      %{name}-libs = %{version}-%{release}
 %if %{defined systemd_requires}
 Requires:      systemd-python
 %endif
 
 %description   cts
 Test framework for cluster-related technologies like Pacemaker
 
 %package       doc
 License:       GPLv2+ and LGPLv2+
 Summary:       Documentation for Pacemaker
 Group:         Documentation
 
 %description   doc
 Documentation for Pacemaker.
 
 Pacemaker is an advanced, scalable High-Availability cluster resource
 manager for Corosync, CMAN and/or Linux-HA.
 
 %prep
 %setup -q -n %{name}-%{commit}
 
 # Force the local time
 #
 # 'git' sets the file date to the date of the last commit.
 # This can result in files having been created in the future
 # when building on machines in timezones 'behind' the one the
 # commit occurred in - which seriously confuses 'make'
 find . -exec touch \{\} \;
 
 %build
 ./autogen.sh
 
 # RHEL <= 5 does not support --docdir
 docdir=%{pcmk_docdir} %{configure}                 \
         %{?with_profiling:   --with-profiling}     \
         %{?with_coverage:    --with-coverage}      \
         %{!?with_cman:       --without-cman}       \
         --without-heartbeat			   \
         --with-initdir=%{_initrddir}               \
         --localstatedir=%{_var}                    \
         --with-version=%{version}-%{release}
 
 %if 0%{?suse_version} >= 1200
 # Fedora handles rpath removal automagically
 sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
 sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
 %endif
 
 make %{_smp_mflags} V=1 docdir=%{pcmk_docdir} all
 
 %install
 rm -rf %{buildroot}
 make DESTDIR=%{buildroot} docdir=%{pcmk_docdir} V=1 install
 
 mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig
 mkdir -p ${RPM_BUILD_ROOT}%{_var}/lib/pacemaker/cores
 install -m 644 mcp/pacemaker.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/pacemaker
 install -m 644 tools/crm_mon.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/crm_mon
 
 %if %{with upstart_job}
 mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/init
 install -m 644 mcp/pacemaker.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.conf
 install -m 644 mcp/pacemaker.combined.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.combined.conf
 install -m 644 tools/crm_mon.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/crm_mon.conf
 %endif
 
 # Scripts that should be executable
 chmod a+x %{buildroot}/%{_datadir}/pacemaker/tests/cts/CTSlab.py
 
 # These are not actually scripts
 find %{buildroot} -name '*.xml' -type f -print0 | xargs -0 chmod a-x
 find %{buildroot} -name '*.xsl' -type f -print0 | xargs -0 chmod a-x
 find %{buildroot} -name '*.rng' -type f -print0 | xargs -0 chmod a-x
 find %{buildroot} -name '*.dtd' -type f -print0 | xargs -0 chmod a-x
 
 # Don't package static libs
 find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f
 find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f
 
 # Do not package these either
 rm -f %{buildroot}/%{_libdir}/service_crm.so
 
 # Don't ship init scripts for systemd based platforms
 %if %{defined _unitdir}
 rm -f %{buildroot}/%{_initrddir}/pacemaker
 rm -f %{buildroot}/%{_initrddir}/pacemaker_remote
 %endif
 
 %if %{with coverage}
 GCOV_BASE=%{buildroot}/%{_var}/lib/pacemaker/gcov
 mkdir -p $GCOV_BASE
 find . -name '*.gcno' -type f | while read F ; do
         D=`dirname $F`
         mkdir -p ${GCOV_BASE}/$D
         cp $F ${GCOV_BASE}/$D
 done
 %endif
 
 %clean
 rm -rf %{buildroot}
 
 %if %{defined _unitdir}
 
 %post
 %systemd_post pacemaker.service
 
 %preun
 %systemd_preun pacemaker.service
 
 %postun
 %systemd_postun_with_restart pacemaker.service
 
 %post remote
 %systemd_post pacemaker_remote.service
 
 %preun remote
 %systemd_preun pacemaker_remote.service
 
 %postun remote
 %systemd_postun_with_restart pacemaker_remote.service
 
 %post cli
 %systemd_post crm_mon.service
 
 %preun cli
 %systemd_preun crm_mon.service
 
 %postun cli
 %systemd_postun_with_restart crm_mon.service
 
 %else
 
 %post
 /sbin/chkconfig --add pacemaker || :
 
 %preun
 /sbin/service pacemaker stop || :
 if [ $1 -eq 0 ]; then
     # Package removal, not upgrade
     /sbin/chkconfig --del pacemaker || :
 fi
 
 %post remote
 /sbin/chkconfig --add pacemaker_remote || :
 
 %preun remote
 /sbin/service pacemaker_remote stop &>/dev/null || :
 if [ $1 -eq 0 ]; then
     # Package removal, not upgrade
     /sbin/chkconfig --del pacemaker_remote || :
 fi
 
 %endif
 
 %pre -n %{name}-libs
 
 getent group %{gname} >/dev/null || groupadd -r %{gname} -g 189
 getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u 189 -s /sbin/nologin -c "cluster user" %{uname}
 exit 0
 
 %post -n %{name}-libs -p /sbin/ldconfig
 
 %postun -n %{name}-libs -p /sbin/ldconfig
 
 %post -n %{name}-cluster-libs -p /sbin/ldconfig
 
 %postun -n %{name}-cluster-libs -p /sbin/ldconfig
 
 %files
 ###########################################################
 %defattr(-,root,root)
 
 %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker
 %{_sbindir}/pacemakerd
 
 %if %{defined _unitdir}
 %{_unitdir}/pacemaker.service
 %else
 %{_initrddir}/pacemaker
 %endif
 
 %exclude %{_libexecdir}/pacemaker/lrmd_test
 %exclude %{_sbindir}/pacemaker_remoted
 %{_libexecdir}/pacemaker/*
 
 %{_sbindir}/crm_attribute
 %{_sbindir}/crm_master
 %{_sbindir}/crm_node
 %{_sbindir}/fence_legacy
 %{_sbindir}/fence_pcmk
 %{_sbindir}/stonith_admin
 
 %doc %{_mandir}/man7/crmd.*
 %doc %{_mandir}/man7/pengine.*
 %doc %{_mandir}/man7/stonithd.*
 %doc %{_mandir}/man7/ocf_pacemaker_controld.*
 %doc %{_mandir}/man7/ocf_pacemaker_o2cb.*
 %doc %{_mandir}/man7/ocf_pacemaker_remote.*
 %doc %{_mandir}/man8/crm_attribute.*
 %doc %{_mandir}/man8/crm_node.*
 %doc %{_mandir}/man8/crm_master.*
 %doc %{_mandir}/man8/fence_pcmk.*
 %doc %{_mandir}/man8/fence_legacy.*
 %doc %{_mandir}/man8/pacemakerd.*
 %doc %{_mandir}/man8/stonith_admin.*
 
 %license COPYING
 %doc AUTHORS
 %doc ChangeLog
 
-%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker
 %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cib
-%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cores
 %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/pengine
-%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/blackbox
 /usr/lib/ocf/resource.d/pacemaker/controld
 /usr/lib/ocf/resource.d/pacemaker/o2cb
 /usr/lib/ocf/resource.d/pacemaker/remote
 /usr/lib/ocf/resource.d/.isolation
 
 %if "%{?cs_version}" != "UNKNOWN"
 %if 0%{?cs_version} < 2
 %{_libexecdir}/lcrso/pacemaker.lcrso
 %endif
 %endif
 
 %if %{with upstart_job}
 %config(noreplace) %{_sysconfdir}/init/pacemaker.conf
 %config(noreplace) %{_sysconfdir}/init/pacemaker.combined.conf
 %endif
 
 %files cli
 %defattr(-,root,root)
 
 %config(noreplace) %{_sysconfdir}/logrotate.d/pacemaker
 %config(noreplace) %{_sysconfdir}/sysconfig/crm_mon
 
 %if %{defined _unitdir}
 %{_unitdir}/crm_mon.service
 %endif
 
 %if %{with upstart_job}
 %config(noreplace) %{_sysconfdir}/init/crm_mon.conf
 %endif
 
 %{_sbindir}/attrd_updater
 %{_sbindir}/cibadmin
 %{_sbindir}/crm_diff
 %{_sbindir}/crm_error
 %{_sbindir}/crm_failcount
 %{_sbindir}/crm_mon
 %{_sbindir}/crm_resource
 %{_sbindir}/crm_standby
 %{_sbindir}/crm_verify
 %{_sbindir}/crmadmin
 %{_sbindir}/iso8601
 %{_sbindir}/crm_shadow
 %{_sbindir}/crm_simulate
 %{_sbindir}/crm_report
 %{_sbindir}/crm_ticket
 %exclude %{_datadir}/pacemaker/tests
 %{_datadir}/pacemaker
 %{_datadir}/snmp/mibs/PCMK-MIB.txt
 
 %exclude /usr/lib/ocf/resource.d/pacemaker/controld
 %exclude /usr/lib/ocf/resource.d/pacemaker/o2cb
 %exclude /usr/lib/ocf/resource.d/pacemaker/remote
 
 %dir /usr/lib/ocf
 %dir /usr/lib/ocf/resource.d
 /usr/lib/ocf/resource.d/pacemaker
 
 %doc %{_mandir}/man7/*
 %exclude %{_mandir}/man7/crmd.*
 %exclude %{_mandir}/man7/pengine.*
 %exclude %{_mandir}/man7/stonithd.*
 %exclude %{_mandir}/man7/ocf_pacemaker_controld.*
 %exclude %{_mandir}/man7/ocf_pacemaker_o2cb.*
 %exclude %{_mandir}/man7/ocf_pacemaker_remote.*
 %doc %{_mandir}/man8/*
 %exclude %{_mandir}/man8/crm_attribute.*
 %exclude %{_mandir}/man8/crm_node.*
 %exclude %{_mandir}/man8/crm_master.*
 %exclude %{_mandir}/man8/fence_pcmk.*
 %exclude %{_mandir}/man8/fence_legacy.*
 %exclude %{_mandir}/man8/pacemakerd.*
 %exclude %{_mandir}/man8/pacemaker_remoted.*
 %exclude %{_mandir}/man8/stonith_admin.*
 
 %license COPYING
 %doc AUTHORS
 %doc ChangeLog
 
+%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker
+%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/blackbox
+%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cores
+
 %files -n %{name}-libs
 %defattr(-,root,root)
 
 %{_libdir}/libcib.so.*
 %{_libdir}/liblrmd.so.*
 %{_libdir}/libcrmservice.so.*
 %{_libdir}/libcrmcommon.so.*
 %{_libdir}/libpe_status.so.*
 %{_libdir}/libpe_rules.so.*
 %{_libdir}/libpengine.so.*
 %{_libdir}/libstonithd.so.*
 %{_libdir}/libtransitioner.so.*
 %license COPYING.LIB
 %doc AUTHORS
 
 %files -n %{name}-cluster-libs
 %defattr(-,root,root)
 %{_libdir}/libcrmcluster.so.*
 %license COPYING.LIB
 %doc AUTHORS
 
 %files remote
 %defattr(-,root,root)
 
 %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker
 %if %{defined _unitdir}
 %{_unitdir}/pacemaker_remote.service
 %else
 %{_initrddir}/pacemaker_remote
 %endif
 
 %{_sbindir}/pacemaker_remoted
 %{_mandir}/man8/pacemaker_remoted.*
 %license COPYING
 %doc AUTHORS
 
 %files doc
 %defattr(-,root,root)
 %doc %{pcmk_docdir}
 
 %files cts
 %defattr(-,root,root)
 %{py_site}/cts
 %{_datadir}/pacemaker/tests/cts
 %{_libexecdir}/pacemaker/lrmd_test
 %license COPYING
 %doc AUTHORS
 
 %files -n %{name}-libs-devel
 %defattr(-,root,root)
 %exclude %{_datadir}/pacemaker/tests/cts
 %{_datadir}/pacemaker/tests
 %{_includedir}/pacemaker
 %{_libdir}/*.so
 %if %{with coverage}
 %{_var}/lib/pacemaker/gcov
 %endif
 %{_libdir}/pkgconfig/*.pc
 %license COPYING.LIB
 %doc AUTHORS
 
 %changelog
 
diff --git a/pengine/clone.c b/pengine/clone.c
index 59ec5f22b2..7d2e07a9b7 100644
--- a/pengine/clone.c
+++ b/pengine/clone.c
@@ -1,1632 +1,1632 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <crm/msg_xml.h>
 #include <allocate.h>
 #include <utils.h>
 #include <allocate.h>
 
 #define VARIANT_CLONE 1
 #include <lib/pengine/variant.h>
 
 gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set);
 static void append_parent_colocation(resource_t * rsc, resource_t * child, gboolean all);
 
 static gint
 sort_rsc_id(gconstpointer a, gconstpointer b)
 {
     const resource_t *resource1 = (const resource_t *)a;
     const resource_t *resource2 = (const resource_t *)b;
 
     CRM_ASSERT(resource1 != NULL);
     CRM_ASSERT(resource2 != NULL);
 
     return strcmp(resource1->id, resource2->id);
 }
 
 static node_t *
 parent_node_instance(const resource_t * rsc, node_t * node)
 {
     node_t *ret = NULL;
 
     if (node != NULL) {
         ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id);
     }
     return ret;
 }
 
 static gboolean
 did_fail(const resource_t * rsc)
 {
     GListPtr gIter = rsc->children;
 
     if (is_set(rsc->flags, pe_rsc_failed)) {
         return TRUE;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         if (did_fail(child_rsc)) {
             return TRUE;
         }
     }
     return FALSE;
 }
 
 gint
 sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set)
 {
     int rc = 0;
     node_t *node1 = NULL;
     node_t *node2 = NULL;
 
     gboolean can1 = TRUE;
     gboolean can2 = TRUE;
 
     const resource_t *resource1 = (const resource_t *)a;
     const resource_t *resource2 = (const resource_t *)b;
 
     CRM_ASSERT(resource1 != NULL);
     CRM_ASSERT(resource2 != NULL);
 
     /* allocation order:
      *  - active instances
      *  - instances running on nodes with the least copies
      *  - active instances on nodes that can't support them or are to be fenced
      *  - failed instances
      *  - inactive instances
      */
 
     if (resource1->running_on && resource2->running_on) {
         if (g_list_length(resource1->running_on) < g_list_length(resource2->running_on)) {
             crm_trace("%s < %s: running_on", resource1->id, resource2->id);
             return -1;
 
         } else if (g_list_length(resource1->running_on) > g_list_length(resource2->running_on)) {
             crm_trace("%s > %s: running_on", resource1->id, resource2->id);
             return 1;
         }
     }
 
     if (resource1->running_on) {
         node1 = resource1->running_on->data;
     }
     if (resource2->running_on) {
         node2 = resource2->running_on->data;
     }
 
     if (node1) {
         node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id);
 
         if (match == NULL || match->weight < 0) {
             crm_trace("%s: current location is unavailable", resource1->id);
             node1 = NULL;
             can1 = FALSE;
         }
     }
 
     if (node2) {
         node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id);
 
         if (match == NULL || match->weight < 0) {
             crm_trace("%s: current location is unavailable", resource2->id);
             node2 = NULL;
             can2 = FALSE;
         }
     }
 
     if (can1 != can2) {
         if (can1) {
             crm_trace("%s < %s: availability of current location", resource1->id, resource2->id);
             return -1;
         }
         crm_trace("%s > %s: availability of current location", resource1->id, resource2->id);
         return 1;
     }
 
     if (resource1->priority < resource2->priority) {
         crm_trace("%s < %s: priority", resource1->id, resource2->id);
         return 1;
 
     } else if (resource1->priority > resource2->priority) {
         crm_trace("%s > %s: priority", resource1->id, resource2->id);
         return -1;
     }
 
     if (node1 == NULL && node2 == NULL) {
         crm_trace("%s == %s: not active", resource1->id, resource2->id);
         return 0;
     }
 
     if (node1 != node2) {
         if (node1 == NULL) {
             crm_trace("%s > %s: active", resource1->id, resource2->id);
             return 1;
         } else if (node2 == NULL) {
             crm_trace("%s < %s: active", resource1->id, resource2->id);
             return -1;
         }
     }
 
     can1 = can_run_resources(node1);
     can2 = can_run_resources(node2);
     if (can1 != can2) {
         if (can1) {
             crm_trace("%s < %s: can", resource1->id, resource2->id);
             return -1;
         }
         crm_trace("%s > %s: can", resource1->id, resource2->id);
         return 1;
     }
 
     node1 = parent_node_instance(resource1, node1);
     node2 = parent_node_instance(resource2, node2);
     if (node1 != NULL && node2 == NULL) {
         crm_trace("%s < %s: not allowed", resource1->id, resource2->id);
         return -1;
     } else if (node1 == NULL && node2 != NULL) {
         crm_trace("%s > %s: not allowed", resource1->id, resource2->id);
         return 1;
     }
 
     if (node1 == NULL || node2 == NULL) {
         crm_trace("%s == %s: not allowed", resource1->id, resource2->id);
         return 0;
     }
 
     if (node1->count < node2->count) {
         crm_trace("%s < %s: count", resource1->id, resource2->id);
         return -1;
 
     } else if (node1->count > node2->count) {
         crm_trace("%s > %s: count", resource1->id, resource2->id);
         return 1;
     }
 
     can1 = did_fail(resource1);
     can2 = did_fail(resource2);
     if (can1 != can2) {
         if (can1) {
             crm_trace("%s > %s: failed", resource1->id, resource2->id);
             return 1;
         }
         crm_trace("%s < %s: failed", resource1->id, resource2->id);
         return -1;
     }
 
     if (node1 && node2) {
         int lpc = 0;
         int max = 0;
         node_t *n = NULL;
         GListPtr gIter = NULL;
         GListPtr list1 = NULL;
         GListPtr list2 = NULL;
         GHashTable *hash1 =
             g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str);
         GHashTable *hash2 =
             g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str);
 
         n = node_copy(resource1->running_on->data);
         g_hash_table_insert(hash1, (gpointer) n->details->id, n);
 
         n = node_copy(resource2->running_on->data);
         g_hash_table_insert(hash2, (gpointer) n->details->id, n);
 
         for (gIter = resource1->parent->rsc_cons; gIter; gIter = gIter->next) {
             rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
             crm_trace("Applying %s to %s", constraint->id, resource1->id);
 
             hash1 = native_merge_weights(constraint->rsc_rh, resource1->id, hash1,
                                          constraint->node_attribute,
                                          (float)constraint->score / INFINITY, 0);
         }
 
         for (gIter = resource1->parent->rsc_cons_lhs; gIter; gIter = gIter->next) {
             rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
             crm_trace("Applying %s to %s", constraint->id, resource1->id);
 
             hash1 = native_merge_weights(constraint->rsc_lh, resource1->id, hash1,
                                          constraint->node_attribute,
                                          (float)constraint->score / INFINITY, pe_weights_positive);
         }
 
         for (gIter = resource2->parent->rsc_cons; gIter; gIter = gIter->next) {
             rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
             crm_trace("Applying %s to %s", constraint->id, resource2->id);
 
             hash2 = native_merge_weights(constraint->rsc_rh, resource2->id, hash2,
                                          constraint->node_attribute,
                                          (float)constraint->score / INFINITY, 0);
         }
 
         for (gIter = resource2->parent->rsc_cons_lhs; gIter; gIter = gIter->next) {
             rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
             crm_trace("Applying %s to %s", constraint->id, resource2->id);
 
             hash2 = native_merge_weights(constraint->rsc_lh, resource2->id, hash2,
                                          constraint->node_attribute,
                                          (float)constraint->score / INFINITY, pe_weights_positive);
         }
 
         /* Current location score */
         node1 = g_list_nth_data(resource1->running_on, 0);
         node1 = g_hash_table_lookup(hash1, node1->details->id);
 
         node2 = g_list_nth_data(resource2->running_on, 0);
         node2 = g_hash_table_lookup(hash2, node2->details->id);
 
         if (node1->weight < node2->weight) {
             if (node1->weight < 0) {
                 crm_trace("%s > %s: current score", resource1->id, resource2->id);
                 rc = -1;
                 goto out;
 
             } else {
                 crm_trace("%s < %s: current score", resource1->id, resource2->id);
                 rc = 1;
                 goto out;
             }
 
         } else if (node1->weight > node2->weight) {
             crm_trace("%s > %s: current score", resource1->id, resource2->id);
             rc = -1;
             goto out;
         }
 
         /* All location scores */
         list1 = g_hash_table_get_values(hash1);
         list2 = g_hash_table_get_values(hash2);
 
         list1 =
             g_list_sort_with_data(list1, sort_node_weight,
                                   g_list_nth_data(resource1->running_on, 0));
         list2 =
             g_list_sort_with_data(list2, sort_node_weight,
                                   g_list_nth_data(resource2->running_on, 0));
         max = g_list_length(list1);
         if (max < g_list_length(list2)) {
             max = g_list_length(list2);
         }
 
         for (; lpc < max; lpc++) {
             node1 = g_list_nth_data(list1, lpc);
             node2 = g_list_nth_data(list2, lpc);
             if (node1 == NULL) {
                 crm_trace("%s < %s: colocated score NULL", resource1->id, resource2->id);
                 rc = 1;
                 break;
 
             } else if (node2 == NULL) {
                 crm_trace("%s > %s: colocated score NULL", resource1->id, resource2->id);
                 rc = -1;
                 break;
             }
 
             if (node1->weight < node2->weight) {
                 crm_trace("%s < %s: colocated score", resource1->id, resource2->id);
                 rc = 1;
                 break;
 
             } else if (node1->weight > node2->weight) {
                 crm_trace("%s > %s: colocated score", resource1->id, resource2->id);
                 rc = -1;
                 break;
             }
         }
 
         /* Order by reverse uname - same as sort_node_weight() does? */
   out:
         g_hash_table_destroy(hash1);    /* Free mem */
         g_hash_table_destroy(hash2);    /* Free mem */
         g_list_free(list1);
         g_list_free(list2);
 
         if (rc != 0) {
             return rc;
         }
     }
 
     rc = strcmp(resource1->id, resource2->id);
     crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id);
     return rc;
 }
 
 static node_t *
 can_run_instance(resource_t * rsc, node_t * node)
 {
     node_t *local_node = NULL;
     clone_variant_data_t *clone_data = NULL;
 
     if (can_run_resources(node) == FALSE) {
         goto bail;
 
     } else if (is_set(rsc->flags, pe_rsc_orphan)) {
         goto bail;
     }
 
     local_node = parent_node_instance(rsc, node);
     get_clone_variant_data(clone_data, rsc->parent);
 
     if (local_node == NULL) {
         crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname);
         goto bail;
 
     } else if (local_node->weight < 0) {
         common_update_score(rsc, node->details->id, local_node->weight);
         pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.",
                      rsc->id, node->details->uname);
     } else if (local_node->count < clone_data->clone_node_max) {
         pe_rsc_trace(rsc, "%s can run on %s: %d", rsc->id, node->details->uname, local_node->count);
         return local_node;
 
     } else {
         pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)",
                      rsc->id, node->details->uname, local_node->count, clone_data->clone_node_max);
     }
 
   bail:
     if (node) {
         common_update_score(rsc, node->details->id, -INFINITY);
     }
     return NULL;
 }
 
 static node_t *
 color_instance(resource_t * rsc, node_t * prefer, gboolean all_coloc, pe_working_set_t * data_set)
 {
     node_t *chosen = NULL;
     node_t *local_node = NULL;
     GHashTable *backup = NULL;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "Processing %s %d", rsc->id, all_coloc);
 
     if (is_not_set(rsc->flags, pe_rsc_provisional)) {
         return rsc->fns->location(rsc, NULL, FALSE);
 
     } else if (is_set(rsc->flags, pe_rsc_allocating)) {
         pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
         return NULL;
     }
 
     /* Only include positive colocation preferences of dependent resources
      * if not every node will get a copy of the clone
      */
     append_parent_colocation(rsc->parent, rsc, all_coloc);
 
     if (prefer) {
         node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
 
         if (local_prefer == NULL || local_prefer->weight < 0) {
             pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id,
                          prefer->details->uname);
             return NULL;
         }
     }
 
     if (rsc->allowed_nodes) {
         GHashTableIter iter;
         node_t *try_node = NULL;
 
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&try_node)) {
             can_run_instance(rsc, try_node);
         }
     }
 
     backup = node_hash_dup(rsc->allowed_nodes);
     chosen = rsc->cmds->allocate(rsc, prefer, data_set);
     if (chosen) {
         local_node = pe_hash_table_lookup(rsc->parent->allowed_nodes, chosen->details->id);
 
         if (prefer && chosen && chosen->details != prefer->details) {
             crm_notice("Pre-allocation failed: got %s instead of %s",
                        chosen->details->uname, prefer->details->uname);
             g_hash_table_destroy(rsc->allowed_nodes);
             rsc->allowed_nodes = backup;
             native_deallocate(rsc);
             chosen = NULL;
             backup = NULL;
 
         } else if (local_node) {
             local_node->count++;
 
         } else if (is_set(rsc->flags, pe_rsc_managed)) {
             /* what to do? we can't enforce per-node limits in this case */
             crm_config_err("%s not found in %s (list=%d)",
                            chosen->details->id, rsc->parent->id,
                            g_hash_table_size(rsc->parent->allowed_nodes));
         }
     }
 
     if(backup) {
         g_hash_table_destroy(backup);
     }
     return chosen;
 }
 
 static void
 append_parent_colocation(resource_t * rsc, resource_t * child, gboolean all)
 {
 
     GListPtr gIter = NULL;
 
     gIter = rsc->rsc_cons;
     for (; gIter != NULL; gIter = gIter->next) {
         rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data;
 
         if (all || cons->score < 0 || cons->score == INFINITY) {
             child->rsc_cons = g_list_prepend(child->rsc_cons, cons);
         }
     }
 
     gIter = rsc->rsc_cons_lhs;
     for (; gIter != NULL; gIter = gIter->next) {
         rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data;
 
         if (all || cons->score < 0) {
             child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons);
         }
     }
 }
 
 node_t *
 clone_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set)
 {
     GHashTableIter iter;
     GListPtr nIter = NULL;
     GListPtr gIter = NULL;
     GListPtr nodes = NULL;
     node_t *node = NULL;
 
     int allocated = 0;
     int loop_max = 0;
     int clone_max = 0;
     int available_nodes = 0;
     clone_variant_data_t *clone_data = NULL;
 
     get_clone_variant_data(clone_data, rsc);
 
     if (is_not_set(rsc->flags, pe_rsc_provisional)) {
         return NULL;
 
     } else if (is_set(rsc->flags, pe_rsc_allocating)) {
         pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
         return NULL;
     }
 
     set_bit(rsc->flags, pe_rsc_allocating);
     pe_rsc_trace(rsc, "Processing %s", rsc->id);
 
     /* this information is used by sort_clone_instance() when deciding in which 
      * order to allocate clone instances
      */
     gIter = rsc->rsc_cons;
     for (; gIter != NULL; gIter = gIter->next) {
         rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
         pe_rsc_trace(rsc, "%s: Coloring %s first", rsc->id, constraint->rsc_rh->id);
         constraint->rsc_rh->cmds->allocate(constraint->rsc_rh, prefer, data_set);
     }
 
     gIter = rsc->rsc_cons_lhs;
     for (; gIter != NULL; gIter = gIter->next) {
         rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
         rsc->allowed_nodes =
             constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes,
                                                     constraint->node_attribute,
                                                     (float)constraint->score / INFINITY,
                                                     (pe_weights_rollback | pe_weights_positive));
     }
 
     dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes);
 
     /* count now tracks the number of clones currently allocated */
     g_hash_table_iter_init(&iter, rsc->allowed_nodes);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
         node->count = 0;
         if (can_run_resources(node)) {
             available_nodes++;
         }
     }
 
     clone_max = clone_data->clone_max;
     if(available_nodes) {
         loop_max = clone_data->clone_max / available_nodes;
     }
     if (loop_max < 1) {
         loop_max = 1;
     }
 
     rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set);
 
     /* Pre-allocate as many instances as we can to their current location
      * First pre-sort the list of nodes by their placement score
      */
     nodes = g_hash_table_get_values(rsc->allowed_nodes);
     nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL);
 
     for(nIter = nodes; nIter; nIter = nIter->next) {
         int lpc;
 
         node = nIter->data;
 
         if(clone_max <= 0) {
             break;
         }
 
         if (can_run_resources(node) == FALSE || node->weight < 0) {
             pe_rsc_trace(rsc, "Not Pre-allocatiing %s", node->details->uname);
             continue;
         }
 
         clone_max--;
         pe_rsc_trace(rsc, "Pre-allocating %s (%d remaining)", node->details->uname, clone_max);
         for (lpc = 0;
              allocated < clone_data->clone_max
              && node->count < clone_data->clone_node_max
              && lpc < clone_data->clone_node_max && lpc < loop_max; lpc++) {
             for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
                 resource_t *child = (resource_t *) gIter->data;
 
                 if (child->running_on && is_set(child->flags, pe_rsc_provisional)
                     && is_not_set(child->flags, pe_rsc_failed)) {
                     node_t *child_node = child->running_on->data;
 
                     if (child_node->details == node->details
                         && color_instance(child, node, clone_data->clone_max < available_nodes,
                                           data_set)) {
                         pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id,
                                      node->details->uname);
                         allocated++;
                         break;
                     }
                 }
             }
         }
     }
 
     pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, clone_data->clone_max);
     g_list_free(nodes);
 
     for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
         resource_t *child = (resource_t *) gIter->data;
 
         if (g_list_length(child->running_on) > 0) {
             node_t *child_node = child->running_on->data;
             node_t *local_node = parent_node_instance(child, child->running_on->data);
 
             if (local_node == NULL) {
                 crm_err("%s is running on %s which isn't allowed",
                         child->id, child_node->details->uname);
             }
         }
 
         if (is_not_set(child->flags, pe_rsc_provisional)) {
         } else if (allocated >= clone_data->clone_max) {
             pe_rsc_debug(rsc, "Child %s not allocated - limit reached", child->id);
             resource_location(child, NULL, -INFINITY, "clone_color:limit_reached", data_set);
 
         } else if (color_instance(child, NULL, clone_data->clone_max < available_nodes, data_set)) {
             allocated++;
         }
     }
 
     pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d",
                  allocated, rsc->id, clone_data->clone_max);
 
     clear_bit(rsc->flags, pe_rsc_provisional);
     clear_bit(rsc->flags, pe_rsc_allocating);
 
     pe_rsc_trace(rsc, "Done allocating %s", rsc->id);
     return NULL;
 }
 
 static void
 clone_update_pseudo_status(resource_t * rsc, gboolean * stopping, gboolean * starting,
                            gboolean * active)
 {
     GListPtr gIter = NULL;
 
     if (rsc->children) {
 
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             clone_update_pseudo_status(child, stopping, starting, active);
         }
 
         return;
     }
 
     CRM_ASSERT(active != NULL);
     CRM_ASSERT(starting != NULL);
     CRM_ASSERT(stopping != NULL);
 
     if (rsc->running_on) {
         *active = TRUE;
     }
 
     gIter = rsc->actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (*starting && *stopping) {
             return;
 
         } else if (is_set(action->flags, pe_action_optional)) {
             pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid);
             continue;
 
         } else if (is_set(action->flags, pe_action_pseudo) == FALSE
                    && is_set(action->flags, pe_action_runnable) == FALSE) {
             pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid);
             continue;
 
         } else if (safe_str_eq(RSC_STOP, action->task)) {
             pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid);
             *stopping = TRUE;
 
         } else if (safe_str_eq(RSC_START, action->task)) {
             if (is_set(action->flags, pe_action_runnable) == FALSE) {
                 pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d",
                              action->uuid, is_set(action->flags, pe_action_runnable),
                              is_set(action->flags, pe_action_pseudo));
             } else {
                 pe_rsc_trace(rsc, "Starting due to: %s", action->uuid);
                 pe_rsc_trace(rsc, "%s run=%d, pseudo=%d",
                              action->uuid, is_set(action->flags, pe_action_runnable),
                              is_set(action->flags, pe_action_pseudo));
                 *starting = TRUE;
             }
         }
     }
 }
 
 static action_t *
 find_rsc_action(resource_t * rsc, const char *key, gboolean active_only, GListPtr * list)
 {
     action_t *match = NULL;
     GListPtr possible = NULL;
     GListPtr active = NULL;
 
     possible = find_actions(rsc->actions, key, NULL);
 
     if (active_only) {
         GListPtr gIter = possible;
 
         for (; gIter != NULL; gIter = gIter->next) {
             action_t *op = (action_t *) gIter->data;
 
             if (is_set(op->flags, pe_action_optional) == FALSE) {
                 active = g_list_prepend(active, op);
             }
         }
 
         if (active && g_list_length(active) == 1) {
             match = g_list_nth_data(active, 0);
         }
 
         if (list) {
             *list = active;
             active = NULL;
         }
 
     } else if (possible && g_list_length(possible) == 1) {
         match = g_list_nth_data(possible, 0);
 
     }
     if (list) {
         *list = possible;
         possible = NULL;
     }
 
     if (possible) {
         g_list_free(possible);
     }
     if (active) {
         g_list_free(active);
     }
 
     return match;
 }
 
 static void
 child_ordering_constraints(resource_t * rsc, pe_working_set_t * data_set)
 {
     char *key = NULL;
     action_t *stop = NULL;
     action_t *start = NULL;
     action_t *last_stop = NULL;
     action_t *last_start = NULL;
     GListPtr gIter = NULL;
     gboolean active_only = TRUE;        /* change to false to get the old behavior */
     clone_variant_data_t *clone_data = NULL;
 
     get_clone_variant_data(clone_data, rsc);
 
     if (clone_data->ordered == FALSE) {
         return;
     }
     /* we have to maintain a consistent sorted child list when building order constraints */
     rsc->children = g_list_sort(rsc->children, sort_rsc_id);
 
     for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
         resource_t *child = (resource_t *) gIter->data;
 
         key = stop_key(child);
         stop = find_rsc_action(child, key, active_only, NULL);
         free(key);
 
         key = start_key(child);
         start = find_rsc_action(child, key, active_only, NULL);
         free(key);
 
         if (stop) {
             if (last_stop) {
                 /* child/child relative stop */
                 order_actions(stop, last_stop, pe_order_optional);
             }
             last_stop = stop;
         }
 
         if (start) {
             if (last_start) {
                 /* child/child relative start */
                 order_actions(last_start, start, pe_order_optional);
             }
             last_start = start;
         }
     }
 }
 
 void
 clone_create_actions(resource_t * rsc, pe_working_set_t * data_set)
 {
     gboolean child_active = FALSE;
     gboolean child_starting = FALSE;
     gboolean child_stopping = FALSE;
     gboolean allow_dependent_migrations = TRUE;
 
     action_t *stop = NULL;
     action_t *stopped = NULL;
 
     action_t *start = NULL;
     action_t *started = NULL;
 
     GListPtr gIter = rsc->children;
     clone_variant_data_t *clone_data = NULL;
 
     get_clone_variant_data(clone_data, rsc);
 
     pe_rsc_trace(rsc, "Creating actions for %s", rsc->id);
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
         gboolean starting = FALSE;
         gboolean stopping = FALSE;
 
         child_rsc->cmds->create_actions(child_rsc, data_set);
         clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active);
         if (stopping && starting) {
             allow_dependent_migrations = FALSE;
         }
 
         child_stopping |= stopping;
         child_starting |= starting;
     }
 
     /* start */
     start = start_action(rsc, NULL, !child_starting);
     started = custom_action(rsc, started_key(rsc),
                             RSC_STARTED, NULL, !child_starting, TRUE, data_set);
 
     update_action_flags(start, pe_action_pseudo | pe_action_runnable);
     update_action_flags(started, pe_action_pseudo);
     started->priority = INFINITY;
 
     if (child_active || child_starting) {
         update_action_flags(started, pe_action_runnable);
     }
 
     child_ordering_constraints(rsc, data_set);
     if (clone_data->start_notify == NULL) {
         clone_data->start_notify =
             create_notification_boundaries(rsc, RSC_START, start, started, data_set);
     }
 
     /* stop */
     stop = stop_action(rsc, NULL, !child_stopping);
     stopped = custom_action(rsc, stopped_key(rsc),
                             RSC_STOPPED, NULL, !child_stopping, TRUE, data_set);
 
     stopped->priority = INFINITY;
     update_action_flags(stop, pe_action_pseudo | pe_action_runnable);
     if (allow_dependent_migrations) {
         update_action_flags(stop, pe_action_migrate_runnable);
     }
     update_action_flags(stopped, pe_action_pseudo | pe_action_runnable);
     if (clone_data->stop_notify == NULL) {
         clone_data->stop_notify =
             create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set);
 
         if (clone_data->stop_notify && clone_data->start_notify) {
             order_actions(clone_data->stop_notify->post_done, clone_data->start_notify->pre,
                           pe_order_optional);
         }
     }
 }
 
 void
 clone_internal_constraints(resource_t * rsc, pe_working_set_t * data_set)
 {
     resource_t *last_rsc = NULL;
     GListPtr gIter;
     clone_variant_data_t *clone_data = NULL;
 
     get_clone_variant_data(clone_data, rsc);
 
     pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id);
     new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set);
     new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set);
     new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set);
 
     if (rsc->variant == pe_master) {
         new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set);
         new_rsc_order(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set);
     }
 
     if (clone_data->ordered) {
         /* we have to maintain a consistent sorted child list when building order constraints */
         rsc->children = g_list_sort(rsc->children, sort_rsc_id);
     }
     for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->internal_constraints(child_rsc, data_set);
 
         order_start_start(rsc, child_rsc, pe_order_runnable_left | pe_order_implies_first_printed);
         new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed,
                       data_set);
         if (clone_data->ordered && last_rsc) {
             order_start_start(last_rsc, child_rsc, pe_order_optional);
         }
 
         order_stop_stop(rsc, child_rsc, pe_order_implies_first_printed);
         new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed,
                       data_set);
         if (clone_data->ordered && last_rsc) {
             order_stop_stop(child_rsc, last_rsc, pe_order_optional);
         }
 
         last_rsc = child_rsc;
     }
 }
 
 static void
 assign_node(resource_t * rsc, node_t * node, gboolean force)
 {
     if (rsc->children) {
 
         GListPtr gIter = rsc->children;
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             native_assign_node(child_rsc, NULL, node, force);
         }
 
         return;
     }
     native_assign_node(rsc, NULL, node, force);
 }
 
 static resource_t *
 find_compatible_child_by_node(resource_t * local_child, node_t * local_node, resource_t * rsc,
                               enum rsc_role_e filter, gboolean current)
 {
     node_t *node = NULL;
     GListPtr gIter = NULL;
 
     if (local_node == NULL) {
         crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id);
         return NULL;
     }
 
     crm_trace("Looking for compatible child from %s for %s on %s",
               local_child->id, rsc->id, local_node->details->uname);
 
     gIter = rsc->children;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
         enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current);
 
         if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
             /* We only want instances that haven't failed */
             node = child_rsc->fns->location(child_rsc, NULL, current);
         }
 
         if (filter != RSC_ROLE_UNKNOWN && next_role != filter) {
             crm_trace("Filtered %s", child_rsc->id);
             continue;
         }
 
         if (node && local_node && node->details == local_node->details) {
             crm_trace("Pairing %s with %s on %s",
                       local_child->id, child_rsc->id, node->details->uname);
             return child_rsc;
 
         } else if (node) {
             crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname,
                       local_node->details->uname);
 
         } else {
             crm_trace("%s - not allocated %d", child_rsc->id, current);
         }
     }
 
     crm_trace("Can't pair %s with %s", local_child->id, rsc->id);
     return NULL;
 }
 
 resource_t *
 find_compatible_child(resource_t * local_child, resource_t * rsc, enum rsc_role_e filter,
                       gboolean current)
 {
     resource_t *pair = NULL;
     GListPtr gIter = NULL;
     GListPtr scratch = NULL;
     node_t *local_node = NULL;
 
     local_node = local_child->fns->location(local_child, NULL, current);
     if (local_node) {
         return find_compatible_child_by_node(local_child, local_node, rsc, filter, current);
     }
 
     scratch = g_hash_table_get_values(local_child->allowed_nodes);
     scratch = g_list_sort_with_data(scratch, sort_node_weight, NULL);
 
     gIter = scratch;
     for (; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         pair = find_compatible_child_by_node(local_child, node, rsc, filter, current);
         if (pair) {
             goto done;
         }
     }
 
     pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id);
   done:
     g_list_free(scratch);
     return pair;
 }
 
 void
 clone_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
 {
     /* -- Never called --
      *
      * Instead we add the colocation constraints to the child and call from there
      */
 
     GListPtr gIter = rsc_lh->children;
 
     CRM_CHECK(FALSE, crm_err("This functionality is not thought to be used. Please report a bug."));
     CRM_CHECK(rsc_lh, return);
     CRM_CHECK(rsc_rh, return);
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->rsc_colocation_lh(child_rsc, rsc_rh, constraint);
     }
 
     return;
 }
 
 void
 clone_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
 {
     GListPtr gIter = NULL;
     gboolean do_interleave = FALSE;
     clone_variant_data_t *clone_data = NULL;
     clone_variant_data_t *clone_data_lh = NULL;
 
     CRM_CHECK(constraint != NULL, return);
     CRM_CHECK(rsc_lh != NULL, pe_err("rsc_lh was NULL for %s", constraint->id); return);
     CRM_CHECK(rsc_rh != NULL, pe_err("rsc_rh was NULL for %s", constraint->id); return);
     CRM_CHECK(rsc_lh->variant == pe_native, return);
 
     get_clone_variant_data(clone_data, constraint->rsc_rh);
     pe_rsc_trace(rsc_rh, "Processing constraint %s: %s -> %s %d",
                  constraint->id, rsc_lh->id, rsc_rh->id, constraint->score);
 
     if (constraint->rsc_lh->variant >= pe_clone) {
 
         get_clone_variant_data(clone_data_lh, constraint->rsc_lh);
         if (clone_data_lh->interleave
             && clone_data->clone_node_max != clone_data_lh->clone_node_max) {
             crm_config_err("Cannot interleave " XML_CIB_TAG_INCARNATION " %s and %s because"
                            " they do not support the same number of" " resources per node",
                            constraint->rsc_lh->id, constraint->rsc_rh->id);
 
             /* only the LHS side needs to be labeled as interleave */
         } else if (clone_data_lh->interleave) {
             do_interleave = TRUE;
         }
     }
 
     if (is_set(rsc_rh->flags, pe_rsc_provisional)) {
         pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id);
         return;
 
     } else if (do_interleave) {
         resource_t *rh_child = NULL;
 
         rh_child = find_compatible_child(rsc_lh, rsc_rh, RSC_ROLE_UNKNOWN, FALSE);
 
         if (rh_child) {
             pe_rsc_debug(rsc_rh, "Pairing %s with %s", rsc_lh->id, rh_child->id);
             rsc_lh->cmds->rsc_colocation_lh(rsc_lh, rh_child, constraint);
 
         } else if (constraint->score >= INFINITY) {
             crm_notice("Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id);
             assign_node(rsc_lh, NULL, TRUE);
 
         } else {
             pe_rsc_debug(rsc_rh, "Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id);
         }
 
         return;
 
     } else if (constraint->score >= INFINITY) {
         GListPtr rhs = NULL;
 
         gIter = rsc_rh->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
             node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
 
             if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
                 rhs = g_list_prepend(rhs, chosen);
             }
         }
 
         node_list_exclude(rsc_lh->allowed_nodes, rhs, FALSE);
         g_list_free(rhs);
         return;
     }
 
     gIter = rsc_rh->children;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint);
     }
 }
 
 static enum action_tasks
 clone_child_action(action_t * action)
 {
     enum action_tasks result = no_action;
     resource_t *child = (resource_t *) action->rsc->children->data;
 
     if (safe_str_eq(action->task, "notify")
         || safe_str_eq(action->task, "notified")) {
 
         /* Find the action we're notifying about instead */
 
         int stop = 0;
         char *key = action->uuid;
         int lpc = strlen(key);
 
         for (; lpc > 0; lpc--) {
             if (key[lpc] == '_' && stop == 0) {
                 stop = lpc;
 
             } else if (key[lpc] == '_') {
                 char *task_mutable = NULL;
 
                 lpc++;
                 task_mutable = strdup(key + lpc);
                 task_mutable[stop - lpc] = 0;
 
                 crm_trace("Extracted action '%s' from '%s'", task_mutable, key);
                 result = get_complex_task(child, task_mutable, TRUE);
                 free(task_mutable);
                 break;
             }
         }
 
     } else {
         result = get_complex_task(child, action->task, TRUE);
     }
     return result;
 }
 
 enum pe_action_flags
 clone_action_flags(action_t * action, node_t * node)
 {
     GListPtr gIter = NULL;
     gboolean any_runnable = FALSE;
     gboolean check_runnable = TRUE;
     enum action_tasks task = clone_child_action(action);
     enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo);
     const char *task_s = task2text(task);
 
     gIter = action->rsc->children;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *child_action = NULL;
         resource_t *child = (resource_t *) gIter->data;
 
         child_action =
             find_first_action(child->actions, NULL, task_s, child->children ? NULL : node);
         pe_rsc_trace(child, "Checking for %s in %s on %s", task_s, child->id,
                      node ? node->details->uname : "none");
         if (child_action) {
             enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
 
             if (is_set(flags, pe_action_optional)
                 && is_set(child_flags, pe_action_optional) == FALSE) {
-                pe_rsc_trace(child, "%s is manditory because of %s", action->uuid,
+                pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid,
                              child_action->uuid);
                 flags = crm_clear_bit(__FUNCTION__, action->rsc->id, flags, pe_action_optional);
                 pe_clear_action_bit(action, pe_action_optional);
             }
             if (is_set(child_flags, pe_action_runnable)) {
                 any_runnable = TRUE;
             }
 
         } else {
 
             GListPtr gIter2 = child->actions;
 
             for (; gIter2 != NULL; gIter2 = gIter2->next) {
                 action_t *op = (action_t *) gIter2->data;
 
                 pe_rsc_trace(child, "%s on %s (%s)", op->uuid,
                              op->node ? op->node->details->uname : "none", op->task);
             }
         }
     }
 
     if (check_runnable && any_runnable == FALSE) {
         pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid);
         flags = crm_clear_bit(__FUNCTION__, action->rsc->id, flags, pe_action_runnable);
         if (node == NULL) {
             pe_clear_action_bit(action, pe_action_runnable);
         }
     }
 
     return flags;
 }
 
 static enum pe_graph_flags
 clone_update_actions_interleave(action_t * first, action_t * then, node_t * node,
                                 enum pe_action_flags flags, enum pe_action_flags filter,
                                 enum pe_ordering type)
 {
     gboolean current = FALSE;
     resource_t *first_child = NULL;
     GListPtr gIter = then->rsc->children;
     enum pe_graph_flags changed = pe_graph_none;        /*pe_graph_disable */
 
     enum action_tasks task = clone_child_action(first);
     const char *first_task = task2text(task);
 
     /* Fix this - lazy */
     if (strstr(first->uuid, "_stopped_0") || strstr(first->uuid, "_demoted_0")) {
         current = TRUE;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *then_child = (resource_t *) gIter->data;
 
         CRM_ASSERT(then_child != NULL);
         first_child = find_compatible_child(then_child, first->rsc, RSC_ROLE_UNKNOWN, current);
         if (first_child == NULL && current) {
             crm_trace("Ignore");
 
         } else if (first_child == NULL) {
             crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid,
                       then->uuid);
 
             /* Me no like this hack - but what else can we do?
              *
              * If there is no-one active or about to be active
              *   on the same node as then_child, then they must
              *   not be allowed to start
              */
             if (type & (pe_order_runnable_left | pe_order_implies_then) /* Mandatory */ ) {
                 pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id);
                 assign_node(then_child, NULL, TRUE);
                 /* TODO - set changed correctly? */
             }
 
         } else {
             action_t *first_action = NULL;
             action_t *then_action = NULL;
 
             pe_rsc_debug(then->rsc, "Pairing %s with %s", first_child->id, then_child->id);
 
             first_action = find_first_action(first_child->actions, NULL, first_task, node);
             then_action = find_first_action(then_child->actions, NULL, then->task, node);
 
             CRM_CHECK(first_action != NULL || is_set(first_child->flags, pe_rsc_orphan),
                       crm_err("No action found for %s in %s (first)", first_task, first_child->id));
 
             /* We're only interested if 'then' is neither stopping nor being demoted */ 
             if (then_action == NULL && is_not_set(then_child->flags, pe_rsc_orphan)
                 && crm_str_eq(then->task, RSC_STOP, TRUE) == FALSE 
                 && crm_str_eq(then->task, RSC_DEMOTE, TRUE) == FALSE) {
                 crm_err("Internal error: No action found for %s in %s (then)", then->task,
                         then_child->id);
             }
 
             if (first_action == NULL || then_action == NULL) {
                 continue;
             }
             if (order_actions(first_action, then_action, type)) {
                 crm_debug("Created constraint for %s -> %s", first_action->uuid, then_action->uuid);
                 changed |= (pe_graph_updated_first | pe_graph_updated_then);
             }
             changed |=
                 then_child->cmds->update_actions(first_action, then_action, node,
                                                  first_child->cmds->action_flags(first_action,
                                                                                  node), filter,
                                                  type);
         }
     }
     return changed;
 }
 
 enum pe_graph_flags
 clone_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags,
                      enum pe_action_flags filter, enum pe_ordering type)
 {
     const char *rsc = "none";
     gboolean interleave = FALSE;
     enum pe_graph_flags changed = pe_graph_none;
 
     if (first->rsc != then->rsc
         && first->rsc && first->rsc->variant >= pe_clone
         && then->rsc && then->rsc->variant >= pe_clone) {
         clone_variant_data_t *clone_data = NULL;
 
         if (strstr(then->uuid, "_stop_0") || strstr(then->uuid, "_demote_0")) {
             get_clone_variant_data(clone_data, first->rsc);
             rsc = first->rsc->id;
         } else {
             get_clone_variant_data(clone_data, then->rsc);
             rsc = then->rsc->id;
         }
         interleave = clone_data->interleave;
     }
 
     crm_trace("Interleave %s -> %s: %s (based on %s)",
               first->uuid, then->uuid, interleave ? "yes" : "no", rsc);
 
     if (interleave) {
         changed = clone_update_actions_interleave(first, then, node, flags, filter, type);
 
     } else if (then->rsc) {
         GListPtr gIter = then->rsc->children;
 
         changed |= native_update_actions(first, then, node, flags, filter, type);
 
         for (; gIter != NULL; gIter = gIter->next) {
             enum pe_graph_flags child_changed = pe_graph_none;
             GListPtr lpc = NULL;
             resource_t *child = (resource_t *) gIter->data;
             action_t *child_action = find_first_action(child->actions, NULL, then->task, node);
 
             if (child_action) {
                 enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
 
                 if (is_set(child_flags, pe_action_runnable)) {
                                      
                     child_changed |=
                         child->cmds->update_actions(first, child_action, node, flags, filter, type);
                 }
                 changed |= child_changed;
                 if (child_changed & pe_graph_updated_then) {
                    for (lpc = child_action->actions_after; lpc != NULL; lpc = lpc->next) {
                         action_wrapper_t *other = (action_wrapper_t *) lpc->data;
                         update_action(other->action);
                     }
                 }
             }
         }
     }
 
     return changed;
 }
 
 void
 clone_rsc_location(resource_t * rsc, rsc_to_node_t * constraint)
 {
     GListPtr gIter = rsc->children;
 
     pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id);
 
     native_rsc_location(rsc, constraint);
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->rsc_location(child_rsc, constraint);
     }
 }
 
 void
 clone_expand(resource_t * rsc, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     clone_variant_data_t *clone_data = NULL;
 
     get_clone_variant_data(clone_data, rsc);
 
     gIter = rsc->actions;
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *op = (action_t *) gIter->data;
 
         rsc->cmds->action_flags(op, NULL);
     }
 
     if (clone_data->start_notify) {
         collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify);
         expand_notification_data(clone_data->start_notify, data_set);
         create_notifications(rsc, clone_data->start_notify, data_set);
     }
 
     if (clone_data->stop_notify) {
         collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify);
         expand_notification_data(clone_data->stop_notify, data_set);
         create_notifications(rsc, clone_data->stop_notify, data_set);
     }
 
     if (clone_data->promote_notify) {
         collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify);
         expand_notification_data(clone_data->promote_notify, data_set);
         create_notifications(rsc, clone_data->promote_notify, data_set);
     }
 
     if (clone_data->demote_notify) {
         collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify);
         expand_notification_data(clone_data->demote_notify, data_set);
         create_notifications(rsc, clone_data->demote_notify, data_set);
     }
 
     /* Now that the notifcations have been created we can expand the children */
 
     gIter = rsc->children;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->expand(child_rsc, data_set);
     }
 
     native_expand(rsc, data_set);
 
     /* The notifications are in the graph now, we can destroy the notify_data */
     free_notification_data(clone_data->demote_notify);
     clone_data->demote_notify = NULL;
     free_notification_data(clone_data->stop_notify);
     clone_data->stop_notify = NULL;
     free_notification_data(clone_data->start_notify);
     clone_data->start_notify = NULL;
     free_notification_data(clone_data->promote_notify);
     clone_data->promote_notify = NULL;
 }
 
 node_t *
 rsc_known_on(resource_t * rsc, GListPtr * list)
 {
     GListPtr gIter = NULL;
     node_t *one = NULL;
     GListPtr result = NULL;
 
     if (rsc->children) {
 
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child = (resource_t *) gIter->data;
 
             rsc_known_on(child, &result);
         }
 
     } else if (rsc->known_on) {
         result = g_hash_table_get_values(rsc->known_on);
     }
 
     if (result && g_list_length(result) == 1) {
         one = g_list_nth_data(result, 0);
     }
 
     if (list) {
         GListPtr gIter = NULL;
 
         gIter = result;
         for (; gIter != NULL; gIter = gIter->next) {
             node_t *node = (node_t *) gIter->data;
 
             if (*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) {
                 *list = g_list_prepend(*list, node);
             }
         }
     }
 
     g_list_free(result);
     return one;
 }
 
 static resource_t *
 find_instance_on(resource_t * rsc, node_t * node)
 {
     GListPtr gIter = NULL;
 
     gIter = rsc->children;
     for (; gIter != NULL; gIter = gIter->next) {
         GListPtr gIter2 = NULL;
         GListPtr known_list = NULL;
         resource_t *child = (resource_t *) gIter->data;
 
         rsc_known_on(child, &known_list);
 
         gIter2 = known_list;
         for (; gIter2 != NULL; gIter2 = gIter2->next) {
             node_t *known = (node_t *) gIter2->data;
 
             if (node->details == known->details) {
                 g_list_free(known_list);
                 return child;
             }
         }
         g_list_free(known_list);
     }
 
     return NULL;
 }
 
 gboolean
 clone_create_probe(resource_t * rsc, node_t * node, action_t * complete,
                    gboolean force, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     gboolean any_created = FALSE;
     clone_variant_data_t *clone_data = NULL;
 
     CRM_ASSERT(rsc);
     get_clone_variant_data(clone_data, rsc);
 
     rsc->children = g_list_sort(rsc->children, sort_rsc_id);
     if (rsc->children == NULL) {
         pe_warn("Clone %s has no children", rsc->id);
         return FALSE;
     }
 
     if (rsc->exclusive_discover) {
         node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
         if (allowed && allowed->rsc_discover_mode != discover_exclusive) {
             /* exclusive discover is enabled and this node is not marked
              * as a node this resource should be discovered on
              *
              * remove the node from allowed_nodes so that the
              * notification contains only nodes that we might ever run
              * on
              */
             g_hash_table_remove(rsc->allowed_nodes, node->details->id);
 
             /* Bit of a shortcut - might as well take it */
             return FALSE;
         }
     }
 
     if (is_not_set(rsc->flags, pe_rsc_unique)
         && clone_data->clone_node_max == 1) {
         /* only look for one copy */
         resource_t *child = NULL;
 
         /* Try whoever we probed last time */
         child = find_instance_on(rsc, node);
         if (child) {
             return child->cmds->create_probe(child, node, complete, force, data_set);
         }
 
         /* Try whoever we plan on starting there */
         gIter = rsc->children;
         for (; gIter != NULL; gIter = gIter->next) {
             node_t *local_node = NULL;
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             CRM_ASSERT(child_rsc);
             local_node = child_rsc->fns->location(child_rsc, NULL, FALSE);
             if (local_node == NULL) {
                 continue;
             }
 
             if (local_node->details == node->details) {
                 return child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set);
             }
         }
 
         /* Fall back to the first clone instance */
         CRM_ASSERT(rsc->children);
         child = rsc->children->data;
         return child->cmds->create_probe(child, node, complete, force, data_set);
     }
 
     gIter = rsc->children;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         if (child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set)) {
             any_created = TRUE;
         }
 
         if (any_created && is_not_set(rsc->flags, pe_rsc_unique)
             && clone_data->clone_node_max == 1) {
             /* only look for one copy (clone :0) */
             break;
         }
     }
 
     return any_created;
 }
 
 void
 clone_append_meta(resource_t * rsc, xmlNode * xml)
 {
     char *name = NULL;
     clone_variant_data_t *clone_data = NULL;
 
     get_clone_variant_data(clone_data, rsc);
 
     name = crm_meta_name(XML_RSC_ATTR_UNIQUE);
     crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_unique) ? "true" : "false");
     free(name);
 
     name = crm_meta_name(XML_RSC_ATTR_NOTIFY);
     crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_notify) ? "true" : "false");
     free(name);
 
     name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX);
     crm_xml_add_int(xml, name, clone_data->clone_max);
     free(name);
 
     name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX);
     crm_xml_add_int(xml, name, clone_data->clone_node_max);
     free(name);
 }
 
 GHashTable *
 clone_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr,
                     float factor, enum pe_weights flags)
 {
     return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags);
 }
diff --git a/pengine/group.c b/pengine/group.c
index 15c058fc1f..7c5d5b4f55 100644
--- a/pengine/group.c
+++ b/pengine/group.c
@@ -1,517 +1,517 @@
 /* 
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  * 
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <pengine.h>
 #include <crm/msg_xml.h>
 
 #include <allocate.h>
 #include <utils.h>
 
 #define VARIANT_GROUP 1
 #include <lib/pengine/variant.h>
 
 node_t *
 group_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set)
 {
     node_t *node = NULL;
     node_t *group_node = NULL;
     GListPtr gIter = NULL;
     group_variant_data_t *group_data = NULL;
 
     get_group_variant_data(group_data, rsc);
 
     if (is_not_set(rsc->flags, pe_rsc_provisional)) {
         return rsc->allocated_to;
     }
     pe_rsc_trace(rsc, "Processing %s", rsc->id);
     if (is_set(rsc->flags, pe_rsc_allocating)) {
         pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
         return NULL;
     }
 
     if (group_data->first_child == NULL) {
         /* nothign to allocate */
         clear_bit(rsc->flags, pe_rsc_provisional);
         return NULL;
     }
 
     set_bit(rsc->flags, pe_rsc_allocating);
     rsc->role = group_data->first_child->role;
 
     group_data->first_child->rsc_cons =
         g_list_concat(group_data->first_child->rsc_cons, rsc->rsc_cons);
     rsc->rsc_cons = NULL;
 
     group_data->last_child->rsc_cons_lhs =
         g_list_concat(group_data->last_child->rsc_cons_lhs, rsc->rsc_cons_lhs);
     rsc->rsc_cons_lhs = NULL;
 
     dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__,
                      rsc->allowed_nodes);
 
     gIter = rsc->children;
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         node = child_rsc->cmds->allocate(child_rsc, prefer, data_set);
         if (group_node == NULL) {
             group_node = node;
         }
     }
 
     rsc->next_role = group_data->first_child->next_role;
     clear_bit(rsc->flags, pe_rsc_allocating);
     clear_bit(rsc->flags, pe_rsc_provisional);
 
     if (group_data->colocated) {
         return group_node;
     }
     return NULL;
 }
 
 void group_update_pseudo_status(resource_t * parent, resource_t * child);
 
 void
 group_create_actions(resource_t * rsc, pe_working_set_t * data_set)
 {
     action_t *op = NULL;
     const char *value = NULL;
     GListPtr gIter = rsc->children;
 
     pe_rsc_trace(rsc, "Creating actions for %s", rsc->id);
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->create_actions(child_rsc, data_set);
         group_update_pseudo_status(rsc, child_rsc);
     }
 
     op = start_action(rsc, NULL, TRUE /* !group_data->child_starting */ );
     set_bit(op->flags, pe_action_pseudo | pe_action_runnable);
 
     op = custom_action(rsc, started_key(rsc),
                        RSC_STARTED, NULL, TRUE /* !group_data->child_starting */ , TRUE, data_set);
     set_bit(op->flags, pe_action_pseudo | pe_action_runnable);
 
     op = stop_action(rsc, NULL, TRUE /* !group_data->child_stopping */ );
     set_bit(op->flags, pe_action_pseudo | pe_action_runnable);
 
     op = custom_action(rsc, stopped_key(rsc),
                        RSC_STOPPED, NULL, TRUE /* !group_data->child_stopping */ , TRUE, data_set);
     set_bit(op->flags, pe_action_pseudo | pe_action_runnable);
 
     value = g_hash_table_lookup(rsc->meta, "stateful");
     if (crm_is_true(value)) {
         op = custom_action(rsc, demote_key(rsc), RSC_DEMOTE, NULL, TRUE, TRUE, data_set);
         set_bit(op->flags, pe_action_pseudo);
         set_bit(op->flags, pe_action_runnable);
         op = custom_action(rsc, demoted_key(rsc), RSC_DEMOTED, NULL, TRUE, TRUE, data_set);
         set_bit(op->flags, pe_action_pseudo);
         set_bit(op->flags, pe_action_runnable);
 
         op = custom_action(rsc, promote_key(rsc), RSC_PROMOTE, NULL, TRUE, TRUE, data_set);
         set_bit(op->flags, pe_action_pseudo);
         set_bit(op->flags, pe_action_runnable);
         op = custom_action(rsc, promoted_key(rsc), RSC_PROMOTED, NULL, TRUE, TRUE, data_set);
         set_bit(op->flags, pe_action_pseudo);
         set_bit(op->flags, pe_action_runnable);
     }
 }
 
 void
 group_update_pseudo_status(resource_t * parent, resource_t * child)
 {
     GListPtr gIter = child->actions;
     group_variant_data_t *group_data = NULL;
 
     get_group_variant_data(group_data, parent);
 
     if (group_data->ordered == FALSE) {
         /* If this group is not ordered, then leave the meta-actions as optional */
         return;
     }
 
     if (group_data->child_stopping && group_data->child_starting) {
         return;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (is_set(action->flags, pe_action_optional)) {
             continue;
         }
         if (safe_str_eq(RSC_STOP, action->task) && is_set(action->flags, pe_action_runnable)) {
             group_data->child_stopping = TRUE;
             pe_rsc_trace(action->rsc, "Based on %s the group is stopping", action->uuid);
 
         } else if (safe_str_eq(RSC_START, action->task)
                    && is_set(action->flags, pe_action_runnable)) {
             group_data->child_starting = TRUE;
             pe_rsc_trace(action->rsc, "Based on %s the group is starting", action->uuid);
         }
     }
 }
 
 void
 group_internal_constraints(resource_t * rsc, pe_working_set_t * data_set)
 {
     GListPtr gIter = rsc->children;
     resource_t *last_rsc = NULL;
     resource_t *last_active = NULL;
     resource_t *top = uber_parent(rsc);
     group_variant_data_t *group_data = NULL;
 
     get_group_variant_data(group_data, rsc);
 
     new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set);
     new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set);
     new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set);
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
         int stop = pe_order_none;
         int stopped = pe_order_implies_then_printed;
         int start = pe_order_implies_then | pe_order_runnable_left;
         int started =
             pe_order_runnable_left | pe_order_implies_then | pe_order_implies_then_printed;
 
         child_rsc->cmds->internal_constraints(child_rsc, data_set);
 
         if (last_rsc == NULL) {
             if (group_data->ordered) {
                 stop |= pe_order_optional;
                 stopped = pe_order_implies_then;
             }
 
         } else if (group_data->colocated) {
             rsc_colocation_new("group:internal_colocation", NULL, INFINITY,
                                child_rsc, last_rsc, NULL, NULL, data_set);
         }
 
         if (top->variant == pe_master) {
             new_rsc_order(rsc, RSC_DEMOTE, child_rsc, RSC_DEMOTE,
                           stop | pe_order_implies_first_printed, data_set);
 
             new_rsc_order(child_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, stopped, data_set);
 
             new_rsc_order(child_rsc, RSC_PROMOTE, rsc, RSC_PROMOTED, started, data_set);
 
             new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE,
                           pe_order_implies_first_printed, data_set);
 
         }
 
         order_start_start(rsc, child_rsc, pe_order_implies_first_printed);
         order_stop_stop(rsc, child_rsc, stop | pe_order_implies_first_printed);
 
         new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, stopped, data_set);
 
         new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, started, data_set);
 
         if (group_data->ordered == FALSE) {
             order_start_start(rsc, child_rsc, start | pe_order_implies_first_printed);
             if (top->variant == pe_master) {
                 new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE,
                               start | pe_order_implies_first_printed, data_set);
             }
 
         } else if (last_rsc != NULL) {
             child_rsc->restart_type = pe_restart_restart;
 
             order_start_start(last_rsc, child_rsc, start);
             order_stop_stop(child_rsc, last_rsc, pe_order_optional | pe_order_restart);
 
             if (top->variant == pe_master) {
                 new_rsc_order(last_rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start, data_set);
                 new_rsc_order(child_rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE, pe_order_optional,
                               data_set);
             }
 
         } else {
             /* If anyone in the group is starting, then
              *  pe_order_implies_then will cause _everyone_ in the group
              *  to be sent a start action
              * But this is safe since starting something that is already
              *  started is required to be "safe"
              */
             int flags = pe_order_none;
 
             order_start_start(rsc, child_rsc, flags);
             if (top->variant == pe_master) {
                 new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, flags, data_set);
             }
 
         }
 
         /* Look for partially active groups
          * Make sure they still shut down in sequence
          */
         if (child_rsc->running_on) {
             if (group_data->ordered
                 && last_rsc
                 && last_rsc->running_on == NULL && last_active && last_active->running_on) {
                 order_stop_stop(child_rsc, last_active, pe_order_optional);
             }
             last_active = child_rsc;
         }
 
         last_rsc = child_rsc;
     }
 
     if (group_data->ordered && last_rsc != NULL) {
         int stop_stop_flags = pe_order_implies_then;
         int stop_stopped_flags = pe_order_optional;
 
         order_stop_stop(rsc, last_rsc, stop_stop_flags);
         new_rsc_order(last_rsc, RSC_STOP, rsc, RSC_STOPPED, stop_stopped_flags, data_set);
 
         if (top->variant == pe_master) {
             new_rsc_order(rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE, stop_stop_flags, data_set);
             new_rsc_order(last_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, stop_stopped_flags, data_set);
         }
     }
 }
 
 void
 group_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
 {
     GListPtr gIter = NULL;
     group_variant_data_t *group_data = NULL;
 
     if (rsc_lh == NULL) {
         pe_err("rsc_lh was NULL for %s", constraint->id);
         return;
 
     } else if (rsc_rh == NULL) {
         pe_err("rsc_rh was NULL for %s", constraint->id);
         return;
     }
 
     gIter = rsc_lh->children;
     pe_rsc_trace(rsc_lh, "Processing constraints from %s", rsc_lh->id);
 
     get_group_variant_data(group_data, rsc_lh);
 
     if (group_data->colocated) {
         group_data->first_child->cmds->rsc_colocation_lh(group_data->first_child, rsc_rh,
                                                          constraint);
         return;
 
     } else if (constraint->score >= INFINITY) {
-        crm_config_err("%s: Cannot perform manditory colocation"
+        crm_config_err("%s: Cannot perform mandatory colocation"
                        " between non-colocated group and %s", rsc_lh->id, rsc_rh->id);
         return;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->rsc_colocation_lh(child_rsc, rsc_rh, constraint);
     }
 }
 
 void
 group_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
 {
     GListPtr gIter = rsc_rh->children;
     group_variant_data_t *group_data = NULL;
 
     get_group_variant_data(group_data, rsc_rh);
     CRM_CHECK(rsc_lh->variant == pe_native, return);
 
     pe_rsc_trace(rsc_rh, "Processing RH of constraint %s", constraint->id);
     print_resource(LOG_DEBUG_3, "LHS", rsc_lh, TRUE);
 
     if (is_set(rsc_rh->flags, pe_rsc_provisional)) {
         return;
 
     } else if (group_data->colocated && group_data->first_child) {
         if (constraint->score >= INFINITY) {
             /* Ensure RHS is _fully_ up before can start LHS */
             group_data->last_child->cmds->rsc_colocation_rh(rsc_lh, group_data->last_child,
                                                             constraint);
         } else {
             /* A partially active RHS is fine */
             group_data->first_child->cmds->rsc_colocation_rh(rsc_lh, group_data->first_child,
                                                              constraint);
         }
 
         return;
 
     } else if (constraint->score >= INFINITY) {
-        crm_config_err("%s: Cannot perform manditory colocation with"
+        crm_config_err("%s: Cannot perform mandatory colocation with"
                        " non-colocated group: %s", rsc_lh->id, rsc_rh->id);
         return;
     }
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint);
     }
 }
 
 enum pe_action_flags
 group_action_flags(action_t * action, node_t * node)
 {
     GListPtr gIter = NULL;
     enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo);
 
     for (gIter = action->rsc->children; gIter != NULL; gIter = gIter->next) {
         resource_t *child = (resource_t *) gIter->data;
         enum action_tasks task = get_complex_task(child, action->task, TRUE);
         const char *task_s = task2text(task);
         action_t *child_action = find_first_action(child->actions, NULL, task_s, node);
 
         if (child_action) {
             enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
 
             if (is_set(flags, pe_action_optional)
                 && is_set(child_flags, pe_action_optional) == FALSE) {
-                pe_rsc_trace(action->rsc, "%s is manditory because of %s", action->uuid,
+                pe_rsc_trace(action->rsc, "%s is mandatory because of %s", action->uuid,
                              child_action->uuid);
                 clear_bit(flags, pe_action_optional);
                 pe_clear_action_bit(action, pe_action_optional);
             }
             if (safe_str_neq(task_s, action->task)
                 && is_set(flags, pe_action_runnable)
                 && is_set(child_flags, pe_action_runnable) == FALSE) {
                 pe_rsc_trace(action->rsc, "%s is not runnable because of %s", action->uuid,
                              child_action->uuid);
                 clear_bit(flags, pe_action_runnable);
                 pe_clear_action_bit(action, pe_action_runnable);
             }
 
         } else if (task != stop_rsc && task != action_demote) {
             pe_rsc_trace(action->rsc, "%s is not runnable because of %s (not found in %s)",
                          action->uuid, task_s, child->id);
             clear_bit(flags, pe_action_runnable);
         }
     }
 
     return flags;
 }
 
 enum pe_graph_flags
 group_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags,
                      enum pe_action_flags filter, enum pe_ordering type)
 {
     GListPtr gIter = then->rsc->children;
     enum pe_graph_flags changed = pe_graph_none;
 
     CRM_ASSERT(then->rsc != NULL);
     changed |= native_update_actions(first, then, node, flags, filter, type);
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child = (resource_t *) gIter->data;
         action_t *child_action = find_first_action(child->actions, NULL, then->task, node);
 
         if (child_action) {
             changed |= child->cmds->update_actions(first, child_action, node, flags, filter, type);
         }
     }
 
     return changed;
 }
 
 void
 group_rsc_location(resource_t * rsc, rsc_to_node_t * constraint)
 {
     GListPtr gIter = rsc->children;
     GListPtr saved = constraint->node_list_rh;
     GListPtr zero = node_list_dup(constraint->node_list_rh, TRUE, FALSE);
     gboolean reset_scores = TRUE;
     group_variant_data_t *group_data = NULL;
 
     get_group_variant_data(group_data, rsc);
 
     pe_rsc_debug(rsc, "Processing rsc_location %s for %s", constraint->id, rsc->id);
 
     native_rsc_location(rsc, constraint);
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->rsc_location(child_rsc, constraint);
         if (group_data->colocated && reset_scores) {
             reset_scores = FALSE;
             constraint->node_list_rh = zero;
         }
     }
 
     constraint->node_list_rh = saved;
     g_list_free_full(zero, free);
 }
 
 void
 group_expand(resource_t * rsc, pe_working_set_t * data_set)
 {
     GListPtr gIter = rsc->children;
 
     pe_rsc_trace(rsc, "Processing actions from %s", rsc->id);
 
     CRM_CHECK(rsc != NULL, return);
     native_expand(rsc, data_set);
 
     for (; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->expand(child_rsc, data_set);
     }
 }
 
 GHashTable *
 group_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr,
                     float factor, enum pe_weights flags)
 {
     GListPtr gIter = rsc->rsc_cons_lhs;
     group_variant_data_t *group_data = NULL;
 
     get_group_variant_data(group_data, rsc);
 
     if (is_set(rsc->flags, pe_rsc_merging)) {
         pe_rsc_info(rsc, "Breaking dependency loop with %s at %s", rsc->id, rhs);
         return nodes;
     }
 
     set_bit(rsc->flags, pe_rsc_merging);
 
     nodes =
         group_data->first_child->cmds->merge_weights(group_data->first_child, rhs, nodes, attr,
                                                      factor, flags);
 
     for (; gIter != NULL; gIter = gIter->next) {
         rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
         nodes = native_merge_weights(constraint->rsc_lh, rsc->id, nodes,
                                      constraint->node_attribute,
                                      (float)constraint->score / INFINITY, flags);
     }
 
     clear_bit(rsc->flags, pe_rsc_merging);
     return nodes;
 }
 
 void
 group_append_meta(resource_t * rsc, xmlNode * xml)
 {
 }
diff --git a/pengine/native.c b/pengine/native.c
index 4a81def421..6bcadb8b13 100644
--- a/pengine/native.c
+++ b/pengine/native.c
@@ -1,3333 +1,3333 @@
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <pengine.h>
 #include <crm/pengine/rules.h>
 #include <crm/msg_xml.h>
 #include <allocate.h>
 #include <utils.h>
 #include <crm/services.h>
 
 /* #define DELETE_THEN_REFRESH 1  // The crmd will remove the resource from the CIB itself, making this redundant */
 #define INFINITY_HACK   (INFINITY * -100)
 
 #define VARIANT_NATIVE 1
 #include <lib/pengine/variant.h>
 
 gboolean update_action(action_t * then);
 void native_rsc_colocation_rh_must(resource_t * rsc_lh, gboolean update_lh,
                                    resource_t * rsc_rh, gboolean update_rh);
 
 void native_rsc_colocation_rh_mustnot(resource_t * rsc_lh, gboolean update_lh,
                                       resource_t * rsc_rh, gboolean update_rh);
 
 void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set);
 void RecurringOp(resource_t * rsc, action_t * start, node_t * node,
                  xmlNode * operation, pe_working_set_t * data_set);
 void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node,
                        pe_working_set_t * data_set);
 void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node,
                          xmlNode * operation, pe_working_set_t * data_set);
 void pe_post_notify(resource_t * rsc, node_t * node, action_t * op,
                     notify_data_t * n_data, pe_working_set_t * data_set);
 
 gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set);
 gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
 gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
 gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
 gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional,
                     pe_working_set_t * data_set);
 gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
 gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
 
 /* *INDENT-OFF* */
 enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
 /* Current State */
 /*       Next State:    Unknown 	  Stopped	     Started	        Slave	          Master */
     /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, },
     /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE,   RSC_ROLE_SLAVE, },
     /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE,   RSC_ROLE_MASTER, },
     /* Slave */	  { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE,   RSC_ROLE_MASTER, },
     /* Master */  { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE,   RSC_ROLE_SLAVE,   RSC_ROLE_SLAVE,   RSC_ROLE_MASTER, },
 };
 
 gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = {
 /* Current State */
 /*       Next State:       Unknown	Stopped		Started		Slave		Master */
     /* Unknown */	{ RoleError,	StopRsc,	RoleError,	RoleError,	RoleError,  },
     /* Stopped */	{ RoleError,	NullOp,		StartRsc,	StartRsc,	RoleError,  },
     /* Started */	{ RoleError,	StopRsc,	NullOp,		NullOp,		PromoteRsc, },
     /* Slave */	        { RoleError,	StopRsc,	StopRsc, 	NullOp,		PromoteRsc, },
     /* Master */	{ RoleError,	DemoteRsc,	DemoteRsc,	DemoteRsc,	NullOp,     },
 };
 /* *INDENT-ON* */
 
 static action_t * get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current);
 
 static gboolean
 native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set)
 {
     /*
        1. Sort by weight
        2. color.chosen_node = the node (of those with the highest wieght)
        with the fewest resources
        3. remove color.chosen_node from all other colors
      */
     GListPtr nodes = NULL;
     node_t *chosen = NULL;
 
     int lpc = 0;
     int multiple = 0;
     int length = 0;
     gboolean result = FALSE;
 
     process_utilization(rsc, &prefer, data_set);
 
     length = g_hash_table_size(rsc->allowed_nodes);
 
     if (is_not_set(rsc->flags, pe_rsc_provisional)) {
         return rsc->allocated_to ? TRUE : FALSE;
     }
 
     if (prefer) {
         chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
         if (chosen && chosen->weight >= 0 && can_run_resources(chosen)) {
             pe_rsc_trace(rsc,
                          "Using preferred node %s for %s instead of choosing from %d candidates",
                          chosen->details->uname, rsc->id, length);
         } else if (chosen && chosen->weight < 0) {
             pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname,
                          rsc->id);
             chosen = NULL;
         } else if (chosen && can_run_resources(chosen)) {
             pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname,
                          rsc->id);
             chosen = NULL;
         } else {
             pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname,
                          rsc->id);
         }
     }
 
     if (chosen == NULL && rsc->allowed_nodes) {
         nodes = g_hash_table_get_values(rsc->allowed_nodes);
         nodes = g_list_sort_with_data(nodes, sort_node_weight, g_list_nth_data(rsc->running_on, 0));
 
         chosen = g_list_nth_data(nodes, 0);
         pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates",
                      chosen ? chosen->details->uname : "<none>", rsc->id, length);
 
         if (chosen && chosen->weight > 0 && can_run_resources(chosen)) {
             node_t *running = g_list_nth_data(rsc->running_on, 0);
 
             if (running && can_run_resources(running) == FALSE) {
                 pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources",
                              rsc->id, running->details->uname);
                 running = NULL;
             }
 
             for (lpc = 1; lpc < length && running; lpc++) {
                 node_t *tmp = g_list_nth_data(nodes, lpc);
 
                 if (tmp->weight == chosen->weight) {
                     multiple++;
                     if (tmp->details == running->details) {
                         /* prefer the existing node if scores are equal */
                         chosen = tmp;
                     }
                 }
             }
         }
     }
 
     if (multiple > 1) {
         int log_level = LOG_INFO;
         static char score[33];
 
         score2char_stack(chosen->weight, score, sizeof(score));
 
         if (chosen->weight >= INFINITY) {
             log_level = LOG_WARNING;
         }
 
         do_crm_log(log_level, "%d nodes with equal score (%s) for"
                    " running %s resources.  Chose %s.",
                    multiple, score, rsc->id, chosen->details->uname);
     }
 
     result = native_assign_node(rsc, nodes, chosen, FALSE);
     g_list_free(nodes);
     return result;
 }
 
 static int
 node_list_attr_score(GHashTable * list, const char *attr, const char *value)
 {
     GHashTableIter iter;
     node_t *node = NULL;
     int best_score = -INFINITY;
     const char *best_node = NULL;
 
     if (attr == NULL) {
         attr = "#" XML_ATTR_UNAME;
     }
 
     g_hash_table_iter_init(&iter, list);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
         int weight = node->weight;
 
         if (can_run_resources(node) == FALSE) {
             weight = -INFINITY;
         }
         if (weight > best_score || best_node == NULL) {
             const char *tmp = g_hash_table_lookup(node->details->attrs, attr);
 
             if (safe_str_eq(value, tmp)) {
                 best_score = weight;
                 best_node = node->details->uname;
             }
         }
     }
 
     if (safe_str_neq(attr, "#" XML_ATTR_UNAME)) {
         crm_info("Best score for %s=%s was %s with %d",
                  attr, value, best_node ? best_node : "<none>", best_score);
     }
 
     return best_score;
 }
 
 static void
 node_hash_update(GHashTable * list1, GHashTable * list2, const char *attr, float factor,
                  gboolean only_positive)
 {
     int score = 0;
     int new_score = 0;
     GHashTableIter iter;
     node_t *node = NULL;
 
     if (attr == NULL) {
         attr = "#" XML_ATTR_UNAME;
     }
 
     g_hash_table_iter_init(&iter, list1);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
         CRM_LOG_ASSERT(node != NULL);
         if(node == NULL) { continue; };
 
         score = node_list_attr_score(list2, attr, g_hash_table_lookup(node->details->attrs, attr));
         new_score = merge_weights(factor * score, node->weight);
 
         if (factor < 0 && score < 0) {
             /* Negative preference for a node with a negative score
              * should not become a positive preference
              *
              * TODO - Decide if we want to filter only if weight == -INFINITY
              *
              */
             crm_trace("%s: Filtering %d + %f*%d (factor * score)",
                       node->details->uname, node->weight, factor, score);
 
         } else if (node->weight == INFINITY_HACK) {
             crm_trace("%s: Filtering %d + %f*%d (node < 0)",
                       node->details->uname, node->weight, factor, score);
 
         } else if (only_positive && new_score < 0 && node->weight > 0) {
             node->weight = INFINITY_HACK;
             crm_trace("%s: Filtering %d + %f*%d (score > 0)",
                       node->details->uname, node->weight, factor, score);
 
         } else if (only_positive && new_score < 0 && node->weight == 0) {
             crm_trace("%s: Filtering %d + %f*%d (score == 0)",
                       node->details->uname, node->weight, factor, score);
 
         } else {
             crm_trace("%s: %d + %f*%d", node->details->uname, node->weight, factor, score);
             node->weight = new_score;
         }
     }
 }
 
 GHashTable *
 node_hash_dup(GHashTable * hash)
 {
     /* Hack! */
     GListPtr list = g_hash_table_get_values(hash);
     GHashTable *result = node_hash_from_list(list);
 
     g_list_free(list);
     return result;
 }
 
 GHashTable *
 native_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr,
                      float factor, enum pe_weights flags)
 {
     return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags);
 }
 
 GHashTable *
 rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr,
                   float factor, enum pe_weights flags)
 {
     GHashTable *work = NULL;
     int multiplier = 1;
 
     if (factor < 0) {
         multiplier = -1;
     }
 
     if (is_set(rsc->flags, pe_rsc_merging)) {
         pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", rhs, rsc->id);
         return nodes;
     }
 
     set_bit(rsc->flags, pe_rsc_merging);
 
     if (is_set(flags, pe_weights_init)) {
         if (rsc->variant == pe_group && rsc->children) {
             GListPtr last = rsc->children;
 
             while (last->next != NULL) {
                 last = last->next;
             }
 
             pe_rsc_trace(rsc, "Merging %s as a group %p %p", rsc->id, rsc->children, last);
             work = rsc_merge_weights(last->data, rhs, NULL, attr, factor, flags);
 
         } else {
             work = node_hash_dup(rsc->allowed_nodes);
         }
         clear_bit(flags, pe_weights_init);
 
     } else if (rsc->variant == pe_group && rsc->children) {
         GListPtr iter = rsc->children;
 
         pe_rsc_trace(rsc, "%s: Combining scores from %d children of %s", rhs, g_list_length(iter), rsc->id);
         work = node_hash_dup(nodes);
         for(iter = rsc->children; iter->next != NULL; iter = iter->next) {
             work = rsc_merge_weights(iter->data, rhs, work, attr, factor, flags);
         }
 
     } else {
         pe_rsc_trace(rsc, "%s: Combining scores from %s", rhs, rsc->id);
         work = node_hash_dup(nodes);
         node_hash_update(work, rsc->allowed_nodes, attr, factor,
                          is_set(flags, pe_weights_positive));
     }
 
     if (is_set(flags, pe_weights_rollback) && can_run_any(work) == FALSE) {
         pe_rsc_info(rsc, "%s: Rolling back scores from %s", rhs, rsc->id);
         g_hash_table_destroy(work);
         clear_bit(rsc->flags, pe_rsc_merging);
         return nodes;
     }
 
     if (can_run_any(work)) {
         GListPtr gIter = NULL;
 
         if (is_set(flags, pe_weights_forward)) {
             gIter = rsc->rsc_cons;
             crm_trace("Checking %d additional colocation constraints", g_list_length(gIter));
 
         } else if(rsc->variant == pe_group && rsc->children) {
             GListPtr last = rsc->children;
 
             while (last->next != NULL) {
                 last = last->next;
             }
 
             gIter = ((resource_t*)last->data)->rsc_cons_lhs;
             crm_trace("Checking %d additional optional group colocation constraints from %s",
                       g_list_length(gIter), ((resource_t*)last->data)->id);
 
         } else {
             gIter = rsc->rsc_cons_lhs;
             crm_trace("Checking %d additional optional colocation constraints %s", g_list_length(gIter), rsc->id);
         }
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *other = NULL;
             rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
             if (is_set(flags, pe_weights_forward)) {
                 other = constraint->rsc_rh;
             } else {
                 other = constraint->rsc_lh;
             }
 
             pe_rsc_trace(rsc, "Applying %s (%s)", constraint->id, other->id);
             work = rsc_merge_weights(other, rhs, work, constraint->node_attribute,
                                      multiplier * (float)constraint->score / INFINITY, flags|pe_weights_rollback);
             dump_node_scores(LOG_TRACE, NULL, rhs, work);
         }
 
     }
 
     if (is_set(flags, pe_weights_positive)) {
         node_t *node = NULL;
         GHashTableIter iter;
 
         g_hash_table_iter_init(&iter, work);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
             if (node->weight == INFINITY_HACK) {
                 node->weight = 1;
             }
         }
     }
 
     if (nodes) {
         g_hash_table_destroy(nodes);
     }
 
     clear_bit(rsc->flags, pe_rsc_merging);
     return work;
 }
 
 node_t *
 native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     int alloc_details = scores_log_level + 1;
 
     if (rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) {
         /* never allocate children on their own */
         pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id,
                      rsc->parent->id);
         rsc->parent->cmds->allocate(rsc->parent, prefer, data_set);
     }
 
     if (is_not_set(rsc->flags, pe_rsc_provisional)) {
         return rsc->allocated_to;
     }
 
     if (is_set(rsc->flags, pe_rsc_allocating)) {
         pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
         return NULL;
     }
 
     set_bit(rsc->flags, pe_rsc_allocating);
     print_resource(alloc_details, "Allocating: ", rsc, FALSE);
     dump_node_scores(alloc_details, rsc, "Pre-allloc", rsc->allowed_nodes);
 
     for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
         rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
         GHashTable *archive = NULL;
         resource_t *rsc_rh = constraint->rsc_rh;
 
         pe_rsc_trace(rsc, "%s: Pre-Processing %s (%s, %d, %s)",
                      rsc->id, constraint->id, rsc_rh->id,
                      constraint->score, role2text(constraint->role_lh));
         if (constraint->role_lh >= RSC_ROLE_MASTER
             || (constraint->score < 0 && constraint->score > -INFINITY)) {
             archive = node_hash_dup(rsc->allowed_nodes);
         }
         rsc_rh->cmds->allocate(rsc_rh, NULL, data_set);
         rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint);
         if (archive && can_run_any(rsc->allowed_nodes) == FALSE) {
             pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, rsc_rh->id);
             g_hash_table_destroy(rsc->allowed_nodes);
             rsc->allowed_nodes = archive;
             archive = NULL;
         }
         if (archive) {
             g_hash_table_destroy(archive);
         }
     }
 
     dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes);
 
     for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
         rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
 
         rsc->allowed_nodes =
             constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes,
                                                     constraint->node_attribute,
                                                     (float)constraint->score / INFINITY,
                                                     pe_weights_rollback);
     }
 
     print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE);
     if (rsc->next_role == RSC_ROLE_STOPPED) {
         pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id);
         /* make sure it doesn't come up again */
         resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set);
 
     } else if(rsc->next_role > rsc->role
               && is_set(data_set->flags, pe_flag_have_quorum) == FALSE
               && data_set->no_quorum_policy == no_quorum_freeze) {
         crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze",
                    rsc->id, role2text(rsc->role), role2text(rsc->next_role));
         rsc->next_role = rsc->role;
     }
 
     dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__,
                      rsc->allowed_nodes);
     if (is_set(data_set->flags, pe_flag_stonith_enabled)
         && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
         clear_bit(rsc->flags, pe_rsc_managed);
     }
 
     if (is_not_set(rsc->flags, pe_rsc_managed)) {
         const char *reason = NULL;
         node_t *assign_to = NULL;
 
         rsc->next_role = rsc->role;
         if (rsc->running_on == NULL) {
             reason = "inactive";
         } else if (rsc->role == RSC_ROLE_MASTER) {
             assign_to = rsc->running_on->data;
             reason = "master";
         } else if (is_set(rsc->flags, pe_rsc_failed)) {
             assign_to = rsc->running_on->data;
             reason = "failed";
         } else {
             assign_to = rsc->running_on->data;
             reason = "active";
         }
         pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id,
                     assign_to ? assign_to->details->uname : "'nowhere'", reason);
         native_assign_node(rsc, NULL, assign_to, TRUE);
 
     } else if (is_set(data_set->flags, pe_flag_stop_everything)) {
         pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id);
         native_assign_node(rsc, NULL, NULL, TRUE);
 
     } else if (is_set(rsc->flags, pe_rsc_provisional)
                && native_choose_node(rsc, prefer, data_set)) {
         pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id,
                      rsc->allocated_to->details->uname);
 
     } else if (rsc->allocated_to == NULL) {
         if (is_not_set(rsc->flags, pe_rsc_orphan)) {
             pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
         } else if (rsc->running_on != NULL) {
             pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
         }
 
     } else {
         pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id,
                      rsc->allocated_to->details->uname);
     }
 
     clear_bit(rsc->flags, pe_rsc_allocating);
     print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE);
 
     if (rsc->is_remote_node) {
         node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
 
         CRM_ASSERT(remote_node != NULL);
         if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) {
             crm_trace("Setting remote node %s to ONLINE", remote_node->details->id);
             remote_node->details->online = TRUE;
             /* We shouldn't consider an unseen remote-node unclean if we are going
              * to try and connect to it. Otherwise we get an unnecessary fence */
             if (remote_node->details->unseen == TRUE) {
                 remote_node->details->unclean = FALSE;
             }
 
         } else {
             crm_trace("Setting remote node %s to SHUTDOWN.  next role = %s, allocated=%s",
                 remote_node->details->id, role2text(rsc->next_role), rsc->allocated_to ? "true" : "false");
             remote_node->details->shutdown = TRUE;
         }
     }
 
     return rsc->allocated_to;
 }
 
 static gboolean
 is_op_dup(resource_t * rsc, const char *name, const char *interval)
 {
     gboolean dup = FALSE;
     const char *id = NULL;
     const char *value = NULL;
     xmlNode *operation = NULL;
 
     CRM_ASSERT(rsc);
     for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
          operation = __xml_next_element(operation)) {
         if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
             value = crm_element_value(operation, "name");
             if (safe_str_neq(value, name)) {
                 continue;
             }
 
             value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
             if (value == NULL) {
                 value = "0";
             }
 
             if (safe_str_neq(value, interval)) {
                 continue;
             }
 
             if (id == NULL) {
                 id = ID(operation);
 
             } else {
                 crm_config_err("Operation %s is a duplicate of %s", ID(operation), id);
                 crm_config_err
                     ("Do not use the same (name, interval) combination more than once per resource");
                 dup = TRUE;
             }
         }
     }
 
     return dup;
 }
 
 void
 RecurringOp(resource_t * rsc, action_t * start, node_t * node,
             xmlNode * operation, pe_working_set_t * data_set)
 {
     char *key = NULL;
     const char *name = NULL;
     const char *value = NULL;
     const char *interval = NULL;
     const char *node_uname = NULL;
 
     unsigned long long interval_ms = 0;
     action_t *mon = NULL;
     gboolean is_optional = TRUE;
     GListPtr possible_matches = NULL;
 
     /* Only process for the operations without role="Stopped" */
     value = crm_element_value(operation, "role");
     if (value && text2role(value) == RSC_ROLE_STOPPED) {
         return;
     }
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s",
                  ID(operation), rsc->id, role2text(rsc->next_role),
                  node ? node->details->uname : "n/a");
 
     if (node != NULL) {
         node_uname = node->details->uname;
     }
 
     interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
     interval_ms = crm_get_interval(interval);
 
     if (interval_ms == 0) {
         return;
     }
 
     name = crm_element_value(operation, "name");
     if (is_op_dup(rsc, name, interval)) {
         return;
     }
 
     if (safe_str_eq(name, RSC_STOP)
         || safe_str_eq(name, RSC_START)
         || safe_str_eq(name, RSC_DEMOTE)
         || safe_str_eq(name, RSC_PROMOTE)
         ) {
         crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name);
         return;
     }
 
     key = generate_op_key(rsc->id, name, interval_ms);
     if (find_rsc_op_entry(rsc, key) == NULL) {
         /* disabled */
         free(key);
         return;
     }
 
     if (start != NULL) {
         pe_rsc_trace(rsc, "Marking %s %s due to %s",
-                     key, is_set(start->flags, pe_action_optional) ? "optional" : "manditory",
+                     key, is_set(start->flags, pe_action_optional) ? "optional" : "mandatory",
                      start->uuid);
         is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional);
     } else {
         pe_rsc_trace(rsc, "Marking %s optional", key);
         is_optional = TRUE;
     }
 
     /* start a monitor for an already active resource */
     possible_matches = find_actions_exact(rsc->actions, key, node);
     if (possible_matches == NULL) {
         is_optional = FALSE;
-        pe_rsc_trace(rsc, "Marking %s manditory: not active", key);
+        pe_rsc_trace(rsc, "Marking %s mandatory: not active", key);
 
     } else {
         GListPtr gIter = NULL;
 
         for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) {
             action_t *op = (action_t *) gIter->data;
 
             if (is_set(op->flags, pe_action_reschedule)) {
                 is_optional = FALSE;
                 break;
             }
         }
         g_list_free(possible_matches);
     }
 
     if ((rsc->next_role == RSC_ROLE_MASTER && value == NULL)
         || (value != NULL && text2role(value) != rsc->next_role)) {
         int log_level = LOG_DEBUG_2;
         const char *result = "Ignoring";
 
         if (is_optional) {
             char *local_key = strdup(key);
 
             log_level = LOG_INFO;
             result = "Cancelling";
             /* its running : cancel it */
 
             mon = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set);
 
             free(mon->task);
             free(mon->cancel_task);
             mon->task = strdup(RSC_CANCEL);
             mon->cancel_task = strdup(name);
             add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval);
             add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name);
 
             local_key = NULL;
 
             switch (rsc->role) {
                 case RSC_ROLE_SLAVE:
                 case RSC_ROLE_STARTED:
                     if (rsc->next_role == RSC_ROLE_MASTER) {
                         local_key = promote_key(rsc);
 
                     } else if (rsc->next_role == RSC_ROLE_STOPPED) {
                         local_key = stop_key(rsc);
                     }
 
                     break;
                 case RSC_ROLE_MASTER:
                     local_key = demote_key(rsc);
                     break;
                 default:
                     break;
             }
 
             if (local_key) {
                 custom_action_order(rsc, NULL, mon, rsc, local_key, NULL,
                                     pe_order_runnable_left, data_set);
             }
 
             mon = NULL;
         }
 
         do_crm_log(log_level, "%s action %s (%s vs. %s)",
                    result, key, value ? value : role2text(RSC_ROLE_SLAVE),
                    role2text(rsc->next_role));
 
         free(key);
         key = NULL;
         return;
     }
 
     mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set);
     key = mon->uuid;
     if (is_optional) {
         pe_rsc_trace(rsc, "%s\t   %s (optional)", crm_str(node_uname), mon->uuid);
     }
 
     if (start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) {
         pe_rsc_debug(rsc, "%s\t   %s (cancelled : start un-runnable)", crm_str(node_uname),
                      mon->uuid);
         update_action_flags(mon, pe_action_runnable | pe_action_clear);
 
     } else if (node == NULL || node->details->online == FALSE || node->details->unclean) {
         pe_rsc_debug(rsc, "%s\t   %s (cancelled : no node available)", crm_str(node_uname),
                      mon->uuid);
         update_action_flags(mon, pe_action_runnable | pe_action_clear);
 
     } else if (is_set(mon->flags, pe_action_optional) == FALSE) {
         pe_rsc_info(rsc, " Start recurring %s (%llus) for %s on %s", mon->task, interval_ms / 1000,
                     rsc->id, crm_str(node_uname));
     }
 
     if (rsc->next_role == RSC_ROLE_MASTER) {
         char *running_master = crm_itoa(PCMK_OCF_RUNNING_MASTER);
 
         add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master);
         free(running_master);
     }
 
     if (node == NULL || is_set(rsc->flags, pe_rsc_managed)) {
         custom_action_order(rsc, start_key(rsc), NULL,
                             NULL, strdup(key), mon,
                             pe_order_implies_then | pe_order_runnable_left, data_set);
 
         if (rsc->next_role == RSC_ROLE_MASTER) {
             custom_action_order(rsc, promote_key(rsc), NULL,
                                 rsc, NULL, mon,
                                 pe_order_optional | pe_order_runnable_left, data_set);
 
         } else if (rsc->role == RSC_ROLE_MASTER) {
             custom_action_order(rsc, demote_key(rsc), NULL,
                                 rsc, NULL, mon,
                                 pe_order_optional | pe_order_runnable_left, data_set);
         }
     }
 }
 
 void
 Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set)
 {
     if (is_not_set(rsc->flags, pe_rsc_maintenance) &&
         (node == NULL || node->details->maintenance == FALSE)) {
         xmlNode *operation = NULL;
 
         for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
              operation = __xml_next_element(operation)) {
             if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
                 RecurringOp(rsc, start, node, operation, data_set);
             }
         }
     }
 }
 
 void
 RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node,
                     xmlNode * operation, pe_working_set_t * data_set)
 {
     char *key = NULL;
     const char *name = NULL;
     const char *role = NULL;
     const char *interval = NULL;
     const char *node_uname = NULL;
 
     unsigned long long interval_ms = 0;
     GListPtr possible_matches = NULL;
     GListPtr gIter = NULL;
 
     /* TODO: Support of non-unique clone */
     if (is_set(rsc->flags, pe_rsc_unique) == FALSE) {
         return;
     }
 
     /* Only process for the operations with role="Stopped" */
     role = crm_element_value(operation, "role");
     if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) {
         return;
     }
 
     pe_rsc_trace(rsc,
                  "Creating recurring actions %s for %s in role %s on nodes where it'll not be running",
                  ID(operation), rsc->id, role2text(rsc->next_role));
 
     if (node != NULL) {
         node_uname = node->details->uname;
     }
 
     interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
     interval_ms = crm_get_interval(interval);
 
     if (interval_ms == 0) {
         return;
     }
 
     name = crm_element_value(operation, "name");
     if (is_op_dup(rsc, name, interval)) {
         return;
     }
 
     if (safe_str_eq(name, RSC_STOP)
         || safe_str_eq(name, RSC_START)
         || safe_str_eq(name, RSC_DEMOTE)
         || safe_str_eq(name, RSC_PROMOTE)
         ) {
         crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name);
         return;
     }
 
     key = generate_op_key(rsc->id, name, interval_ms);
     if (find_rsc_op_entry(rsc, key) == NULL) {
         /* disabled */
         free(key);
         return;
     }
 
     /* if the monitor exists on the node where the resource will be running, cancel it */
     if (node != NULL) {
         possible_matches = find_actions_exact(rsc->actions, key, node);
         if (possible_matches) {
             action_t *cancel_op = NULL;
             char *local_key = strdup(key);
 
             g_list_free(possible_matches);
 
             cancel_op = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set);
 
             free(cancel_op->task);
             free(cancel_op->cancel_task);
             cancel_op->task = strdup(RSC_CANCEL);
             cancel_op->cancel_task = strdup(name);
             add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL, interval);
             add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, name);
 
             local_key = NULL;
 
             if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) {
                 /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */
                 /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */
                 custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL,
                                     pe_order_runnable_left, data_set);
             }
 
             pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s",
                         key, role, role2text(rsc->next_role), crm_str(node_uname));
         }
     }
 
     for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         node_t *stop_node = (node_t *) gIter->data;
         const char *stop_node_uname = stop_node->details->uname;
         gboolean is_optional = TRUE;
         gboolean probe_is_optional = TRUE;
         gboolean stop_is_optional = TRUE;
         action_t *stopped_mon = NULL;
         char *rc_inactive = NULL;
         GListPtr probe_complete_ops = NULL;
         GListPtr stop_ops = NULL;
         GListPtr local_gIter = NULL;
         char *stop_op_key = NULL;
 
         if (node_uname && safe_str_eq(stop_node_uname, node_uname)) {
             continue;
         }
 
         pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s",
                      ID(operation), rsc->id, crm_str(stop_node_uname));
 
         /* start a monitor for an already stopped resource */
         possible_matches = find_actions_exact(rsc->actions, key, stop_node);
         if (possible_matches == NULL) {
-            pe_rsc_trace(rsc, "Marking %s manditory on %s: not active", key,
+            pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key,
                          crm_str(stop_node_uname));
             is_optional = FALSE;
         } else {
             pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key,
                          crm_str(stop_node_uname));
             is_optional = TRUE;
             g_list_free(possible_matches);
         }
 
         stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set);
 
         rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING);
         add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive);
         free(rc_inactive);
 
         if (is_set(rsc->flags, pe_rsc_managed)) {
             char *probe_key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0);
             GListPtr probes = find_actions(rsc->actions, probe_key, stop_node);
             GListPtr pIter = NULL;
 
             for (pIter = probes; pIter != NULL; pIter = pIter->next) {
                 action_t *probe = (action_t *) pIter->data;
 
                 order_actions(probe, stopped_mon, pe_order_runnable_left);
                 crm_trace("%s then %s on %s\n", probe->uuid, stopped_mon->uuid, stop_node->details->uname);
             }
 
             g_list_free(probes);
             free(probe_key);
         }
 
         if (probe_complete_ops) {
             g_list_free(probe_complete_ops);
         }
 
         stop_op_key = stop_key(rsc);
         stop_ops = find_actions_exact(rsc->actions, stop_op_key, stop_node);
 
         for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) {
             action_t *stop = (action_t *) local_gIter->data;
 
             if (is_set(stop->flags, pe_action_optional) == FALSE) {
                 stop_is_optional = FALSE;
             }
 
             if (is_set(stop->flags, pe_action_runnable) == FALSE) {
                 crm_debug("%s\t   %s (cancelled : stop un-runnable)",
                           crm_str(stop_node_uname), stopped_mon->uuid);
                 update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear);
             }
 
             if (is_set(rsc->flags, pe_rsc_managed)) {
                 custom_action_order(rsc, strdup(stop_op_key), stop,
                                     NULL, strdup(key), stopped_mon,
                                     pe_order_implies_then | pe_order_runnable_left, data_set);
             }
 
         }
 
         if (stop_ops) {
             g_list_free(stop_ops);
         }
         free(stop_op_key);
 
         if (is_optional == FALSE && probe_is_optional && stop_is_optional
             && is_set(rsc->flags, pe_rsc_managed) == FALSE) {
             pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged",
                          key, crm_str(stop_node_uname));
             update_action_flags(stopped_mon, pe_action_optional);
         }
 
         if (is_set(stopped_mon->flags, pe_action_optional)) {
             pe_rsc_trace(rsc, "%s\t   %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid);
         }
 
         if (stop_node->details->online == FALSE || stop_node->details->unclean) {
             pe_rsc_debug(rsc, "%s\t   %s (cancelled : no node available)",
                          crm_str(stop_node_uname), stopped_mon->uuid);
             update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear);
         }
 
         if (is_set(stopped_mon->flags, pe_action_runnable)
             && is_set(stopped_mon->flags, pe_action_optional) == FALSE) {
             crm_notice(" Start recurring %s (%llus) for %s on %s", stopped_mon->task,
                        interval_ms / 1000, rsc->id, crm_str(stop_node_uname));
         }
     }
 
     free(key);
 }
 
 void
 Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set)
 {
     if (is_not_set(rsc->flags, pe_rsc_maintenance) && 
         (node == NULL || node->details->maintenance == FALSE)) {
         xmlNode *operation = NULL;
 
         for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
              operation = __xml_next_element(operation)) {
             if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
                 RecurringOp_Stopped(rsc, start, node, operation, data_set);
             }
         }
     }
 }
 
 static void
 handle_migration_actions(resource_t * rsc, node_t *current, node_t *chosen, pe_working_set_t * data_set)
 {
     action_t *migrate_to = NULL;
     action_t *migrate_from = NULL;
     action_t *start = NULL;
     action_t *stop = NULL;
     gboolean partial = rsc->partial_migration_target ? TRUE : FALSE;
 
     pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s",
     rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE");
     start = start_action(rsc, chosen, TRUE);
     stop = stop_action(rsc, current, TRUE);
 
     if (partial == FALSE) {
         migrate_to = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set);
     }
 
     migrate_from = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set);
 
     if ((migrate_to && migrate_from) || (migrate_from && partial)) {
 
         set_bit(start->flags, pe_action_migrate_runnable);
         set_bit(stop->flags, pe_action_migrate_runnable);
 
         update_action_flags(start, pe_action_pseudo);       /* easier than trying to delete it from the graph */
 
         /* order probes before migrations */
         if (partial) {
             set_bit(migrate_from->flags, pe_action_migrate_runnable);
             migrate_from->needs = start->needs;
 
             custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL,
                                 rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set);
 
         } else {
             set_bit(migrate_from->flags, pe_action_migrate_runnable);
             set_bit(migrate_to->flags, pe_action_migrate_runnable);
             migrate_to->needs = start->needs;
 
             custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL,
                                 rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set);
             custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL,
                                 rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set);
         }
 
         custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL,
                             rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set);
         custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL,
                             rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional | pe_order_implies_first_migratable | pe_order_pseudo_left, data_set);
 
     }
 
     if (migrate_to) {
         add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname);
         add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname);
 
         /* pcmk remote connections don't require pending to be recorded in cib.
          * We can optimize cib writes by only setting PENDING for non pcmk remote
          * connection resources */
         if (rsc->is_remote_node == FALSE) {
             /* migrate_to takes place on the source node, but can 
              * have an effect on the target node depending on how
              * the agent is written. Because of this, we have to maintain
              * a record that the migrate_to occurred incase the source node 
              * loses membership while the migrate_to action is still in-flight. */
             add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true");
         }
     }
 
     if (migrate_from) {
         add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname);
         add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname);
     }
 }
 
 void
 native_create_actions(resource_t * rsc, pe_working_set_t * data_set)
 {
     action_t *start = NULL;
     node_t *chosen = NULL;
     node_t *current = NULL;
     gboolean need_stop = FALSE;
     gboolean is_moving = FALSE;
     gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE;
 
     GListPtr gIter = NULL;
     int num_active_nodes = 0;
     enum rsc_role_e role = RSC_ROLE_UNKNOWN;
     enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
 
     CRM_ASSERT(rsc);
     chosen = rsc->allocated_to;
     if (chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) {
         rsc->next_role = RSC_ROLE_STARTED;
         pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role));
 
     } else if (rsc->next_role == RSC_ROLE_UNKNOWN) {
         rsc->next_role = RSC_ROLE_STOPPED;
         pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role));
     }
 
     pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc,
                  role2text(rsc->role), role2text(rsc->next_role));
 
     if (rsc->running_on) {
         current = rsc->running_on->data;
     }
 
     for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
         node_t *n = (node_t *) gIter->data;
         if (rsc->partial_migration_source &&
             (n->details == rsc->partial_migration_source->details)) {
             current = rsc->partial_migration_source;
         }
         num_active_nodes++;
     }
 
     for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) {
         node_t *current = (node_t *) gIter->data;
 
         action_t *stop = stop_action(rsc, current, FALSE);
 
         set_bit(stop->flags, pe_action_dangle);
         pe_rsc_trace(rsc, "Forcing a cleanup of %s on %s", rsc->id, current->details->uname);
 
         if (is_set(data_set->flags, pe_flag_remove_after_stop)) {
             DeleteRsc(rsc, current, FALSE, data_set);
         }
     }
 
     if (num_active_nodes > 1) {
 
         if (num_active_nodes == 2
             && chosen
             && rsc->partial_migration_target
             && rsc->partial_migration_source
             && (current->details == rsc->partial_migration_source->details)
             && (chosen->details == rsc->partial_migration_target->details)) {
             /* Here the chosen node is still the migration target from a partial
              * migration. Attempt to continue the migration instead of recovering
              * by stopping the resource everywhere and starting it on a single node. */
             pe_rsc_trace(rsc,
                          "Will attempt to continue with a partial migration to target %s from %s",
                          rsc->partial_migration_target->details->id,
                          rsc->partial_migration_source->details->id);
         } else {
             const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE);
             const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
 
             if(rsc->partial_migration_target && rsc->partial_migration_source) {
                 crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", rsc->id,
                            rsc->partial_migration_target->details->uname,
                            rsc->partial_migration_source->details->uname);
 
             } else {
                 pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s",
                             rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type));
                 crm_warn("See %s for more information.",
                          "http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active");
             }
 
             if (rsc->recovery_type == recovery_stop_start) {
                 need_stop = TRUE;
             }
 
             /* If by chance a partial migration is in process,
              * but the migration target is not chosen still, clear all
              * partial migration data.  */
             rsc->partial_migration_source = rsc->partial_migration_target = NULL;
             allow_migrate = FALSE;
         }
     }
 
     if (is_set(rsc->flags, pe_rsc_start_pending)) {
         start = start_action(rsc, chosen, TRUE);
         set_bit(start->flags, pe_action_print_always);
     }
 
     if (current && chosen && current->details != chosen->details) {
         pe_rsc_trace(rsc, "Moving %s", rsc->id);
         is_moving = TRUE;
         need_stop = TRUE;
 
     } else if (is_set(rsc->flags, pe_rsc_failed)) {
         pe_rsc_trace(rsc, "Recovering %s", rsc->id);
         need_stop = TRUE;
 
     } else if (is_set(rsc->flags, pe_rsc_block)) {
         pe_rsc_trace(rsc, "Block %s", rsc->id);
         need_stop = TRUE;
 
     } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) {
         /* Recovery of a promoted resource */
         start = start_action(rsc, chosen, TRUE);
         if (is_set(start->flags, pe_action_optional) == FALSE) {
             pe_rsc_trace(rsc, "Forced start %s", rsc->id);
             need_stop = TRUE;
         }
     }
 
     pe_rsc_trace(rsc, "Creating actions for %s: %s->%s", rsc->id,
                  role2text(rsc->role), role2text(rsc->next_role));
 
     role = rsc->role;
     /* Potentiall optional steps on brining the resource down and back up to the same level */
     while (role != RSC_ROLE_STOPPED) {
         next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
         pe_rsc_trace(rsc, "Down: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role),
                      rsc->id, need_stop ? " required" : "");
         if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) {
             break;
         }
         role = next_role;
     }
 
 
     while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) {
         next_role = rsc_state_matrix[role][rsc->role];
         pe_rsc_trace(rsc, "Up:   Executing: %s->%s (%s)%s", role2text(role), role2text(next_role),
                      rsc->id, need_stop ? " required" : "");
         if (rsc_action_matrix[role][next_role] (rsc, chosen, !need_stop, data_set) == FALSE) {
             break;
         }
         role = next_role;
     }
     role = rsc->role;
 
     /* Required steps from this role to the next */
     while (role != rsc->next_role) {
         next_role = rsc_state_matrix[role][rsc->next_role];
         pe_rsc_trace(rsc, "Role: Executing: %s->%s = (%s)", role2text(role),
                      role2text(rsc->next_role), role2text(next_role), rsc->id);
         if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) {
             break;
         }
         role = next_role;
     }
 
     if(is_set(rsc->flags, pe_rsc_block)) {
         pe_rsc_trace(rsc, "No monitor additional ops for blocked resource");
 
     } else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) {
         pe_rsc_trace(rsc, "Monitor ops for active resource");
         start = start_action(rsc, chosen, TRUE);
         Recurring(rsc, start, chosen, data_set);
         Recurring_Stopped(rsc, start, chosen, data_set);
     } else {
         pe_rsc_trace(rsc, "Monitor ops for in-active resource");
         Recurring_Stopped(rsc, NULL, NULL, data_set);
     }
 
     /* if we are stuck in a partial migration, where the target
      * of the partial migration no longer matches the chosen target.
      * A full stop/start is required */
     if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) {
         pe_rsc_trace(rsc, "Not allowing partial migration to continue. %s", rsc->id);
         allow_migrate = FALSE;
 
     } else if (is_moving == FALSE ||
                is_not_set(rsc->flags, pe_rsc_managed) ||
                is_set(rsc->flags, pe_rsc_failed) ||
                is_set(rsc->flags, pe_rsc_start_pending) ||
                (current->details->unclean == TRUE) ||
                rsc->next_role < RSC_ROLE_STARTED) {
 
         allow_migrate = FALSE;
     }
 
     if (allow_migrate) {
         handle_migration_actions(rsc, current, chosen, data_set);
     }
 }
 
 static void
 rsc_avoids_remote_nodes(resource_t *rsc)
 {
     GHashTableIter iter;
     node_t *node = NULL;
     g_hash_table_iter_init(&iter, rsc->allowed_nodes);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
         if (node->details->remote_rsc) {
             node->weight = -INFINITY;
         }
     }
 }
 
 void
 native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set)
 {
     /* This function is on the critical path and worth optimizing as much as possible */
 
     resource_t *top = uber_parent(rsc);
     int type = pe_order_optional | pe_order_implies_then | pe_order_restart;
     gboolean is_stonith = is_set(rsc->flags, pe_rsc_fence_device);
 
     custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL,
                         rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, type, data_set);
 
     if (top->variant == pe_master || rsc->role > RSC_ROLE_SLAVE) {
         custom_action_order(rsc, generate_op_key(rsc->id, RSC_DEMOTE, 0), NULL,
                             rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL,
                             pe_order_implies_first_master, data_set);
 
         custom_action_order(rsc, generate_op_key(rsc->id, RSC_START, 0), NULL,
                             rsc, generate_op_key(rsc->id, RSC_PROMOTE, 0), NULL,
                             pe_order_runnable_left, data_set);
     }
 
     if (is_stonith == FALSE
         && is_set(data_set->flags, pe_flag_enable_unfencing)
         && is_set(rsc->flags, pe_rsc_needs_unfencing)
         && is_not_set(rsc->flags, pe_rsc_have_unfencing)) {
         /* Check if the node needs to be unfenced first */
         node_t *node = NULL;
         GHashTableIter iter;
 
         if(rsc != top) {
             /* Only create these constraints once, rsc is almost certainly cloned */
             clear_bit_recursive(top, pe_rsc_have_unfencing);
         }
 
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
             action_t *unfence = pe_fence_op(node, "on", TRUE, data_set);
 
             custom_action_order(top, generate_op_key(top->id, top == rsc?RSC_STOP:RSC_STOPPED, 0), NULL,
                                 NULL, strdup(unfence->uuid), unfence,
                                 pe_order_optional, data_set);
 
             crm_debug("Stopping %s prior to unfencing %s", top->id, unfence->uuid);
 
             custom_action_order(NULL, strdup(unfence->uuid), unfence,
                                 top, generate_op_key(top->id, RSC_START, 0), NULL,
                                 pe_order_implies_then_on_node, data_set);
         }
     }
 
     if (is_not_set(rsc->flags, pe_rsc_managed)) {
         pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id);
         return;
     }
 
     {
         action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
 
         custom_action_order(rsc, stop_key(rsc), NULL,
                             NULL, strdup(all_stopped->task), all_stopped,
                             pe_order_implies_then | pe_order_runnable_left, data_set);
     }
 
     if (g_hash_table_size(rsc->utilization) > 0
         && safe_str_neq(data_set->placement_strategy, "default")) {
         GHashTableIter iter;
         node_t *next = NULL;
         GListPtr gIter = NULL;
 
         pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s",
                      rsc->id, data_set->placement_strategy);
 
         for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
             node_t *current = (node_t *) gIter->data;
 
             char *load_stopped_task = crm_concat(LOAD_STOPPED, current->details->uname, '_');
             action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set);
 
             if (load_stopped->node == NULL) {
                 load_stopped->node = node_copy(current);
                 update_action_flags(load_stopped, pe_action_optional | pe_action_clear);
             }
 
             custom_action_order(rsc, stop_key(rsc), NULL,
                                 NULL, load_stopped_task, load_stopped, pe_order_load, data_set);
         }
 
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&next)) {
             char *load_stopped_task = crm_concat(LOAD_STOPPED, next->details->uname, '_');
             action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set);
 
             if (load_stopped->node == NULL) {
                 load_stopped->node = node_copy(next);
                 update_action_flags(load_stopped, pe_action_optional | pe_action_clear);
             }
 
             custom_action_order(NULL, strdup(load_stopped_task), load_stopped,
                                 rsc, start_key(rsc), NULL, pe_order_load, data_set);
 
             custom_action_order(NULL, strdup(load_stopped_task), load_stopped,
                                 rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL,
                                 pe_order_load, data_set);
 
             free(load_stopped_task);
         }
     }
 
     if (rsc->container) {
         resource_t *remote_rsc = NULL;
 
         /* find out if the container is associated with remote node connection resource */
         if (rsc->container->is_remote_node) {
             remote_rsc = rsc->container;
         } else if (rsc->is_remote_node == FALSE) {
             remote_rsc = rsc_contains_remote_node(data_set, rsc->container);
         }
 
         /* if the container is a remote-node, force the resource within the container
          * instead of colocating the resource with the container. */
         if (remote_rsc) {
             GHashTableIter iter;
             node_t *node = NULL;
             g_hash_table_iter_init(&iter, rsc->allowed_nodes);
             while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
                 if (node->details->remote_rsc != remote_rsc) {
                     node->weight = -INFINITY;
                 }
             }
         } else {
 
             crm_trace("Generating order and colocation rules for rsc %s with container %s", rsc->id, rsc->container->id);
             custom_action_order(rsc->container, generate_op_key(rsc->container->id, RSC_START, 0), NULL,
                                 rsc, generate_op_key(rsc->id, RSC_START, 0), NULL,
                                 pe_order_implies_then | pe_order_runnable_left, data_set);
 
             custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL,
                                 rsc->container, generate_op_key(rsc->container->id, RSC_STOP, 0), NULL,
                                 pe_order_implies_first, data_set);
 
             rsc_colocation_new("resource-with-containter", NULL, INFINITY, rsc, rsc->container, NULL,
                                NULL, data_set);
         }
     }
 
     if (rsc->is_remote_node || is_stonith) {
         /* don't allow remote nodes to run stonith devices
          * or remote connection resources.*/
         rsc_avoids_remote_nodes(rsc);
     }
 
     /* If this rsc is a remote connection resource associated
      * with a container ( which will most likely be a virtual guest )
      * do not allow the container to live on any remote-nodes.
      * remote-nodes managing nested remote-nodes should not be allowed. */
     if (rsc->is_remote_node && rsc->container) {
         rsc_avoids_remote_nodes(rsc->container);
     }
 }
 
 void
 native_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
 {
     if (rsc_lh == NULL) {
         pe_err("rsc_lh was NULL for %s", constraint->id);
         return;
 
     } else if (constraint->rsc_rh == NULL) {
         pe_err("rsc_rh was NULL for %s", constraint->id);
         return;
     }
 
     pe_rsc_trace(rsc_lh, "Processing colocation constraint between %s and %s", rsc_lh->id,
                  rsc_rh->id);
 
     rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint);
 }
 
 enum filter_colocation_res
 filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh,
                              rsc_colocation_t * constraint, gboolean preview)
 {
     if (constraint->score == 0) {
         return influence_nothing;
     }
 
     /* rh side must be allocated before we can process constraint */
     if (preview == FALSE && is_set(rsc_rh->flags, pe_rsc_provisional)) {
         return influence_nothing;
     }
 
     if ((constraint->role_lh >= RSC_ROLE_SLAVE) &&
         rsc_lh->parent &&
         rsc_lh->parent->variant == pe_master && is_not_set(rsc_lh->flags, pe_rsc_provisional)) {
 
         /* LH and RH resources have already been allocated, place the correct
          * priority oh LH rsc for the given multistate resource role */
         return influence_rsc_priority;
     }
 
     if (preview == FALSE && is_not_set(rsc_lh->flags, pe_rsc_provisional)) {
         /* error check */
         struct node_shared_s *details_lh;
         struct node_shared_s *details_rh;
 
         if ((constraint->score > -INFINITY) && (constraint->score < INFINITY)) {
             return influence_nothing;
         }
 
         details_rh = rsc_rh->allocated_to ? rsc_rh->allocated_to->details : NULL;
         details_lh = rsc_lh->allocated_to ? rsc_lh->allocated_to->details : NULL;
 
         if (constraint->score == INFINITY && details_lh != details_rh) {
             crm_err("%s and %s are both allocated"
                     " but to different nodes: %s vs. %s",
                     rsc_lh->id, rsc_rh->id,
                     details_lh ? details_lh->uname : "n/a", details_rh ? details_rh->uname : "n/a");
 
         } else if (constraint->score == -INFINITY && details_lh == details_rh) {
             crm_err("%s and %s are both allocated"
                     " but to the SAME node: %s",
                     rsc_lh->id, rsc_rh->id, details_rh ? details_rh->uname : "n/a");
         }
 
         return influence_nothing;
     }
 
     if (constraint->score > 0
         && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) {
         crm_trace("LH: Skipping constraint: \"%s\" state filter nextrole is %s",
                   role2text(constraint->role_lh), role2text(rsc_lh->next_role));
         return influence_nothing;
     }
 
     if (constraint->score > 0
         && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) {
         crm_trace("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh));
         return FALSE;
     }
 
     if (constraint->score < 0
         && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) {
         crm_trace("LH: Skipping -ve constraint: \"%s\" state filter",
                   role2text(constraint->role_lh));
         return influence_nothing;
     }
 
     if (constraint->score < 0
         && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) {
         crm_trace("RH: Skipping -ve constraint: \"%s\" state filter",
                   role2text(constraint->role_rh));
         return influence_nothing;
     }
 
     return influence_rsc_location;
 }
 
 static void
 influence_priority(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
 {
     const char *rh_value = NULL;
     const char *lh_value = NULL;
     const char *attribute = "#id";
     int score_multiplier = 1;
 
     if (constraint->node_attribute != NULL) {
         attribute = constraint->node_attribute;
     }
 
     if (!rsc_rh->allocated_to || !rsc_lh->allocated_to) {
         return;
     }
 
     lh_value = g_hash_table_lookup(rsc_lh->allocated_to->details->attrs, attribute);
     rh_value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute);
 
     if (!safe_str_eq(lh_value, rh_value)) {
         if(constraint->score == INFINITY && constraint->role_lh == RSC_ROLE_MASTER) {
             rsc_lh->priority = -INFINITY;
         }
         return;
     }
 
     if (constraint->role_rh && (constraint->role_rh != rsc_rh->next_role)) {
         return;
     }
 
     if (constraint->role_lh == RSC_ROLE_SLAVE) {
         score_multiplier = -1;
     }
 
     rsc_lh->priority = merge_weights(score_multiplier * constraint->score, rsc_lh->priority);
 }
 
 static void
 colocation_match(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
 {
     const char *tmp = NULL;
     const char *value = NULL;
     const char *attribute = "#id";
 
     GHashTable *work = NULL;
     gboolean do_check = FALSE;
 
     GHashTableIter iter;
     node_t *node = NULL;
 
     if (constraint->node_attribute != NULL) {
         attribute = constraint->node_attribute;
     }
 
     if (rsc_rh->allocated_to) {
         value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute);
         do_check = TRUE;
 
     } else if (constraint->score < 0) {
         /* nothing to do:
          *   anti-colocation with something thats not running
          */
         return;
     }
 
     work = node_hash_dup(rsc_lh->allowed_nodes);
 
     g_hash_table_iter_init(&iter, work);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
         tmp = g_hash_table_lookup(node->details->attrs, attribute);
         if (do_check && safe_str_eq(tmp, value)) {
             if (constraint->score < INFINITY) {
                 pe_rsc_trace(rsc_lh, "%s: %s.%s += %d", constraint->id, rsc_lh->id,
                              node->details->uname, constraint->score);
                 node->weight = merge_weights(constraint->score, node->weight);
             }
 
         } else if (do_check == FALSE || constraint->score >= INFINITY) {
             pe_rsc_trace(rsc_lh, "%s: %s.%s -= %d (%s)", constraint->id, rsc_lh->id,
                          node->details->uname, constraint->score,
                          do_check ? "failed" : "unallocated");
             node->weight = merge_weights(-constraint->score, node->weight);
         }
     }
 
     if (can_run_any(work)
         || constraint->score <= -INFINITY || constraint->score >= INFINITY) {
         g_hash_table_destroy(rsc_lh->allowed_nodes);
         rsc_lh->allowed_nodes = work;
         work = NULL;
 
     } else {
         static char score[33];
 
         score2char_stack(constraint->score, score, sizeof(score));
 
         pe_rsc_info(rsc_lh, "%s: Rolling back scores from %s (%d, %s)",
                     rsc_lh->id, rsc_rh->id, do_check, score);
     }
 
     if (work) {
         g_hash_table_destroy(work);
     }
 }
 
 void
 native_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
 {
     enum filter_colocation_res filter_results;
 
     CRM_ASSERT(rsc_lh);
     CRM_ASSERT(rsc_rh);
     filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint, FALSE);
     pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d, filter=%d)",
                  constraint->score >= 0 ? "" : "Anti-",
                  rsc_lh->id, rsc_rh->id, constraint->id, constraint->score, filter_results);
 
     switch (filter_results) {
         case influence_rsc_priority:
             influence_priority(rsc_lh, rsc_rh, constraint);
             break;
         case influence_rsc_location:
             pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d)",
                          constraint->score >= 0 ? "" : "Anti-",
                          rsc_lh->id, rsc_rh->id, constraint->id, constraint->score);
             colocation_match(rsc_lh, rsc_rh, constraint);
             break;
         case influence_nothing:
         default:
             return;
     }
 }
 
 static gboolean
 filter_rsc_ticket(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket)
 {
     if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) {
         pe_rsc_trace(rsc_lh, "LH: Skipping constraint: \"%s\" state filter",
                      role2text(rsc_ticket->role_lh));
         return FALSE;
     }
 
     return TRUE;
 }
 
 void
 rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set)
 {
     if (rsc_ticket == NULL) {
         pe_err("rsc_ticket was NULL");
         return;
     }
 
     if (rsc_lh == NULL) {
         pe_err("rsc_lh was NULL for %s", rsc_ticket->id);
         return;
     }
 
     if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) {
         return;
     }
 
     if (rsc_lh->children) {
         GListPtr gIter = rsc_lh->children;
 
         pe_rsc_trace(rsc_lh, "Processing ticket dependencies from %s", rsc_lh->id);
 
         for (; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             rsc_ticket_constraint(child_rsc, rsc_ticket, data_set);
         }
         return;
     }
 
     pe_rsc_trace(rsc_lh, "%s: Processing ticket dependency on %s (%s, %s)",
                  rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id,
                  role2text(rsc_ticket->role_lh));
 
     if (rsc_ticket->ticket->granted == FALSE && g_list_length(rsc_lh->running_on) > 0) {
         GListPtr gIter = NULL;
 
         switch (rsc_ticket->loss_policy) {
             case loss_ticket_stop:
                 resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set);
                 break;
 
             case loss_ticket_demote:
                 /*Promotion score will be set to -INFINITY in master_promotion_order() */
                 if (rsc_ticket->role_lh != RSC_ROLE_MASTER) {
                     resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set);
                 }
                 break;
 
             case loss_ticket_fence:
                 if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) {
                     return;
                 }
 
                 resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set);
 
                 for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) {
                     node_t *node = (node_t *) gIter->data;
 
                     pe_fence_node(data_set, node, "deadman ticket lost");
                 }
                 break;
 
             case loss_ticket_freeze:
                 if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) {
                     return;
                 }
                 if (g_list_length(rsc_lh->running_on) > 0) {
                     clear_bit(rsc_lh->flags, pe_rsc_managed);
                     set_bit(rsc_lh->flags, pe_rsc_block);
                 }
                 break;
         }
 
     } else if (rsc_ticket->ticket->granted == FALSE) {
 
         if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) {
             resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set);
         }
 
     } else if (rsc_ticket->ticket->standby) {
 
         if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) {
             resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set);
         }
     }
 }
 
 enum pe_action_flags
 native_action_flags(action_t * action, node_t * node)
 {
     return action->flags;
 }
 
 enum pe_graph_flags
 native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags,
                       enum pe_action_flags filter, enum pe_ordering type)
 {
     /* flags == get_action_flags(first, then_node) called from update_action() */
     enum pe_graph_flags changed = pe_graph_none;
     enum pe_action_flags then_flags = then->flags;
     enum pe_action_flags first_flags = first->flags;
 
     crm_trace(   "Testing %s on %s (0x%.6x) with %s 0x%.6x %x %x",
                  first->uuid, first->node ? first->node->details->uname : "[none]",
                  first->flags, then->uuid, then->flags);
 
     if (type & pe_order_asymmetrical) {
         resource_t *then_rsc = then->rsc;
         enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0;
 
         if (!then_rsc) {
             /* ignore */
         } else if ((then_rsc_role == RSC_ROLE_STOPPED) && safe_str_eq(then->task, RSC_STOP)) {
             /* ignore... if 'then' is supposed to be stopped after 'first', but
              * then is already stopped, there is nothing to be done when non-symmetrical.  */
         } else if ((then_rsc_role >= RSC_ROLE_STARTED) && safe_str_eq(then->task, RSC_START)) {
             /* ignore... if 'then' is supposed to be started after 'first', but
              * then is already started, there is nothing to be done when non-symmetrical.  */
         } else if (!(first->flags & pe_action_runnable)) {
             /* prevent 'then' action from happening if 'first' is not runnable and
              * 'then' has not yet occurred. */
             pe_clear_action_bit(then, pe_action_runnable);
             pe_clear_action_bit(then, pe_action_optional);
             pe_rsc_trace(then->rsc, "Unset optional and runnable on %s", then->uuid);
         } else {
             /* ignore... then is allowed to start/stop if it wants to. */
         }
     }
 
     if (type & pe_order_implies_first) {
         if ((filter & pe_action_optional) && (flags & pe_action_optional) == 0) {
             pe_rsc_trace(first->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid);
 
             pe_clear_action_bit(first, pe_action_optional);
         }
 
         if (is_set(flags, pe_action_migrate_runnable) &&
             is_set(then->flags, pe_action_migrate_runnable) == FALSE &&
             is_set(then->flags, pe_action_optional) == FALSE) {
 
             pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s",
                          first->uuid, then->uuid);
             pe_clear_action_bit(first, pe_action_migrate_runnable);
         }
     }
 
     if (type & pe_order_implies_first_master) {
         if ((filter & pe_action_optional) &&
             ((then->flags & pe_action_optional) == FALSE) &&
             then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) {
             pe_clear_action_bit(first, pe_action_optional);
 
             if (is_set(first->flags, pe_action_migrate_runnable) &&
                 is_set(then->flags, pe_action_migrate_runnable) == FALSE) {
 
                 pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid);
                 pe_clear_action_bit(first, pe_action_migrate_runnable);
             }
             pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid);
         }
     }
 
     if ((type & pe_order_implies_first_migratable)
         && is_set(filter, pe_action_optional)) {
 
         if (((then->flags & pe_action_migrate_runnable) == FALSE) ||
             ((then->flags & pe_action_runnable) == FALSE)) {
 
             pe_rsc_trace(then->rsc, "Unset runnable on %s because %s is neither runnable or migratable", first->uuid, then->uuid);
             pe_clear_action_bit(first, pe_action_runnable);
         }
 
         if ((then->flags & pe_action_optional) == 0) {
 
             pe_rsc_trace(then->rsc, "Unset optional on %s because %s is not optional", first->uuid, then->uuid);
             pe_clear_action_bit(first, pe_action_optional);
         }
     }
 
     if ((type & pe_order_pseudo_left)
         && is_set(filter, pe_action_optional)) {
 
         if ((first->flags & pe_action_runnable) == FALSE) {
             pe_clear_action_bit(then, pe_action_migrate_runnable);
             pe_clear_action_bit(then, pe_action_pseudo);
             pe_rsc_trace(then->rsc, "Unset pseudo on %s because %s is not runnable", then->uuid, first->uuid);
         }
 
     }
 
     if (is_set(type, pe_order_runnable_left)
         && is_set(filter, pe_action_runnable)
         && is_set(then->flags, pe_action_runnable)
         && is_set(flags, pe_action_runnable) == FALSE) {
         pe_rsc_trace(then->rsc, "Unset runnable on %s because of %s", then->uuid, first->uuid);
         pe_clear_action_bit(then, pe_action_runnable);
         pe_clear_action_bit(then, pe_action_migrate_runnable);
     }
 
     if (is_set(type, pe_order_implies_then)
         && is_set(filter, pe_action_optional)
         && is_set(then->flags, pe_action_optional)
         && is_set(flags, pe_action_optional) == FALSE) {
 
         /* in this case, treat migrate_runnable as if first is optional */
         if (is_set(first->flags, pe_action_migrate_runnable) == FALSE) {
            pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", then->uuid, first->uuid);
            pe_clear_action_bit(then, pe_action_optional);
         }
     }
 
     if (is_set(type, pe_order_restart)) {
         const char *reason = NULL;
 
         CRM_ASSERT(first->rsc && first->rsc->variant == pe_native);
         CRM_ASSERT(then->rsc && then->rsc->variant == pe_native);
 
         if ((filter & pe_action_runnable)
             && (then->flags & pe_action_runnable) == 0
             && (then->rsc->flags & pe_rsc_managed)) {
             reason = "shutdown";
         }
 
         if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) {
             reason = "recover";
         }
 
         if (reason && is_set(first->flags, pe_action_optional)) {
             if (is_set(first->flags, pe_action_runnable)
                 || is_not_set(then->flags, pe_action_optional)) {
                 pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid);
                 pe_clear_action_bit(first, pe_action_optional);
             }
         }
 
         if (reason && is_not_set(first->flags, pe_action_optional)
             && is_not_set(first->flags, pe_action_runnable)) {
             pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid);
             pe_clear_action_bit(then, pe_action_runnable);
         }
 
         if (reason &&
             is_not_set(first->flags, pe_action_optional) &&
             is_set(first->flags, pe_action_migrate_runnable)  &&
             is_not_set(then->flags, pe_action_migrate_runnable)) {
 
             pe_clear_action_bit(first, pe_action_migrate_runnable);
         }
 
     }
 
     if (then_flags != then->flags) {
         changed |= pe_graph_updated_then;
         pe_rsc_trace(then->rsc,
                      "Then: Flags for %s on %s are now  0x%.6x (was 0x%.6x) because of %s 0x%.6x",
                      then->uuid, then->node ? then->node->details->uname : "[none]", then->flags,
                      then_flags, first->uuid, first->flags);
 
         if(then->rsc && then->rsc->parent) {
             /* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */
             update_action(then);
         }
     }
 
     if (first_flags != first->flags) {
         changed |= pe_graph_updated_first;
         pe_rsc_trace(first->rsc,
                      "First: Flags for %s on %s are now  0x%.6x (was 0x%.6x) because of %s 0x%.6x",
                      first->uuid, first->node ? first->node->details->uname : "[none]",
                      first->flags, first_flags, then->uuid, then->flags);
     }
 
     return changed;
 }
 
 void
 native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint)
 {
     GListPtr gIter = NULL;
     GHashTableIter iter;
     node_t *node = NULL;
 
     if (constraint == NULL) {
         pe_err("Constraint is NULL");
         return;
 
     } else if (rsc == NULL) {
         pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id);
         return;
     }
 
     pe_rsc_trace(rsc, "Applying %s (%s) to %s", constraint->id,
                  role2text(constraint->role_filter), rsc->id);
 
     /* take "lifetime" into account */
     if (constraint->role_filter > RSC_ROLE_UNKNOWN && constraint->role_filter != rsc->next_role) {
         pe_rsc_debug(rsc, "Constraint (%s) is not active (role : %s vs. %s)",
                      constraint->id, role2text(constraint->role_filter), role2text(rsc->next_role));
         return;
 
     } else if (is_active(constraint) == FALSE) {
         pe_rsc_trace(rsc, "Constraint (%s) is not active", constraint->id);
         return;
     }
 
     if (constraint->node_list_rh == NULL) {
         pe_rsc_trace(rsc, "RHS of constraint %s is NULL", constraint->id);
         return;
     }
 
     for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
         node_t *other_node = NULL;
 
         other_node = (node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
 
         if (other_node != NULL) {
             pe_rsc_trace(rsc, "%s + %s: %d + %d",
                          node->details->uname,
                          other_node->details->uname, node->weight, other_node->weight);
             other_node->weight = merge_weights(other_node->weight, node->weight);
 
         } else {
             other_node = node_copy(node);
 
             g_hash_table_insert(rsc->allowed_nodes, (gpointer) other_node->details->id, other_node);
         }
 
         if (other_node->rsc_discover_mode < constraint->discover_mode) {
             if (constraint->discover_mode == discover_exclusive) {
                 rsc->exclusive_discover = TRUE;
             }
             /* exclusive > never > always... always is default */
             other_node->rsc_discover_mode = constraint->discover_mode;
         }
     }
 
     g_hash_table_iter_init(&iter, rsc->allowed_nodes);
     while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
         pe_rsc_trace(rsc, "%s + %s : %d", rsc->id, node->details->uname, node->weight);
     }
 }
 
 void
 native_expand(resource_t * rsc, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "Processing actions from %s", rsc->id);
 
     for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         crm_trace("processing action %d for rsc=%s", action->id, rsc->id);
         graph_element_from_action(action, data_set);
     }
 
     for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
         resource_t *child_rsc = (resource_t *) gIter->data;
 
         child_rsc->cmds->expand(child_rsc, data_set);
     }
 }
 
 #define log_change(fmt, args...)  do {          \
         if(terminal) {                          \
             printf(" * "fmt"\n", ##args);       \
         } else {                                \
             crm_notice(fmt, ##args);            \
         }                                       \
     } while(0)
 
 #define STOP_SANITY_ASSERT(lineno) do {                                 \
         if(current && current->details->unclean) {                      \
             /* It will be a pseudo op */                                \
         } else if(stop == NULL) {                                       \
             crm_err("%s:%d: No stop action exists for %s", __FUNCTION__, lineno, rsc->id); \
             CRM_ASSERT(stop != NULL);                                   \
         } else if(is_set(stop->flags, pe_action_optional)) { \
             crm_err("%s:%d: Action %s is still optional", __FUNCTION__, lineno, stop->uuid); \
             CRM_ASSERT(is_not_set(stop->flags, pe_action_optional));    \
         }                                                               \
     } while(0)
 
 void
 LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal)
 {
     node_t *next = NULL;
     node_t *current = NULL;
 
     action_t *stop = NULL;
     action_t *start = NULL;
     action_t *demote = NULL;
     action_t *promote = NULL;
 
     char *key = NULL;
     gboolean moving = FALSE;
     GListPtr possible_matches = NULL;
 
     if (rsc->children) {
         GListPtr gIter = NULL;
 
         for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             LogActions(child_rsc, data_set, terminal);
         }
         return;
     }
 
     next = rsc->allocated_to;
     if (rsc->running_on) {
         if (g_list_length(rsc->running_on) > 1 && rsc->partial_migration_source) {
             current = rsc->partial_migration_source;
         } else {
             current = rsc->running_on->data;
         }
 
         if (rsc->role == RSC_ROLE_STOPPED) {
             /*
              * This can occur when resources are being recovered
              * We fiddle with the current role in native_create_actions()
              */
             rsc->role = RSC_ROLE_STARTED;
         }
     }
 
     if (current == NULL && is_set(rsc->flags, pe_rsc_orphan)) {
         /* Don't log stopped orphans */
         return;
     }
 
     if (is_not_set(rsc->flags, pe_rsc_managed)
         || (current == NULL && next == NULL)) {
         pe_rsc_info(rsc, "Leave   %s\t(%s%s)",
                     rsc->id, role2text(rsc->role), is_not_set(rsc->flags,
                                                               pe_rsc_managed) ? " unmanaged" : "");
         return;
     }
 
     if (current != NULL && next != NULL && safe_str_neq(current->details->id, next->details->id)) {
         moving = TRUE;
     }
 
     key = start_key(rsc);
     possible_matches = find_actions(rsc->actions, key, next);
     free(key);
     if (possible_matches) {
         start = possible_matches->data;
         g_list_free(possible_matches);
     }
 
     key = stop_key(rsc);
     if(start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) {
         possible_matches = find_actions(rsc->actions, key, NULL);
     } else {
         possible_matches = find_actions(rsc->actions, key, current);
     }
     free(key);
     if (possible_matches) {
         stop = possible_matches->data;
         g_list_free(possible_matches);
     }
 
     key = promote_key(rsc);
     possible_matches = find_actions(rsc->actions, key, next);
     free(key);
     if (possible_matches) {
         promote = possible_matches->data;
         g_list_free(possible_matches);
     }
 
     key = demote_key(rsc);
     possible_matches = find_actions(rsc->actions, key, next);
     free(key);
     if (possible_matches) {
         demote = possible_matches->data;
         g_list_free(possible_matches);
     }
 
     if (rsc->role == rsc->next_role) {
         action_t *migrate_to = NULL;
         key = generate_op_key(rsc->id, RSC_MIGRATED, 0);
         possible_matches = find_actions(rsc->actions, key, next);
         free(key);
 
         if (possible_matches) {
             migrate_to = possible_matches->data;
         }
 
         CRM_CHECK(next != NULL,);
         if (next == NULL) {
         } else if (migrate_to && is_set(migrate_to->flags, pe_action_runnable) && current) {
             log_change("Migrate %s\t(%s %s -> %s)",
                        rsc->id, role2text(rsc->role), current->details->uname,
                        next->details->uname);
 
         } else if (is_set(rsc->flags, pe_rsc_reload)) {
             log_change("Reload  %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname);
 
         } else if (start == NULL || is_set(start->flags, pe_action_optional)) {
             pe_rsc_info(rsc, "Leave   %s\t(%s %s)", rsc->id, role2text(rsc->role),
                         next->details->uname);
 
         } else if (start && is_set(start->flags, pe_action_runnable) == FALSE) {
             log_change("Stop    %s\t(%s %s%s)", rsc->id, role2text(rsc->role), current?current->details->uname:"N/A",
                        stop && is_not_set(stop->flags, pe_action_runnable) ? " - blocked" : "");
             STOP_SANITY_ASSERT(__LINE__);
 
         } else if (moving && current) {
             log_change("%s %s\t(%s %s -> %s)",
                        is_set(rsc->flags, pe_rsc_failed) ? "Recover" : "Move   ",
                        rsc->id, role2text(rsc->role),
                        current->details->uname, next->details->uname);
 
         } else if (is_set(rsc->flags, pe_rsc_failed)) {
             log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname);
             STOP_SANITY_ASSERT(__LINE__);
 
         } else {
             log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname);
             /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */
         }
 
         g_list_free(possible_matches);
         return;
     }
 
     if (rsc->role > RSC_ROLE_SLAVE && rsc->role > rsc->next_role) {
         CRM_CHECK(current != NULL,);
         if (current != NULL) {
             gboolean allowed = FALSE;
 
             if (demote != NULL && (demote->flags & pe_action_runnable)) {
                 allowed = TRUE;
             }
 
             log_change("Demote  %s\t(%s -> %s %s%s)",
                        rsc->id,
                        role2text(rsc->role),
                        role2text(rsc->next_role),
                        current->details->uname, allowed ? "" : " - blocked");
 
             if (stop != NULL && is_not_set(stop->flags, pe_action_optional)
                 && rsc->next_role > RSC_ROLE_STOPPED && moving == FALSE) {
                 if (is_set(rsc->flags, pe_rsc_failed)) {
                     log_change("Recover %s\t(%s %s)",
                                rsc->id, role2text(rsc->role), next->details->uname);
                     STOP_SANITY_ASSERT(__LINE__);
 
                 } else if (is_set(rsc->flags, pe_rsc_reload)) {
                     log_change("Reload  %s\t(%s %s)", rsc->id, role2text(rsc->role),
                                next->details->uname);
 
                 } else {
                     log_change("Restart %s\t(%s %s)",
                                rsc->id, role2text(rsc->next_role), next->details->uname);
                     STOP_SANITY_ASSERT(__LINE__);
                 }
             }
         }
 
     } else if (rsc->next_role == RSC_ROLE_STOPPED) {
         GListPtr gIter = NULL;
 
         CRM_CHECK(current != NULL,);
 
         key = stop_key(rsc);
         for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
             node_t *node = (node_t *) gIter->data;
             action_t *stop_op = NULL;
             gboolean allowed = FALSE;
 
             possible_matches = find_actions(rsc->actions, key, node);
             if (possible_matches) {
                 stop_op = possible_matches->data;
                 g_list_free(possible_matches);
             }
 
             if (stop_op && (stop_op->flags & pe_action_runnable)) {
                 STOP_SANITY_ASSERT(__LINE__);
                 allowed = TRUE;
             }
 
             log_change("Stop    %s\t(%s%s)", rsc->id, node->details->uname,
                        allowed ? "" : " - blocked");
         }
 
         free(key);
     }
 
     if (moving) {
         log_change("Move    %s\t(%s %s -> %s)",
                    rsc->id, role2text(rsc->next_role), current->details->uname,
                    next->details->uname);
         STOP_SANITY_ASSERT(__LINE__);
     }
 
     if (rsc->role == RSC_ROLE_STOPPED) {
         gboolean allowed = FALSE;
 
         if (start && (start->flags & pe_action_runnable)) {
             allowed = TRUE;
         }
 
         CRM_CHECK(next != NULL,);
         if (next != NULL) {
             log_change("Start   %s\t(%s%s)", rsc->id, next->details->uname,
                        allowed ? "" : " - blocked");
         }
         if (allowed == FALSE) {
             return;
         }
     }
 
     if (rsc->next_role > RSC_ROLE_SLAVE && rsc->role < rsc->next_role) {
         gboolean allowed = FALSE;
 
         CRM_LOG_ASSERT(next);
         if (stop != NULL && is_not_set(stop->flags, pe_action_optional)
             && rsc->role > RSC_ROLE_STOPPED) {
             if (is_set(rsc->flags, pe_rsc_failed)) {
                 log_change("Recover %s\t(%s %s)",
                            rsc->id, role2text(rsc->role), next?next->details->uname:NULL);
                 STOP_SANITY_ASSERT(__LINE__);
 
             } else if (is_set(rsc->flags, pe_rsc_reload)) {
                 log_change("Reload  %s\t(%s %s)", rsc->id, role2text(rsc->role),
                            next?next->details->uname:NULL);
                 STOP_SANITY_ASSERT(__LINE__);
 
             } else {
                 log_change("Restart %s\t(%s %s)",
                            rsc->id, role2text(rsc->role), next?next->details->uname:NULL);
                 STOP_SANITY_ASSERT(__LINE__);
             }
         }
 
         if (promote && (promote->flags & pe_action_runnable)) {
             allowed = TRUE;
         }
 
         log_change("Promote %s\t(%s -> %s %s%s)",
                    rsc->id,
                    role2text(rsc->role),
                    role2text(rsc->next_role),
                    next?next->details->uname:NULL,
                    allowed ? "" : " - blocked");
     }
 }
 
 gboolean
 StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "%s", rsc->id);
 
     for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
         node_t *current = (node_t *) gIter->data;
         action_t *stop;
 
         if (rsc->partial_migration_target) {
             if (rsc->partial_migration_target->details == current->details) {
                 pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname,
                              next->details->uname, rsc->id);
                 continue;
             } else {
                 pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id);
                 optional = FALSE;
             }
         }
 
         pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname);
         stop = stop_action(rsc, current, optional);
 
         if (is_not_set(rsc->flags, pe_rsc_managed)) {
             update_action_flags(stop, pe_action_runnable | pe_action_clear);
         }
 
         if (is_set(data_set->flags, pe_flag_remove_after_stop)) {
             DeleteRsc(rsc, current, optional, data_set);
         }
     }
 
     return TRUE;
 }
 
 gboolean
 StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
 {
     action_t *start = NULL;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "%s on %s %d", rsc->id, next ? next->details->uname : "N/A", optional);
     start = start_action(rsc, next, TRUE);
     if (is_set(start->flags, pe_action_runnable) && optional == FALSE) {
         update_action_flags(start, pe_action_optional | pe_action_clear);
     }
     return TRUE;
 }
 
 gboolean
 PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
 {
     char *key = NULL;
     GListPtr gIter = NULL;
     gboolean runnable = TRUE;
     GListPtr action_list = NULL;
 
     CRM_ASSERT(rsc);
     CRM_CHECK(next != NULL, return FALSE);
     pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname);
 
     key = start_key(rsc);
     action_list = find_actions_exact(rsc->actions, key, next);
     free(key);
 
     for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
         action_t *start = (action_t *) gIter->data;
 
         if (is_set(start->flags, pe_action_runnable) == FALSE) {
             runnable = FALSE;
         }
     }
     g_list_free(action_list);
 
     if (runnable) {
         promote_action(rsc, next, optional);
         return TRUE;
     }
 
     pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id);
 
     key = promote_key(rsc);
     action_list = find_actions_exact(rsc->actions, key, next);
     free(key);
 
     for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
         action_t *promote = (action_t *) gIter->data;
 
         update_action_flags(promote, pe_action_runnable | pe_action_clear);
     }
 
     g_list_free(action_list);
     return TRUE;
 }
 
 gboolean
 DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "%s", rsc->id);
 
 /* 	CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */
     for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
         node_t *current = (node_t *) gIter->data;
 
         pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A");
         demote_action(rsc, current, optional);
     }
     return TRUE;
 }
 
 gboolean
 RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
 {
     CRM_ASSERT(rsc);
     crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A");
     CRM_CHECK(FALSE, return FALSE);
     return FALSE;
 }
 
 gboolean
 NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
 {
     CRM_ASSERT(rsc);
     pe_rsc_trace(rsc, "%s", rsc->id);
     return FALSE;
 }
 
 gboolean
 DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set)
 {
     if (is_set(rsc->flags, pe_rsc_failed)) {
         pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname);
         return FALSE;
 
     } else if (node == NULL) {
         pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id);
         return FALSE;
 
     } else if (node->details->unclean || node->details->online == FALSE) {
         pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id,
                      node->details->uname);
         return FALSE;
     }
 
     crm_notice("Removing %s from %s", rsc->id, node->details->uname);
 
     delete_action(rsc, node, optional);
 
     new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE,
                   optional ? pe_order_implies_then : pe_order_optional, data_set);
 
     new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START,
                   optional ? pe_order_implies_then : pe_order_optional, data_set);
 
     return TRUE;
 }
 
 #include <../lib/pengine/unpack.h>
 #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE;
 static char *
 increment_clone(char *last_rsc_id)
 {
     int lpc = 0;
     int len = 0;
     char *tmp = NULL;
     gboolean complete = FALSE;
 
     CRM_CHECK(last_rsc_id != NULL, return NULL);
     if (last_rsc_id != NULL) {
         len = strlen(last_rsc_id);
     }
 
     lpc = len - 1;
     while (complete == FALSE && lpc > 0) {
         switch (last_rsc_id[lpc]) {
             case 0:
                 lpc--;
                 break;
             case '0':
                 set_char('1');
                 break;
             case '1':
                 set_char('2');
                 break;
             case '2':
                 set_char('3');
                 break;
             case '3':
                 set_char('4');
                 break;
             case '4':
                 set_char('5');
                 break;
             case '5':
                 set_char('6');
                 break;
             case '6':
                 set_char('7');
                 break;
             case '7':
                 set_char('8');
                 break;
             case '8':
                 set_char('9');
                 break;
             case '9':
                 last_rsc_id[lpc] = '0';
                 lpc--;
                 break;
             case ':':
                 tmp = last_rsc_id;
                 last_rsc_id = calloc(1, len + 2);
                 memcpy(last_rsc_id, tmp, len);
                 last_rsc_id[++lpc] = '1';
                 last_rsc_id[len] = '0';
                 last_rsc_id[len + 1] = 0;
                 complete = TRUE;
                 free(tmp);
                 break;
             default:
                 crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc);
                 return NULL;
                 break;
         }
     }
     return last_rsc_id;
 }
 
 static node_t *
 probe_grouped_clone(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
 {
     node_t *running = NULL;
     resource_t *top = uber_parent(rsc);
 
     if (running == NULL && is_set(top->flags, pe_rsc_unique) == FALSE) {
         /* Annoyingly we also need to check any other clone instances
          * Clumsy, but it will work.
          *
          * An alternative would be to update known_on for every peer
          * during process_rsc_state()
          *
          * This code desperately needs optimization
          * ptest -x with 100 nodes, 100 clones and clone-max=10:
          *   No probes                          O(25s)
          *   Detection without clone loop               O(3m)
          *   Detection with clone loop                  O(8m)
 
          ptest[32211]: 2010/02/18_14:27:55 CRIT: stage5: Probing for unknown resources
          ptest[32211]: 2010/02/18_14:33:39 CRIT: stage5: Done
          ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Updating action states
          ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Done
 
          */
         char *clone_id = clone_zero(rsc->id);
         resource_t *peer = pe_find_resource(top->children, clone_id);
 
         while (peer && running == NULL) {
             running = pe_hash_table_lookup(peer->known_on, node->details->id);
             if (running != NULL) {
                 /* we already know the status of the resource on this node */
                 pe_rsc_trace(rsc, "Skipping active clone: %s", rsc->id);
                 free(clone_id);
                 return running;
             }
             clone_id = increment_clone(clone_id);
             peer = pe_find_resource(data_set->resources, clone_id);
         }
 
         free(clone_id);
     }
     return running;
 }
 
 gboolean
 native_create_probe(resource_t * rsc, node_t * node, action_t * complete,
                     gboolean force, pe_working_set_t * data_set)
 {
     enum pe_ordering flags = pe_order_optional;
     char *key = NULL;
     action_t *probe = NULL;
     node_t *running = NULL;
     node_t *allowed = NULL;
     resource_t *top = uber_parent(rsc);
 
     static const char *rc_master = NULL;
     static const char *rc_inactive = NULL;
 
     if (rc_inactive == NULL) {
         rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING);
         rc_master = crm_itoa(PCMK_OCF_RUNNING_MASTER);
     }
 
     CRM_CHECK(node != NULL, return FALSE);
     if (force == FALSE && is_not_set(data_set->flags, pe_flag_startup_probes)) {
         pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id);
         return FALSE;
     } else if (force == FALSE && is_container_remote_node(node)) {
         pe_rsc_trace(rsc, "Skipping active resource detection for %s on container %s",
                      rsc->id, node->details->id);
         return FALSE;
     }
 
     if (is_remote_node(node)) {
         const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
 
         if (safe_str_eq(class, "stonith")) {
             pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes do not run stonith agents.", rsc->id, node->details->id);
             return FALSE;
         } else if (rsc_contains_remote_node(data_set, rsc)) {
             pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run resources that contain connection resources.", rsc->id, node->details->id);
             return FALSE;
         } else if (rsc->is_remote_node) {
             pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run connection resources", rsc->id, node->details->id);
             return FALSE;
         }
     }
 
     if (rsc->children) {
         GListPtr gIter = NULL;
         gboolean any_created = FALSE;
 
         for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set)
                 || any_created;
         }
 
         return any_created;
 
     } else if (rsc->container) {
         pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id);
         return FALSE;
     }
 
     if (is_set(rsc->flags, pe_rsc_orphan)) {
         pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id);
         return FALSE;
     }
 
     running = g_hash_table_lookup(rsc->known_on, node->details->id);
     if (running == NULL && is_set(rsc->flags, pe_rsc_unique) == FALSE) {
         /* Anonymous clones */
         if (rsc->parent == top) {
             running = g_hash_table_lookup(rsc->parent->known_on, node->details->id);
 
         } else {
             /* Grouped anonymous clones need extra special handling */
             running = probe_grouped_clone(rsc, node, data_set);
         }
     }
 
     if (force == FALSE && running != NULL) {
         /* we already know the status of the resource on this node */
         pe_rsc_trace(rsc, "Skipping active: %s on %s", rsc->id, node->details->uname);
         return FALSE;
     }
 
     allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
     if (rsc->exclusive_discover || top->exclusive_discover) {
         if (allowed == NULL) {
             /* exclusive discover is enabled and this node is not in the allowed list. */    
             return FALSE;
         } else if (allowed->rsc_discover_mode != discover_exclusive) {
             /* exclusive discover is enabled and this node is not marked
              * as a node this resource should be discovered on */ 
             return FALSE;
         }
     }
     if (allowed && allowed->rsc_discover_mode == discover_never) {
         /* this resource is marked as not needing to be discovered on this node */
         return FALSE;
     }
 
 
     key = generate_op_key(rsc->id, RSC_STATUS, 0);
     probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set);
     update_action_flags(probe, pe_action_optional | pe_action_clear);
 
     /* If enabled, require unfencing before probing any fence devices
      * but ensure it happens after any resources that require
      * unfencing have been probed.
      *
      * Doing it the other way (requiring unfencing after probing
      * resources that need it) would result in the node being
      * unfenced, and all its resources being stopped, whenever a new
      * resource is added.  Which would be highly suboptimal.
      *
      * So essentially, at the point the fencing device(s) have been
      * probed, we know the state of all resources that require
      * unfencing and that unfencing occurred.
      */
     if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) {
         trigger_unfencing(NULL, node, "node discovery", probe, data_set);
         probe->priority = INFINITY; /* Ensure this runs if unfencing succeeds */
 
     } else if(is_set(rsc->flags, pe_rsc_needs_unfencing)) {
         action_t *unfence = pe_fence_op(node, "on", TRUE, data_set);
 
         order_actions(probe, unfence, pe_order_optional);
     }
 
     /*
      * We need to know if it's running_on (not just known_on) this node
      * to correctly determine the target rc.
      */
     running = pe_find_node_id(rsc->running_on, node->details->id);
     if (running == NULL) {
         add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive);
 
     } else if (rsc->role == RSC_ROLE_MASTER) {
         add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master);
     }
 
     crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role),
               is_set(probe->flags, pe_action_runnable), rsc->running_on);
 
     if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) {
         top = rsc;
 
     } else if (top->variant < pe_clone) {
         top = rsc;
 
     } else {
         crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id);
     }
 
     if(is_not_set(probe->flags, pe_action_runnable) && rsc->running_on == NULL) {
         /* Prevent the start from occuring if rsc isn't active, but
          * don't cause it to stop if it was active already
          */
         flags |= pe_order_runnable_left;
     }
 
     custom_action_order(rsc, NULL, probe,
                         top, generate_op_key(top->id, RSC_START, 0), NULL,
                         flags, data_set);
 
     if (node && node->details->shutdown == FALSE) {
         custom_action_order(rsc, NULL, probe,
                             rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL,
                             pe_order_optional, data_set);
     }
 
     if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) {
         /* Normally rsc.start depends on probe complete which depends
          * on rsc.probe. But this can't be the case in this scenario as
          * it would create graph loops.
          *
          * So instead we explicitly order 'rsc.probe then rsc.start'
          */
 
     } else {
         order_actions(probe, complete, pe_order_implies_then);
     }
     return TRUE;
 }
 
 static void
 native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set)
 {
     node_t *target = stonith_op ? stonith_op->node : NULL;
 
     GListPtr gIter = NULL;
     action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
     action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set);
 
     for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if(action->needs == rsc_req_nothing) {
         } else if (action->needs == rsc_req_stonith) {
             order_actions(stonith_done, action, pe_order_optional);
 
         } else if (target != NULL
                    && safe_str_eq(action->task, RSC_START)
                    && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) {
             /* if known == NULL, then we dont know if
              *   the resource is active on the node
              *   we're about to shoot
              *
              * in this case, regardless of action->needs,
              *   the only safe option is to wait until
              *   the node is shot before doing anything
              *   to with the resource
              *
              * its analogous to waiting for all the probes
              *   for rscX to complete before starting rscX
              *
              * the most likely explaination is that the
              *   DC died and took its status with it
              */
 
             pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,
                          target->details->uname);
             order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left);
         }
     }
 }
 
 static GListPtr
 find_fence_target_node_actions(GListPtr search_list, const char *key, node_t *fence_target, pe_working_set_t *data_set)
 {
     GListPtr gIter = NULL;
     GListPtr result_list = find_actions(search_list, key, fence_target);
 
     /* find stop actions for this rsc on any container nodes running on
      * the fencing target node */
     for (gIter = fence_target->details->running_rsc; gIter != NULL; gIter = gIter->next) { 
         GListPtr iter = NULL;
         GListPtr tmp_list = NULL;
         resource_t *tmp_rsc = (resource_t *) gIter->data;
         node_t *container_node = NULL;
 
         /* found a container node that lives on the host node
          * that is getting fenced. Find stop for our rsc that live on
          * the container node as well. These stop operations are also
          * implied by fencing of the host cluster node. */
         if (tmp_rsc->is_remote_node && tmp_rsc->container != NULL) {
             container_node = pe_find_node(data_set->nodes, tmp_rsc->id);
         }
         if (container_node) {
             tmp_list = find_actions(search_list, key, container_node);
         }
         for (iter = tmp_list; iter != NULL; iter = iter->next) { 
             result_list = g_list_prepend(result_list, (action_t *) iter->data);
         }
         g_list_free(tmp_list);
     }
 
     return result_list;
 }
 
 static void
 native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set)
 {
     char *key = NULL;
     GListPtr gIter = NULL;
     GListPtr action_list = NULL;
 
     action_t *start = NULL;
     resource_t *top = uber_parent(rsc);
 
     key = start_key(rsc);
     action_list = find_actions(rsc->actions, key, NULL);
     if(action_list) {
         start = action_list->data;
     }
 
     g_list_free(action_list);
     free(key);
 
     key = stop_key(rsc);
     action_list = find_fence_target_node_actions(rsc->actions, key, stonith_op->node, data_set);
     free(key);
 
     /* add the stonith OP as a stop pre-req and the mark the stop
      * as a pseudo op - since its now redundant
      */
 
     for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (action->node->details->online
             && action->node->details->unclean == FALSE && is_set(rsc->flags, pe_rsc_failed)) {
             continue;
         }
 
         if (is_set(rsc->flags, pe_rsc_failed)) {
             crm_notice("Stop of failed resource %s is"
                        " implicit after %s is fenced", rsc->id, action->node->details->uname);
         } else {
             crm_info("%s is implicit after %s is fenced",
                      action->uuid, action->node->details->uname);
         }
 
         /* the stop would never complete and is
          * now implied by the stonith operation
          */
         update_action_flags(action, pe_action_pseudo);
         update_action_flags(action, pe_action_runnable);
         update_action_flags(action, pe_action_implied_by_stonith);
 
         if(start == NULL || start->needs > rsc_req_quorum) {
             enum pe_ordering flags = pe_order_optional;
             action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL);
 
             if(stonith_op->node->details->remote_rsc) {
                 flags |= pe_order_preserve;
             }
             order_actions(stonith_op, action, flags);
             order_actions(stonith_op, parent_stop, flags);
         }
 
         if (is_set(rsc->flags, pe_rsc_notify)) {
             /* Create a second notification that will be delivered
              *   immediately after the node is fenced
              *
              * Basic problem:
              * - C is a clone active on the node to be shot and stopping on another
              * - R is a resource that depends on C
              *
              * + C.stop depends on R.stop
              * + C.stopped depends on STONITH
              * + C.notify depends on C.stopped
              * + C.healthy depends on C.notify
              * + R.stop depends on C.healthy
              *
              * The extra notification here changes
              *  + C.healthy depends on C.notify
              * into:
              *  + C.healthy depends on C.notify'
              *  + C.notify' depends on STONITH'
              * thus breaking the loop
              */
             notify_data_t *n_data =
                 create_notification_boundaries(rsc, RSC_STOP, NULL, stonith_op, data_set);
             crm_info("Creating secondary notification for %s", action->uuid);
 
             collect_notification_data(rsc, TRUE, FALSE, n_data);
             g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), strdup(rsc->id));
             g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"),
                                 strdup(action->node->details->uname));
             create_notifications(uber_parent(rsc), n_data, data_set);
             free_notification_data(n_data);
         }
 
 /* From Bug #1601, successful fencing must be an input to a failed resources stop action.
 
    However given group(rA, rB) running on nodeX and B.stop has failed,
    A := stop healthy resource (rA.stop)
    B := stop failed resource (pseudo operation B.stop)
    C := stonith nodeX
    A requires B, B requires C, C requires A
    This loop would prevent the cluster from making progress.
 
    This block creates the "C requires A" dependency and therefore must (at least
    for now) be disabled.
 
    Instead, run the block above and treat all resources on nodeX as B would be
    (marked as a pseudo op depending on the STONITH).
 
    TODO: Break the "A requires B" dependency in update_action() and re-enable this block
 
    } else if(is_stonith == FALSE) {
    crm_info("Moving healthy resource %s"
    " off %s before fencing",
    rsc->id, node->details->uname);
 
    * stop healthy resources before the
    * stonith op
    *
    custom_action_order(
    rsc, stop_key(rsc), NULL,
    NULL,strdup(CRM_OP_FENCE),stonith_op,
    pe_order_optional, data_set);
 */
     }
 
     g_list_free(action_list);
 
     key = demote_key(rsc);
     action_list = find_fence_target_node_actions(rsc->actions, key, stonith_op->node, data_set);
     free(key);
 
     for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
         action_t *action = (action_t *) gIter->data;
 
         if (action->node->details->online == FALSE || action->node->details->unclean == TRUE
             || is_set(rsc->flags, pe_rsc_failed)) {
             if (is_set(rsc->flags, pe_rsc_failed)) {
                 pe_rsc_info(rsc, "Demote of failed resource %s is"
                             " implict after %s is fenced", rsc->id, action->node->details->uname);
             } else {
                 pe_rsc_info(rsc, "%s is implicit after %s is fenced",
                             action->uuid, action->node->details->uname);
             }
             /* the stop would never complete and is
              * now implied by the stonith operation
              */
             crm_trace("here - 1");
             update_action_flags(action, pe_action_pseudo);
             update_action_flags(action, pe_action_runnable);
 
             if (start == NULL || start->needs > rsc_req_quorum) {
                 order_actions(stonith_op, action, pe_order_preserve|pe_order_optional);
             }
         }
     }
 
     g_list_free(action_list);
 }
 
 void
 rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set)
 {
     if (rsc->children) {
         GListPtr gIter = NULL;
 
         for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             rsc_stonith_ordering(child_rsc, stonith_op, data_set);
         }
         return;
     }
 
     if (is_not_set(rsc->flags, pe_rsc_managed)) {
         pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id);
         return;
     }
 
     /* Start constraints */
     native_start_constraints(rsc, stonith_op, data_set);
 
     /* Stop constraints */
     if (stonith_op) {
         native_stop_constraints(rsc, stonith_op, data_set);
     }
 }
 
 enum stack_activity {
     stack_stable = 0,
     stack_starting = 1,
     stack_stopping = 2,
     stack_middle = 4,
 };
 
 static action_t *
 get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current)
 {
     action_t *a = NULL;
     GListPtr action_list = NULL;
     char *key = generate_op_key(rsc->id, action, 0);
 
     action_list = find_actions(rsc->actions, key, current);
 
     if (action_list == NULL || action_list->data == NULL) {
         crm_trace("%s: no %s action", rsc->id, action);
         free(key);
         return NULL;
     }
 
     a = action_list->data;
     g_list_free(action_list);
 
     if (only_valid && is_set(a->flags, pe_action_pseudo)) {
         crm_trace("%s: pseudo", key);
         a = NULL;
 
     } else if (only_valid && is_not_set(a->flags, pe_action_runnable)) {
         crm_trace("%s: runnable", key);
         a = NULL;
     }
 
     free(key);
     return a;
 }
 
 static void
 ReloadRsc(resource_t * rsc, action_t * stop, action_t * start, pe_working_set_t * data_set)
 {
     action_t *action = NULL;
     action_t *rewrite = NULL;
 
     if (is_not_set(rsc->flags, pe_rsc_try_reload)) {
         return;
 
     } else if (is_not_set(stop->flags, pe_action_optional)) {
         pe_rsc_trace(rsc, "%s: stop action", rsc->id);
         return;
 
     } else if (is_not_set(start->flags, pe_action_optional)) {
         pe_rsc_trace(rsc, "%s: start action", rsc->id);
         return;
     }
 
     pe_rsc_trace(rsc, "%s on %s", rsc->id, stop->node->details->uname);
 
     action = get_first_named_action(rsc, RSC_PROMOTE, TRUE, NULL);
     if (action && is_set(action->flags, pe_action_optional) == FALSE) {
         update_action_flags(action, pe_action_pseudo);
     }
 
     action = get_first_named_action(rsc, RSC_DEMOTE, TRUE, NULL);
     if (action && is_set(action->flags, pe_action_optional) == FALSE) {
         rewrite = action;
         update_action_flags(stop, pe_action_pseudo);
 
     } else {
         rewrite = start;
     }
 
     pe_rsc_info(rsc, "Rewriting %s of %s on %s as a reload",
                 rewrite->task, rsc->id, stop->node->details->uname);
     set_bit(rsc->flags, pe_rsc_reload);
     update_action_flags(rewrite, pe_action_optional | pe_action_clear);
 
     free(rewrite->uuid);
     free(rewrite->task);
     rewrite->task = strdup("reload");
     rewrite->uuid = generate_op_key(rsc->id, rewrite->task, 0);
 }
 
 void
 rsc_reload(resource_t * rsc, pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
     action_t *stop = NULL;
     action_t *start = NULL;
 
     if(is_set(rsc->flags, pe_rsc_munging)) {
         return;
     }
     set_bit(rsc->flags, pe_rsc_munging);
 
     if (rsc->children) {
         for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
             resource_t *child_rsc = (resource_t *) gIter->data;
 
             rsc_reload(child_rsc, data_set);
         }
         return;
 
     } else if (rsc->variant > pe_native) {
         return;
     }
 
     pe_rsc_trace(rsc, "Processing %s", rsc->id);
 
     stop =
         get_first_named_action(rsc, RSC_STOP, TRUE,
                                rsc->running_on ? rsc->running_on->data : NULL);
     start = get_first_named_action(rsc, RSC_START, TRUE, NULL);
 
     if (is_not_set(rsc->flags, pe_rsc_managed)
         || is_set(rsc->flags, pe_rsc_failed)
         || is_set(rsc->flags, pe_rsc_start_pending)
         || rsc->next_role < RSC_ROLE_STARTED) {
         pe_rsc_trace(rsc, "%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags);
         return;
     }
 
     if (stop != NULL && is_set(stop->flags, pe_action_optional) && is_set(rsc->flags, pe_rsc_try_reload)) {
         ReloadRsc(rsc, stop, start, data_set);
     }
 }
 
 void
 native_append_meta(resource_t * rsc, xmlNode * xml)
 {
     char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
     resource_t *iso_parent, *last_parent, *parent;
 
     if (value) {
         char *name = NULL;
 
         name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
         crm_xml_add(xml, name, value);
         free(name);
     }
 
     value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
     if (value) {
         char *name = NULL;
 
         name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
         crm_xml_add(xml, name, value);
         free(name);
     }
 
     for (parent = rsc; parent != NULL; parent = parent->parent) {
         if (parent->container) {
             crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id);
         }
     }
 
     last_parent = iso_parent = rsc;
     while (iso_parent != NULL) {
         char *name = NULL;
         char *iso = NULL;
 
         if (iso_parent->isolation_wrapper == NULL) {
             last_parent = iso_parent;
             iso_parent = iso_parent->parent;
             continue;
         }
 
         /* name of wrapper script this resource is routed through. */
         name = crm_meta_name(XML_RSC_ATTR_ISOLATION_WRAPPER);
         crm_xml_add(xml, name, iso_parent->isolation_wrapper);
         free(name);
 
         /* instance name for isolated environment */
         name = crm_meta_name(XML_RSC_ATTR_ISOLATION_INSTANCE);
         if (iso_parent->variant >= pe_clone) { 
             /* if isolation is set at the clone/master level, we have to 
              * give this resource the unique isolation instance associated
              * with the clone child (last_parent)*/
 
             /* Example: cloned group. group is container
              * clone myclone - iso_parent
              *    group mygroup - last_parent (this is the iso environment)
              *       rsc myrsc1 - rsc
              *       rsc myrsc2
              * The group is what is isolated in example1. We have to make
              * sure myrsc1 and myrsc2 launch in the same isolated environment.
              *
              * Example: cloned primitives. rsc primitive is container
              * clone myclone iso_parent
              *     rsc myrsc1 - last_parent == rsc (this is the iso environment)
              * The individual cloned primitive instances are isolated
              */
             value = g_hash_table_lookup(last_parent->meta, XML_RSC_ATTR_INCARNATION);
             CRM_ASSERT(value != NULL);
 
             iso = crm_concat(crm_element_value(last_parent->xml, XML_ATTR_ID), value, '_');
             crm_xml_add(xml, name, iso);
             free(iso);
         } else { 
             /*
              * Example: cloned group of containers
              * clone myclone
              *    group mygroup
              *       rsc myrsc1 - iso_parent (this is the iso environment)
              *       rsc myrsc2
              *
              * Example: group of containers
              * group mygroup
              *   rsc myrsc1 - iso_parent (this is the iso environment)
              *   rsc myrsc2
              * 
              * Example: group is container
              * group mygroup - iso_parent ( this is iso environment)
              *   rsc myrsc1 
              *   rsc myrsc2
              *
              * Example: single primitive
              * rsc myrsc1 - iso_parent (this is the iso environment)
              */
             value = g_hash_table_lookup(iso_parent->meta, XML_RSC_ATTR_INCARNATION);
             if (value) {
                 crm_xml_add(xml, name, iso_parent->id);
                 iso = crm_concat(crm_element_value(iso_parent->xml, XML_ATTR_ID), value, '_');
                 crm_xml_add(xml, name, iso);
                 free(iso);
             } else {
                 crm_xml_add(xml, name, iso_parent->id);
             }
         }
         free(name);
 
         break;
     }
 }
diff --git a/pengine/regression.sh b/pengine/regression.sh
index c7f990c8aa..8d7e3e8d6d 100755
--- a/pengine/regression.sh
+++ b/pengine/regression.sh
@@ -1,839 +1,839 @@
 #!/bin/bash
 
  # Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  #
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public
  # License as published by the Free Software Foundation; either
  # version 2 of the License, or (at your option) any later version.
  #
  # This software is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  # General Public License for more details.
  #
  # You should have received a copy of the GNU General Public
  # License along with this library; if not, write to the Free Software
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  #
 
 core=`dirname $0`
 . $core/regression.core.sh || exit 1
 
 create_mode="true"
 info Generating test outputs for these tests...
 # do_test file description
 
 info Done.
 echo ""
 
 info Performing the following tests from $io_dir
 create_mode="false"
 
 echo ""
 
 do_test simple1 "Offline     "
 do_test simple2 "Start       "
 do_test simple3 "Start 2     "
 do_test simple4 "Start Failed"
 do_test simple6 "Stop Start  "
 do_test simple7 "Shutdown    "
 #do_test simple8 "Stonith	"
 #do_test simple9 "Lower version"
 #do_test simple10 "Higher version"
 do_test simple11 "Priority (ne)"
 do_test simple12 "Priority (eq)"
 do_test simple8 "Stickiness"
 
 echo ""
 do_test group1 "Group		"
 do_test group2 "Group + Native	"
 do_test group3 "Group + Group	"
 do_test group4 "Group + Native (nothing)"
 do_test group5 "Group + Native (move)   "
 do_test group6 "Group + Group (move)    "
 do_test group7 "Group colocation"
 do_test group13 "Group colocation (cant run)"
 do_test group8 "Group anti-colocation"
 do_test group9 "Group recovery"
 do_test group10 "Group partial recovery"
 do_test group11 "Group target_role"
 do_test group14 "Group stop (graph terminated)"
 do_test group15 "-ve group colocation"
 do_test bug-1573 "Partial stop of a group with two children"
 do_test bug-1718 "Mandatory group ordering - Stop group_FUN"
 do_test bug-lf-2613 "Move group on failure"
 do_test bug-lf-2619 "Move group on clone failure"
 do_test group-fail "Ensure stop order is preserved for partially active groups"
 do_test group-unmanaged "No need to restart r115 because r114 is unmanaged"
 do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails"
 do_test group-dependents "Account for the location preferences of things colocated with a group"
 
 echo ""
 do_test rsc_dep1 "Must not     "
 do_test rsc_dep3 "Must         "
 do_test rsc_dep5 "Must not 3   "
 do_test rsc_dep7 "Must 3       "
 do_test rsc_dep10 "Must (but cant)"
 do_test rsc_dep2  "Must (running) "
 do_test rsc_dep8  "Must (running : alt) "
 do_test rsc_dep4  "Must (running + move)"
 do_test asymmetric "Asymmetric - require explicit location constraints"
 
 echo ""
 do_test orphan-0 "Orphan ignore"
 do_test orphan-1 "Orphan stop"
 do_test orphan-2 "Orphan stop, remove failcount"
 
 echo ""
 do_test params-0 "Params: No change"
 do_test params-1 "Params: Changed"
 do_test params-2 "Params: Resource definition"
 do_test params-4 "Params: Reload"
 do_test params-5 "Params: Restart based on probe digest"
 do_test novell-251689 "Resource definition change + target_role=stopped"
 do_test bug-lf-2106 "Restart all anonymous clone instances after config change"
 do_test params-6 "Params: Detect reload in previously migrated resource"
 do_test nvpair-id-ref "Support id-ref in nvpair with optional name"
 do_test not-reschedule-unneeded-monitor "Do not reschedule unneeded monitors while resource definitions have changed"
 
 echo ""
 do_test target-0 "Target Role : baseline"
 do_test target-1 "Target Role : master"
 do_test target-2 "Target Role : invalid"
 
 echo ""
 do_test base-score "Set a node's default score for all nodes"
 
 echo ""
 do_test date-1 "Dates" -t "2005-020"
 do_test date-2 "Date Spec - Pass" -t "2005-020T12:30"
 do_test date-3 "Date Spec - Fail" -t "2005-020T11:30"
 do_test origin "Timing of recurring operations" -t "2014-05-07 00:28:00" 
 do_test probe-0 "Probe (anon clone)"
 do_test probe-1 "Pending Probe"
 do_test probe-2 "Correctly re-probe cloned groups"
 do_test probe-3 "Probe (pending node)"
 do_test probe-4 "Probe (pending node + stopped resource)"
 do_test standby "Standby"
 do_test comments "Comments"
 
 echo ""
 do_test one-or-more-0 "Everything starts"
 do_test one-or-more-1 "Nothing starts because of A"
 do_test one-or-more-2 "D can start because of C"
 do_test one-or-more-3 "D cannot start because of B and C"
 do_test one-or-more-4 "D cannot start because of target-role"
 do_test one-or-more-5 "Start A and F even though C and D are stopped"
 do_test one-or-more-6 "Leave A running even though B is stopped"
 do_test one-or-more-7 "Leave A running even though C is stopped"
 do_test bug-5140-require-all-false "Allow basegrp:0 to stop"
 do_test clone-require-all-1 "clone B starts node 3 and 4"
 do_test clone-require-all-2 "clone B remains stopped everywhere"
 do_test clone-require-all-3 "clone B stops everywhere because A stops everywhere"
 do_test clone-require-all-4 "clone B remains on node 3 and 4 with only one instance of A remaining."
 do_test clone-require-all-5 "clone B starts on node 1 3 and 4"
 do_test clone-require-all-6 "clone B remains active after shutting down instances of A"
 do_test clone-require-all-7 "clone A and B both start at the same time. all instances of A start before B."
 do_test clone-require-all-no-interleave-1 "C starts everywhere after A and B"
 do_test clone-require-all-no-interleave-2 "C starts on nodes 1, 2, and 4 with only one active instance of B"
 do_test clone-require-all-no-interleave-3 "C remains active when instance of B is stopped on one node and started on another."
 do_test one-or-more-unrunnnable-instances "Avoid dependencies on instances that wont ever be started"
 
 echo ""
 do_test order1 "Order start 1     "
 do_test order2 "Order start 2     "
 do_test order3 "Order stop	  "
 do_test order4 "Order (multiple)  "
 do_test order5 "Order (move)  "
 do_test order6 "Order (move w/ restart)  "
-do_test order7 "Order (manditory)  "
+do_test order7 "Order (mandatory)  "
 do_test order-optional "Order (score=0)  "
 do_test order-required "Order (score=INFINITY)  "
 do_test bug-lf-2171 "Prevent group start when clone is stopped"
 do_test order-clone "Clone ordering should be able to prevent startup of dependent clones"
 do_test order-sets "Ordering for resource sets"
 do_test order-serialize "Serialize resources without inhibiting migration"
 do_test order-serialize-set "Serialize a set of resources without inhibiting migration"
 do_test clone-order-primitive "Order clone start after a primitive"
 do_test clone-order-16instances "Verify ordering of 16 cloned resources"
 do_test order-optional-keyword "Order (optional keyword)"
 do_test order-mandatory "Order (mandatory keyword)"
 do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones"
 do_test ordered-set-basic-startup "Constraint set with default order settings."
 do_test ordered-set-natural "Allow natural set ordering"
 do_test order-wrong-kind "Order (error)"
 
 echo ""
 do_test coloc-loop "Colocation - loop"
 do_test coloc-many-one "Colocation - many-to-one"
 do_test coloc-list "Colocation - many-to-one with list"
 do_test coloc-group "Colocation - groups"
 do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation"
 do_test coloc-attr "Colocation based on node attributes"
 do_test coloc-negative-group "Negative colocation with a group"
 do_test coloc-intra-set "Intra-set colocation"
 do_test bug-lf-2435 "Colocation sets with a negative score"
 do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependent must stop"
 do_test coloc_fp_logic "Verify floating point calculations in colocation are working"
 do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc."
 do_test colo_slave_w_native  "cl#5070 - Verify promotion order is affected when colocating slave to native rsc."
 do_test anti-colocation-order "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node"
 do_test enforce-colo1 "Always enforce B with A INFINITY."
 do_test complex_enforce_colo "Always enforce B with A INFINITY. (make sure heat-engine stops)"
 
 echo ""
 do_test rsc-sets-seq-true "Resource Sets - sequential=false"
 do_test rsc-sets-seq-false "Resource Sets - sequential=true"
 do_test rsc-sets-clone "Resource Sets - Clone"
 do_test rsc-sets-master "Resource Sets - Master"
 do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)"
 
 #echo ""
 #do_test agent1 "version: lt (empty)"
 #do_test agent2 "version: eq	"
 #do_test agent3 "version: gt	"
 
 echo ""
 do_test attrs1 "string: eq (and)     "
 do_test attrs2 "string: lt / gt (and)"
 do_test attrs3 "string: ne (or)      "
 do_test attrs4 "string: exists       "
 do_test attrs5 "string: not_exists   "
 do_test attrs6 "is_dc: true          "
 do_test attrs7 "is_dc: false         "
 do_test attrs8 "score_attribute      "
 do_test per-node-attrs "Per node resource parameters"
 
 echo ""
 do_test mon-rsc-1 "Schedule Monitor - start"
 do_test mon-rsc-2 "Schedule Monitor - move "
 do_test mon-rsc-3 "Schedule Monitor - pending start     "
 do_test mon-rsc-4 "Schedule Monitor - move/pending start"
 
 echo ""
 do_test rec-rsc-0 "Resource Recover - no start     "
 do_test rec-rsc-1 "Resource Recover - start        "
 do_test rec-rsc-2 "Resource Recover - monitor      "
 do_test rec-rsc-3 "Resource Recover - stop - ignore"
 do_test rec-rsc-4 "Resource Recover - stop - block "
 do_test rec-rsc-5 "Resource Recover - stop - fence "
 do_test rec-rsc-6 "Resource Recover - multiple - restart"
 do_test rec-rsc-7 "Resource Recover - multiple - stop   "
 do_test rec-rsc-8 "Resource Recover - multiple - block  "
 do_test rec-rsc-9 "Resource Recover - group/group"
 do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor"
 do_test stop-failure-no-quorum "Stop failure without quorum"
 do_test stop-failure-no-fencing "Stop failure without fencing available"
 do_test stop-failure-with-fencing "Stop failure with fencing available"
 do_test multiple-active-block-group "Support of multiple-active=block for resource groups"
 
 echo ""
 do_test quorum-1 "No quorum - ignore"
 do_test quorum-2 "No quorum - freeze"
 do_test quorum-3 "No quorum - stop  "
 do_test quorum-4 "No quorum - start anyway"
 do_test quorum-5 "No quorum - start anyway (group)"
 do_test quorum-6 "No quorum - start anyway (clone)"
 do_test bug-cl-5212 "No promotion with no-quorum-policy=freeze"
 
 echo ""
 do_test rec-node-1 "Node Recover - Startup   - no fence"
 do_test rec-node-2 "Node Recover - Startup   - fence   "
 do_test rec-node-3 "Node Recover - HA down   - no fence"
 do_test rec-node-4 "Node Recover - HA down   - fence   "
 do_test rec-node-5 "Node Recover - CRM down  - no fence"
 do_test rec-node-6 "Node Recover - CRM down  - fence   "
 do_test rec-node-7 "Node Recover - no quorum - ignore  "
 do_test rec-node-8 "Node Recover - no quorum - freeze  "
 do_test rec-node-9 "Node Recover - no quorum - stop    "
 do_test rec-node-10 "Node Recover - no quorum - stop w/fence"
 do_test rec-node-11 "Node Recover - CRM down w/ group - fence   "
 do_test rec-node-12 "Node Recover - nothing active - fence   "
 do_test rec-node-13 "Node Recover - failed resource + shutdown - fence   "
 do_test rec-node-15 "Node Recover - unknown lrm section"
 do_test rec-node-14 "Serialize all stonith's"
 
 echo ""
 do_test multi1 "Multiple Active (stop/start)"
 
 echo ""
 do_test migrate-begin     "Normal migration"
 do_test migrate-success   "Completed migration"
 do_test migrate-partial-1 "Completed migration, missing stop on source"
 do_test migrate-partial-2 "Successful migrate_to only"
 do_test migrate-partial-3 "Successful migrate_to only, target down"
 do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from"
 do_test bug-5186-partial-migrate "Handle partial migration when src node loses membership"
 
 do_test migrate-fail-2 "Failed migrate_from"
 do_test migrate-fail-3 "Failed migrate_from + stop on source"
 do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target"
 do_test migrate-fail-5 "Failed migrate_from + stop on source and target"
 
 do_test migrate-fail-6 "Failed migrate_to"
 do_test migrate-fail-7 "Failed migrate_to + stop on source"
 do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target"
 do_test migrate-fail-9 "Failed migrate_to + stop on source and target"
 
 do_test migrate-stop "Migration in a stopping stack"
 do_test migrate-start "Migration in a starting stack"
 do_test migrate-stop_start "Migration in a restarting stack"
 do_test migrate-stop-complex "Migration in a complex stopping stack"
 do_test migrate-start-complex "Migration in a complex starting stack"
 do_test migrate-stop-start-complex "Migration in a complex moving stack"
 do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown"
 
 do_test migrate-1 "Migrate (migrate)"
 do_test migrate-2 "Migrate (stable)"
 do_test migrate-3 "Migrate (failed migrate_to)"
 do_test migrate-4 "Migrate (failed migrate_from)"
 do_test novell-252693 "Migration in a stopping stack"
 do_test novell-252693-2 "Migration in a starting stack"
 do_test novell-252693-3 "Non-Migration in a starting and stopping stack"
 do_test bug-1820 "Migration in a group"
 do_test bug-1820-1 "Non-migration in a group"
 do_test migrate-5 "Primitive migration with a clone"
 do_test migrate-fencing "Migration after Fencing"
 do_test migrate-both-vms "Migrate two VMs that have no colocation"
 
 do_test 1-a-then-bm-move-b "Advanced migrate logic. A then B. migrate B."
 do_test 2-am-then-b-move-a "Advanced migrate logic, A then B, migrate A without stopping B"
 do_test 3-am-then-bm-both-migrate "Advanced migrate logic. A then B. migrate both"
 do_test 4-am-then-bm-b-not-migratable "Advanced migrate logic, A then B, B not migratable"
 do_test 5-am-then-bm-a-not-migratable "Advanced migrate logic. A then B. move both, a not migratable"
 do_test 6-migrate-group "Advanced migrate logic, migrate a group"
 do_test 7-migrate-group-one-unmigratable "Advanced migrate logic, migrate group mixed with allow-migrate true/false"
 do_test 8-am-then-bm-a-migrating-b-stopping "Advanced migrate logic, A then B, A migrating, B stopping"
 do_test 9-am-then-bm-b-migrating-a-stopping "Advanced migrate logic, A then B, B migrate, A stopping"
 do_test 10-a-then-bm-b-move-a-clone "Advanced migrate logic, A clone then B, migrate B while stopping A"
 do_test 11-a-then-bm-b-move-a-clone-starting "Advanced migrate logic, A clone then B, B moving while A is start/stopping"
 
 do_test a-promote-then-b-migrate "A promote then B start. migrate B"
 do_test a-demote-then-b-migrate "A demote then B stop. migrate B"
 
 #echo ""
 #do_test complex1 "Complex	"
 
 do_test bug-lf-2422 "Dependency on partially active group - stop ocfs:*"
 
 echo ""
 do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node"
 do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones"
 do_test clone-anon-failcount "Merge failcounts for anonymous clones"
 do_test inc0 "Incarnation start"
 do_test inc1 "Incarnation start order"
 do_test inc2 "Incarnation silent restart, stop, move"
 do_test inc3 "Inter-incarnation ordering, silent restart, stop, move"
 do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)"
 do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)"
 do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)"
 do_test inc7 "Clone colocation"
 do_test inc8 "Clone anti-colocation"
 do_test inc9 "Non-unique clone"
 do_test inc10 "Non-unique clone (stop)"
 do_test inc11 "Primitive colocation with clones"
 do_test inc12 "Clone shutdown"
 do_test cloned-group "Make sure only the correct number of cloned groups are started"
 do_test cloned-group-stop "Ensure stopping qpidd also stops glance and cinder"
 do_test clone-no-shuffle "Don't prioritize allocation of instances that must be moved"
 do_test clone-max-zero "Orphan processing with clone-max=0"
 do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node"
 do_test bug-lf-2160 "Don't shuffle clones due to colocation"
 do_test bug-lf-2213 "clone-node-max enforcement for cloned groups"
 do_test bug-lf-2153 "Clone ordering constraints"
 do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable"
 do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone"
 do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)"
 do_test clone-colocate-instance-2 "Colocation with a specific clone instance"
 do_test clone-order-instance "Ordering with specific clone instances"
 do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation"
 do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups"
 do_test bug-lf-2544 "Balanced clone placement"
 do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0"
 do_test bug-lf-2574 "Avoid clone shuffle"
 do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start"
 do_test bug-cl-5168 "Don't shuffle clones"
 do_test bug-cl-5170 "Prevent clone from starting with on-fail=block"
 do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block"
 do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)"
 do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)"
 do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)"
 
 echo ""
 do_test cloned_start_one  "order first clone then clone... first clone_min=2"
 do_test cloned_start_two  "order first clone then clone... first clone_min=2"
 do_test cloned_stop_one   "order first clone then clone... first clone_min=2"
 do_test cloned_stop_two   "order first clone then clone... first clone_min=2"
 do_test clone_min_interleave_start_one "order first clone then clone... first clone_min=2 and then has interleave=true"
 do_test clone_min_interleave_start_two "order first clone then clone... first clone_min=2 and then has interleave=true"
 do_test clone_min_interleave_stop_one  "order first clone then clone... first clone_min=2 and then has interleave=true"
 do_test clone_min_interleave_stop_two  "order first clone then clone... first clone_min=2 and then has interleave=true"
 do_test clone_min_start_one "order first clone then primitive... first clone_min=2"
 do_test clone_min_start_two "order first clone then primitive... first clone_min=2"
 do_test clone_min_stop_all  "order first clone then primitive... first clone_min=2"
 do_test clone_min_stop_one  "order first clone then primitive... first clone_min=2"
 do_test clone_min_stop_two  "order first clone then primitive... first clone_min=2"
 
 echo ""
 do_test unfence-startup "Clean unfencing"
 do_test unfence-definition "Unfencing when the agent changes"
 do_test unfence-parameters "Unfencing when the agent parameters changes"
 
 echo ""
 do_test master-0 "Stopped -> Slave"
 do_test master-1 "Stopped -> Promote"
 do_test master-2 "Stopped -> Promote : notify"
 do_test master-3 "Stopped -> Promote : master location"
 do_test master-4 "Started -> Promote : master location"
 do_test master-5 "Promoted -> Promoted"
 do_test master-6 "Promoted -> Promoted (2)"
 do_test master-7 "Promoted -> Fenced"
 do_test master-8 "Promoted -> Fenced -> Moved"
 do_test master-9 "Stopped + Promotable + No quorum"
 do_test master-10 "Stopped -> Promotable : notify with monitor"
 do_test master-11 "Stopped -> Promote : colocation"
 do_test novell-239082 "Demote/Promote ordering"
 do_test novell-239087 "Stable master placement"
 do_test master-12 "Promotion based solely on rsc_location constraints"
 do_test master-13 "Include preferences of colocated resources when placing master"
 do_test master-demote "Ordering when actions depends on demoting a slave resource"
 do_test master-ordering "Prevent resources from starting that need a master"
 do_test bug-1765 "Master-Master Colocation (dont stop the slaves)"
 do_test master-group "Promotion of cloned groups"
 do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily"
 do_test master-failed-demote "Don't retry failed demote actions"
 do_test master-failed-demote-2 "Don't retry failed demote actions (notify=false)"
 do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does"
 do_test master-reattach "Re-attach to a running master"
 do_test master-allow-start "Don't include master score if it would prevent allocation"
 do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints"
 do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly"
 do_test master-role "Prevent target-role from promoting more than master-max instances"
 do_test bug-lf-2358 "Master-Master anti-colocation"
 do_test master-promotion-constraint "Mandatory master colocation constraints"
 do_test unmanaged-master "Ensure role is preserved for unmanaged resources"
 do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters"
 do_test master-demote-2 "Demote does not clear past failure"
 do_test master-move "Move master based on failure of colocated group"
 do_test master-probed-score "Observe the promotion score of probed resources"
 do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint"
 do_test colocation_constraint_stops_slave  "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint"
 do_test order_constraint_stops_master      "cl#5054 - Ensure master is demoted when stopped by order constraint"
 do_test order_constraint_stops_slave       "cl#5054 - Ensure slave is not demoted when stopped by order constraint"
 do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion."
 do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive"
 do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score"
 do_test master-demote-block "Block promotion if demote fails with on-fail=block"
 do_test master-dependent-ban "Don't stop instances from being active because a dependent is banned from that host"
 do_test master-stop "Stop instances due to location constraint with role=Started"
 do_test master-partially-demoted-group "Allow partially demoted group to finish demoting"
 do_test bug-cl-5213 "Ensure role colocation with -INFINITY is enforced"
 do_test bug-cl-5219 "Allow unrelated resources with a common colocation target to remain promoted"
 do_test master-asymmetrical-order "Fix the behaviors of multi-state resources with asymmetrical ordering"
 do_test master-notify "Master promotion with notifies"
 
 echo ""
 do_test history-1 "Correctly parse stateful-1 resource state"
 
 echo ""
 do_test managed-0 "Managed (reference)"
 do_test managed-1 "Not managed - down "
 do_test managed-2 "Not managed - up   "
 do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource"
 do_test bug-5028-detach "Ensure detach still works"
 do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack"
 do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged "
 do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged "
 do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged "
 do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged "
 do_test unmanaged-block-restart "Block restart of resources if any dependent resource in a group is unmanaged"
 
 echo ""
 do_test interleave-0 "Interleave (reference)"
 do_test interleave-1 "coloc - not interleaved"
 do_test interleave-2 "coloc - interleaved   "
 do_test interleave-3 "coloc - interleaved (2)"
 do_test interleave-pseudo-stop "Interleaved clone during stonith"
 do_test interleave-stop "Interleaved clone during stop"
 do_test interleave-restart "Interleaved clone during dependency restart"
 
 echo ""
 do_test notify-0 "Notify reference"
 do_test notify-1 "Notify simple"
 do_test notify-2 "Notify simple, confirm"
 do_test notify-3 "Notify move, confirm"
 do_test novell-239079 "Notification priority"
 #do_test notify-2 "Notify - 764"
 
 echo ""
 do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition"
 do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1"
 do_test 696 "OSDL #696 - CRM starts stonith RA without monitor"
 do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop"
 do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3"
 do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1"
 do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id"
 do_test 829 "OSDL #829"
 do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted"
 do_test 994-2 "OSDL #994 - with a dependent resource"
 do_test 1360 "OSDL #1360 - Clone stickiness"
 do_test 1484 "OSDL #1484 - on_fail=stop"
 do_test 1494 "OSDL #1494 - Clone stability"
 do_test unrunnable-1 "Unrunnable"
 do_test stonith-0 "Stonith loop - 1"
 do_test stonith-1 "Stonith loop - 2"
 do_test stonith-2 "Stonith loop - 3"
 do_test stonith-3 "Stonith startup"
 do_test stonith-4 "Stonith node state"
 do_test bug-1572-1 "Recovery of groups depending on master/slave"
 do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted"
 do_test bug-1685 "Depends-on-master ordering"
 do_test bug-1822 "Don't promote partially active groups"
 do_test bug-pm-11 "New resource added to a m/s group"
 do_test bug-pm-12 "Recover only the failed portion of a cloned group"
 do_test bug-n-387749 "Don't shuffle clone instances"
 do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped"
 do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node"
 do_test bug-lf-1920 "Correctly handle probes that find active resources"
 do_test bnc-515172 "Location constraint with multiple expressions"
 do_test colocate-primitive-with-clone "Optional colocation with a clone"
 do_test use-after-free-merge "Use-after-free in native_merge_weights"
 do_test bug-lf-2551 "STONITH ordering for stop"
 do_test bug-lf-2606 "Stonith implies demote"
 do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults"
 do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering"
 do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false"
 do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false"
 do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false"
 do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts."
 do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false"
 do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false."
 do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false."
 do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false."
 do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false."
 do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false"
 do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true"
 do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources."
 do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases"
 do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload"
 do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change."
 do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart"
 do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed."
 do_test failcount "Ensure failcounts are correctly expired"
 do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present"
 do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart"
 do_test monitor-onfail-stop    "bug-5058 - Monitor failure wiht on-fail set to stop"
 do_test bug-5059 "No need to restart p_stateful1:*"
 do_test bug-5069-op-enabled  "Test on-fail=ignore with failure when monitor is enabled."
 do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled."
 do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections"
 do_test expire-non-blocked-failure "Ignore failure-timeout only if the failed operation has on-fail=block"
 
 do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc."
 do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith."
 do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group"
 do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group"
 do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)."
 do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)"
 do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group."
 do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs."
 do_test probe-timeout "cl#5099 - Default probe timeout"
 
 do_test concurrent-fencing "Allow performing fencing operations in parallel"
 
 echo ""
 do_test systemhealth1  "System Health ()               #1"
 do_test systemhealth2  "System Health ()               #2"
 do_test systemhealth3  "System Health ()               #3"
 do_test systemhealthn1 "System Health (None)           #1"
 do_test systemhealthn2 "System Health (None)           #2"
 do_test systemhealthn3 "System Health (None)           #3"
 do_test systemhealthm1 "System Health (Migrate On Red) #1"
 do_test systemhealthm2 "System Health (Migrate On Red) #2"
 do_test systemhealthm3 "System Health (Migrate On Red) #3"
 do_test systemhealtho1 "System Health (Only Green)     #1"
 do_test systemhealtho2 "System Health (Only Green)     #2"
 do_test systemhealtho3 "System Health (Only Green)     #3"
 do_test systemhealthp1 "System Health (Progessive)     #1"
 do_test systemhealthp2 "System Health (Progessive)     #2"
 do_test systemhealthp3 "System Health (Progessive)     #3"
 
 echo ""
 do_test utilization "Placement Strategy - utilization"
 do_test minimal     "Placement Strategy - minimal"
 do_test balanced    "Placement Strategy - balanced"
 
 echo ""
 do_test placement-stickiness "Optimized Placement Strategy - stickiness"
 do_test placement-priority   "Optimized Placement Strategy - priority"
 do_test placement-location   "Optimized Placement Strategy - location"
 do_test placement-capacity   "Optimized Placement Strategy - capacity"
 
 echo ""
 do_test utilization-order1 "Utilization Order - Simple"
 do_test utilization-order2 "Utilization Order - Complex"
 do_test utilization-order3 "Utilization Order - Migrate"
 do_test utilization-order4 "Utilization Order - Live Mirgration (bnc#695440)"
 do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3"
 do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)"
 do_test load-stopped-loop-2 "cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering"
 
 echo ""
 do_test colocated-utilization-primitive-1 "Colocated Utilization - Primitive"
 do_test colocated-utilization-primitive-2 "Colocated Utilization - Choose the most capable node"
 do_test colocated-utilization-group "Colocated Utilization - Group"
 do_test colocated-utilization-clone "Colocated Utilization - Clone"
 
 do_test utilization-check-allowed-nodes "Only check the capacities of the nodes that can run the resource"
 
 echo ""
 do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources"
 do_test node-maintenance-1 "cl#5128 - Node maintenance"
 do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)"
 
 do_test rsc-maintenance "Per-resource maintenance"
 
 echo ""
 do_test not-installed-agent "The resource agent is missing"
 do_test not-installed-tools "Something the resource agent needs is missing"
 
 echo ""
 do_test stopped-monitor-00	"Stopped Monitor - initial start"
 do_test stopped-monitor-01	"Stopped Monitor - failed started"
 do_test stopped-monitor-02	"Stopped Monitor - started multi-up"
 do_test stopped-monitor-03	"Stopped Monitor - stop started"
 do_test stopped-monitor-04	"Stopped Monitor - failed stop"
 do_test stopped-monitor-05	"Stopped Monitor - start unmanaged"
 do_test stopped-monitor-06	"Stopped Monitor - unmanaged multi-up"
 do_test stopped-monitor-07	"Stopped Monitor - start unmanaged multi-up"
 do_test stopped-monitor-08	"Stopped Monitor - migrate"
 do_test stopped-monitor-09	"Stopped Monitor - unmanage started"
 do_test stopped-monitor-10	"Stopped Monitor - unmanaged started multi-up"
 do_test stopped-monitor-11	"Stopped Monitor - stop unmanaged started"
 do_test stopped-monitor-12	"Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")"
 do_test stopped-monitor-20	"Stopped Monitor - initial stop"
 do_test stopped-monitor-21	"Stopped Monitor - stopped single-up"
 do_test stopped-monitor-22	"Stopped Monitor - stopped multi-up"
 do_test stopped-monitor-23	"Stopped Monitor - start stopped"
 do_test stopped-monitor-24	"Stopped Monitor - unmanage stopped"
 do_test stopped-monitor-25	"Stopped Monitor - unmanaged stopped multi-up"
 do_test stopped-monitor-26	"Stopped Monitor - start unmanaged stopped"
 do_test stopped-monitor-27	"Stopped Monitor - unmanaged stopped multi-up (target-role="Started")"
 do_test stopped-monitor-30	"Stopped Monitor - new node started"
 do_test stopped-monitor-31	"Stopped Monitor - new node stopped"
 
 echo""
 do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)"
 do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)"
 do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)"
 do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)"
 do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)"
 do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)"
 do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)"
 do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)"
 do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)"
 do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)"
 do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)"
 do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)"
 
 do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)"
 do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)"
 do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)"
 do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)"
 do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)"
 do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)"
 do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)"
 do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)"
 do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)"
 do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)"
 do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)"
 do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)"
 
 echo""
 do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)"
 do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)"
 do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)"
 do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)"
 do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)"
 do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)"
 do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)"
 do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)"
 do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)"
 do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)"
 do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)"
 do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)"
 
 do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)"
 do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)"
 do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)"
 do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)"
 do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)"
 do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)"
 do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)"
 do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)"
 do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)"
 do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)"
 do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)"
 do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)"
 
 echo""
 do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)"
 do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)"
 do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)"
 do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)"
 do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)"
 do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)"
 do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)"
 do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)"
 do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)"
 do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)"
 do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)"
 do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)"
 
 do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)"
 do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)"
 do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)"
 do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)"
 do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)"
 do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)"
 do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)"
 do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)"
 do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)"
 do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)"
 do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)"
 do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)"
 
 echo""
 do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)"
 do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)"
 do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)"
 do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)"
 do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)"
 do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)"
 do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)"
 do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)"
 do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)"
 do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)"
 do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)"
 do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)"
 
 do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)"
 do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)"
 do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)"
 do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)"
 do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)"
 do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)"
 do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)"
 do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)"
 do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)"
 do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)"
 do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)"
 do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)"
 
 echo ""
 do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)"
 do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)"
 do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)"
 do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)"
 do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)"
 do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)"
 do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)"
 
 do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)"
 do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)"
 do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)"
 do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)"
 do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)"
 do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)"
 do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)"
 
 do_test cluster-specific-params "Cluster-specific instance attributes based on rules"
 do_test site-specific-params "Site-specific instance attributes based on rules"
 
 echo ""
 do_test template-1 "Template - 1"
 do_test template-2 "Template - 2"
 do_test template-3 "Template - 3 (merge operations)"
 
 do_test template-coloc-1 "Template - Colocation 1"
 do_test template-coloc-2 "Template - Colocation 2"
 do_test template-coloc-3 "Template - Colocation 3"
 do_test template-order-1 "Template - Order 1"
 do_test template-order-2 "Template - Order 2"
 do_test template-order-3 "Template - Order 3"
 do_test template-ticket  "Template - Ticket"
 
 do_test template-rsc-sets-1  "Template - Resource Sets 1"
 do_test template-rsc-sets-2  "Template - Resource Sets 2"
 do_test template-rsc-sets-3  "Template - Resource Sets 3"
 do_test template-rsc-sets-4  "Template - Resource Sets 4"
 
 do_test template-clone-primitive "Cloned primitive from template"
 do_test template-clone-group     "Cloned group from template"
 
 do_test location-sets-templates "Resource sets and templates - Location"
 
 do_test tags-coloc-order-1 "Tags - Colocation and Order (Simple)"
 do_test tags-coloc-order-2 "Tags - Colocation and Order (Resource Sets with Templates)"
 do_test tags-location      "Tags - Location"
 do_test tags-ticket        "Tags - Ticket"
 
 echo ""
 do_test container-1 "Container - initial"
 do_test container-2 "Container - monitor failed"
 do_test container-3 "Container - stop failed"
 do_test container-4 "Container - reached migration-threshold"
 do_test container-group-1 "Container in group - initial"
 do_test container-group-2 "Container in group - monitor failed"
 do_test container-group-3 "Container in group - stop failed"
 do_test container-group-4 "Container in group - reached migration-threshold"
 do_test container-is-remote-node "Place resource within container when container is remote-node"
 do_test bug-rh-1097457 "Kill user defined container/contents ordering"
 do_test bug-cl-5247 "Graph loop when recovering m/s resource in a container"
 
 echo ""
 do_test whitebox-fail1 "Fail whitebox container rsc."
 do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection."
 do_test whitebox-fail3 "Failed containers should not run nested on remote nodes."
 do_test whitebox-start "Start whitebox container with resources assigned to it"
 do_test whitebox-stop "Stop whitebox container with resources assigned to it"
 do_test whitebox-move "Move whitebox container with resources assigned to it"
 do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource"
 do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established"
 do_test whitebox-orphaned    "Properly shutdown orphaned whitebox container"
 do_test whitebox-orphan-ms   "Properly tear down orphan ms resources on remote-nodes"
 do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start."
 do_test whitebox-migrate1 "Migrate both container and connection resource"
 do_test whitebox-imply-stop-on-fence "imply stop action on container node rsc when host node is fenced"
 do_test whitebox-nested-group "Verify guest remote-node works nested in a group"
 
 echo ""
 do_test remote-startup-probes  "Baremetal remote-node startup probes"
 do_test remote-startup         "Startup a newly discovered remote-nodes with no status."
 do_test remote-fence-unclean   "Fence unclean baremetal remote-node"
 do_test remote-fence-unclean2  "Fence baremetal remote-node after cluster node fails and connection can not be recovered"
 do_test remote-move            "Move remote-node connection resource"
 do_test remote-disable         "Disable a baremetal remote-node"
 do_test remote-orphaned        "Properly shutdown orphaned connection resource"
 do_test remote-orphaned2       "verify we can handle orphaned remote connections with active resources on the remote"
 do_test remote-recover         "Recover connection resource after cluster-node fails."
 do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section"
 do_test remote-partial-migrate  "Make sure partial migrations are handled before ops on the remote node."
 do_test remote-partial-migrate2 "Make sure partial migration target is prefered for remote connection."
 do_test remote-recover-fail     "Make sure start failure causes fencing if rsc are active on remote."
 do_test remote-start-fail       "Make sure a start failure does not result in fencing if no active resources are on remote."
 do_test remote-unclean2         "Make monitor failure always results in fencing, even if no rsc are active on remote."
 do_test remote-fence-before-reconnect "Fence before clearing recurring monitor failure"
 
 echo ""
 do_test resource-discovery      "Exercises resource-discovery location constraint option."
 do_test rsc-discovery-per-node  "Disable resource discovery per node"
 
 echo ""
 do_test isolation-start-all   "Start docker isolated resources."
 do_test isolation-restart-all "Restart docker isolated resources."
 do_test isolation-clone       "Cloned isolated primitive."
 
 echo ""
 test_results
diff --git a/tools/cib_shadow.c b/tools/cib_shadow.c
index 845486c145..2fe66f1302 100644
--- a/tools/cib_shadow.c
+++ b/tools/cib_shadow.c
@@ -1,477 +1,477 @@
 
 /*
  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <crm_internal.h>
 
 #include <stdio.h>
 #include <unistd.h>
 
 #include <sys/param.h>
 #include <crm/crm.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 
 #include <stdlib.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
 
 #include <crm/common/ipc.h>
 
 #include <crm/cib.h>
 
 int exit_code = pcmk_ok;
 GMainLoop *mainloop = NULL;
 
 const char *host = NULL;
 void usage(const char *cmd, int exit_status);
 
 int command_options = cib_sync_call;
 const char *cib_action = NULL;
 
 cib_t *real_cib = NULL;
 
 static int force_flag = 0;
 static int batch_flag = 0;
 
 static char *
 get_shadow_prompt(const char *name)
 {
     return crm_strdup_printf("shadow[%.40s] # ", name);
 }
 
 static void
 shadow_setup(char *name, gboolean do_switch)
 {
     const char *prompt = getenv("PS1");
     const char *shell = getenv("SHELL");
     char *new_prompt = get_shadow_prompt(name);
 
     printf("Setting up shadow instance\n");
 
     if (safe_str_eq(new_prompt, prompt)) {
         /* nothing to do */
         goto done;
 
     } else if (batch_flag == FALSE && shell != NULL) {
         setenv("PS1", new_prompt, 1);
         setenv("CIB_shadow", name, 1);
         printf("Type Ctrl-D to exit the crm_shadow shell\n");
 
         if (strstr(shell, "bash")) {
             execl(shell, shell, "--norc", "--noprofile", NULL);
         } else {
             execl(shell, shell, NULL);
         }
 
     } else if (do_switch) {
         printf("To switch to the named shadow instance, paste the following into your shell:\n");
 
     } else {
         printf
             ("A new shadow instance was created.  To begin using it paste the following into your shell:\n");
     }
     printf("  CIB_shadow=%s ; export CIB_shadow\n", name);
 
   done:
     free(new_prompt);
 }
 
 static void
 shadow_teardown(char *name)
 {
     const char *prompt = getenv("PS1");
     char *our_prompt = get_shadow_prompt(name);
 
     if (prompt != NULL && strstr(prompt, our_prompt)) {
         printf("Now type Ctrl-D to exit the crm_shadow shell\n");
 
     } else {
         printf
             ("Please remember to unset the CIB_shadow variable by pasting the following into your shell:\n");
         printf("  unset CIB_shadow\n");
     }
     free(our_prompt);
 }
 
 /* *INDENT-OFF* */
 static struct crm_option long_options[] = {
     /* Top-level Options */
     {"help",    0, 0, '?', "\t\tThis text"},
     {"version", 0, 0, '$', "\t\tVersion information"  },
     {"verbose", 0, 0, 'V', "\t\tIncrease debug output"},
 
     {"-spacer-",	1, 0, '-', "\nQueries:"},
     {"which",   no_argument,       NULL, 'w', "\t\tIndicate the active shadow copy"},
     {"display", no_argument,       NULL, 'p', "\t\tDisplay the contents of the active shadow copy"},
     {"edit",    no_argument,       NULL, 'E', "\t\tEdit the contents of the active shadow copy with your favorite $EDITOR"},
     {"diff",    no_argument,       NULL, 'd', "\t\tDisplay the changes in the active shadow copy\n"},
     {"file",    no_argument,       NULL, 'F', "\t\tDisplay the location of the active shadow copy file\n"},
 
     {"-spacer-",	1, 0, '-', "\nCommands:"},
     {"create",		required_argument, NULL, 'c', "\tCreate the named shadow copy of the active cluster configuration"},
     {"create-empty",	required_argument, NULL, 'e', "Create the named shadow copy with an empty cluster configuration. Optional: --validate-with"},
     {"commit",  required_argument, NULL, 'C', "\tUpload the contents of the named shadow copy to the cluster"},
     {"delete",  required_argument, NULL, 'D', "\tDelete the contents of the named shadow copy"},
     {"reset",   required_argument, NULL, 'r', "\tRecreate the named shadow copy from the active cluster configuration"},
     {"switch",  required_argument, NULL, 's', "\t(Advanced) Switch to the named shadow copy"},
 
     {"-spacer-",	1, 0, '-', "\nAdditional Options:"},
     {"force",	no_argument, NULL, 'f', "\t\t(Advanced) Force the action to be performed"},
     {"batch",   no_argument, NULL, 'b', "\t\t(Advanced) Don't spawn a new shell" },
     {"all",     no_argument, NULL, 'a', "\t\t(Advanced) Upload the entire CIB, including status, with --commit" },
     {"validate-with",     required_argument, NULL, 'v', "(Advanced) Create an older configuration version" },
 
     {"-spacer-",	1, 0, '-', "\nExamples:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', "Create a blank shadow configuration:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_shadow --create-empty myShadow", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Create a shadow configuration from the running cluster:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_shadow --create myShadow", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Display the current shadow configuration:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_shadow --display", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Discard the current shadow configuration (named myShadow):", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_shadow --delete myShadow", pcmk_option_example},
     {"-spacer-",	1, 0, '-', "Upload the current shadow configuration (named myShadow) to the running cluster:", pcmk_option_paragraph},
     {"-spacer-",	1, 0, '-', " crm_shadow --commit myShadow", pcmk_option_example},
 
     {0, 0, 0, 0}
 };
 /* *INDENT-ON* */
 
 int
 main(int argc, char **argv)
 {
     int rc = 0;
     int flag;
     int argerr = 0;
     static int command = '?';
     const char *validation = NULL;
     char *shadow = NULL;
     char *shadow_file = NULL;
     gboolean full_upload = FALSE;
     gboolean dangerous_cmd = FALSE;
     struct stat buf;
     int option_index = 0;
 
     crm_log_cli_init("crm_shadow");
     crm_set_options(NULL, "(query|command) [modifiers]", long_options,
                     "Perform configuration changes in a sandbox before updating the live cluster."
                     "\n\nSets up an environment in which configuration tools (cibadmin, crm_resource, etc) work"
                     " offline instead of against a live cluster, allowing changes to be previewed and tested"
                     " for side-effects.\n");
 
     if (argc < 2) {
         crm_help('?', EX_USAGE);
     }
 
     while (1) {
         flag = crm_get_option(argc, argv, &option_index);
         if (flag == -1 || flag == 0)
             break;
 
         switch (flag) {
             case 'a':
                 full_upload = TRUE;
                 break;
             case 'd':
             case 'E':
             case 'p':
             case 'w':
             case 'F':
                 command = flag;
                 free(shadow);
                 shadow = NULL;
                 {
                     const char *env = getenv("CIB_shadow");
                     if(env) {
                         shadow = strdup(env);
                     } else {
                         fprintf(stderr, "No active shadow configuration defined\n");
                         crm_exit(ENOENT);
                     }
                 }
                 break;
             case 'v':
                 validation = optarg;
                 break;
             case 'e':
             case 'c':
             case 's':
             case 'r':
                 command = flag;
                 free(shadow);
                 shadow = strdup(optarg);
                 break;
             case 'C':
             case 'D':
                 command = flag;
                 dangerous_cmd = TRUE;
                 free(shadow);
                 shadow = strdup(optarg);
                 break;
             case 'V':
                 command_options = command_options | cib_verbose;
                 crm_bump_log_level(argc, argv);
                 break;
             case '$':
             case '?':
                 crm_help(flag, EX_OK);
                 break;
             case 'f':
                 command_options |= cib_quorum_override;
                 force_flag = 1;
                 break;
             case 'b':
                 batch_flag = 1;
                 break;
             default:
                 printf("Argument code 0%o (%c)" " is not (?yet?) supported\n", flag, flag);
                 ++argerr;
                 break;
         }
     }
 
     if (optind < argc) {
         printf("non-option ARGV-elements: ");
         while (optind < argc)
             printf("%s ", argv[optind++]);
         printf("\n");
         crm_help('?', EX_USAGE);
     }
 
     if (optind > argc) {
         ++argerr;
     }
 
     if (argerr) {
         crm_help('?', EX_USAGE);
     }
 
     if (command == 'w') {
         /* which shadow instance is active? */
         const char *local = getenv("CIB_shadow");
 
         if (local == NULL) {
             fprintf(stderr, "No shadow instance provided\n");
             rc = -ENXIO;
             goto done;
         }
         fprintf(stdout, "%s\n", local);
         rc = 0;
         goto done;
     }
 
     if (shadow == NULL) {
         fprintf(stderr, "No shadow instance provided\n");
         fflush(stderr);
         rc = -EINVAL;
         goto done;
 
     } else if (command != 's' && command != 'c') {
         const char *local = getenv("CIB_shadow");
 
         if (local != NULL && safe_str_neq(local, shadow) && force_flag == FALSE) {
             fprintf(stderr,
                     "The supplied shadow instance (%s) is not the same as the active one (%s).\n"
                     "  To prevent accidental destruction of the cluster,"
                     " the --force flag is required in order to proceed.\n", shadow, local);
             fflush(stderr);
             rc = EX_USAGE;
             goto done;
         }
     }
 
     if (dangerous_cmd && force_flag == FALSE) {
         fprintf(stderr, "The supplied command is considered dangerous."
                 "  To prevent accidental destruction of the cluster,"
                 " the --force flag is required in order to proceed.\n");
         fflush(stderr);
         rc = EX_USAGE;
         goto done;
     }
 
     shadow_file = get_shadow_file(shadow);
     if (command == 'D') {
         /* delete the file */
         rc = stat(shadow_file, &buf);
         if (rc == 0) {
             rc = unlink(shadow_file);
             if (rc != 0) {
                 fprintf(stderr, "Could not remove shadow instance '%s': %s\n", shadow,
                         strerror(errno));
                 goto done;
             }
         }
 
         shadow_teardown(shadow);
         goto done;
 
     } else if (command == 'F') {
         printf("%s\n", shadow_file);
         rc = 0;
         goto done;
     }
 
     if (command == 'd' || command == 'r' || command == 'c' || command == 'C') {
         real_cib = cib_new_no_shadow();
         rc = real_cib->cmds->signon(real_cib, crm_system_name, cib_command);
         if (rc != pcmk_ok) {
             fprintf(stderr, "Signon to CIB failed: %s\n", pcmk_strerror(rc));
             goto done;
         }
     }
 
     rc = stat(shadow_file, &buf);
 
     if (command == 'e' || command == 'c') {
         if (rc == 0 && force_flag == FALSE) {
             fprintf(stderr, "A shadow instance '%s' already exists.\n"
                     "  To prevent accidental destruction of the cluster,"
                     " the --force flag is required in order to proceed.\n", shadow);
             rc = -ENOTUNIQ;
             goto done;
         }
 
     } else if (rc != 0) {
         fprintf(stderr, "Could not access shadow instance '%s': %s\n", shadow, strerror(errno));
         rc = -ENXIO;
         goto done;
     }
 
     rc = pcmk_ok;
     if (command == 'c' || command == 'e' || command == 'r') {
         xmlNode *output = NULL;
 
         /* create a shadow instance based on the current cluster config */
         if (command == 'c' || command == 'r') {
             rc = real_cib->cmds->query(real_cib, NULL, &output, command_options);
             if (rc != pcmk_ok) {
                 fprintf(stderr, "Could not connect to the CIB: %s\n", pcmk_strerror(rc));
                 goto done;
             }
 
         } else {
             output = createEmptyCib(0);
             if(validation) {
                 crm_xml_add(output, XML_ATTR_VALIDATION, validation);
             }
             printf("Created new %s configuration\n",
                    crm_element_value(output, XML_ATTR_VALIDATION));
         }
 
         rc = write_xml_file(output, shadow_file, FALSE);
         free_xml(output);
 
         if (rc < 0) {
             fprintf(stderr, "Could not %s the shadow instance '%s': %s\n",
                     command == 'r' ? "reset" : "create",
                     shadow, strerror(errno));
             goto done;
         }
         shadow_setup(shadow, FALSE);
         rc = pcmk_ok;
 
     } else if (command == 'E') {
         const char *err = NULL;
         char *editor = getenv("EDITOR");
 
         if (editor == NULL) {
             fprintf(stderr, "No value for $EDITOR defined\n");
             rc = -EINVAL;
             goto done;
         }
 
         execlp(editor, "--", shadow_file, NULL);
         err = strerror(errno);
         fprintf(stderr, "Could not invoke $EDITOR (%s %s): %s\n", editor, shadow_file, err);
         rc = -EINVAL;
         goto done;
 
     } else if (command == 's') {
         shadow_setup(shadow, TRUE);
         rc = 0;
         goto done;
 
-    } else if (command == 'P') {
+    } else if (command == 'p') {
         /* display the current contents */
         char *output_s = NULL;
         xmlNode *output = filename2xml(shadow_file);
 
         output_s = dump_xml_formatted(output);
         printf("%s", output_s);
 
         free(output_s);
         free_xml(output);
 
     } else if (command == 'd') {
         /* diff against cluster */
         xmlNode *diff = NULL;
         xmlNode *old_config = NULL;
         xmlNode *new_config = filename2xml(shadow_file);
 
         rc = real_cib->cmds->query(real_cib, NULL, &old_config, command_options);
 
         if (rc != pcmk_ok) {
             fprintf(stderr, "Could not query the CIB: %s\n", pcmk_strerror(rc));
             goto done;
         }
 
         xml_track_changes(new_config, NULL, new_config, FALSE);
         xml_calculate_changes(old_config, new_config);
 
         diff = xml_create_patchset(0, old_config, new_config, NULL, FALSE);
 
         xml_log_changes(LOG_INFO, __FUNCTION__, new_config);
         xml_accept_changes(new_config);
 
         if (diff != NULL) {
             xml_log_patchset(0, "  ", diff);
             rc = 1;
             goto done;
         }
         rc = 0;
         goto done;
 
     } else if (command == 'C') {
         /* commit to the cluster */
         xmlNode *input = filename2xml(shadow_file);
 
         if (full_upload) {
             rc = real_cib->cmds->replace(real_cib, NULL, input, command_options);
         } else {
             xmlNode *config = first_named_child(input, XML_CIB_TAG_CONFIGURATION);
 
             rc = real_cib->cmds->replace(real_cib, XML_CIB_TAG_CONFIGURATION, config,
                                          command_options);
         }
 
         if (rc != pcmk_ok) {
             fprintf(stderr, "Could not commit shadow instance '%s' to the CIB: %s\n",
                     shadow, pcmk_strerror(rc));
             goto done;
         }
         shadow_teardown(shadow);
         free_xml(input);
     }
   done:
     free(shadow_file);
     free(shadow);
     return crm_exit(rc);
 }
diff --git a/version.m4 b/version.m4
index 70596af445..d5aaeb4715 100644
--- a/version.m4
+++ b/version.m4
@@ -1 +1,2 @@
 m4_define([VERSION_NUMBER], [1.1.14])
+m4_define([PCMK_URL], [http://clusterlabs.org])
diff --git a/xml/Makefile.am b/xml/Makefile.am
index de522c719b..e05166a546 100644
--- a/xml/Makefile.am
+++ b/xml/Makefile.am
@@ -1,128 +1,171 @@
 #
 # Copyright (C) 2004 Andrew Beekhof
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 # 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 MAINTAINERCLEANFILES    = Makefile.in
 
 dtddir			= $(CRM_DTD_DIRECTORY)
 dtd_SCRIPTS		= crm.dtd crm-transitional.dtd
 
 xsltdir			= $(dtddir)
 xslt_SCRIPTS		= upgrade06.xsl upgrade-*.xsl
 
 RNGdir			= $(dtddir)
 
 # See Readme.md for details on updating schema files
 
 # Sorted list of available numeric RNG versions,
 # extracted from filenames like NAME-MAJOR[.MINOR][.MINOR-MINOR].rng
 RNG_numeric_versions    = $(shell ls -1 *.rng \
 			  | sed -E -n -e 's/^.*-([0-9.]+).rng$$/\1/p' \
 			  | sort -u -t. -k 1,1n -k 2,2n -k 3,3n)
 
 # The highest numeric version
 RNG_max			?= $(lastword $(RNG_numeric_versions))
 
-# The previous numeric version before $(RNG_max)
-RNG_last		?= $(shell ls -1 *.rng \
-			   | sed -n -e 's/^.*-\([0-9.]\+\).rng$$/\1/p' \
-			   | sort -u -t. -k 1,1nr -k 2,2nr -k 3,3nr \
-			   | head -n 2 | tail -n 1)
-
 # A sorted list of all RNG versions (numeric and "next")
 RNG_versions		= next $(RNG_numeric_versions)
+RNG_version_pairs	= $(join \
+			    ${RNG_numeric_versions},$(addprefix \
+			      -,$(wordlist \
+			        2,$(words ${RNG_numeric_versions}),${RNG_numeric_versions} \
+			      ) next \
+			    ) \
+			  )
+RNG_version_pairs_cnt	= $(words ${RNG_version_pairs})
+RNG_version_pairs_last  = $(wordlist \
+			    $(words \
+			      $(wordlist \
+			        2,${RNG_version_pairs_cnt},${RNG_version_pairs} \
+			      ) \
+			    ),${RNG_version_pairs_cnt},${RNG_version_pairs} \
+			  )
 
 RNG_generated		= pacemaker.rng $(foreach base,$(RNG_versions),pacemaker-$(base).rng) versions.rng
 
 RNG_cfg_base	 	= options nodes resources constraints fencing acls tags
 RNG_base	 	= cib $(RNG_cfg_base) status score rule nvset
 RNG_files	 	= $(foreach base,$(RNG_base),$(wildcard $(base)*.rng))
 
 # List of non-Pacemaker RNGs
 RNG_extra		= crm_mon.rng
 
 RNG_SCRIPTS		= $(RNG_files) $(RNG_generated) $(RNG_extra)
 
 EXTRA_DIST		= best-match.sh
 
-best_match		= $(shell $(top_srcdir)/xml/best-match.sh $(1) $(2))
-
 versions:
 	echo "Max: $(RNG_max)"
 	echo "Available: $(RNG_versions)"
 
 versions.rng: Makefile.am
 	echo "  RNG      $@"
 	echo "<?xml version='1.0' encoding='UTF-8'?>" > $@
 	echo "<grammar xmlns='http://relaxng.org/ns/structure/1.0' datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'>" >> $@
 	echo "  <start>" >> $@
 	echo "   <interleave>" >> $@
 	echo "    <optional>" >> $@
 	echo "      <attribute name='validate-with'>" >> $@
 	echo "        <choice>" >> $@
 	echo "          <value>none</value>" >> $@
 	echo "          <value>pacemaker-0.6</value>" >> $@
 	echo "          <value>transitional-0.6</value>" >> $@
 	echo "          <value>pacemaker-0.7</value>" >> $@
 	echo "          <value>pacemaker-1.1</value>" >> $@
 	for rng in $(RNG_versions); do echo "          <value>pacemaker-$$rng</value>" >> $@; done
 	echo "        </choice>" >> $@
 	echo "      </attribute>" >> $@
 	echo "    </optional>" >> $@
 	echo "    <attribute name='admin_epoch'><data type='nonNegativeInteger'/></attribute>" >> $@
 	echo "    <attribute name='epoch'><data type='nonNegativeInteger'/></attribute>" >> $@
 	echo "    <attribute name='num_updates'><data type='nonNegativeInteger'/></attribute>" >> $@
 	echo "   </interleave>" >> $@
 	echo "  </start>" >> $@
 	echo "</grammar>" >> $@
 
 pacemaker.rng: pacemaker-$(RNG_max).rng
 	echo "  RNG      $@"
 	cp $(top_builddir)/xml/$< $@
 
 pacemaker-%.rng: $(RNG_files) best-match.sh Makefile.am 
 	echo "  RNG      $@"
 	echo "<?xml version='1.0' encoding='UTF-8'?>" > $@
 	echo "<grammar xmlns='http://relaxng.org/ns/structure/1.0' datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'>" >> $@
 	echo "  <start>" >> $@
 	echo "    <element name='cib'>" >> $@
-	$(top_srcdir)/xml/best-match.sh cib $(*) $(@) "      "
+	./best-match.sh cib $(*) $(@) "      "
 	echo "      <element name='configuration'>" >> $@
 	echo "        <interleave>" >> $@
-	for rng in $(RNG_cfg_base); do $(top_srcdir)/xml/best-match.sh $$rng $(*) $(@) "          "; done
+	for rng in $(RNG_cfg_base); do ./best-match.sh $$rng $(*) $(@) "          " || :; done
 	echo "        </interleave>" >> $@
 	echo "      </element>" >> $@
 	echo "      <element name='status'>" >> $@
-	$(top_srcdir)/xml/best-match.sh status $(*) $(@) "        "
+	./best-match.sh status $(*) $(@) "        "
 	echo "      </element>" >> $@
 	echo "    </element>" >> $@
 	echo "  </start>" >> $@
 	echo "</grammar>" >> $@
 
-files_next = $(shell echo $(wildcard *-next.rng) | sed 's/-next.rng//g')
-files_max = $(shell echo $(wildcard *-$(RNG_max).rng) | sed 's/-[0-9][0-9.]*.rng//g')
-
-diff:
-	echo "#  Comparing changes in: $(RNG_max)"
-	-for rng in $(files_max); do echo "### $${rng}"; diff -u `$(top_srcdir)/xml/best-match.sh $${rng} $(RNG_last)` $${rng}-$(RNG_max).rng; done
-	echo -e "\n\n\n#  Comparing changes since: $(RNG_max)"
-	-for rng in $(files_next); do echo "### $${rng}"; diff -u `$(top_srcdir)/xml/best-match.sh $${rng} $(RNG_max)` $${rng}-next.rng; done
+# diff fails with ec=2 if no predecessor is found;
+# this uses '=' GNU extension to sed, if that's not available,
+# one can use: hline=`echo "$${p}" | grep -Fn "$${hunk}" | cut -d: -f1`;
+# XXX: use line information from hunk to avoid "not detected" for ambiguity
+version_diff = \
+	@for p in $(1); do \
+	  set `echo "$${p}" | tr '-' ' '`; \
+	  echo "\#\#\# *-$$2.rng vs. predecessor"; \
+	  for v in *-$$2.rng; do \
+	    echo "\#\#\#\# $${v} vs. predecessor"; b=`echo "$${v}" | cut -d- -f1`; \
+	    old=`./best-match.sh $${b} $$1`; \
+	    p=`diff -u "$${old}" "$${v}" 2>/dev/null`; \
+	    case $$? in \
+	    1) echo "$${p}" | sed -n -e '/^@@ /!d;=;p' \
+	       -e ':l;n;/^\([- ]\|+.*<[^ />]\+\([^/>]\+="ID\|>$$\)\)/bl;s/^[+ ]\(.*\)/\1/p' \
+	       | while read hline; do \
+	           read h && read i || break; \
+	           iline=`grep -Fn "$${i}" "$${v}" | cut -d: -f1`; \
+	           ctxt="(not detected)"; \
+	           if test `echo "$${iline}" | wc -l` -eq 1; then \
+	             ctxt=`{ sed -n -e "1,$$(($${iline}-1))p" "$${v}"; \
+	                     echo "<inject id=\"GOAL\"/>$${i}"; \
+	                     sed -n -e "$$(($${iline}+1)),$$ p" "$${v}"; \
+	                   } | $(XSLTPROC) --param skip 1 context-of.xsl -`; \
+	           fi; \
+	           echo "$${p}" | sed -n -e "$$(($${hline}-2)),$${hline}!d" \
+	             -e '/^\(+++\|---\)/p'; \
+	           echo "$${h} context: $${ctxt}"; \
+	           echo "$${p}" | sed -n -e "1,$${hline}d" \
+	             -e '/^\(---\|@@ \)/be;p;d;:e;n;be'; \
+	           done; \
+	       ;; \
+	    2) echo "\#\#\#\#\# $${v} has no predecessor";; \
+	    esac; \
+	  done; \
+	done
+
+diff: best-match.sh
+	@echo "#  Comparing changes in + since $(RNG_max)"
+	$(call version_diff,${RNG_version_pairs_last})
+
+fulldiff: best-match.sh
+	@echo "#  Comparing all changes across all the subsequent increments"
+	$(call version_diff,${RNG_version_pairs})
 
 sync:
 	git rm -f $(wildcard *-next.rng)
 	make pacemaker-next.rng
 
 CLEANFILES = $(RNG_generated)
diff --git a/xml/Readme.md b/xml/Readme.md
index 30e7934f08..b78e13d26b 100644
--- a/xml/Readme.md
+++ b/xml/Readme.md
@@ -1,51 +1,53 @@
 # Updating schema files #
 
 ## Experimental features ##
 
 Experimental features go into `${base}-next.rng`
 
 Create from the most recent `${base}-${X}.${Y}.rng` if it does not already exist
 
 ## Stable features ##
 
 The current stable version is determined at runtime when
 __xml_build_schema_list() interrogates the CRM_DTD_DIRECTORY.
 
 It will have the form `pacemaker-${X}.${Y}` and the highest
 `${X}.${Y}` wins.
 
 ### Simple Additions
 
 When the new syntax is a simple addition to the previous one, create a
 new entry with `${Y} = ${Yold} + 1`
 
 ### Feature Removal or otherwise Incompatible Changes
 
 When the new syntax is not a simple addition to the previous one,
 create a new entry with `${X} = ${Xold} + 1` and `${Y} = 0`.
 
 An XSLT file is also required that converts an old syntax to the new
 one and must be named `upgrade-${Xold}.${Yold}.xsl`.
 
 See `xml/upgrade06.xsl` for an example.
 
 ### General Proceedure
 
 1. Copy the most recent version of `${base}-*.rng` to `${base}-${X}.${Y}.rng` 
 1. Commit the copy, eg. `"Clone the latest ${base} schema in preparation for changes"`.  
    This way the actual change will be obvious in the commit history.
 1. Modify `${base}-${X}.${Y}.rng` as required
 1. Add an XSLT file if required and update `xslt_SCRIPTS` in `xml/Makefile.am` 
 1. Commit
 
 ## Admin Tasks
 New features will not be available until the admin
 
 1. Updates all the nodes
 1. Runs the equivalent of `cibadmin --upgrade`
 
 ## Random Notes
 
 From the source directory, run `make -C xml diff` to see the changes
 in the current schema (compared to the previous ones) and also the
 pending changes in `pacemaker-next`.
+Alternatively, if the intention is to grok the overall historical schema
+evolution, use `make -C xml fulldiff`.
diff --git a/xml/best-match.sh b/xml/best-match.sh
index 6d9ffb7219..2ba062532e 100755
--- a/xml/best-match.sh
+++ b/xml/best-match.sh
@@ -1,46 +1,58 @@
 #!/bin/sh
 
 base=$1; shift
 target=$1; shift
 destination=$1; shift
 prefix=$1; shift
 
 best="0.0"
-candidates=$(ls -1 ${base}-*.rng 2>/dev/null)
-for rng in $candidates; do
-    case $rng in
-	${base}-${target}.rng)
-	    best=${target}
-	    break
-	    ;;
-	*next*)
-	    : skipping $rng
-	    ;;
-	*) 
-	    v=$(echo $rng | sed -e "s/${base}-//" -e 's/.rng//')
-	    : comparing $v with $target
+candidates=$(ls -1 "${base}.rng" "${base}"-*.rng 2>/dev/null)
+for rng in ${candidates}; do
+    case ${rng} in
+        ${base}-${target}.rng)
+            best=${target}
+            break
+            ;;
+        *next*)
+            : skipping ${rng}
+            ;;
+        *)
+            if [ "${rng}" = "${base}.rng" ]; then
+                # special case for nvset.rng, no -0.1 around anyway
+                v=0.1
+            else
+                v=$(echo ${rng} | sed -e "s/${base}-//" -e 's/.rng//')
+            fi
+            : comparing ${v} with ${target}
 
-		echo | awk -v n1="$v" -v n2="${best}"  '{if (n1>n2) printf ("true"); else printf ("false");}' |  grep -q "true"
-	    if [ $? -eq 0 ]; then
-		: $v beats the previous ${best} for $target
-		if [ ${target} = next ]; then
-		    best=$v
-		else
-			echo | awk -v n1="$v" -v n2="${target}"  '{if (n1<n2) printf ("true"); else printf ("false");}' |  grep -q "true"
-		    if [ $? -eq 0 ]; then
-			: $v is still less than $target, using
-			best=$v
-		    fi
-		fi
-	    fi
-	    ;;
+            echo | awk -v n1="${v}" -v n2="${best}" '{if (n1>n2) printf ("true"); else printf ("false");}' |  grep -q "true"
+            if [ $? -eq 0 ]; then
+                : ${v} beats the previous ${best} for ${target}
+                if [ "${target}" = "next" ]; then
+                    best=${v}
+                else
+                    echo | awk -v n1="${v}" -v n2="${target}" '{if (n1<n2) printf ("true"); else printf ("false");}' |  grep -q "true"
+                    if [ $? -eq 0 ]; then
+                        : ${v} is still less than ${target}, using
+                        best=${v}
+                    fi
+                fi
+            fi
+            ;;
     esac
 done
 
-if [ "x${best}" != "x0.0" ]; then
-    if [ "x$destination" = x ]; then
-	echo ${base}-${best}.rng
+[ "${best}" != "0.0" ]; ec=$?
+if [ ${ec} -eq 0 ]; then
+    if [ "${best}" = "0.1" ]; then
+        found=${base}.rng
     else
-	echo "$prefix<externalRef href=\"${base}-${best}.rng\"/>" >> ${destination}
+        found=${base}-${best}.rng
+    fi
+    if [ "x${destination}" = "x" ]; then
+        echo "${found}"
+    else
+        echo "${prefix}<externalRef href=\"${found}\"/>" >> "${destination}"
     fi
 fi
+ret () { return $1; }; ret ${ec}
diff --git a/xml/context-of.xsl b/xml/context-of.xsl
new file mode 100644
index 0000000000..96b8c17332
--- /dev/null
+++ b/xml/context-of.xsl
@@ -0,0 +1,83 @@
+<xsl:stylesheet version="1.0"
+                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:output method="text"/>
+<xsl:param name="goal-name" select="'id'"/>
+<xsl:param name="goal-value" select="'GOAL'"/>
+<xsl:param name="style" select="'rng'"/>
+<xsl:param name="skip" select="0"/>
+
+<xsl:template match="/">
+    <xsl:choose>
+        <xsl:when test="not(.//@*[
+                            name() = $goal-name
+                            and
+                            . = $goal-value
+                        ])">
+            <xsl:message terminate="yes">NOTFOUND</xsl:message>
+        </xsl:when>
+        <xsl:when test="$style = 'xml'">
+            <xsl:call-template name="xpath-xml-elem">
+                <xsl:with-param name="terminal-elem"
+                                select=".//@*[
+                                            name() = $goal-name
+                                            and
+                                            . = $goal-value
+                                        ]/.."/>
+            </xsl:call-template>
+        </xsl:when>
+        <xsl:when test="$style = 'rng'">
+            <xsl:call-template name="xpath-rng-elem">
+                <xsl:with-param name="terminal-elem"
+                                select=".//@*[
+                                            name() = $goal-name
+                                            and
+                                            . = $goal-value
+                                        ]/.."/>
+            </xsl:call-template>
+        </xsl:when>
+        <xsl:otherwise>
+            <xsl:message terminate="yes">BADSTYLE</xsl:message>
+        </xsl:otherwise>
+    </xsl:choose>
+</xsl:template>
+
+<xsl:template name="xpath-xml-elem">
+    <xsl:param name="terminal-elem"/>
+    <xsl:variable name="TotalCount"
+                  select="count($terminal-elem/ancestor-or-self::*)"/>
+    <xsl:for-each select="$terminal-elem/ancestor-or-self::*">
+        <xsl:if test="$TotalCount - position() &gt;= $skip">
+            <xsl:value-of select="concat('/', name())"/>
+        </xsl:if>
+    </xsl:for-each>
+    <xsl:value-of select="'&#xa;'"/>
+</xsl:template>
+
+<xsl:template name="xpath-rng-elem">
+    <xsl:param name="terminal-elem"/>
+    <xsl:variable name="TotalCount"
+                  select="count($terminal-elem/ancestor-or-self::*)"/>
+    <xsl:for-each select="$terminal-elem/ancestor-or-self::*">
+        <xsl:if test="$TotalCount - position() &gt;= $skip">
+            <xsl:choose>
+                <xsl:when test="name() = 'attribute'">
+                    <xsl:value-of select="concat('/@', @name)"/>
+                </xsl:when>
+                <xsl:when test="name() = 'define'">
+                    <xsl:value-of select="concat('/&lt;', @name, '&gt;')"/>
+                </xsl:when>
+                <xsl:when test="name() = 'element'">
+                    <xsl:value-of select="concat('/', @name)"/>
+                </xsl:when>
+                <xsl:when test="name() = 'grammar'">
+                    <xsl:if test="$TotalCount &lt; 3">
+                        <xsl:value-of select="concat('&lt;', name(), '&gt;')"/>
+                    </xsl:if>
+                </xsl:when>
+            </xsl:choose>
+        </xsl:if>
+    </xsl:for-each>
+    <xsl:value-of select="'&#xa;'"/>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/xml/regression.sh b/xml/regression.sh
index 8f073053d3..cc435ef703 100755
--- a/xml/regression.sh
+++ b/xml/regression.sh
@@ -1,288 +1,288 @@
 #!/bin/bash
 
  # Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
  # 
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public
  # License as published by the Free Software Foundation; either
  # version 2 of the License, or (at your option) any later version.
  # 
  # This software is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  # General Public License for more details.
  # 
  # You should have received a copy of the GNU General Public
  # License along with this library; if not, write to the Free Software
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  #
 
 . regression.core.sh
 create_mode="true"
 echo Generating test outputs for these tests...
 # do_test
 echo Done.
 echo ""
 
 echo Performing the following tests...
 create_mode="false"
 
 echo ""
 do_test simple1 "Offline     "
 do_test simple2 "Start       "
 do_test simple3 "Start 2     "
 do_test simple4 "Start Failed"
 do_test simple6 "Stop Start  "
 do_test simple7 "Shutdown    "
 #do_test simple8 "Stonith	"
 #do_test simple9 "Lower version"
 #do_test simple10 "Higher version"
 do_test simple11 "Priority (ne)"
 do_test simple12 "Priority (eq)"
 do_test simple8 "Stickiness"
 
 echo ""
 do_test params-0 "Params: No change"
 do_test params-1 "Params: Changed"
 do_test params-2 "Params: Resource definition"
 do_test params-4 "Params: Reload"
 do_test novell-251689 "Resource definition change + target_role=stopped"
 
 echo ""
 do_test orphan-0 "Orphan ignore"
 do_test orphan-1 "Orphan stop"
 
 echo ""
 do_test target-0 "Target Role : baseline"
 do_test target-1 "Target Role : test"
 
 echo ""
 do_test date-1 "Dates" -d "2005-020"
 do_test date-2 "Date Spec - Pass" -d "2005-020T12:30"
 do_test date-3 "Date Spec - Fail" -d "2005-020T11:30"
 do_test probe-0 "Probe (anon clone)"
 do_test probe-1 "Pending Probe"
 do_test standby "Standby"
 do_test comments "Comments"
 
 echo ""
 do_test rsc_dep1 "Must not     "
 do_test rsc_dep3 "Must         "
 do_test rsc_dep5 "Must not 3   "
 do_test rsc_dep7 "Must 3       "
 do_test rsc_dep10 "Must (but cant)"
 do_test rsc_dep2  "Must (running) "
 do_test rsc_dep8  "Must (running : alt) "
 do_test rsc_dep4  "Must (running + move)"
 do_test asymmetric "Asymmetric - require explicit location constraints"
 
 echo ""
 do_test order1 "Order start 1     "
 do_test order2 "Order start 2     "
 do_test order3 "Order stop	  "
 do_test order4 "Order (multiple)  "
 do_test order5 "Order (move)  "
 do_test order6 "Order (move w/ restart)  "
-do_test order7 "Order (manditory)  "
+do_test order7 "Order (mandatory)  "
 do_test order-optional "Order (score=0)  "
 do_test order-required "Order (score=INFINITY)  "
 
 echo ""
 do_test coloc-loop "Colocation - loop"
 do_test coloc-many-one "Colocation - many-to-one"
 do_test coloc-list "Colocation - many-to-one with list"
 do_test coloc-group "Colocation - groups"
 do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation"
 
 #echo ""
 #do_test agent1 "version: lt (empty)"
 #do_test agent2 "version: eq	"
 #do_test agent3 "version: gt	"
 
 echo ""
 do_test attrs1 "string: eq (and)     "
 do_test attrs2 "string: lt / gt (and)"
 do_test attrs3 "string: ne (or)      "
 do_test attrs4 "string: exists       "
 do_test attrs5 "string: not_exists   "
 do_test attrs6 "is_dc: true          "
 do_test attrs7 "is_dc: false         "
 do_test attrs8 "score_attribute      "
 
 echo ""
 do_test mon-rsc-1 "Schedule Monitor - start"
 do_test mon-rsc-2 "Schedule Monitor - move "
 do_test mon-rsc-3 "Schedule Monitor - pending start     "
 do_test mon-rsc-4 "Schedule Monitor - move/pending start"
 
 echo ""
 do_test rec-rsc-0 "Resource Recover - no start     "
 do_test rec-rsc-1 "Resource Recover - start        "
 do_test rec-rsc-2 "Resource Recover - monitor      "
 do_test rec-rsc-3 "Resource Recover - stop - ignore"
 do_test rec-rsc-4 "Resource Recover - stop - block "
 do_test rec-rsc-5 "Resource Recover - stop - fence "
 do_test rec-rsc-6 "Resource Recover - multiple - restart"
 do_test rec-rsc-7 "Resource Recover - multiple - stop   "
 do_test rec-rsc-8 "Resource Recover - multiple - block  "
 do_test rec-rsc-9 "Resource Recover - group/group"
 
 echo ""
 do_test quorum-1 "No quorum - ignore"
 do_test quorum-2 "No quorum - freeze"
 do_test quorum-3 "No quorum - stop  "
 do_test quorum-4 "No quorum - start anyway"
 do_test quorum-5 "No quorum - start anyway (group)"
 do_test quorum-6 "No quorum - start anyway (clone)"
 
 echo ""
 do_test rec-node-1 "Node Recover - Startup   - no fence"
 do_test rec-node-2 "Node Recover - Startup   - fence   "
 do_test rec-node-3 "Node Recover - HA down   - no fence"
 do_test rec-node-4 "Node Recover - HA down   - fence   "
 do_test rec-node-5 "Node Recover - CRM down  - no fence"
 do_test rec-node-6 "Node Recover - CRM down  - fence   "
 do_test rec-node-7 "Node Recover - no quorum - ignore  "
 do_test rec-node-8 "Node Recover - no quorum - freeze  "
 do_test rec-node-9 "Node Recover - no quorum - stop    "
 do_test rec-node-10 "Node Recover - no quorum - stop w/fence"
 do_test rec-node-11 "Node Recover - CRM down w/ group - fence   "
 do_test rec-node-12 "Node Recover - nothing active - fence   "
 do_test rec-node-13 "Node Recover - failed resource + shutdown - fence   "
 do_test rec-node-15 "Node Recover - unknown lrm section"
 do_test rec-node-14 "Serialize all stonith's"
 
 echo ""
 do_test multi1 "Multiple Active (stop/start)"
 
 echo ""
 do_test migrate-1 "Migrate (migrate)"
 do_test migrate-2 "Migrate (stable)"
 do_test migrate-3 "Migrate (failed migrate_to)"
 do_test migrate-4 "Migrate (failed migrate_from)"
 do_test novell-252693 "Migration in a stopping stack"
 do_test novell-252693-2 "Migration in a starting stack"
 do_test novell-252693-3 "Non-Migration in a starting and stopping stack"
 do_test bug-1820 "Migration in a group"
 do_test bug-1820-1 "Non-migration in a group"
 
 #echo ""
 #do_test complex1 "Complex	"
 
 echo ""
 do_test group1 "Group		"
 do_test group2 "Group + Native	"
 do_test group3 "Group + Group	"
 do_test group4 "Group + Native (nothing)"
 do_test group5 "Group + Native (move)   "
 do_test group6 "Group + Group (move)    "
 do_test group7 "Group colocation"
 do_test group13 "Group colocation (cant run)"
 do_test group8 "Group anti-colocation"
 do_test group9 "Group recovery"
 do_test group10 "Group partial recovery"
 do_test group11 "Group target_role"
 do_test group14 "Group stop (graph terminated)"
 do_test group15 "-ve group colocation"
 do_test bug-1573 "Partial stop of a group with two children"
 do_test bug-1718 "Mandatory group ordering - Stop group_FUN"
 
 echo ""
 do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node"
 do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones"
 do_test inc0 "Incarnation start" 
 do_test inc1 "Incarnation start order" 
 do_test inc2 "Incarnation silent restart, stop, move"
 do_test inc3 "Inter-incarnation ordering, silent restart, stop, move"
 do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)"
 do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)"
 do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)"
 do_test inc7 "Clone colocation"
 do_test inc8 "Clone anti-colocation"
 do_test inc9 "Non-unique clone"
 do_test inc10 "Non-unique clone (stop)"
 do_test inc11 "Primitive colocation with clones" 
 do_test inc12 "Clone shutdown" 
 do_test cloned-group "Make sure only the correct number of cloned groups are started"
 do_test clone-no-shuffle "Don't prioritize allocation of instances that must be moved"
 
 echo ""
 do_test master-0 "Stopped -> Slave"
 do_test master-1 "Stopped -> Promote"
 do_test master-2 "Stopped -> Promote : notify"
 do_test master-3 "Stopped -> Promote : master location"
 do_test master-4 "Started -> Promote : master location"
 do_test master-5 "Promoted -> Promoted"
 do_test master-6 "Promoted -> Promoted (2)"
 do_test master-7 "Promoted -> Fenced"
 do_test master-8 "Promoted -> Fenced -> Moved"
 do_test master-9 "Stopped + Promotable + No quorum"
 do_test master-10 "Stopped -> Promotable : notify with monitor"
 do_test master-11 "Stopped -> Promote : colocation"
 do_test novell-239082 "Demote/Promote ordering"
 do_test novell-239087 "Stable master placement"
 do_test master-12 "Promotion based solely on rsc_location constraints"
 do_test master-13 "Include preferences of colocated resources when placing master"
 do_test master-demote "Ordering when actions depends on demoting a slave resource" 
 do_test master-ordering "Prevent resources from starting that need a master"
 do_test bug-1765 "Master-Master Colocation (dont stop the slaves)"
 do_test master-group "Promotion of cloned groups"
 do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily"
 do_test master-failed-demote "Don't retry failed demote actions"
 do_test master-failed-demote-2 "Don't retry failed demote actions (notify=false)"
 do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does"
 
 echo ""
 do_test managed-0 "Managed (reference)"
 do_test managed-1 "Not managed - down "
 do_test managed-2 "Not managed - up   "
 
 echo ""
 do_test interleave-0 "Interleave (reference)"
 do_test interleave-1 "coloc - not interleaved"
 do_test interleave-2 "coloc - interleaved   "
 do_test interleave-3 "coloc - interleaved (2)"
 do_test interleave-pseudo-stop "Interleaved clone during stonith"
 do_test interleave-stop "Interleaved clone during stop"
 do_test interleave-restart "Interleaved clone during dependency restart"
 
 echo ""
 do_test notify-0 "Notify reference"
 do_test notify-1 "Notify simple"
 do_test notify-2 "Notify simple, confirm"
 do_test notify-3 "Notify move, confirm"
 do_test novell-239079 "Notification priority"
 #do_test notify-2 "Notify - 764"
 
 echo ""
 do_test 594 "OSDL #594"
 do_test 662 "OSDL #662"
 do_test 696 "OSDL #696"
 do_test 726 "OSDL #726"
 do_test 735 "OSDL #735"
 do_test 764 "OSDL #764"
 do_test 797 "OSDL #797"
 do_test 829 "OSDL #829"
 do_test 994 "OSDL #994"
 do_test 994-2 "OSDL #994 - with a dependent resource"
 do_test 1360 "OSDL #1360 - Clone stickiness"
 do_test 1484 "OSDL #1484 - on_fail=stop"
 do_test 1494 "OSDL #1494 - Clone stability"
 do_test unrunnable-1 "Unrunnable"
 do_test stonith-0 "Stonith loop - 1"
 do_test stonith-1 "Stonith loop - 2"
 do_test stonith-2 "Stonith loop - 3"
 do_test bug-1572-1 "Recovery of groups depending on master/slave"
 do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted"
 do_test bug-1685 "Depends-on-master ordering"
 do_test bug-1822 "Don't promote partially active groups"
 do_test bug-pm-11 "New resource added to a m/s group"
 do_test bug-pm-12 "Recover only the failed portion of a cloned group"
 do_test bug-n-387749 "Don't shuffle clone instances"
 do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped"
 do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node"
 do_test bug-lf-1920 "Correctly handle probes that find active resources"
 
 echo ""
 
 test_results