No OneTemporary
Actions

Size

318 KB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
	index e5d5f69b08..4b53aaa972 100644
	--- a/daemons/controld/controld_schedulerd.c
	+++ b/daemons/controld/controld_schedulerd.c
	@@ -1,384 +1,385 @@
	/*
	* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <unistd.h> /* pid_t, sleep, ssize_t */

	#include <crm/cib.h>
	#include <crm/cluster.h>
	#include <crm/common/xml.h>
	#include <crm/crm.h>
	#include <crm/msg_xml.h>

	#include <pacemaker-controld.h>
	#include <controld_fsa.h>
	#include <controld_messages.h> /* register_fsa_error_adv */

	static mainloop_io_t *pe_subsystem = NULL;

	/*!
	* \internal
	* \brief Close any scheduler connection and free associated memory
	*/
	void
	pe_subsystem_free(void)
	{
	if (pe_subsystem) {
	mainloop_del_ipc_client(pe_subsystem);
	pe_subsystem = NULL;
	}
	}

	/*!
	* \internal
	* \brief Save CIB query result to file, raising FSA error
	*
	* \param[in] msg Ignored
	* \param[in] call_id Call ID of CIB query
	* \param[in] rc Return code of CIB query
	* \param[in] output Result of CIB query
	* \param[in] user_data Unique identifier for filename (will be freed)
	*
	* \note This is intended to be called after a scheduler connection fails.
	*/
	static void
	save_cib_contents(xmlNode msg, int call_id, int rc, xmlNode output,
	void *user_data)
	{
	char *id = user_data;

	register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
	CRM_CHECK(id != NULL, return);

	if (rc == pcmk_ok) {
	char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);

	if (write_xml_file(output, filename, TRUE) < 0) {
	crm_err("Could not save Cluster Information Base to %s after scheduler crash",
	filename);
	} else {
	crm_notice("Saved Cluster Information Base to %s after scheduler crash",
	filename);
	}
	free(filename);
	}
	}

	/*!
	* \internal
	* \brief Respond to scheduler connection failure
	*
	* \param[in] user_data Ignored
	*/
	static void
	pe_ipc_destroy(gpointer user_data)
	{
	if (is_set(fsa_input_register, R_PE_REQUIRED)) {
	int rc = pcmk_ok;
	char *uuid_str = crm_generate_uuid();

	crm_crit("Connection to the scheduler failed "
	CRM_XS " uuid=%s", uuid_str);

	/*
	* The scheduler died...
	*
	* Save the current CIB so that we have a chance of
	* figuring out what killed it.
	*
	* Delay raising the I_ERROR until the query below completes or
	* 5s is up, whichever comes first.
	*
	*/
	rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
	fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);

	} else {
	crm_info("Connection to the scheduler released");
	}

	clear_bit(fsa_input_register, R_PE_CONNECTED);
	pe_subsystem = NULL;
	mainloop_set_trigger(fsa_source);
	return;
	}

	/*!
	* \internal
	* \brief Handle message from scheduler connection
	*
	* \param[in] buffer XML message (will be freed)
	* \param[in] length Ignored
	* \param[in] userdata Ignored
	*
	* \return 0
	*/
	static int
	pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
	{
	xmlNode *msg = string2xml(buffer);

	if (msg) {
	route_message(C_IPC_MESSAGE, msg);
	}
	free_xml(msg);
	return 0;
	}

	/*!
	* \internal
	* \brief Make new connection to PE
	*
	* \return TRUE on success, FALSE otherwise
	*/
	static bool
	pe_subsystem_new()
	{
	static struct ipc_client_callbacks pe_callbacks = {
	.dispatch = pe_ipc_dispatch,
	.destroy = pe_ipc_destroy
	};

	pe_subsystem = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE,
	G_PRIORITY_DEFAULT,
	5 * 1024 * 1024 /* 5MB */,
	NULL, &pe_callbacks);
	return (pe_subsystem != NULL);
	}

	/*!
	* \internal
	* \brief Send an XML message to the PE
	*
	* \param[in] cmd XML message to send
	*
	* \return pcmk_ok on success, -errno otherwise
	*/
	static int
	pe_subsystem_send(xmlNode *cmd)
	{
	if (pe_subsystem) {
	int sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem), cmd,
	0, 0, NULL);

	if (sent == 0) {
	sent = -ENODATA;
	} else if (sent > 0) {
	sent = pcmk_ok;
	}
	return sent;
	}
	return -ENOTCONN;
	}

	static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
	xmlNode output, void user_data);

	/* A_PE_START, A_PE_STOP, O_PE_RESTART */
	void
	do_pe_control(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	if (action & A_PE_STOP) {
	clear_bit(fsa_input_register, R_PE_REQUIRED);
	pe_subsystem_free();
	clear_bit(fsa_input_register, R_PE_CONNECTED);
	}

	if ((action & A_PE_START) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) {
	if (cur_state != S_STOPPING) {
	set_bit(fsa_input_register, R_PE_REQUIRED);
	if (pe_subsystem_new()) {
	set_bit(fsa_input_register, R_PE_CONNECTED);
	} else {
	crm_warn("Could not connect to scheduler");
	register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
	}
	} else {
	crm_info("Ignoring request to connect to scheduler while shutting down");
	}
	}
	}

	int fsa_pe_query = 0;
	char *fsa_pe_ref = NULL;

	/* A_PE_INVOKE */
	void
	do_pe_invoke(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	if (AM_I_DC == FALSE) {
	crm_err("Not invoking scheduler because not DC: %s",
	fsa_action2string(action));
	return;
	}

	if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
	if (is_set(fsa_input_register, R_SHUTDOWN)) {
	crm_err("Cannot shut down gracefully without the scheduler");
	register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);

	} else {
	crm_info("Waiting for the scheduler to connect");
	crmd_fsa_stall(FALSE);
	register_fsa_action(A_PE_START);
	}
	return;
	}

	if (cur_state != S_POLICY_ENGINE) {
	crm_notice("Not invoking scheduler because in state %s",
	fsa_state2string(cur_state));
	return;
	}
	if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
	crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");

	/* start the join from scratch */
	register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
	return;
	}

	fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);

	crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
	fsa_state2string(fsa_state));

	/* Make sure any queued calculations are discarded */
	free(fsa_pe_ref);
	fsa_pe_ref = NULL;

	fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
	}

	static void
	force_local_option(xmlNode xml, const char attr_name, const char *attr_value)
	{
	int max = 0;
	int lpc = 0;
	char *xpath_string = NULL;
	xmlXPathObjectPtr xpathObj = NULL;

	xpath_string = crm_strdup_printf("%.128s//%s//nvpair[@name='%.128s']",
	get_object_path(XML_CIB_TAG_CRMCONFIG),
	XML_CIB_TAG_PROPSET, attr_name);
	xpathObj = xpath_search(xml, xpath_string);
	max = numXpathResults(xpathObj);
	free(xpath_string);

	for (lpc = 0; lpc < max; lpc++) {
	xmlNode *match = getXpathResult(xpathObj, lpc);
	crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
	crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
	}

	if(max == 0) {
	xmlNode *configuration = NULL;
	xmlNode *crm_config = NULL;
	xmlNode *cluster_property_set = NULL;

	crm_trace("Creating %s-%s for %s=%s",
	CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);

	configuration = find_entity(xml, XML_CIB_TAG_CONFIGURATION, NULL);
	if (configuration == NULL) {
	configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
	}

	crm_config = find_entity(configuration, XML_CIB_TAG_CRMCONFIG, NULL);
	if (crm_config == NULL) {
	crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
	}

	cluster_property_set = find_entity(crm_config, XML_CIB_TAG_PROPSET, NULL);
	if (cluster_property_set == NULL) {
	cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
	crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
	}

	xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);

	crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
	crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
	crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
	}
	freeXpathObject(xpathObj);
	}

	static void
	do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	xmlNode *cmd = NULL;
	pid_t watchdog = pcmk_locate_sbd();

	if (rc != pcmk_ok) {
	crm_err("Could not retrieve the Cluster Information Base: %s "
	CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
	register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
	return;

	} else if (call_id != fsa_pe_query) {
	crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
	return;

	} else if (AM_I_DC == FALSE \|\| is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
	crm_debug("No need to invoke the scheduler anymore");
	return;

	} else if (fsa_state != S_POLICY_ENGINE) {
	crm_debug("Discarding scheduler request in state: %s",
	fsa_state2string(fsa_state));
	return;

	/* this callback counts as 1 */
	} else if (num_cib_op_callbacks() > 1) {
	crm_debug("Re-asking for the CIB: %d other peer updates still pending",
	(num_cib_op_callbacks() - 1));
	sleep(1);
	register_fsa_action(A_PE_INVOKE);
	return;

	} else if (fsa_state != S_POLICY_ENGINE) {
	crm_err("Invoking scheduler in state: %s", fsa_state2string(fsa_state));
	return;
	}

	CRM_LOG_ASSERT(output != NULL);

	- // Refresh the remote node cache when the scheduler is invoked
	- crm_remote_peer_cache_refresh(output);
	+ /* Refresh the remote node cache and the known node cache when the
	+ * scheduler is invoked */
	+ crm_peer_caches_refresh(output);

	crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
	crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);

	force_local_option(output, XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false");

	if (ever_had_quorum && crm_have_quorum == FALSE) {
	crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
	}

	cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL);

	free(fsa_pe_ref);
	fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE);

	rc = pe_subsystem_send(cmd);
	if (rc < 0) {
	crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
	pcmk_strerror(rc), rc);
	register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
	}
	crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d",
	fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum);
	free_xml(cmd);
	}
	diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
	index 3f538b9bc2..5606ed654a 100644
	--- a/daemons/controld/controld_te_utils.c
	+++ b/daemons/controld/controld_te_utils.c
	@@ -1,718 +1,718 @@
	/*
	* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <crm/crm.h>

	#include <crm/msg_xml.h>

	#include <crm/common/xml.h>
	#include <controld_transition.h>
	#include <controld_fsa.h>
	#include <controld_lrm.h>
	#include <controld_messages.h>
	#include <controld_throttle.h>
	#include <crm/fencing/internal.h>

	crm_trigger_t *stonith_reconnect = NULL;
	static crm_trigger_t *stonith_history_sync_trigger = NULL;
	static mainloop_timer_t *stonith_history_sync_timer = NULL;

	/*
	* stonith cleanup list
	*
	* If the DC is shot, proper notifications might not go out.
	* The stonith cleanup list allows the cluster to (re-)send
	* notifications once a new DC is elected.
	*/

	static GListPtr stonith_cleanup_list = NULL;

	/*!
	* \internal
	* \brief Add a node to the stonith cleanup list
	*
	* \param[in] target Name of node to add
	*/
	void
	add_stonith_cleanup(const char *target) {
	stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
	}

	/*!
	* \internal
	* \brief Remove a node from the stonith cleanup list
	*
	* \param[in] Name of node to remove
	*/
	void
	remove_stonith_cleanup(const char *target)
	{
	GListPtr iter = stonith_cleanup_list;

	while (iter != NULL) {
	GListPtr tmp = iter;
	char *iter_name = tmp->data;

	iter = iter->next;
	if (safe_str_eq(target, iter_name)) {
	crm_trace("Removing %s from the cleanup list", iter_name);
	stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
	free(iter_name);
	}
	}
	}

	/*!
	* \internal
	* \brief Purge all entries from the stonith cleanup list
	*/
	void
	purge_stonith_cleanup()
	{
	if (stonith_cleanup_list) {
	GListPtr iter = NULL;

	for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
	char *target = iter->data;

	crm_info("Purging %s from stonith cleanup list", target);
	free(target);
	}
	g_list_free(stonith_cleanup_list);
	stonith_cleanup_list = NULL;
	}
	}

	/*!
	* \internal
	* \brief Send stonith updates for all entries in cleanup list, then purge it
	*/
	void
	execute_stonith_cleanup()
	{
	GListPtr iter;

	for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
	char *target = iter->data;
	crm_node_t *target_node = crm_get_peer(0, target);
	const char *uuid = crm_peer_uuid(target_node);

	crm_notice("Marking %s, target of a previous stonith action, as clean", target);
	send_stonith_update(NULL, target, uuid);
	free(target);
	}
	g_list_free(stonith_cleanup_list);
	stonith_cleanup_list = NULL;
	}

	/* end stonith cleanup list functions */

	static gboolean
	fail_incompletable_stonith(crm_graph_t * graph)
	{
	GListPtr lpc = NULL;
	const char *task = NULL;
	xmlNode *last_action = NULL;

	if (graph == NULL) {
	return FALSE;
	}

	for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
	GListPtr lpc2 = NULL;
	synapse_t synapse = (synapse_t ) lpc->data;

	if (synapse->confirmed) {
	continue;
	}

	for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
	crm_action_t action = (crm_action_t ) lpc2->data;

	if (action->type != action_type_crm \|\| action->confirmed) {
	continue;
	}

	task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
	if (task && safe_str_eq(task, CRM_OP_FENCE)) {
	action->failed = TRUE;
	last_action = action->xml;
	update_graph(graph, action);
	crm_notice("Failing action %d (%s): fencer terminated",
	action->id, ID(action->xml));
	}
	}
	}

	if (last_action != NULL) {
	crm_warn("Fencer failure resulted in unrunnable actions");
	abort_for_stonith_failure(tg_restart, NULL, last_action);
	return TRUE;
	}

	return FALSE;
	}

	static void
	tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
	{
	if (is_set(fsa_input_register, R_ST_REQUIRED)) {
	crm_crit("Fencing daemon connection failed");
	mainloop_set_trigger(stonith_reconnect);

	} else {
	crm_info("Fencing daemon disconnected");
	}

	/* cbchan will be garbage at this point, arrange for it to be reset */
	if(stonith_api) {
	stonith_api->state = stonith_disconnected;
	}

	if (AM_I_DC) {
	fail_incompletable_stonith(transition_graph);
	trigger_graph();
	}
	}

	char *te_client_id = NULL;

	#ifdef HAVE_SYS_REBOOT_H
	# include <unistd.h>
	# include <sys/reboot.h>
	#endif

	static void
	tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
	{
	if(te_client_id == NULL) {
	te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
	(unsigned long) getpid());
	}

	if (st_event == NULL) {
	crm_err("Notify data not found");
	return;
	}

	crmd_alert_fencing_op(st_event);

	if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
	crm_notice("%s was successfully unfenced by %s (at the request of %s)",
	st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
	/* TODO: Hook up st_event->device */
	return;

	} else if (safe_str_eq("on", st_event->action)) {
	crm_err("Unfencing of %s by %s failed: %s (%d)",
	st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
	pcmk_strerror(st_event->result), st_event->result);
	return;

	} else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
	crm_crit("We were allegedly just fenced by %s for %s!",
	st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */

	qb_log_fini(); /* Try to get the above log message to disk - somehow */

	/* Get out ASAP and do not come back up.
	*
	* Triggering a reboot is also not the worst idea either since
	* the rest of the cluster thinks we're safely down
	*/

	#ifdef RB_HALT_SYSTEM
	reboot(RB_HALT_SYSTEM);
	#endif

	/*
	* If reboot() fails or is not supported, coming back up will
	* probably lead to a situation where the other nodes set our
	* status to 'lost' because of the fencing callback and will
	* discard subsequent election votes with:
	*
	* Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
	*
	* So just stay dead, something is seriously messed up anyway.
	*
	*/
	exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
	return;
	}

	/* Update the count of stonith failures for this target, in case we become
	* DC later. The current DC has already updated its fail count in
	* tengine_stonith_callback().
	*/
	if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
	if (st_event->result == pcmk_ok) {
	st_fail_count_reset(st_event->target);
	} else {
	st_fail_count_increment(st_event->target);
	}
	}

	crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
	CRM_XS " initiator=%s ref=%s",
	st_event->target, st_event->result == pcmk_ok ? "" : " not",
	st_event->action,
	st_event->executioner ? st_event->executioner : "<anyone>",
	(st_event->client_origin? st_event->client_origin : "<unknown>"),
	pcmk_strerror(st_event->result),
	st_event->origin, st_event->id);

	if (st_event->result == pcmk_ok) {
	- crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
	+ crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
	const char *uuid = NULL;
	gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

	if (peer == NULL) {
	return;
	}

	uuid = crm_peer_uuid(peer);

	crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
	if(AM_I_DC) {
	/* The DC always sends updates */
	send_stonith_update(NULL, st_event->target, uuid);

	/* @TODO Ideally, at this point, we'd check whether the fenced node
	* hosted any guest nodes, and call remote_node_down() for them.
	* Unfortunately, the controller doesn't have a simple, reliable way
	* to map hosts to guests. It might be possible to track this in the
	* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
	* on the PE creating fence pseudo-events for the guests.
	*/

	if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {

	/* Abort the current transition graph if it wasn't us
	* that invoked stonith to fence someone
	*/
	crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
	abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
	}

	/* Assume it was our leader if we don't currently have one */
	} else if (((fsa_our_dc == NULL) \|\| safe_str_eq(fsa_our_dc, st_event->target))
	&& !is_set(peer->flags, crm_remote_node)) {

	crm_notice("Target %s our leader %s (recorded: %s)",
	fsa_our_dc ? "was" : "may have been", st_event->target,
	fsa_our_dc ? fsa_our_dc : "<unset>");

	/* Given the CIB resyncing that occurs around elections,
	* have one node update the CIB now and, if the new DC is different,
	* have them do so too after the election
	*/
	if (we_are_executioner) {
	send_stonith_update(NULL, st_event->target, uuid);
	}
	add_stonith_cleanup(st_event->target);
	}

	/* If the target is a remote node, and we host its connection,
	* immediately fail all monitors so it can be recovered quickly.
	* The connection won't necessarily drop when a remote node is fenced,
	* so the failure might not otherwise be detected until the next poke.
	*/
	if (is_set(peer->flags, crm_remote_node)) {
	remote_ra_fail(st_event->target);
	}

	crmd_peer_down(peer, TRUE);
	}
	}

	static gboolean
	do_stonith_history_sync(gpointer user_data)
	{
	if (stonith_api && (stonith_api->state != stonith_disconnected)) {
	stonith_history_t *history = NULL;

	stonith_api->cmds->history(stonith_api,
	st_opt_sync_call \| st_opt_broadcast,
	NULL, &history, 5);
	stonith_history_free(history);
	return TRUE;
	} else {
	crm_info("Skip triggering stonith history-sync as stonith is disconnected");
	return FALSE;
	}
	}

	static gboolean
	stonith_history_sync_set_trigger(gpointer user_data)
	{
	mainloop_set_trigger(stonith_history_sync_trigger);
	return FALSE;
	}

	void
	te_trigger_stonith_history_sync(void)
	{
	/* trigger a sync in 5s to give more nodes the
	* chance to show up so that we don't create
	* unnecessary stonith-history-sync traffic
	*/

	/* as we are finally checking the stonith-connection
	* in do_stonith_history_sync we should be fine
	* leaving stonith_history_sync_time & stonith_history_sync_trigger
	* around
	*/
	if (stonith_history_sync_trigger == NULL) {
	stonith_history_sync_trigger =
	mainloop_add_trigger(G_PRIORITY_LOW,
	do_stonith_history_sync, NULL);
	}

	if(stonith_history_sync_timer == NULL) {
	stonith_history_sync_timer =
	mainloop_timer_add("history_sync", 5000,
	FALSE, stonith_history_sync_set_trigger,
	NULL);
	}
	crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
	mainloop_timer_start(stonith_history_sync_timer);
	}

	gboolean
	te_connect_stonith(gpointer user_data)
	{
	int lpc = 0;
	int rc = pcmk_ok;

	if (stonith_api == NULL) {
	stonith_api = stonith_api_new();
	}

	if (stonith_api->state != stonith_disconnected) {
	crm_trace("Still connected");
	return TRUE;
	}

	for (lpc = 0; lpc < 30; lpc++) {
	crm_debug("Attempting connection to fencing daemon...");

	sleep(1);
	rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);

	if (rc == pcmk_ok) {
	break;
	}

	if (user_data != NULL) {
	if (is_set(fsa_input_register, R_ST_REQUIRED)) {
	crm_err("Sign-in failed: triggered a retry");
	mainloop_set_trigger(stonith_reconnect);
	} else {
	crm_info("Sign-in failed, but no longer required");
	}
	return TRUE;
	}

	crm_err("Sign-in failed: pausing and trying again in 2s...");
	sleep(1);
	}

	CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */
	stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
	tengine_stonith_connection_destroy);

	stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
	tengine_stonith_notify);

	crm_trace("Connected");
	return TRUE;
	}

	gboolean
	stop_te_timer(crm_action_timer_t * timer)
	{
	if (timer == NULL) {
	return FALSE;
	}
	if (timer->source_id != 0) {
	crm_trace("Stopping action timer");
	g_source_remove(timer->source_id);
	timer->source_id = 0;
	} else {
	crm_trace("Action timer was already stopped");
	return FALSE;
	}
	return TRUE;
	}

	gboolean
	te_graph_trigger(gpointer user_data)
	{
	enum transition_status graph_rc = -1;

	if (transition_graph == NULL) {
	crm_debug("Nothing to do");
	return TRUE;
	}

	crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));

	switch (fsa_state) {
	case S_STARTING:
	case S_PENDING:
	case S_NOT_DC:
	case S_HALT:
	case S_ILLEGAL:
	case S_STOPPING:
	case S_TERMINATE:
	return TRUE;
	break;
	default:
	break;
	}

	if (transition_graph->complete == FALSE) {
	int limit = transition_graph->batch_limit;

	transition_graph->batch_limit = throttle_get_total_job_limit(limit);
	graph_rc = run_graph(transition_graph);
	transition_graph->batch_limit = limit; /* Restore the configured value */

	/* significant overhead... */
	/* print_graph(LOG_TRACE, transition_graph); */

	if (graph_rc == transition_active) {
	crm_trace("Transition not yet complete");
	return TRUE;

	} else if (graph_rc == transition_pending) {
	crm_trace("Transition not yet complete - no actions fired");
	return TRUE;
	}

	if (graph_rc != transition_complete) {
	crm_warn("Transition failed: %s", transition_status(graph_rc));
	print_graph(LOG_NOTICE, transition_graph);
	}
	}

	crm_debug("Transition %d is now complete", transition_graph->id);
	transition_graph->complete = TRUE;
	notify_crmd(transition_graph);

	return TRUE;
	}

	void
	trigger_graph_processing(const char *fn, int line)
	{
	crm_trace("%s:%d - Triggered graph processing", fn, line);
	mainloop_set_trigger(transition_trigger);
	}

	static struct abort_timer_s {
	bool aborted;
	guint id;
	int priority;
	enum transition_action action;
	const char *text;
	} abort_timer = { 0, };

	static gboolean
	abort_timer_popped(gpointer data)
	{
	if (AM_I_DC && (abort_timer.aborted == FALSE)) {
	abort_transition(abort_timer.priority, abort_timer.action,
	abort_timer.text, NULL);
	}
	abort_timer.id = 0;
	return FALSE; // do not immediately reschedule timer
	}

	/*!
	* \internal
	* \brief Abort transition after delay, if not already aborted in that time
	*
	* \param[in] abort_text Must be literal string
	*/
	void
	abort_after_delay(int abort_priority, enum transition_action abort_action,
	const char *abort_text, guint delay_ms)
	{
	if (abort_timer.id) {
	// Timer already in progress, stop and reschedule
	g_source_remove(abort_timer.id);
	}
	abort_timer.aborted = FALSE;
	abort_timer.priority = abort_priority;
	abort_timer.action = abort_action;
	abort_timer.text = abort_text;
	abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL);
	}

	void
	abort_transition_graph(int abort_priority, enum transition_action abort_action,
	const char abort_text, xmlNode reason, const char *fn, int line)
	{
	int add[] = { 0, 0, 0 };
	int del[] = { 0, 0, 0 };
	int level = LOG_INFO;
	xmlNode *diff = NULL;
	xmlNode *change = NULL;

	CRM_CHECK(transition_graph != NULL, return);

	switch (fsa_state) {
	case S_STARTING:
	case S_PENDING:
	case S_NOT_DC:
	case S_HALT:
	case S_ILLEGAL:
	case S_STOPPING:
	case S_TERMINATE:
	crm_info("Abort %s suppressed: state=%s (complete=%d)",
	abort_text, fsa_state2string(fsa_state), transition_graph->complete);
	return;
	default:
	break;
	}

	abort_timer.aborted = TRUE;

	/* Make sure any queued calculations are discarded ASAP */
	free(fsa_pe_ref);
	fsa_pe_ref = NULL;

	if (transition_graph->complete == FALSE) {
	if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) {
	level = LOG_NOTICE;
	}
	}

	if(reason) {
	xmlNode *search = NULL;

	for(search = reason; search; search = search->parent) {
	if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) {
	diff = search;
	break;
	}
	}

	if(diff) {
	xml_patch_versions(diff, add, del);
	for(search = reason; search; search = search->parent) {
	if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) {
	change = search;
	break;
	}
	}
	}
	}

	if(reason == NULL) {
	do_crm_log(level, "Transition %d aborted: %s "CRM_XS" source=%s:%d complete=%s",
	transition_graph->id, abort_text, fn, line,
	(transition_graph->complete? "true" : "false"));

	} else if(change == NULL) {
	char *local_path = xml_get_path(reason);

	do_crm_log(level, "Transition %d aborted by %s.%s: %s "
	CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
	transition_graph->id, TYPE(reason), ID(reason), abort_text,
	add[0], add[1], add[2], fn, line, local_path,
	(transition_graph->complete? "true" : "false"));
	free(local_path);

	} else {
	const char *kind = NULL;
	const char *op = crm_element_value(change, XML_DIFF_OP);
	const char *path = crm_element_value(change, XML_DIFF_PATH);

	if(change == reason) {
	if(strcmp(op, "create") == 0) {
	reason = reason->children;

	} else if(strcmp(op, "modify") == 0) {
	reason = first_named_child(reason, XML_DIFF_RESULT);
	if(reason) {
	reason = reason->children;
	}
	}
	}

	kind = TYPE(reason);
	if(strcmp(op, "delete") == 0) {
	const char *shortpath = strrchr(path, '/');

	do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
	CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
	transition_graph->id,
	(shortpath? (shortpath + 1) : path), abort_text,
	add[0], add[1], add[2], fn, line, path,
	(transition_graph->complete? "true" : "false"));

	} else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) {
	do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
	CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
	transition_graph->id,
	crm_element_value(reason, XML_ATTR_ID), op,
	crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
	crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
	abort_text, add[0], add[1], add[2], fn, line, path,
	(transition_graph->complete? "true" : "false"));

	} else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) {
	const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);

	do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
	CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
	transition_graph->id,
	crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
	crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
	magic, add[0], add[1], add[2], fn, line,
	(transition_graph->complete? "true" : "false"));

	} else if (safe_str_eq(XML_CIB_TAG_STATE, kind)
	\|\| safe_str_eq(XML_CIB_TAG_NODE, kind)) {
	const char *uname = crm_peer_uname(ID(reason));

	do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
	CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
	transition_graph->id,
	kind, op, (uname? uname : ID(reason)), abort_text,
	add[0], add[1], add[2], fn, line,
	(transition_graph->complete? "true" : "false"));

	} else {
	const char *id = ID(reason);

	do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
	CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
	transition_graph->id,
	TYPE(reason), (id? id : ""), (op? op : "change"),
	abort_text, add[0], add[1], add[2], fn, line, path,
	(transition_graph->complete? "true" : "false"));
	}
	}

	if (transition_graph->complete) {
	if (transition_timer->period_ms > 0) {
	crm_timer_stop(transition_timer);
	crm_timer_start(transition_timer);
	} else {
	register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
	}
	return;
	}

	mainloop_set_trigger(transition_trigger);
	}
	diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
	index 1fdcee7f4f..8e6f1b6341 100644
	--- a/daemons/fenced/fenced_commands.c
	+++ b/daemons/fenced/fenced_commands.c
	@@ -1,2742 +1,2739 @@
	/*
	* Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/wait.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <sys/utsname.h>

	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>
	#include <ctype.h>

	#include <crm/crm.h>
	#include <crm/msg_xml.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipcs.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/mainloop.h>

	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>
	#include <crm/common/xml.h>

	#if SUPPORT_CIBSECRETS
	# include <crm/common/cib_secrets.h>
	#endif

	#include <pacemaker-fenced.h>

	GHashTable *device_list = NULL;
	GHashTable *topology = NULL;
	GList *cmd_list = NULL;

	struct device_search_s {
	/* target of fence action */
	char *host;
	/* requested fence action */
	char *action;
	/* timeout to use if a device is queried dynamically for possible targets */
	int per_device_timeout;
	/* number of registered fencing devices at time of request */
	int replies_needed;
	/* number of device replies received so far */
	int replies_received;
	/* whether the target is eligible to perform requested action (or off) */
	bool allow_suicide;

	/* private data to pass to search callback function */
	void *user_data;
	/* function to call when all replies have been received */
	void (callback) (GList devices, void *user_data);
	/* devices capable of performing requested action (or off if remapping) */
	GListPtr capable;
	};

	static gboolean stonith_device_dispatch(gpointer user_data);
	static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data);
	static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
	const char *client_id);

	static void search_devices_record_result(struct device_search_s search, const char device,
	gboolean can_fence);

	typedef struct async_command_s {

	int id;
	int pid;
	int fd_stdout;
	int options;
	int default_timeout; /* seconds */
	int timeout; /* seconds */

	int start_delay; /* milliseconds */
	int delay_id;

	char *op;
	char *origin;
	char *client;
	char *client_name;
	char *remote_op_id;

	char *victim;
	uint32_t victim_nodeid;
	char *action;
	char *device;
	char *mode;

	GListPtr device_list;
	GListPtr device_next;

	void *internal_user_data;
	void (done_cb) (GPid pid, int rc, const char output, gpointer user_data);
	guint timer_sigterm;
	guint timer_sigkill;
	/*! If the operation timed out, this is the last signal
	* we sent to the process to get it to terminate */
	int last_timeout_signo;

	stonith_device_t *active_on;
	} async_command_t;

	static xmlNode stonith_construct_async_reply(async_command_t cmd, const char *output,
	xmlNode * data, int rc);

	static gboolean
	is_action_required(const char action, stonith_device_t device)
	{
	return device && device->automatic_unfencing && safe_str_eq(action, "on");
	}

	static int
	get_action_delay_max(stonith_device_t * device, const char * action)
	{
	const char *value = NULL;
	int delay_max_ms = 0;

	if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
	return 0;
	}

	value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX);
	if (value) {
	delay_max_ms = crm_get_msec(value);
	}

	return delay_max_ms;
	}

	static int
	get_action_delay_base(stonith_device_t * device, const char * action)
	{
	const char *value = NULL;
	int delay_base_ms = 0;

	if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
	return 0;
	}

	value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE);
	if (value) {
	delay_base_ms = crm_get_msec(value);
	}

	return delay_base_ms;
	}

	/*!
	* \internal
	* \brief Override STONITH timeout with pcmk_*_timeout if available
	*
	* \param[in] device STONITH device to use
	* \param[in] action STONITH action name
	* \param[in] default_timeout Timeout to use if device does not have
	* a pcmk_*_timeout parameter for action
	*
	* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
	* \note For consistency, it would be nice if reboot/off/on timeouts could be
	* set the same way as start/stop/monitor timeouts, i.e. with an
	* <operation> entry in the fencing resource configuration. However that
	* is insufficient because fencing devices may be registered directly via
	* the fencer's register_device() API instead of going through the CIB
	* (e.g. stonith_admin uses it for its -R option, and the executor uses it
	* to ensure a device is registered when a command is issued). As device
	* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
	* the device is registered, whether by CIB change or API call.
	*/
	static int
	get_action_timeout(stonith_device_t * device, const char *action, int default_timeout)
	{
	if (action && device && device->params) {
	char buffer[64] = { 0, };
	const char *value = NULL;

	/* If "reboot" was requested but the device does not support it,
	* we will remap to "off", so check timeout for "off" instead
	*/
	if (safe_str_eq(action, "reboot")
	&& is_not_set(device->flags, st_device_supports_reboot)) {
	crm_trace("%s doesn't support reboot, using timeout for off instead",
	device->id);
	action = "off";
	}

	/* If the device config specified an action-specific timeout, use it */
	snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
	value = g_hash_table_lookup(device->params, buffer);
	if (value) {
	return atoi(value);
	}
	}
	return default_timeout;
	}

	static void
	free_async_command(async_command_t * cmd)
	{
	if (!cmd) {
	return;
	}

	if (cmd->delay_id) {
	g_source_remove(cmd->delay_id);
	}

	cmd_list = g_list_remove(cmd_list, cmd);

	g_list_free_full(cmd->device_list, free);
	free(cmd->device);
	free(cmd->action);
	free(cmd->victim);
	free(cmd->remote_op_id);
	free(cmd->client);
	free(cmd->client_name);
	free(cmd->origin);
	free(cmd->mode);
	free(cmd->op);
	free(cmd);
	}

	static async_command_t *
	create_async_command(xmlNode * msg)
	{
	async_command_t *cmd = NULL;
	xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
	const char *action = crm_element_value(op, F_STONITH_ACTION);

	CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL);

	crm_log_xml_trace(msg, "Command");
	cmd = calloc(1, sizeof(async_command_t));
	crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
	crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
	crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
	cmd->timeout = cmd->default_timeout;

	cmd->origin = crm_element_value_copy(msg, F_ORIG);
	cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
	cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
	cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
	cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
	cmd->action = strdup(action);
	cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET);
	cmd->mode = crm_element_value_copy(op, F_STONITH_MODE);
	cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);

	CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL);
	CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient"));

	cmd->done_cb = st_child_done;
	cmd_list = g_list_append(cmd_list, cmd);
	return cmd;
	}

	static int
	get_action_limit(stonith_device_t * device)
	{
	const char *value = NULL;
	int action_limit = 1;

	value = g_hash_table_lookup(device->params, STONITH_ATTR_ACTION_LIMIT);
	if (value) {
	action_limit = crm_parse_int(value, "1");
	if (action_limit == 0) {
	/* pcmk_action_limit should not be 0. Enforce it to be 1. */
	action_limit = 1;
	}
	}

	return action_limit;
	}

	static int
	get_active_cmds(stonith_device_t * device)
	{
	int counter = 0;
	GListPtr gIter = NULL;
	GListPtr gIterNext = NULL;

	CRM_CHECK(device != NULL, return 0);

	for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
	async_command_t *cmd = gIter->data;

	gIterNext = gIter->next;

	if (cmd->active_on == device) {
	counter++;
	}
	}

	return counter;
	}

	static gboolean
	stonith_device_execute(stonith_device_t * device)
	{
	int exec_rc = 0;
	const char *action_str = NULL;
	async_command_t *cmd = NULL;
	stonith_action_t *action = NULL;
	int active_cmds = 0;
	int action_limit = 0;

	CRM_CHECK(device != NULL, return FALSE);

	active_cmds = get_active_cmds(device);
	action_limit = get_action_limit(device);
	if (action_limit > -1 && active_cmds >= action_limit) {
	crm_trace("%s is over its action limit of %d (%u active action%s)",
	device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : "");
	return TRUE;
	}

	if (device->pending_ops) {
	GList *first = device->pending_ops;

	cmd = first->data;
	if (cmd && cmd->delay_id) {
	crm_trace
	("Operation %s%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms",
	cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "",
	device->id, cmd->start_delay);
	return TRUE;
	}

	device->pending_ops = g_list_remove_link(device->pending_ops, first);
	g_list_free_1(first);
	}

	if (cmd == NULL) {
	crm_trace("Nothing further to do for %s", device->id);
	return TRUE;
	}

	if(safe_str_eq(device->agent, STONITH_WATCHDOG_AGENT)) {
	if(safe_str_eq(cmd->action, "reboot")) {
	pcmk_panic(__FUNCTION__);
	return TRUE;

	} else if(safe_str_eq(cmd->action, "off")) {
	pcmk_panic(__FUNCTION__);
	return TRUE;

	} else {
	crm_info("Faking success for %s watchdog operation", cmd->action);
	cmd->done_cb(0, 0, NULL, cmd);
	return TRUE;
	}
	}

	#if SUPPORT_CIBSECRETS
	if (replace_secret_params(device->id, device->params) < 0) {
	/* replacing secrets failed! */
	if (safe_str_eq(cmd->action,"stop")) {
	/* don't fail on stop! */
	crm_info("proceeding with the stop operation for %s", device->id);

	} else {
	crm_err("failed to get secrets for %s, "
	"considering resource not configured", device->id);
	exec_rc = PCMK_OCF_NOT_CONFIGURED;
	cmd->done_cb(0, exec_rc, NULL, cmd);
	return TRUE;
	}
	}
	#endif

	action_str = cmd->action;
	if (safe_str_eq(cmd->action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) {
	crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent);
	action_str = "off";
	}

	action = stonith_action_create(device->agent,
	action_str,
	cmd->victim,
	cmd->victim_nodeid,
	cmd->timeout, device->params, device->aliases);

	/* for async exec, exec_rc is pid if positive and error code if negative/zero */
	exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb);

	if (exec_rc > 0) {
	crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds",
	cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "",
	device->id, exec_rc, cmd->timeout);
	cmd->active_on = device;

	} else {
	crm_warn("Operation %s%s%s on %s failed: %s (%d)",
	cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "",
	device->id, pcmk_strerror(exec_rc), exec_rc);
	cmd->done_cb(0, exec_rc, NULL, cmd);
	}
	return TRUE;
	}

	static gboolean
	stonith_device_dispatch(gpointer user_data)
	{
	return stonith_device_execute(user_data);
	}

	static gboolean
	start_delay_helper(gpointer data)
	{
	async_command_t *cmd = data;
	stonith_device_t *device = NULL;

	cmd->delay_id = 0;
	device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;

	if (device) {
	mainloop_set_trigger(device->work);
	}

	return FALSE;
	}

	static void
	schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
	{
	int delay_max = 0;
	int delay_base = 0;

	CRM_CHECK(cmd != NULL, return);
	CRM_CHECK(device != NULL, return);

	if (cmd->device) {
	free(cmd->device);
	}

	if (device->include_nodeid && cmd->victim) {
	crm_node_t *node = crm_get_peer(0, cmd->victim);

	cmd->victim_nodeid = node->id;
	}

	cmd->device = strdup(device->id);
	cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);

	if (cmd->remote_op_id) {
	crm_debug("Scheduling %s on %s for remote peer %s with op id (%s) (timeout=%ds)",
	cmd->action, device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
	} else {
	crm_debug("Scheduling %s on %s for %s (timeout=%ds)",
	cmd->action, device->id, cmd->client, cmd->timeout);
	}

	device->pending_ops = g_list_append(device->pending_ops, cmd);
	mainloop_set_trigger(device->work);

	delay_max = get_action_delay_max(device, cmd->action);
	delay_base = get_action_delay_base(device, cmd->action);
	if (delay_max == 0) {
	delay_max = delay_base;
	}
	if (delay_max < delay_base) {
	crm_warn("Base-delay (%dms) is larger than max-delay (%dms) "
	"for %s on %s - limiting to max-delay",
	delay_base, delay_max, cmd->action, device->id);
	delay_base = delay_max;
	}
	if (delay_max > 0) {
	// coverity[dont_call] We're not using rand() for security
	cmd->start_delay =
	((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
	+ delay_base;
	crm_notice("Delaying %s on %s for %dms (timeout=%ds, base=%dms, "
	"max=%dms)",
	cmd->action, device->id, cmd->start_delay, cmd->timeout,
	delay_base, delay_max);
	cmd->delay_id =
	g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
	}
	}

	static void
	free_device(gpointer data)
	{
	GListPtr gIter = NULL;
	stonith_device_t *device = data;

	g_hash_table_destroy(device->params);
	g_hash_table_destroy(device->aliases);

	for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
	async_command_t *cmd = gIter->data;

	crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action);
	cmd->done_cb(0, -ENODEV, NULL, cmd);
	}
	g_list_free(device->pending_ops);

	g_list_free_full(device->targets, free);

	mainloop_destroy_trigger(device->work);

	free_xml(device->agent_metadata);
	free(device->namespace);
	free(device->on_target_actions);
	free(device->agent);
	free(device->id);
	free(device);
	}

	void free_device_list()
	{
	if (device_list != NULL) {
	g_hash_table_destroy(device_list);
	device_list = NULL;
	}
	}

	void
	init_device_list()
	{
	if (device_list == NULL) {
	device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
	free_device);
	}
	}

	static GHashTable *
	build_port_aliases(const char hostmap, GListPtr targets)
	{
	char *name = NULL;
	int last = 0, lpc = 0, max = 0, added = 0;
	GHashTable *aliases = crm_strcase_table_new();

	if (hostmap == NULL) {
	return aliases;
	}

	max = strlen(hostmap);
	for (; lpc <= max; lpc++) {
	switch (hostmap[lpc]) {
	/* Assignment chars */
	case '=':
	case ':':
	if (lpc > last) {
	free(name);
	name = calloc(1, 1 + lpc - last);
	memcpy(name, hostmap + last, lpc - last);
	}
	last = lpc + 1;
	break;

	/* Delimeter chars */
	/* case ',': Potentially used to specify multiple ports */
	case 0:
	case ';':
	case ' ':
	case '\t':
	if (name) {
	char *value = NULL;

	value = calloc(1, 1 + lpc - last);
	memcpy(value, hostmap + last, lpc - last);

	crm_debug("Adding alias '%s'='%s'", name, value);
	g_hash_table_replace(aliases, name, value);
	if (targets) {
	targets = g_list_append(targets, strdup(value));
	}
	value = NULL;
	name = NULL;
	added++;

	} else if (lpc > last) {
	crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
	}

	last = lpc + 1;
	break;
	}

	if (hostmap[lpc] == 0) {
	break;
	}
	}

	if (added == 0) {
	crm_info("No host mappings detected in '%s'", hostmap);
	}

	free(name);
	return aliases;
	}

	static void
	parse_host_line(const char line, int max, GListPtr output)
	{
	int lpc = 0;
	int last = 0;

	if (max <= 0) {
	return;
	}

	/* Check for any complaints about additional parameters that the device doesn't understand */
	if (strstr(line, "invalid") \|\| strstr(line, "variable")) {
	crm_debug("Skipping: %s", line);
	return;
	}

	crm_trace("Processing %d bytes: [%s]", max, line);
	/* Skip initial whitespace */
	for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) {
	last = lpc + 1;
	}

	/* Now the actual content */
	for (lpc = 0; lpc <= max; lpc++) {
	gboolean a_space = isspace(line[lpc]);

	if (a_space && lpc < max && isspace(line[lpc + 1])) {
	/* fast-forward to the end of the spaces */

	} else if (a_space \|\| line[lpc] == ',' \|\| line[lpc] == ';' \|\| line[lpc] == 0) {
	int rc = 1;
	char *entry = NULL;

	if (lpc != last) {
	entry = calloc(1, 1 + lpc - last);
	rc = sscanf(line + last, "%[a-zA-Z0-9_-.]", entry);
	}

	if (entry == NULL) {
	/* Skip */
	} else if (rc != 1) {
	crm_warn("Could not parse (%d %d): %s", last, lpc, line + last);
	} else if (safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) {
	crm_trace("Adding '%s'", entry);
	output = g_list_append(output, entry);
	entry = NULL;
	}

	free(entry);
	last = lpc + 1;
	}
	}
	}

	static GListPtr
	parse_host_list(const char *hosts)
	{
	int lpc = 0;
	int max = 0;
	int last = 0;
	GListPtr output = NULL;

	if (hosts == NULL) {
	return output;
	}

	max = strlen(hosts);
	for (lpc = 0; lpc <= max; lpc++) {
	if (hosts[lpc] == '\n' \|\| hosts[lpc] == 0) {
	int len = lpc - last;

	if(len > 1) {
	char *line = strndup(hosts + last, len);

	line[len] = 0; /* Because it might be '\n' */
	parse_host_line(line, len, &output);
	free(line);
	}

	last = lpc + 1;
	}
	}

	crm_trace("Parsed %d entries from '%s'", g_list_length(output), hosts);
	return output;
	}

	GHashTable *metadata_cache = NULL;

	void
	free_metadata_cache() {
	if (metadata_cache != NULL) {
	g_hash_table_destroy(metadata_cache);
	metadata_cache = NULL;
	}
	}

	static void
	init_metadata_cache() {
	if (metadata_cache == NULL) {
	metadata_cache = crm_str_table_new();
	}
	}

	static xmlNode *
	get_agent_metadata(const char *agent)
	{
	xmlNode *xml = NULL;
	char *buffer = NULL;

	init_metadata_cache();
	buffer = g_hash_table_lookup(metadata_cache, agent);
	if(safe_str_eq(agent, STONITH_WATCHDOG_AGENT)) {
	return NULL;

	} else if(buffer == NULL) {
	stonith_t *st = stonith_api_new();
	int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);

	stonith_api_delete(st);
	if (rc \|\| !buffer) {
	crm_err("Could not retrieve metadata for fencing agent %s", agent);
	return NULL;
	}
	g_hash_table_replace(metadata_cache, strdup(agent), buffer);
	}

	xml = string2xml(buffer);

	return xml;
	}

	static gboolean
	is_nodeid_required(xmlNode * xml)
	{
	xmlXPathObjectPtr xpath = NULL;

	if (stand_alone) {
	return FALSE;
	}

	if (!xml) {
	return FALSE;
	}

	xpath = xpath_search(xml, "//parameter[@name='nodeid']");
	if (numXpathResults(xpath) <= 0) {
	freeXpathObject(xpath);
	return FALSE;
	}

	freeXpathObject(xpath);
	return TRUE;
	}

	#define MAX_ACTION_LEN 256

	static char *
	add_action(char actions, const char action)
	{
	int offset = 0;

	if (actions == NULL) {
	actions = calloc(1, MAX_ACTION_LEN);
	} else {
	offset = strlen(actions);
	}

	if (offset > 0) {
	offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " ");
	}
	offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action);

	return actions;
	}

	static void
	read_action_metadata(stonith_device_t *device)
	{
	xmlXPathObjectPtr xpath = NULL;
	int max = 0;
	int lpc = 0;

	if (device->agent_metadata == NULL) {
	return;
	}

	xpath = xpath_search(device->agent_metadata, "//action");
	max = numXpathResults(xpath);

	if (max <= 0) {
	freeXpathObject(xpath);
	return;
	}

	for (lpc = 0; lpc < max; lpc++) {
	const char *on_target = NULL;
	const char *action = NULL;
	xmlNode *match = getXpathResult(xpath, lpc);

	CRM_LOG_ASSERT(match != NULL);
	if(match == NULL) { continue; };

	on_target = crm_element_value(match, "on_target");
	action = crm_element_value(match, "name");

	if(safe_str_eq(action, "list")) {
	set_bit(device->flags, st_device_supports_list);
	} else if(safe_str_eq(action, "status")) {
	set_bit(device->flags, st_device_supports_status);
	} else if(safe_str_eq(action, "reboot")) {
	set_bit(device->flags, st_device_supports_reboot);
	} else if (safe_str_eq(action, "on")) {
	/* "automatic" means the cluster will unfence node when it joins */
	const char *automatic = crm_element_value(match, "automatic");

	/* "required" is a deprecated synonym for "automatic" */
	const char *required = crm_element_value(match, "required");

	if (crm_is_true(automatic) \|\| crm_is_true(required)) {
	device->automatic_unfencing = TRUE;
	}
	}

	if (action && crm_is_true(on_target)) {
	device->on_target_actions = add_action(device->on_target_actions, action);
	}
	}

	freeXpathObject(xpath);
	}

	/*!
	* \internal
	* \brief Set a pcmk_*_action parameter if not already set
	*
	* \param[in,out] params Device parameters
	* \param[in] action Name of action
	* \param[in] value Value to use if action is not already set
	*/
	static void
	map_action(GHashTable params, const char action, const char *value)
	{
	char *key = crm_strdup_printf("pcmk_%s_action", action);

	if (g_hash_table_lookup(params, key)) {
	crm_warn("Ignoring %s='%s', see %s instead",
	STONITH_ATTR_ACTION_OP, value, key);
	free(key);
	} else {
	crm_warn("Mapping %s='%s' to %s='%s'",
	STONITH_ATTR_ACTION_OP, value, key, value);
	g_hash_table_insert(params, key, strdup(value));
	}
	}

	/*!
	* \internal
	* \brief Create device parameter table from XML
	*
	* \param[in] name Device name (used for logging only)
	* \param[in,out] params Device parameters
	*/
	static GHashTable *
	xml2device_params(const char name, xmlNode dev)
	{
	GHashTable *params = xml2list(dev);
	const char *value;

	/* Action should never be specified in the device configuration,
	* but we support it for users who are familiar with other software
	* that worked that way.
	*/
	value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
	if (value != NULL) {
	crm_warn("%s has '%s' parameter, which should never be specified in configuration",
	name, STONITH_ATTR_ACTION_OP);

	if (*value == '\0') {
	crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);

	} else if (strcmp(value, "reboot") == 0) {
	crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
	STONITH_ATTR_ACTION_OP);

	} else if (strcmp(value, "off") == 0) {
	map_action(params, "reboot", value);

	} else {
	map_action(params, "off", value);
	map_action(params, "reboot", value);
	}

	g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
	}

	return params;
	}

	static stonith_device_t *
	build_device_from_xml(xmlNode * msg)
	{
	const char *value = NULL;
	xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
	stonith_device_t *device = NULL;

	device = calloc(1, sizeof(stonith_device_t));
	device->id = crm_element_value_copy(dev, XML_ATTR_ID);
	device->agent = crm_element_value_copy(dev, "agent");
	device->namespace = crm_element_value_copy(dev, "namespace");
	device->params = xml2device_params(device->id, dev);

	value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST);
	if (value) {
	device->targets = parse_host_list(value);
	}

	value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP);
	device->aliases = build_port_aliases(value, &(device->targets));

	device->agent_metadata = get_agent_metadata(device->agent);
	read_action_metadata(device);

	value = g_hash_table_lookup(device->params, "nodeid");
	if (!value) {
	device->include_nodeid = is_nodeid_required(device->agent_metadata);
	}

	value = crm_element_value(dev, "rsc_provides");
	if (safe_str_eq(value, "unfencing")) {
	device->automatic_unfencing = TRUE;
	}

	if (is_action_required("on", device)) {
	crm_info("The fencing device '%s' requires unfencing", device->id);
	}

	if (device->on_target_actions) {
	crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node",
	device->id, device->on_target_actions);
	}

	device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
	/* TODO: Hook up priority */

	return device;
	}

	static const char *
	target_list_type(stonith_device_t * dev)
	{
	const char *check_type = NULL;

	check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK);

	if (check_type == NULL) {

	if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) {
	check_type = "static-list";
	} else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) {
	check_type = "static-list";
	} else if(is_set(dev->flags, st_device_supports_list)){
	check_type = "dynamic-list";
	} else if(is_set(dev->flags, st_device_supports_status)){
	check_type = "status";
	} else {
	check_type = "none";
	}
	}

	return check_type;
	}

	void
	schedule_internal_command(const char *origin,
	stonith_device_t * device,
	const char *action,
	const char *victim,
	int timeout,
	void *internal_user_data,
	void (done_cb) (GPid pid, int rc, const char output,
	gpointer user_data))
	{
	async_command_t *cmd = NULL;

	cmd = calloc(1, sizeof(async_command_t));

	cmd->id = -1;
	cmd->default_timeout = timeout ? timeout : 60;
	cmd->timeout = cmd->default_timeout;
	cmd->action = strdup(action);
	cmd->victim = victim ? strdup(victim) : NULL;
	cmd->device = strdup(device->id);
	cmd->origin = strdup(origin);
	cmd->client = strdup(crm_system_name);
	cmd->client_name = strdup(crm_system_name);

	cmd->internal_user_data = internal_user_data;
	cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */

	schedule_stonith_command(cmd, device);
	}

	gboolean
	string_in_list(GListPtr list, const char *item)
	{
	int lpc = 0;
	int max = g_list_length(list);

	for (lpc = 0; lpc < max; lpc++) {
	const char *value = g_list_nth_data(list, lpc);

	if (safe_str_eq(item, value)) {
	return TRUE;
	} else {
	crm_trace("%d: '%s' != '%s'", lpc, item, value);
	}
	}
	return FALSE;
	}

	static void
	status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
	{
	async_command_t *cmd = user_data;
	struct device_search_s *search = cmd->internal_user_data;
	stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
	gboolean can = FALSE;

	free_async_command(cmd);

	if (!dev) {
	search_devices_record_result(search, NULL, FALSE);
	return;
	}

	mainloop_set_trigger(dev->work);

	if (rc == 1 /* unknown */ ) {
	crm_trace("Host %s is not known by %s", search->host, dev->id);

	} else if (rc == 0 /* active / \|\| rc == 2 / inactive */ ) {
	crm_trace("Host %s is known by %s", search->host, dev->id);
	can = TRUE;

	} else {
	crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host,
	rc);
	}
	search_devices_record_result(search, dev->id, can);
	}

	static void
	dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
	{
	async_command_t *cmd = user_data;
	struct device_search_s *search = cmd->internal_user_data;
	stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
	gboolean can_fence = FALSE;

	free_async_command(cmd);

	/* Host/alias must be in the list output to be eligible to be fenced
	*
	* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
	* if the guest is still listed despite being moved to another machine
	*/
	if (!dev) {
	search_devices_record_result(search, NULL, FALSE);
	return;
	}

	mainloop_set_trigger(dev->work);

	/* If we successfully got the targets earlier, don't disable. */
	if (rc != 0 && !dev->targets) {
	crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output);
	/* Fall back to status */
	g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status"));

	g_list_free_full(dev->targets, free);
	dev->targets = NULL;
	} else if (!rc) {
	crm_info("Refreshing port list for %s", dev->id);
	g_list_free_full(dev->targets, free);
	dev->targets = parse_host_list(output);
	dev->targets_age = time(NULL);
	}

	if (dev->targets) {
	const char *alias = g_hash_table_lookup(dev->aliases, search->host);

	if (!alias) {
	alias = search->host;
	}
	if (string_in_list(dev->targets, alias)) {
	can_fence = TRUE;
	}
	}
	search_devices_record_result(search, dev->id, can_fence);
	}

	/*!
	* \internal
	* \brief Returns true if any key in first is not in second or second has a different value for key
	*/
	static int
	device_params_diff(GHashTable first, GHashTable second) {
	char *key = NULL;
	char *value = NULL;
	GHashTableIter gIter;

	g_hash_table_iter_init(&gIter, first);
	while (g_hash_table_iter_next(&gIter, (void )&key, (void )&value)) {

	if(strstr(key, "CRM_meta") == key) {
	continue;
	} else if(strcmp(key, "crm_feature_set") == 0) {
	continue;
	} else {
	char *other_value = g_hash_table_lookup(second, key);

	if (!other_value \|\| safe_str_neq(other_value, value)) {
	crm_trace("Different value for %s: %s != %s", key, other_value, value);
	return 1;
	}
	}
	}

	return 0;
	}

	/*!
	* \internal
	* \brief Checks to see if an identical device already exists in the device_list
	*/
	static stonith_device_t *
	device_has_duplicate(stonith_device_t * device)
	{
	stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);

	if (!dup) {
	crm_trace("No match for %s", device->id);
	return NULL;

	} else if (safe_str_neq(dup->agent, device->agent)) {
	crm_trace("Different agent: %s != %s", dup->agent, device->agent);
	return NULL;
	}

	/* Use calculate_operation_digest() here? */
	if (device_params_diff(device->params, dup->params) \|\|
	device_params_diff(dup->params, device->params)) {
	return NULL;
	}

	crm_trace("Match");
	return dup;
	}

	int
	stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
	{
	stonith_device_t *dup = NULL;
	stonith_device_t *device = build_device_from_xml(msg);

	dup = device_has_duplicate(device);
	if (dup) {
	crm_debug("Device '%s' already existed in device list (%d active devices)", device->id,
	g_hash_table_size(device_list));
	free_device(device);
	device = dup;

	} else {
	stonith_device_t *old = g_hash_table_lookup(device_list, device->id);

	if (from_cib && old && old->api_registered) {
	/* If the cib is writing over an entry that is shared with a stonith client,
	* copy any pending ops that currently exist on the old entry to the new one.
	* Otherwise the pending ops will be reported as failures
	*/
	crm_info("Overwriting an existing entry for %s from the cib", device->id);
	device->pending_ops = old->pending_ops;
	device->api_registered = TRUE;
	old->pending_ops = NULL;
	if (device->pending_ops) {
	mainloop_set_trigger(device->work);
	}
	}
	g_hash_table_replace(device_list, device->id, device);

	crm_notice("Added '%s' to the device list (%d active devices)", device->id,
	g_hash_table_size(device_list));
	}
	if (desc) {
	*desc = device->id;
	}

	if (from_cib) {
	device->cib_registered = TRUE;
	} else {
	device->api_registered = TRUE;
	}

	return pcmk_ok;
	}

	int
	stonith_device_remove(const char *id, gboolean from_cib)
	{
	stonith_device_t *device = g_hash_table_lookup(device_list, id);

	if (!device) {
	crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list));
	return pcmk_ok;
	}

	if (from_cib) {
	device->cib_registered = FALSE;
	} else {
	device->verified = FALSE;
	device->api_registered = FALSE;
	}

	if (!device->cib_registered && !device->api_registered) {
	g_hash_table_remove(device_list, id);
	crm_info("Removed '%s' from the device list (%d active devices)",
	id, g_hash_table_size(device_list));
	} else {
	crm_trace("Not removing '%s' from the device list (%d active devices) "
	"- still %s%s_registered", id, g_hash_table_size(device_list),
	device->cib_registered?"cib":"", device->api_registered?"api":"");
	}
	return pcmk_ok;
	}

	/*!
	* \internal
	* \brief Return the number of stonith levels registered for a node
	*
	* \param[in] tp Node's topology table entry
	*
	* \return Number of non-NULL levels in topology entry
	* \note This function is used only for log messages.
	*/
	static int
	count_active_levels(stonith_topology_t * tp)
	{
	int lpc = 0;
	int count = 0;

	for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
	if (tp->levels[lpc] != NULL) {
	count++;
	}
	}
	return count;
	}

	static void
	free_topology_entry(gpointer data)
	{
	stonith_topology_t *tp = data;

	int lpc = 0;

	for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
	if (tp->levels[lpc] != NULL) {
	g_list_free_full(tp->levels[lpc], free);
	}
	}
	free(tp->target);
	free(tp->target_value);
	free(tp->target_pattern);
	free(tp->target_attribute);
	free(tp);
	}

	void
	free_topology_list()
	{
	if (topology != NULL) {
	g_hash_table_destroy(topology);
	topology = NULL;
	}
	}

	void
	init_topology_list()
	{
	if (topology == NULL) {
	topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
	free_topology_entry);
	}
	}

	char stonith_level_key(xmlNode level, int mode)
	{
	if(mode == -1) {
	mode = stonith_level_kind(level);
	}

	switch(mode) {
	case 0:
	return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
	case 1:
	return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
	case 2:
	{
	const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
	const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE);

	if(name && value) {
	return crm_strdup_printf("%s=%s", name, value);
	}
	}
	default:
	return crm_strdup_printf("Unknown-%d-%s", mode, ID(level));
	}
	}

	int stonith_level_kind(xmlNode * level)
	{
	int mode = 0;
	const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET);

	if(target == NULL) {
	mode++;
	target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN);
	}

	if(stand_alone == FALSE && target == NULL) {

	mode++;

	if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) {
	mode++;

	} else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) {
	mode++;
	}
	}

	return mode;
	}

	static stonith_key_value_t *
	parse_device_list(const char *devices)
	{
	int lpc = 0;
	int max = 0;
	int last = 0;
	stonith_key_value_t *output = NULL;

	if (devices == NULL) {
	return output;
	}

	max = strlen(devices);
	for (lpc = 0; lpc <= max; lpc++) {
	if (devices[lpc] == ',' \|\| devices[lpc] == 0) {
	char *line = strndup(devices + last, lpc - last);

	output = stonith_key_value_add(output, NULL, line);
	free(line);

	last = lpc + 1;
	}
	}

	return output;
	}

	/*!
	* \internal
	* \brief Register a STONITH level for a target
	*
	* Given an XML request specifying the target name, level index, and device IDs
	* for the level, this will create an entry for the target in the global topology
	* table if one does not already exist, then append the specified device IDs to
	* the entry's device list for the specified level.
	*
	* \param[in] msg XML request for STONITH level registration
	* \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]")
	*
	* \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
	*/
	int
	stonith_level_register(xmlNode msg, char *desc)
	{
	int id = 0;
	xmlNode *level;
	int mode;
	char *target;

	stonith_topology_t *tp;
	stonith_key_value_t *dIter = NULL;
	stonith_key_value_t *devices = NULL;

	/* Allow the XML here to point to the level tag directly, or wrapped in
	* another tag. If directly, don't search by xpath, because it might give
	* multiple hits (e.g. if the XML is the CIB).
	*/
	if (safe_str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL)) {
	level = msg;
	} else {
	level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
	}
	CRM_CHECK(level != NULL, return -EINVAL);

	mode = stonith_level_kind(level);
	target = stonith_level_key(level, mode);
	crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);

	if (desc) {
	*desc = crm_strdup_printf("%s[%d]", target, id);
	}

	/* Sanity-check arguments */
	if (mode >= 3 \|\| (id <= 0) \|\| (id >= ST_LEVEL_MAX)) {
	crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
	free(target);
	crm_log_xml_err(level, "Bad topology");
	return -EINVAL;
	}

	/* Find or create topology table entry */
	tp = g_hash_table_lookup(topology, target);
	if (tp == NULL) {
	tp = calloc(1, sizeof(stonith_topology_t));
	tp->kind = mode;
	tp->target = target;
	tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
	tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
	tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);

	g_hash_table_replace(topology, tp->target, tp);
	crm_trace("Added %s (%d) to the topology (%d active entries)",
	target, mode, g_hash_table_size(topology));
	} else {
	free(target);
	}

	if (tp->levels[id] != NULL) {
	crm_info("Adding to the existing %s[%d] topology entry",
	tp->target, id);
	}

	devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
	for (dIter = devices; dIter; dIter = dIter->next) {
	const char *device = dIter->value;

	crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
	tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
	}
	stonith_key_value_freeall(devices, 1, 1);

	crm_info("Target %s has %d active fencing levels",
	tp->target, count_active_levels(tp));
	return pcmk_ok;
	}

	int
	stonith_level_remove(xmlNode msg, char *desc)
	{
	int id = 0;
	stonith_topology_t *tp;
	char *target;

	/* Unlike additions, removal requests should always have one level tag */
	xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);

	CRM_CHECK(level != NULL, return -EINVAL);

	target = stonith_level_key(level, -1);
	crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
	if (desc) {
	*desc = crm_strdup_printf("%s[%d]", target, id);
	}

	/* Sanity-check arguments */
	if (id >= ST_LEVEL_MAX) {
	free(target);
	return -EINVAL;
	}

	tp = g_hash_table_lookup(topology, target);
	if (tp == NULL) {
	crm_info("Topology for %s not found (%d active entries)",
	target, g_hash_table_size(topology));

	} else if (id == 0 && g_hash_table_remove(topology, target)) {
	crm_info("Removed all %s related entries from the topology (%d active entries)",
	target, g_hash_table_size(topology));

	} else if (id > 0 && tp->levels[id] != NULL) {
	g_list_free_full(tp->levels[id], free);
	tp->levels[id] = NULL;

	crm_info("Removed level '%d' from topology for %s (%d active levels remaining)",
	id, target, count_active_levels(tp));
	}

	free(target);
	return pcmk_ok;
	}

	static int
	stonith_device_action(xmlNode * msg, char **output)
	{
	int rc = pcmk_ok;
	xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
	const char *id = crm_element_value(dev, F_STONITH_DEVICE);

	async_command_t *cmd = NULL;
	stonith_device_t *device = NULL;

	if (id) {
	crm_trace("Looking for '%s'", id);
	device = g_hash_table_lookup(device_list, id);
	}

	if (device && device->api_registered == FALSE) {
	rc = -ENODEV;

	} else if (device) {
	cmd = create_async_command(msg);
	if (cmd == NULL) {
	return -EPROTO;
	}

	schedule_stonith_command(cmd, device);
	rc = -EINPROGRESS;

	} else {
	crm_info("Device %s not found", id ? id : "<none>");
	rc = -ENODEV;
	}
	return rc;
	}

	static void
	search_devices_record_result(struct device_search_s search, const char device, gboolean can_fence)
	{
	search->replies_received++;

	if (can_fence && device) {
	search->capable = g_list_append(search->capable, strdup(device));
	}

	if (search->replies_needed == search->replies_received) {

	crm_debug("Finished Search. %d devices can perform action (%s) on node %s",
	g_list_length(search->capable),
	search->action ? search->action : "<unknown>",
	search->host ? search->host : "<anyone>");

	search->callback(search->capable, search->user_data);
	free(search->host);
	free(search->action);
	free(search);
	}
	}

	/*!
	* \internal
	* \brief Check whether the local host is allowed to execute a fencing action
	*
	* \param[in] device Fence device to check
	* \param[in] action Fence action to check
	* \param[in] target Hostname of fence target
	* \param[in] allow_suicide Whether self-fencing is allowed for this operation
	*
	* \return TRUE if local host is allowed to execute action, FALSE otherwise
	*/
	static gboolean
	localhost_is_eligible(const stonith_device_t device, const char action,
	const char *target, gboolean allow_suicide)
	{
	gboolean localhost_is_target = safe_str_eq(target, stonith_our_uname);

	if (device && action && device->on_target_actions
	&& strstr(device->on_target_actions, action)) {
	if (!localhost_is_target) {
	crm_trace("%s operation with %s can only be executed for localhost not %s",
	action, device->id, target);
	return FALSE;
	}

	} else if (localhost_is_target && !allow_suicide) {
	crm_trace("%s operation does not support self-fencing", action);
	return FALSE;
	}
	return TRUE;
	}

	static void
	can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search)
	{
	gboolean can = FALSE;
	const char *check_type = NULL;
	const char *host = search->host;
	const char *alias = NULL;

	CRM_LOG_ASSERT(dev != NULL);

	if (dev == NULL) {
	goto search_report_results;
	} else if (host == NULL) {
	can = TRUE;
	goto search_report_results;
	}

	/* Short-circuit query if this host is not allowed to perform the action */
	if (safe_str_eq(search->action, "reboot")) {
	/* A "reboot" might get remapped to "off" then "on", so short-circuit
	* only if all three are disallowed. If only one or two are disallowed,
	* we'll report that with the results. We never allow suicide for
	* remapped "on" operations because the host is off at that point.
	*/
	if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide)
	&& !localhost_is_eligible(dev, "off", host, search->allow_suicide)
	&& !localhost_is_eligible(dev, "on", host, FALSE)) {
	goto search_report_results;
	}
	} else if (!localhost_is_eligible(dev, search->action, host,
	search->allow_suicide)) {
	goto search_report_results;
	}

	alias = g_hash_table_lookup(dev->aliases, host);
	if (alias == NULL) {
	alias = host;
	}

	check_type = target_list_type(dev);

	if (safe_str_eq(check_type, "none")) {
	can = TRUE;

	} else if (safe_str_eq(check_type, "static-list")) {

	/* Presence in the hostmap is sufficient
	* Only use if all hosts on which the device can be active can always fence all listed hosts
	*/

	if (string_in_list(dev->targets, host)) {
	can = TRUE;
	} else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)
	&& g_hash_table_lookup(dev->aliases, host)) {
	can = TRUE;
	}

	} else if (safe_str_eq(check_type, "dynamic-list")) {
	time_t now = time(NULL);

	if (dev->targets == NULL \|\| dev->targets_age + 60 < now) {
	crm_trace("Running %s command to see if %s can fence %s (%s)",
	check_type, dev->id, search->host, search->action);

	schedule_internal_command(__FUNCTION__, dev, "list", NULL,
	search->per_device_timeout, search, dynamic_list_search_cb);

	/* we'll respond to this search request async in the cb */
	return;
	}

	if (string_in_list(dev->targets, alias)) {
	can = TRUE;
	}

	} else if (safe_str_eq(check_type, "status")) {
	crm_trace("Running %s command to see if %s can fence %s (%s)",
	check_type, dev->id, search->host, search->action);
	schedule_internal_command(__FUNCTION__, dev, "status", search->host,
	search->per_device_timeout, search, status_search_cb);
	/* we'll respond to this search request async in the cb */
	return;
	} else {
	crm_err("Unknown check type: %s", check_type);
	}

	if (safe_str_eq(host, alias)) {
	crm_notice("%s can%s fence (%s) %s: %s", dev->id, can ? "" : " not", search->action, host, check_type);
	} else {
	crm_notice("%s can%s fence (%s) %s (aka. '%s'): %s", dev->id, can ? "" : " not", search->action, host, alias,
	check_type);
	}

	search_report_results:
	search_devices_record_result(search, dev ? dev->id : NULL, can);
	}

	static void
	search_devices(gpointer key, gpointer value, gpointer user_data)
	{
	stonith_device_t *dev = value;
	struct device_search_s *search = user_data;

	can_fence_host_with_device(dev, search);
	}

	#define DEFAULT_QUERY_TIMEOUT 20
	static void
	get_capable_devices(const char host, const char action, int timeout, bool suicide, void *user_data,
	void (callback) (GList devices, void *user_data))
	{
	struct device_search_s *search;
	int per_device_timeout = DEFAULT_QUERY_TIMEOUT;
	int devices_needing_async_query = 0;
	char *key = NULL;
	const char *check_type = NULL;
	GHashTableIter gIter;
	stonith_device_t *device = NULL;

	if (!g_hash_table_size(device_list)) {
	callback(NULL, user_data);
	return;
	}

	search = calloc(1, sizeof(struct device_search_s));
	if (!search) {
	callback(NULL, user_data);
	return;
	}

	g_hash_table_iter_init(&gIter, device_list);
	while (g_hash_table_iter_next(&gIter, (void )&key, (void )&device)) {
	check_type = target_list_type(device);
	if (safe_str_eq(check_type, "status") \|\| safe_str_eq(check_type, "dynamic-list")) {
	devices_needing_async_query++;
	}
	}

	/* If we have devices that require an async event in order to know what
	* nodes they can fence, we have to give the events a timeout. The total
	* query timeout is divided among those events. */
	if (devices_needing_async_query) {
	per_device_timeout = timeout / devices_needing_async_query;
	if (!per_device_timeout) {
	crm_err("STONITH timeout %ds is too low; using %ds, but consider raising to at least %ds",
	timeout, DEFAULT_QUERY_TIMEOUT,
	DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
	per_device_timeout = DEFAULT_QUERY_TIMEOUT;
	} else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) {
	crm_notice("STONITH timeout %ds is low for the current configuration;"
	" consider raising to at least %ds",
	timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
	}
	}

	search->host = host ? strdup(host) : NULL;
	search->action = action ? strdup(action) : NULL;
	search->per_device_timeout = per_device_timeout;
	/* We are guaranteed this many replies. Even if a device gets
	* unregistered some how during the async search, we will get
	* the correct number of replies. */
	search->replies_needed = g_hash_table_size(device_list);
	search->allow_suicide = suicide;
	search->callback = callback;
	search->user_data = user_data;
	/* kick off the search */

	crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s",
	search->replies_needed,
	search->action ? search->action : "<unknown>",
	search->host ? search->host : "<anyone>");
	g_hash_table_foreach(device_list, search_devices, search);
	}

	struct st_query_data {
	xmlNode *reply;
	char *remote_peer;
	char *client_id;
	char *target;
	char *action;
	int call_options;
	};

	/*!
	* \internal
	* \brief Add action-specific attributes to query reply XML
	*
	* \param[in,out] xml XML to add attributes to
	* \param[in] action Fence action
	* \param[in] device Fence device
	*/
	static void
	add_action_specific_attributes(xmlNode xml, const char action,
	stonith_device_t *device)
	{
	int action_specific_timeout;
	int delay_max;
	int delay_base;

	CRM_CHECK(xml && action && device, return);

	if (is_action_required(action, device)) {
	crm_trace("Action %s is required on %s", action, device->id);
	crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
	}

	action_specific_timeout = get_action_timeout(device, action, 0);
	if (action_specific_timeout) {
	crm_trace("Action %s has timeout %dms on %s",
	action, action_specific_timeout, device->id);
	crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
	}

	delay_max = get_action_delay_max(device, action);
	if (delay_max > 0) {
	crm_trace("Action %s has maximum random delay %dms on %s",
	action, delay_max, device->id);
	crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000);
	}

	delay_base = get_action_delay_base(device, action);
	if (delay_base > 0) {
	crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000);
	}

	if ((delay_max > 0) && (delay_base == 0)) {
	crm_trace("Action %s has maximum random delay %dms on %s",
	action, delay_max, device->id);
	} else if ((delay_max == 0) && (delay_base > 0)) {
	crm_trace("Action %s has a static delay of %dms on %s",
	action, delay_base, device->id);
	} else if ((delay_max > 0) && (delay_base > 0)) {
	crm_trace("Action %s has a minimum delay of %dms and a randomly chosen "
	"maximum delay of %dms on %s",
	action, delay_base, delay_max, device->id);
	}
	}

	/*!
	* \internal
	* \brief Add "disallowed" attribute to query reply XML if appropriate
	*
	* \param[in,out] xml XML to add attribute to
	* \param[in] action Fence action
	* \param[in] device Fence device
	* \param[in] target Fence target
	* \param[in] allow_suicide Whether self-fencing is allowed
	*/
	static void
	add_disallowed(xmlNode xml, const char action, stonith_device_t *device,
	const char *target, gboolean allow_suicide)
	{
	if (!localhost_is_eligible(device, action, target, allow_suicide)) {
	crm_trace("Action %s on %s is disallowed for local host",
	action, device->id);
	crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE);
	}
	}

	/*!
	* \internal
	* \brief Add child element with action-specific values to query reply XML
	*
	* \param[in,out] xml XML to add attribute to
	* \param[in] action Fence action
	* \param[in] device Fence device
	* \param[in] target Fence target
	* \param[in] allow_suicide Whether self-fencing is allowed
	*/
	static void
	add_action_reply(xmlNode xml, const char action, stonith_device_t *device,
	const char *target, gboolean allow_suicide)
	{
	xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);

	crm_xml_add(child, XML_ATTR_ID, action);
	add_action_specific_attributes(child, action, device);
	add_disallowed(child, action, device, target, allow_suicide);
	}

	static void
	stonith_query_capable_device_cb(GList * devices, void *user_data)
	{
	struct st_query_data *query = user_data;
	int available_devices = 0;
	xmlNode *dev = NULL;
	xmlNode *list = NULL;
	GListPtr lpc = NULL;

	/* Pack the results into XML */
	list = create_xml_node(NULL, __FUNCTION__);
	crm_xml_add(list, F_STONITH_TARGET, query->target);
	for (lpc = devices; lpc != NULL; lpc = lpc->next) {
	stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
	const char *action = query->action;

	if (!device) {
	/* It is possible the device got unregistered while
	* determining who can fence the target */
	continue;
	}

	available_devices++;

	dev = create_xml_node(list, F_STONITH_DEVICE);
	crm_xml_add(dev, XML_ATTR_ID, device->id);
	crm_xml_add(dev, "namespace", device->namespace);
	crm_xml_add(dev, "agent", device->agent);
	crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);

	/* If the originating fencer wants to reboot the node, and we have a
	* capable device that doesn't support "reboot", remap to "off" instead.
	*/
	if (is_not_set(device->flags, st_device_supports_reboot)
	&& safe_str_eq(query->action, "reboot")) {
	crm_trace("%s doesn't support reboot, using values for off instead",
	device->id);
	action = "off";
	}

	/* Add action-specific values if available */
	add_action_specific_attributes(dev, action, device);
	if (safe_str_eq(query->action, "reboot")) {
	/* A "reboot" might get remapped to "off" then "on", so after
	* sending the "reboot"-specific values in the main element, we add
	* sub-elements for "off" and "on" values.
	*
	* We short-circuited earlier if "reboot", "off" and "on" are all
	* disallowed for the local host. However if only one or two are
	* disallowed, we send back the results and mark which ones are
	* disallowed. If "reboot" is disallowed, this might cause problems
	* with older fencer versions, which won't check for it. Older
	* versions will ignore "off" and "on", so they are not a problem.
	*/
	add_disallowed(dev, action, device, query->target,
	is_set(query->call_options, st_opt_allow_suicide));
	add_action_reply(dev, "off", device, query->target,
	is_set(query->call_options, st_opt_allow_suicide));
	add_action_reply(dev, "on", device, query->target, FALSE);
	}

	/* A query without a target wants device parameters */
	if (query->target == NULL) {
	xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);

	g_hash_table_foreach(device->params, hash2field, attrs);
	}
	}

	crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
	if (query->target) {
	crm_debug("Found %d matching devices for '%s'", available_devices, query->target);
	} else {
	crm_debug("%d devices installed", available_devices);
	}

	if (list != NULL) {
	crm_log_xml_trace(list, "Add query results");
	add_message_xml(query->reply, F_STONITH_CALLDATA, list);
	}
	stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id);

	free_xml(query->reply);
	free(query->remote_peer);
	free(query->client_id);
	free(query->target);
	free(query->action);
	free(query);
	free_xml(list);
	g_list_free_full(devices, free);
	}

	static void
	stonith_query(xmlNode * msg, const char remote_peer, const char client_id, int call_options)
	{
	struct st_query_data *query = NULL;
	const char *action = NULL;
	const char *target = NULL;
	int timeout = 0;
	xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_TRACE);

	crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
	if (dev) {
	const char *device = crm_element_value(dev, F_STONITH_DEVICE);

	target = crm_element_value(dev, F_STONITH_TARGET);
	action = crm_element_value(dev, F_STONITH_ACTION);
	if (device && safe_str_eq(device, "manual_ack")) {
	/* No query or reply necessary */
	return;
	}
	}

	crm_log_xml_debug(msg, "Query");
	query = calloc(1, sizeof(struct st_query_data));

	query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
	query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
	query->client_id = client_id ? strdup(client_id) : NULL;
	query->target = target ? strdup(target) : NULL;
	query->action = action ? strdup(action) : NULL;
	query->call_options = call_options;

	get_capable_devices(target, action, timeout,
	is_set(call_options, st_opt_allow_suicide),
	query, stonith_query_capable_device_cb);
	}

	#define ST_LOG_OUTPUT_MAX 512
	static void
	log_operation(async_command_t * cmd, int rc, int pid, const char next, const char output)
	{
	if (rc == 0) {
	next = NULL;
	}

	if (cmd->victim != NULL) {
	do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR,
	"Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d (%s)%s%s",
	cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, rc,
	pcmk_strerror(rc), next ? ". Trying: " : "", next ? next : "");
	} else {
	do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
	"Operation '%s' [%d] for device '%s' returned: %d (%s)%s%s",
	cmd->action, pid, cmd->device, rc, pcmk_strerror(rc),
	next ? ". Trying: " : "", next ? next : "");
	}

	if (output) {
	/* Logging the whole string confuses syslog when the string is xml */
	char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid);

	crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output);
	free(prefix);
	}
	}

	static void
	stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid)
	{
	xmlNode *reply = NULL;
	gboolean bcast = FALSE;

	reply = stonith_construct_async_reply(cmd, output, NULL, rc);

	if (safe_str_eq(cmd->action, "metadata")) {
	/* Too verbose to log */
	crm_trace("Metadata query for %s", cmd->device);
	output = NULL;

	} else if (crm_str_eq(cmd->action, "monitor", TRUE) \|\|
	crm_str_eq(cmd->action, "list", TRUE) \|\| crm_str_eq(cmd->action, "status", TRUE)) {
	crm_trace("Never broadcast %s replies", cmd->action);

	} else if (!stand_alone && safe_str_eq(cmd->origin, cmd->victim) && safe_str_neq(cmd->action, "on")) {
	crm_trace("Broadcast %s reply for %s", cmd->action, cmd->victim);
	crm_xml_add(reply, F_SUBTYPE, "broadcast");
	bcast = TRUE;
	}

	log_operation(cmd, rc, pid, NULL, output);
	crm_log_xml_trace(reply, "Reply");

	if (bcast) {
	crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
	send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);

	} else if (cmd->origin) {
	crm_trace("Directed reply to %s", cmd->origin);
	send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);

	} else {
	crm_trace("Directed local %ssync reply to %s",
	(cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name);
	do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
	}

	if (stand_alone) {
	/* Do notification with a clean data object */
	xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);

	crm_xml_add_int(notify_data, F_STONITH_RC, rc);
	crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
	crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
	crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
	crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
	crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
	crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);

	do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
	do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
	}

	free_xml(reply);
	}

	static void
	cancel_stonith_command(async_command_t * cmd)
	{
	stonith_device_t *device;

	CRM_CHECK(cmd != NULL, return);

	if (!cmd->device) {
	return;
	}

	device = g_hash_table_lookup(device_list, cmd->device);

	if (device) {
	crm_trace("Cancel scheduled %s on %s", cmd->action, device->id);
	device->pending_ops = g_list_remove(device->pending_ops, cmd);
	}
	}

	static void
	st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
	{
	stonith_device_t *device = NULL;
	stonith_device_t *next_device = NULL;
	async_command_t *cmd = user_data;

	GListPtr gIter = NULL;
	GListPtr gIterNext = NULL;

	CRM_CHECK(cmd != NULL, return);

	cmd->active_on = NULL;

	/* The device is ready to do something else now */
	device = g_hash_table_lookup(device_list, cmd->device);
	if (device) {
	if (rc == pcmk_ok &&
	(safe_str_eq(cmd->action, "list") \|\|
	safe_str_eq(cmd->action, "monitor") \|\| safe_str_eq(cmd->action, "status"))) {

	device->verified = TRUE;
	}

	mainloop_set_trigger(device->work);
	}

	crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)",
	cmd->action, cmd->device, rc, g_list_length(cmd->device_next));

	if (rc == 0) {
	GListPtr iter;
	/* see if there are any required devices left to execute for this op */
	for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
	next_device = g_hash_table_lookup(device_list, iter->data);

	if (next_device != NULL && is_action_required(cmd->action, next_device)) {
	cmd->device_next = iter->next;
	break;
	}
	next_device = NULL;
	}

	} else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
	/* if this device didn't work out, see if there are any others we can try.
	* if the failed device was 'required', we can't pick another device. */
	next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
	cmd->device_next = cmd->device_next->next;
	}

	/* this operation requires more fencing, hooray! */
	if (next_device) {
	log_operation(cmd, rc, pid, cmd->device, output);

	schedule_stonith_command(cmd, next_device);
	/* Prevent cmd from being freed */
	cmd = NULL;
	goto done;
	}

	stonith_send_async_reply(cmd, output, rc, pid);

	if (rc != 0) {
	goto done;
	}

	/* Check to see if any operations are scheduled to do the exact
	* same thing that just completed. If so, rather than
	* performing the same fencing operation twice, return the result
	* of this operation for all pending commands it matches. */
	for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
	async_command_t *cmd_other = gIter->data;

	gIterNext = gIter->next;

	if (cmd == cmd_other) {
	continue;
	}

	/* A pending scheduled command matches the command that just finished if.
	* 1. The client connections are different.
	* 2. The node victim is the same.
	* 3. The fencing action is the same.
	* 4. The device scheduled to execute the action is the same.
	*/
	if (safe_str_eq(cmd->client, cmd_other->client) \|\|
	safe_str_neq(cmd->victim, cmd_other->victim) \|\|
	safe_str_neq(cmd->action, cmd_other->action) \|\|
	safe_str_neq(cmd->device, cmd_other->device)) {

	continue;
	}

	/* Duplicate merging will do the right thing for either type of remapped
	* reboot. If the executing fencer remapped an unsupported reboot to
	* off, then cmd->action will be reboot and will be merged with any
	* other reboot requests. If the originating fencer remapped a
	* topology reboot to off then on, we will get here once with
	* cmd->action "off" and once with "on", and they will be merged
	* separately with similar requests.
	*/
	crm_notice
	("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s",
	cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name);

	cmd_list = g_list_remove_link(cmd_list, gIter);

	stonith_send_async_reply(cmd_other, output, rc, pid);
	cancel_stonith_command(cmd_other);

	free_async_command(cmd_other);
	g_list_free_1(gIter);
	}

	done:
	free_async_command(cmd);
	}

	static gint
	sort_device_priority(gconstpointer a, gconstpointer b)
	{
	const stonith_device_t *dev_a = a;
	const stonith_device_t *dev_b = b;

	if (dev_a->priority > dev_b->priority) {
	return -1;
	} else if (dev_a->priority < dev_b->priority) {
	return 1;
	}
	return 0;
	}

	static void
	stonith_fence_get_devices_cb(GList * devices, void *user_data)
	{
	async_command_t *cmd = user_data;
	stonith_device_t *device = NULL;

	crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim);

	if (g_list_length(devices) > 0) {
	/* Order based on priority */
	devices = g_list_sort(devices, sort_device_priority);
	device = g_hash_table_lookup(device_list, devices->data);

	if (device) {
	cmd->device_list = devices;
	cmd->device_next = devices->next;
	devices = NULL; /* list owned by cmd now */
	}
	}

	/* we have a device, schedule it for fencing. */
	if (device) {
	schedule_stonith_command(cmd, device);
	/* in progress */
	return;
	}

	/* no device found! */
	stonith_send_async_reply(cmd, NULL, -ENODEV, 0);

	free_async_command(cmd);
	g_list_free_full(devices, free);
	}

	static int
	stonith_fence(xmlNode * msg)
	{
	const char *device_id = NULL;
	stonith_device_t *device = NULL;
	async_command_t *cmd = create_async_command(msg);
	xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);

	if (cmd == NULL) {
	return -EPROTO;
	}

	device_id = crm_element_value(dev, F_STONITH_DEVICE);
	if (device_id) {
	device = g_hash_table_lookup(device_list, device_id);
	if (device == NULL) {
	crm_err("Requested device '%s' is not available", device_id);
	return -ENODEV;
	}
	schedule_stonith_command(cmd, device);

	} else {
	const char *host = crm_element_value(dev, F_STONITH_TARGET);
	- char *nodename = NULL;

	if (cmd->options & st_opt_cs_nodeid) {
	int nodeid = crm_atoi(host, NULL);
	+ crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);

	- nodename = stonith_get_peer_name(nodeid);
	- if (nodename) {
	- host = nodename;
	+ if (node) {
	+ host = node->uname;
	}
	}

	/* If we get to here, then self-fencing is implicitly allowed */
	get_capable_devices(host, cmd->action, cmd->default_timeout,
	TRUE, cmd, stonith_fence_get_devices_cb);
	-
	- free(nodename);
	}

	return -EINPROGRESS;
	}

	xmlNode *
	stonith_construct_reply(xmlNode * request, const char output, xmlNode data, int rc)
	{
	int lpc = 0;
	xmlNode *reply = NULL;

	const char *name = NULL;
	const char *value = NULL;

	const char *names[] = {
	F_STONITH_OPERATION,
	F_STONITH_CALLID,
	F_STONITH_CLIENTID,
	F_STONITH_CLIENTNAME,
	F_STONITH_REMOTE_OP_ID,
	F_STONITH_CALLOPTS
	};

	crm_trace("Creating a basic reply");
	reply = create_xml_node(NULL, T_STONITH_REPLY);

	crm_xml_add(reply, "st_origin", __FUNCTION__);
	crm_xml_add(reply, F_TYPE, T_STONITH_NG);
	crm_xml_add(reply, "st_output", output);
	crm_xml_add_int(reply, F_STONITH_RC, rc);

	CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply);
	for (lpc = 0; lpc < DIMOF(names); lpc++) {
	name = names[lpc];
	value = crm_element_value(request, name);
	crm_xml_add(reply, name, value);
	}

	if (data != NULL) {
	crm_trace("Attaching reply output");
	add_message_xml(reply, F_STONITH_CALLDATA, data);
	}
	return reply;
	}

	static xmlNode *
	stonith_construct_async_reply(async_command_t * cmd, const char output, xmlNode data, int rc)
	{
	xmlNode *reply = NULL;

	crm_trace("Creating a basic reply");
	reply = create_xml_node(NULL, T_STONITH_REPLY);

	crm_xml_add(reply, "st_origin", __FUNCTION__);
	crm_xml_add(reply, F_TYPE, T_STONITH_NG);

	crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
	crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
	crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
	crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
	crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
	crm_xml_add(reply, F_STONITH_TARGET, cmd->victim);
	crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
	crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
	crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
	crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);

	crm_xml_add_int(reply, F_STONITH_RC, rc);

	crm_xml_add(reply, "st_output", output);

	if (data != NULL) {
	crm_info("Attaching reply output");
	add_message_xml(reply, F_STONITH_CALLDATA, data);
	}
	return reply;
	}

	bool fencing_peer_active(crm_node_t *peer)
	{
	if (peer == NULL) {
	return FALSE;
	} else if (peer->uname == NULL) {
	return FALSE;
	} else if (is_set(peer->processes, crm_get_cluster_proc())) {
	return TRUE;
	}
	return FALSE;
	}

	/*!
	* \internal
	* \brief Determine if we need to use an alternate node to
	* fence the target. If so return that node's uname
	*
	* \retval NULL, no alternate host
	* \retval uname, uname of alternate host to use
	*/
	static const char *
	check_alternate_host(const char *target)
	{
	const char *alternate_host = NULL;

	crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target);
	if (find_topology_for_host(target) && safe_str_eq(target, stonith_our_uname)) {
	GHashTableIter gIter;
	crm_node_t *entry = NULL;

	g_hash_table_iter_init(&gIter, crm_peer_cache);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
	crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target);
	if (fencing_peer_active(entry)
	&& safe_str_neq(entry->uname, target)) {
	alternate_host = entry->uname;
	break;
	}
	}
	if (alternate_host == NULL) {
	crm_err("No alternate host available to handle complex self fencing request");
	g_hash_table_iter_init(&gIter, crm_peer_cache);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
	crm_notice("Peer[%d] %s", entry->id, entry->uname);
	}
	}
	}

	return alternate_host;
	}

	static void
	stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
	const char *client_id)
	{
	if (remote_peer) {
	send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
	} else {
	do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL);
	}
	}

	static int
	handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request,
	const char *remote_peer)
	{
	int call_options = 0;
	int rc = -EOPNOTSUPP;

	xmlNode *data = NULL;
	xmlNode *reply = NULL;

	char *output = NULL;
	const char *op = crm_element_value(request, F_STONITH_OPERATION);
	const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);

	crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);

	if (is_set(call_options, st_opt_sync_call)) {
	CRM_ASSERT(client == NULL \|\| client->request_id == id);
	}

	if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
	xmlNode *reply = create_xml_node(NULL, "reply");

	CRM_ASSERT(client);
	crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
	crm_xml_add(reply, F_STONITH_CLIENTID, client->id);
	crm_ipcs_send(client, id, reply, flags);
	client->request_id = 0;
	free_xml(reply);
	return 0;

	} else if (crm_str_eq(op, STONITH_OP_EXEC, TRUE)) {
	rc = stonith_device_action(request, &output);

	} else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) {
	const char *call_id = crm_element_value(request, F_STONITH_CALLID);
	const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
	int op_timeout = 0;

	crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
	do_stonith_async_timeout_update(client_id, call_id, op_timeout);
	return 0;

	} else if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
	if (remote_peer) {
	create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */
	}
	stonith_query(request, remote_peer, client_id, call_options);
	return 0;

	} else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
	const char *flag_name = NULL;

	CRM_ASSERT(client);
	flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE);
	if (flag_name) {
	crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id);
	client->options \|= get_stonith_flag(flag_name);
	}

	flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE);
	if (flag_name) {
	crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id);
	client->options \|= get_stonith_flag(flag_name);
	}

	if (flags & crm_ipc_client_response) {
	crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
	}
	return 0;

	} else if (crm_str_eq(op, STONITH_OP_RELAY, TRUE)) {
	xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);

	crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s",
	stonith_our_uname,
	client ? client->name : remote_peer,
	crm_element_value(dev, F_STONITH_ACTION),
	crm_element_value(dev, F_STONITH_TARGET));

	if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
	rc = -EINPROGRESS;
	}

	} else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {

	if (remote_peer \|\| stand_alone) {
	rc = stonith_fence(request);

	} else if (call_options & st_opt_manual_ack) {
	remote_fencing_op_t *rop = NULL;
	xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
	const char *target = crm_element_value(dev, F_STONITH_TARGET);

	crm_notice("Received manual confirmation that %s is fenced", target);
	rop = initiate_remote_stonith_op(client, request, TRUE);
	rc = stonith_manual_ack(request, rop);

	} else {
	const char *alternate_host = NULL;
	xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
	const char *target = crm_element_value(dev, F_STONITH_TARGET);
	const char *action = crm_element_value(dev, F_STONITH_ACTION);
	const char *device = crm_element_value(dev, F_STONITH_DEVICE);

	if (client) {
	int tolerance = 0;

	crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'",
	client->name, client->id, action, target, device ? device : "(any)");

	crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);

	if (stonith_check_fence_tolerance(tolerance, target, action)) {
	rc = 0;
	goto done;
	}

	} else {
	crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
	remote_peer, action, target, device ? device : "(any)");
	}

	alternate_host = check_alternate_host(target);

	if (alternate_host && client) {
	const char *client_id = NULL;

	crm_notice("Forwarding complex self fencing request to peer %s", alternate_host);

	if (client->id) {
	client_id = client->id;
	} else {
	client_id = crm_element_value(request, F_STONITH_CLIENTID);
	}

	/* Create a record of it, otherwise call_id will be 0 if we need to notify of failures */
	create_remote_stonith_op(client_id, request, FALSE);

	crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY);
	crm_xml_add(request, F_STONITH_CLIENTID, client->id);
	send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
	FALSE);
	rc = -EINPROGRESS;

	} else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) {
	rc = -EINPROGRESS;
	}
	}

	} else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) {
	rc = stonith_fence_history(request, &data, remote_peer, call_options);
	if (call_options & st_opt_discard_reply) {
	/* we don't expect answers to the broadcast
	* we might have sent out
	*/
	free_xml(data);
	return pcmk_ok;
	}

	} else if (crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) {
	const char *device_id = NULL;

	rc = stonith_device_register(request, &device_id, FALSE);
	do_stonith_notify_device(call_options, op, rc, device_id);

	} else if (crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) {
	xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
	const char *device_id = crm_element_value(dev, XML_ATTR_ID);

	rc = stonith_device_remove(device_id, FALSE);
	do_stonith_notify_device(call_options, op, rc, device_id);

	} else if (crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) {
	char *device_id = NULL;

	rc = stonith_level_register(request, &device_id);
	do_stonith_notify_level(call_options, op, rc, device_id);
	free(device_id);

	} else if (crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) {
	char *device_id = NULL;

	rc = stonith_level_remove(request, &device_id);
	do_stonith_notify_level(call_options, op, rc, device_id);

	} else if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
	int node_id = 0;
	const char *name = NULL;

	crm_element_value_int(request, XML_ATTR_ID, &node_id);
	name = crm_element_value(request, XML_ATTR_UNAME);
	reap_crm_member(node_id, name);

	return pcmk_ok;

	} else {
	crm_err("Unknown %s from %s", op, client ? client->name : remote_peer);
	crm_log_xml_warn(request, "UnknownOp");
	}

	done:

	/* Always reply unless the request is in process still.
	* If in progress, a reply will happen async after the request
	* processing is finished */
	if (rc != -EINPROGRESS) {
	crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0,
	id, is_set(call_options, st_opt_sync_call), call_options,
	crm_element_value(request, F_STONITH_CALLOPTS));

	if (is_set(call_options, st_opt_sync_call)) {
	CRM_ASSERT(client == NULL \|\| client->request_id == id);
	}
	reply = stonith_construct_reply(request, output, data, rc);
	stonith_send_reply(reply, call_options, remote_peer, client_id);
	}

	free(output);
	free_xml(data);
	free_xml(reply);

	return rc;
	}

	static void
	handle_reply(crm_client_t * client, xmlNode * request, const char *remote_peer)
	{
	const char *op = crm_element_value(request, F_STONITH_OPERATION);

	if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
	process_remote_stonith_query(request);
	} else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
	process_remote_stonith_exec(request);
	} else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
	/* Reply to a complex fencing op */
	process_remote_stonith_exec(request);
	} else {
	crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer);
	crm_log_xml_warn(request, "UnknownOp");
	}
	}

	void
	stonith_command(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request,
	const char *remote_peer)
	{
	int call_options = 0;
	int rc = 0;
	gboolean is_reply = FALSE;

	/* Copy op for reporting. The original might get freed by handle_reply()
	* before we use it in crm_debug():
	* handle_reply()
	* \|- process_remote_stonith_exec()
	* \|-- remote_op_done()
	* \|--- handle_local_reply_and_notify()
	* \|---- crm_xml_add(...F_STONITH_OPERATION...)
	* \|--- free_xml(op->request)
	*/
	char *op = crm_element_value_copy(request, F_STONITH_OPERATION);

	if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_TRACE)) {
	is_reply = TRUE;
	}

	crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
	crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "",
	id, client ? client->name : remote_peer, call_options);

	if (is_set(call_options, st_opt_sync_call)) {
	CRM_ASSERT(client == NULL \|\| client->request_id == id);
	}

	if (is_reply) {
	handle_reply(client, request, remote_peer);
	} else {
	rc = handle_request(client, id, flags, request, remote_peer);
	}

	crm_debug("Processed %s%s from %s: %s (%d)", op,
	is_reply ? " reply" : "", client ? client->name : remote_peer,
	rc > 0 ? "" : pcmk_strerror(rc), rc);

	free(op);
	}
	diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
	index 7e1631fa9b..767aeb337c 100644
	--- a/daemons/fenced/fenced_history.c
	+++ b/daemons/fenced/fenced_history.c
	@@ -1,469 +1,467 @@
	/*
	* Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <stdio.h>
	#include <unistd.h>
	#include <stdlib.h>

	#include <crm/crm.h>
	#include <crm/msg_xml.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipcs.h>
	#include <crm/cluster/internal.h>

	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>
	#include <crm/common/xml.h>

	#include <pacemaker-fenced.h>

	#define MAX_STONITH_HISTORY 500

	/*!
	* \internal
	* \brief Send a broadcast to all nodes to trigger cleanup or
	* history synchronisation
	*
	* \param[in] history Optional history to be attached
	* \param[in] callopts We control cleanup via a flag in the callopts
	* \param[in] target Cleanup can be limited to certain fence-targets
	*/
	static void
	stonith_send_broadcast_history(xmlNode *history,
	int callopts,
	const char *target)
	{
	xmlNode *bcast = create_xml_node(NULL, "stonith_command");
	xmlNode *data = create_xml_node(NULL, __FUNCTION__);

	if (target) {
	crm_xml_add(data, F_STONITH_TARGET, target);
	}
	crm_xml_add(bcast, F_TYPE, T_STONITH_NG);
	crm_xml_add(bcast, F_SUBTYPE, "broadcast");
	crm_xml_add(bcast, F_STONITH_OPERATION, STONITH_OP_FENCE_HISTORY);
	crm_xml_add_int(bcast, F_STONITH_CALLOPTS, callopts);
	if (history) {
	add_node_copy(data, history);
	}
	add_message_xml(bcast, F_STONITH_CALLDATA, data);
	send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);

	free_xml(data);
	free_xml(bcast);
	}

	static gboolean
	stonith_remove_history_entry (gpointer key,
	gpointer value,
	gpointer user_data)
	{
	remote_fencing_op_t *op = value;
	const char target = (const char ) user_data;

	if ((op->state == st_failed) \|\| (op->state == st_done)) {
	if ((target) && (strcmp(op->target, target) != 0)) {
	return FALSE;
	}
	return TRUE;
	}

	return FALSE; /* don't clean pending operations */
	}

	/*!
	* \internal
	* \brief Send out a cleanup broadcast or do a local history-cleanup
	*
	* \param[in] target Cleanup can be limited to certain fence-targets
	* \param[in] broadcast Send out a cleanup broadcast
	*/
	static void
	stonith_fence_history_cleanup(const char *target,
	gboolean broadcast)
	{
	if (broadcast) {
	stonith_send_broadcast_history(NULL,
	st_opt_cleanup \| st_opt_discard_reply,
	target);
	/* we'll do the local clean when we receive back our own broadcast */
	} else if (stonith_remote_op_list) {
	g_hash_table_foreach_remove(stonith_remote_op_list,
	stonith_remove_history_entry,
	(gpointer) target);
	do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
	}
	}

	/* keeping the length of fence-history within bounds
	* =================================================
	*
	* If things are really running wild a lot of fencing-attempts
	* might fill up the hash-map, eventually using up a lot
	* of memory and creating huge history-sync messages.
	* Before the history being synced across nodes at least
	* the reboot of a cluster-node helped keeping the
	* history within bounds even though not in a reliable
	* manner.
	*
	* stonith_remote_op_list isn't sorted for time-stamps
	* thus it would be kind of expensive to delete e.g.
	* the oldest entry if it would grow past MAX_STONITH_HISTORY
	* entries.
	* It is more efficient to purge MAX_STONITH_HISTORY/2
	* entries whenever the list grows beyond MAX_STONITH_HISTORY.
	* (sort for age + purge the MAX_STONITH_HISTORY/2 oldest)
	* That done on a per-node-base might raise the
	* probability of large syncs to occur.
	* Things like introducing a broadcast to purge
	* MAX_STONITH_HISTORY/2 entries or not sync above a certain
	* threshold coming to mind ...
	* Simplest thing though is to purge the full history
	* throughout the cluster once MAX_STONITH_HISTORY is reached.
	* On the other hand this leads to purging the history in
	* situations where it would be handy to have it probably.
	*/


	static int
	op_time_sort(const void a_voidp, const void b_voidp)
	{
	const remote_fencing_op_t a = (const remote_fencing_op_t ) a_voidp;
	const remote_fencing_op_t b = (const remote_fencing_op_t ) b_voidp;
	gboolean a_pending = ((a)->state != st_failed) && ((a)->state != st_done);
	gboolean b_pending = ((b)->state != st_failed) && ((b)->state != st_done);

	if (a_pending && b_pending) {
	return 0;
	} else if (a_pending) {
	return -1;
	} else if (b_pending) {
	return 1;
	} else if ((b)->completed == (a)->completed) {
	return 0;
	} else if ((b)->completed > (a)->completed) {
	return 1;
	}

	return -1;
	}


	/*!
	* \internal
	* \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries
	* once over MAX_STONITH_HISTORY
	*/
	void
	stonith_fence_history_trim(void)
	{
	guint num_ops;

	if (!stonith_remote_op_list) {
	return;
	}
	num_ops = g_hash_table_size(stonith_remote_op_list);
	if (num_ops > MAX_STONITH_HISTORY) {
	remote_fencing_op_t *ops[num_ops];
	remote_fencing_op_t *op = NULL;
	GHashTableIter iter;
	int i;

	crm_trace("Fencing History growing beyond limit of %d so purge "
	"half of failed/successful attempts", MAX_STONITH_HISTORY);

	/* write all ops into an array */
	i = 0;
	g_hash_table_iter_init(&iter, stonith_remote_op_list);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
	ops[i++] = op;
	}
	/* run quicksort over the array so that we get pending ops
	* first and then sorted most recent to oldest
	*/
	qsort(ops, num_ops, sizeof(remote_fencing_op_t *), op_time_sort);
	/* purgest oldest half of the history entries */
	for (i = MAX_STONITH_HISTORY / 2; i < num_ops; i++) {
	/* keep pending ops even if they shouldn't fill more than
	* half of our buffer
	*/
	if ((ops[i]->state == st_failed) \|\| (ops[i]->state == st_done)) {
	g_hash_table_remove(stonith_remote_op_list, ops[i]->id);
	}
	}
	/* we've just purged valid data from the list so there is no need
	* to create a notification - if displayed it can stay
	*/
	}
	}

	/*!
	* \internal
	* \brief Convert xml fence-history to a hash-table like stonith_remote_op_list
	*
	* \param[in] history Fence-history in xml
	*
	* \return Fence-history as hash-table
	*/
	static GHashTable *
	stonith_xml_history_to_list(xmlNode *history)
	{
	xmlNode *xml_op = NULL;
	GHashTable *rv = NULL;

	init_stonith_remote_op_hash_table(&rv);

	CRM_LOG_ASSERT(rv != NULL);

	for (xml_op = __xml_first_child(history); xml_op != NULL;
	xml_op = __xml_next(xml_op)) {
	remote_fencing_op_t *op = NULL;
	char *id = crm_element_value_copy(xml_op, F_STONITH_REMOTE_OP_ID);
	int completed, state;

	if (!id) {
	crm_warn("History to convert to hashtable has no id in entry");
	continue;
	}

	crm_trace("Attaching op %s to hashtable", id);

	op = calloc(1, sizeof(remote_fencing_op_t));

	op->id = id;
	op->target = crm_element_value_copy(xml_op, F_STONITH_TARGET);
	op->action = crm_element_value_copy(xml_op, F_STONITH_ACTION);
	op->originator = crm_element_value_copy(xml_op, F_STONITH_ORIGIN);
	op->delegate = crm_element_value_copy(xml_op, F_STONITH_DELEGATE);
	op->client_name = crm_element_value_copy(xml_op, F_STONITH_CLIENTNAME);
	crm_element_value_int(xml_op, F_STONITH_DATE, &completed);
	op->completed = (time_t) completed;
	crm_element_value_int(xml_op, F_STONITH_STATE, &state);
	op->state = (enum op_state) state;

	g_hash_table_replace(rv, id, op);
	CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
	}

	return rv;
	}

	/*!
	* \internal
	* \brief Craft xml difference between local fence-history and a history
	* coming from remote
	*
	* \param[in] remote_history Fence-history as hash-table (may be NULL)
	* \param[in] add_id If crafting the answer for an API
	* history-request there is no need for the id
	* \param[in] target Optionally limit to certain fence-target
	*
	* \return The fence-history as xml
	*/
	static xmlNode *
	stonith_local_history_diff(GHashTable *remote_history,
	gboolean add_id,
	const char *target)
	{
	xmlNode *history = NULL;
	int cnt = 0;

	if (stonith_remote_op_list) {
	GHashTableIter iter;
	remote_fencing_op_t *op = NULL;

	history = create_xml_node(NULL, F_STONITH_HISTORY_LIST);

	g_hash_table_iter_init(&iter, stonith_remote_op_list);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
	xmlNode *entry = NULL;

	if (remote_history &&
	g_hash_table_lookup(remote_history, op->id)) {
	continue; /* skip entries broadcasted already */
	}

	if (target && strcmp(op->target, target) != 0) {
	continue;
	}

	cnt++;
	crm_trace("Attaching op %s", op->id);
	entry = create_xml_node(history, STONITH_OP_EXEC);
	if (add_id) {
	crm_xml_add(entry, F_STONITH_REMOTE_OP_ID, op->id);
	}
	crm_xml_add(entry, F_STONITH_TARGET, op->target);
	crm_xml_add(entry, F_STONITH_ACTION, op->action);
	crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
	crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
	crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name);
	crm_xml_add_int(entry, F_STONITH_DATE, op->completed);
	crm_xml_add_int(entry, F_STONITH_STATE, op->state);
	}
	}

	if (cnt == 0) {
	free_xml(history);
	return NULL;
	} else {
	return history;
	}
	}

	/*!
	* \internal
	* \brief Merge fence-history coming from remote into local history
	*
	* \param[in] history Hash-table holding remote history to be merged in
	*/
	static void
	stonith_merge_in_history_list(GHashTable *history)
	{
	GHashTableIter iter;
	remote_fencing_op_t *op = NULL;
	gboolean updated = FALSE;

	if (!history) {
	return;
	}

	init_stonith_remote_op_hash_table(&stonith_remote_op_list);

	g_hash_table_iter_init(&iter, history);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
	remote_fencing_op_t *stored_op =
	g_hash_table_lookup(stonith_remote_op_list, op->id);

	if (stored_op) {
	continue; /* skip over existant - state-merging migh be desirable */
	}

	updated = TRUE;
	g_hash_table_iter_steal(&iter);
	g_hash_table_insert(stonith_remote_op_list, op->id, op);
	/* we could trim the history here but if we bail
	* out after trim we might miss more recent entries
	* of those that might still be in the list
	* if we don't bail out trimming once is more
	* efficient and memory overhead is minimal as
	* we are just moving pointers from one hash to
	* another
	*/
	}
	stonith_fence_history_trim();
	if (updated) {
	do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
	}
	g_hash_table_destroy(history); /* remove what is left */
	}

	/*!
	* \internal
	* \brief Handle fence-history messages (either from API or coming in as
	* broadcasts
	*
	* \param[in] msg Request message
	* \param[in] output In case of a request from the API used to craft
	* a reply from
	* \param[in] remote_peer
	* \param[in] options call-options from the request
	*
	* \return always success as there is actully nothing that can go really wrong
	*/
	int
	stonith_fence_history(xmlNode msg, xmlNode *output,
	const char *remote_peer, int options)
	{
	int rc = 0;
	const char *target = NULL;
	xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE);
	- char *nodename = NULL;
	xmlNode *out_history = NULL;

	if (dev) {
	target = crm_element_value(dev, F_STONITH_TARGET);
	if (target && (options & st_opt_cs_nodeid)) {
	int nodeid = crm_atoi(target, NULL);
	+ crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);

	- nodename = stonith_get_peer_name(nodeid);
	- if (nodename) {
	- target = nodename;
	+ if (node) {
	+ target = node->uname;
	}
	}
	}

	if (options & st_opt_cleanup) {
	crm_trace("Cleaning up operations on %s in %p", target,
	stonith_remote_op_list);

	stonith_fence_history_cleanup(target,
	crm_element_value(msg, F_STONITH_CALLID) != NULL);
	} else if (options & st_opt_broadcast) {
	if (crm_element_value(msg, F_STONITH_CALLID)) {
	/* this is coming from the stonith-API
	*
	* craft a broadcast with node's history
	* so that every node can merge and broadcast
	* what it has on top
	*/
	out_history = stonith_local_history_diff(NULL, TRUE, NULL);
	crm_trace("Broadcasting history to peers");
	stonith_send_broadcast_history(out_history,
	st_opt_broadcast \| st_opt_discard_reply,
	NULL);
	} else if (remote_peer &&
	!safe_str_eq(remote_peer, stonith_our_uname)) {
	xmlNode *history =
	get_xpath_object("//" F_STONITH_HISTORY_LIST, msg, LOG_TRACE);
	GHashTable *received_history =
	history?stonith_xml_history_to_list(history):NULL;

	/* either a broadcast created directly upon stonith-API request
	* or a diff as response to such a thing
	*
	* in both cases it may have a history or not
	* if we have differential data
	* merge in what we've received and stop
	* otherwise broadcast what we have on top
	* marking as differential and merge in afterwards
	*/
	if (!history \|\|
	!crm_is_true(crm_element_value(history,
	F_STONITH_DIFFERENTIAL))) {
	out_history =
	stonith_local_history_diff(received_history, TRUE, NULL);
	if (out_history) {
	crm_trace("Broadcasting history-diff to peers");
	crm_xml_add(out_history, F_STONITH_DIFFERENTIAL,
	XML_BOOLEAN_TRUE);
	stonith_send_broadcast_history(out_history,
	st_opt_broadcast \| st_opt_discard_reply,
	NULL);
	} else {
	crm_trace("History-diff is empty - skip broadcast");
	}
	}
	stonith_merge_in_history_list(received_history);
	} else {
	crm_trace("Skipping history-query-broadcast (%s%s)"
	" we sent ourselves",
	remote_peer?"remote-peer=":"local-ipc",
	remote_peer?remote_peer:"");
	}
	} else {
	/* plain history request */
	crm_trace("Looking for operations on %s in %p", target,
	stonith_remote_op_list);
	*output = stonith_local_history_diff(NULL, FALSE, target);
	}
	- free(nodename);
	free_xml(out_history);
	return rc;
	}
	diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
	index f1812dac2a..3aaa3d2a73 100644
	--- a/daemons/fenced/fenced_remote.c
	+++ b/daemons/fenced/fenced_remote.c
	@@ -1,2083 +1,2059 @@
	/*
	* Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/wait.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <sys/utsname.h>

	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>
	#include <ctype.h>
	#include <regex.h>

	#include <crm/crm.h>
	#include <crm/msg_xml.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipcs.h>
	#include <crm/cluster/internal.h>

	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>
	#include <crm/common/xml.h>

	#include <crm/common/util.h>
	#include <pacemaker-fenced.h>

	#define TIMEOUT_MULTIPLY_FACTOR 1.2

	/* When one fencer queries its peers for devices able to handle a fencing
	* request, each peer will reply with a list of such devices available to it.
	* Each reply will be parsed into a st_query_result_t, with each device's
	* information kept in a device_properties_t.
	*/

	typedef struct device_properties_s {
	/* Whether access to this device has been verified */
	gboolean verified;

	/* The remaining members are indexed by the operation's "phase" */

	/* Whether this device has been executed in each phase */
	gboolean executed[st_phase_max];
	/* Whether this device is disallowed from executing in each phase */
	gboolean disallowed[st_phase_max];
	/* Action-specific timeout for each phase */
	int custom_action_timeout[st_phase_max];
	/* Action-specific maximum random delay for each phase */
	int delay_max[st_phase_max];
	/* Action-specific base delay for each phase */
	int delay_base[st_phase_max];
	} device_properties_t;

	typedef struct st_query_result_s {
	/* Name of peer that sent this result */
	char *host;
	/* Only try peers for non-topology based operations once */
	gboolean tried;
	/* Number of entries in the devices table */
	int ndevices;
	/* Devices available to this host that are capable of fencing the target */
	GHashTable *devices;
	} st_query_result_t;

	GHashTable *stonith_remote_op_list = NULL;

	void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer);
	static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
	extern xmlNode stonith_create_op(int call_id, const char token, const char op, xmlNode data,
	int call_options);

	static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
	static int get_op_total_timeout(const remote_fencing_op_t *op,
	const st_query_result_t *chosen_peer);

	static gint
	sort_strings(gconstpointer a, gconstpointer b)
	{
	return strcmp(a, b);
	}

	static void
	free_remote_query(gpointer data)
	{
	if (data) {
	st_query_result_t *query = data;

	crm_trace("Free'ing query result from %s", query->host);
	g_hash_table_destroy(query->devices);
	free(query->host);
	free(query);
	}
	}

	void
	free_stonith_remote_op_list()
	{
	if (stonith_remote_op_list != NULL) {
	g_hash_table_destroy(stonith_remote_op_list);
	stonith_remote_op_list = NULL;
	}
	}

	struct peer_count_data {
	const remote_fencing_op_t *op;
	gboolean verified_only;
	int count;
	};

	/*!
	* \internal
	* \brief Increment a counter if a device has not been executed yet
	*
	* \param[in] key Device ID (ignored)
	* \param[in] value Device properties
	* \param[in] user_data Peer count data
	*/
	static void
	count_peer_device(gpointer key, gpointer value, gpointer user_data)
	{
	device_properties_t props = (device_properties_t)value;
	struct peer_count_data *data = user_data;

	if (!props->executed[data->op->phase]
	&& (!data->verified_only \|\| props->verified)) {
	++(data->count);
	}
	}

	/*!
	* \internal
	* \brief Check the number of available devices in a peer's query results
	*
	* \param[in] op Operation that results are for
	* \param[in] peer Peer to count
	* \param[in] verified_only Whether to count only verified devices
	*
	* \return Number of devices available to peer that were not already executed
	*/
	static int
	count_peer_devices(const remote_fencing_op_t op, const st_query_result_t peer,
	gboolean verified_only)
	{
	struct peer_count_data data;

	data.op = op;
	data.verified_only = verified_only;
	data.count = 0;
	if (peer) {
	g_hash_table_foreach(peer->devices, count_peer_device, &data);
	}
	return data.count;
	}

	/*!
	* \internal
	* \brief Search for a device in a query result
	*
	* \param[in] op Operation that result is for
	* \param[in] peer Query result for a peer
	* \param[in] device Device ID to search for
	*
	* \return Device properties if found, NULL otherwise
	*/
	static device_properties_t *
	find_peer_device(const remote_fencing_op_t op, const st_query_result_t peer,
	const char *device)
	{
	device_properties_t *props = g_hash_table_lookup(peer->devices, device);

	return (props && !props->executed[op->phase]
	&& !props->disallowed[op->phase])? props : NULL;
	}

	/*!
	* \internal
	* \brief Find a device in a peer's device list and mark it as executed
	*
	* \param[in] op Operation that peer result is for
	* \param[in,out] peer Peer with results to search
	* \param[in] device ID of device to mark as done
	* \param[in] verified_devices_only Only consider verified devices
	*
	* \return TRUE if device was found and marked, FALSE otherwise
	*/
	static gboolean
	grab_peer_device(const remote_fencing_op_t op, st_query_result_t peer,
	const char *device, gboolean verified_devices_only)
	{
	device_properties_t *props = find_peer_device(op, peer, device);

	if ((props == NULL) \|\| (verified_devices_only && !props->verified)) {
	return FALSE;
	}

	crm_trace("Removing %s from %s (%d remaining)",
	device, peer->host, count_peer_devices(op, peer, FALSE));
	props->executed[op->phase] = TRUE;
	return TRUE;
	}

	static void
	clear_remote_op_timers(remote_fencing_op_t * op)
	{
	if (op->query_timer) {
	g_source_remove(op->query_timer);
	op->query_timer = 0;
	}
	if (op->op_timer_total) {
	g_source_remove(op->op_timer_total);
	op->op_timer_total = 0;
	}
	if (op->op_timer_one) {
	g_source_remove(op->op_timer_one);
	op->op_timer_one = 0;
	}
	}

	static void
	free_remote_op(gpointer data)
	{
	remote_fencing_op_t *op = data;

	crm_trace("Free'ing op %s for %s", op->id, op->target);
	crm_log_xml_debug(op->request, "Destroying");

	clear_remote_op_timers(op);

	free(op->id);
	free(op->action);
	free(op->delegate);
	free(op->target);
	free(op->client_id);
	free(op->client_name);
	free(op->originator);

	if (op->query_results) {
	g_list_free_full(op->query_results, free_remote_query);
	}
	if (op->request) {
	free_xml(op->request);
	op->request = NULL;
	}
	if (op->devices_list) {
	g_list_free_full(op->devices_list, free);
	op->devices_list = NULL;
	}
	g_list_free_full(op->automatic_list, free);
	g_list_free(op->duplicates);
	free(op);
	}

	void
	init_stonith_remote_op_hash_table(GHashTable **table)
	{
	if (*table == NULL) {
	*table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
	}
	}

	/*!
	* \internal
	* \brief Return an operation's originally requested action (before any remap)
	*
	* \param[in] op Operation to check
	*
	* \return Operation's original action
	*/
	static const char *
	op_requested_action(const remote_fencing_op_t *op)
	{
	return ((op->phase > st_phase_requested)? "reboot" : op->action);
	}

	/*!
	* \internal
	* \brief Remap a "reboot" operation to the "off" phase
	*
	* \param[in,out] op Operation to remap
	*/
	static void
	op_phase_off(remote_fencing_op_t *op)
	{
	crm_info("Remapping multiple-device reboot of %s (%s) to off",
	op->target, op->id);
	op->phase = st_phase_off;

	/* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
	* memory allocation at each phase.
	*/
	strcpy(op->action, "off");
	}

	/*!
	* \internal
	* \brief Advance a remapped reboot operation to the "on" phase
	*
	* \param[in,out] op Operation to remap
	*/
	static void
	op_phase_on(remote_fencing_op_t *op)
	{
	GListPtr iter = NULL;

	crm_info("Remapped off of %s complete, remapping to on for %s.%.8s",
	op->target, op->client_name, op->id);
	op->phase = st_phase_on;
	strcpy(op->action, "on");

	/* Skip devices with automatic unfencing, because the cluster will handle it
	* when the node rejoins.
	*/
	for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
	GListPtr match = g_list_find_custom(op->devices_list, iter->data,
	sort_strings);

	if (match) {
	op->devices_list = g_list_remove(op->devices_list, match->data);
	}
	}
	g_list_free_full(op->automatic_list, free);
	op->automatic_list = NULL;

	/* Rewind device list pointer */
	op->devices = op->devices_list;
	}

	/*!
	* \internal
	* \brief Reset a remapped reboot operation
	*
	* \param[in,out] op Operation to reset
	*/
	static void
	undo_op_remap(remote_fencing_op_t *op)
	{
	if (op->phase > 0) {
	crm_info("Undoing remap of reboot of %s for %s.%.8s",
	op->target, op->client_name, op->id);
	op->phase = st_phase_requested;
	strcpy(op->action, "reboot");
	}
	}

	static xmlNode *
	create_op_done_notify(remote_fencing_op_t * op, int rc)
	{
	xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);

	crm_xml_add_int(notify_data, "state", op->state);
	crm_xml_add_int(notify_data, F_STONITH_RC, rc);
	crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
	crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
	crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
	crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
	crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
	crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
	crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);

	return notify_data;
	}

	static void
	bcast_result_to_peers(remote_fencing_op_t * op, int rc)
	{
	static int count = 0;
	xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
	xmlNode *notify_data = create_op_done_notify(op, rc);

	count++;
	crm_trace("Broadcasting result to peers");
	crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
	crm_xml_add(bcast, F_SUBTYPE, "broadcast");
	crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
	crm_xml_add_int(bcast, "count", count);
	add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
	send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
	free_xml(notify_data);
	free_xml(bcast);

	return;
	}

	static void
	handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
	{
	xmlNode *notify_data = NULL;
	xmlNode *reply = NULL;

	if (op->notify_sent == TRUE) {
	/* nothing to do */
	return;
	}

	/* Do notification with a clean data object */
	notify_data = create_op_done_notify(op, rc);
	crm_xml_add_int(data, "state", op->state);
	crm_xml_add(data, F_STONITH_TARGET, op->target);
	crm_xml_add(data, F_STONITH_OPERATION, op->action);

	reply = stonith_construct_reply(op->request, NULL, data, rc);
	crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);

	/* Send fencing OP reply to local client that initiated fencing */
	do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);

	/* bcast to all local clients that the fencing operation happend */
	do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
	do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);

	/* mark this op as having notify's already sent */
	op->notify_sent = TRUE;
	free_xml(reply);
	free_xml(notify_data);
	}

	static void
	handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
	{
	GListPtr iter = NULL;

	for (iter = op->duplicates; iter != NULL; iter = iter->next) {
	remote_fencing_op_t *other = iter->data;

	if (other->state == st_duplicate) {
	other->state = op->state;
	crm_debug("Performing duplicate notification for %s@%s.%.8s = %s",
	other->client_name, other->originator, other->id,
	pcmk_strerror(rc));
	remote_op_done(other, data, rc, TRUE);

	} else {
	// Possible if (for example) it timed out already
	crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name,
	other->originator, other->state);
	}
	}
	}

	/*!
	* \internal
	* \brief Finalize a remote operation.
	*
	* \description This function has two code paths.
	*
	* Path 1. This node is the owner of the operation and needs
	* to notify the cpg group via a broadcast as to the operation's
	* results.
	*
	* Path 2. The cpg broadcast is received. All nodes notify their local
	* stonith clients the operation results.
	*
	* So, The owner of the operation first notifies the cluster of the result,
	* and once that cpg notify is received back it notifies all the local clients.
	*
	* Nodes that are passive watchers of the operation will receive the
	* broadcast and only need to notify their local clients the operation finished.
	*
	* \param op, The fencing operation to finalize
	* \param data, The xml msg reply (if present) of the last delegated fencing
	* operation.
	* \param dup, Is this operation a duplicate, if so treat it a little differently
	* making sure the broadcast is not sent out.
	*/
	static void
	remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
	{
	int level = LOG_ERR;
	const char *subt = NULL;
	xmlNode *local_data = NULL;

	op->completed = time(NULL);
	clear_remote_op_timers(op);
	undo_op_remap(op);

	if (op->notify_sent == TRUE) {
	crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s",
	op->action, op->target, op->delegate ? op->delegate : "<no-one>",
	op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc));
	goto remote_op_done_cleanup;
	}

	if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) {
	xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE);
	if(ndata) {
	op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE);
	} else {
	op->delegate = crm_element_value_copy(data, F_ORIG);
	}
	}

	if (data == NULL) {
	data = create_xml_node(NULL, "remote-op");
	local_data = data;
	}

	/* Tell everyone the operation is done, we will continue
	* with doing the local notifications once we receive
	* the broadcast back. */
	subt = crm_element_value(data, F_SUBTYPE);
	if (dup == FALSE && safe_str_neq(subt, "broadcast")) {
	/* Defer notification until the bcast message arrives */
	bcast_result_to_peers(op, rc);
	goto remote_op_done_cleanup;
	}

	if (rc == pcmk_ok \|\| dup) {
	level = LOG_NOTICE;
	} else if (safe_str_neq(op->originator, stonith_our_uname)) {
	level = LOG_NOTICE;
	}

	do_crm_log(level,
	"Operation %s of %s by %s for %s@%s.%.8s: %s",
	op->action, op->target, op->delegate ? op->delegate : "<no-one>",
	op->client_name, op->originator, op->id, pcmk_strerror(rc));

	handle_local_reply_and_notify(op, data, rc);

	if (dup == FALSE) {
	handle_duplicates(op, data, rc);
	}

	/* Free non-essential parts of the record
	* Keep the record around so we can query the history
	*/
	if (op->query_results) {
	g_list_free_full(op->query_results, free_remote_query);
	op->query_results = NULL;
	}

	if (op->request) {
	free_xml(op->request);
	op->request = NULL;
	}

	remote_op_done_cleanup:
	free_xml(local_data);
	}

	static gboolean
	remote_op_watchdog_done(gpointer userdata)
	{
	remote_fencing_op_t *op = userdata;

	op->op_timer_one = 0;

	crm_notice("Self-fencing (%s) by %s for %s.%8s assumed complete",
	op->action, op->target, op->client_name, op->id);
	op->state = st_done;
	remote_op_done(op, NULL, pcmk_ok, FALSE);
	return FALSE;
	}

	static gboolean
	remote_op_timeout_one(gpointer userdata)
	{
	remote_fencing_op_t *op = userdata;

	op->op_timer_one = 0;

	crm_notice("Peer's fencing (%s) of %s for %s timed out" CRM_XS "id=%s",
	op->action, op->target, op->client_name, op->id);
	call_remote_stonith(op, NULL);
	return FALSE;
	}

	static gboolean
	remote_op_timeout(gpointer userdata)
	{
	remote_fencing_op_t *op = userdata;

	op->op_timer_total = 0;

	if (op->state == st_done) {
	crm_debug("Action %s (%s) for %s (%s) already completed",
	op->action, op->id, op->target, op->client_name);
	return FALSE;
	}

	crm_debug("Action %s (%s) for %s (%s) timed out",
	op->action, op->id, op->target, op->client_name);

	if (op->phase == st_phase_on) {
	/* A remapped reboot operation timed out in the "on" phase, but the
	* "off" phase completed successfully, so quit trying any further
	* devices, and return success.
	*/
	remote_op_done(op, NULL, pcmk_ok, FALSE);
	return FALSE;
	}

	op->state = st_failed;

	remote_op_done(op, NULL, -ETIME, FALSE);

	return FALSE;
	}

	static gboolean
	remote_op_query_timeout(gpointer data)
	{
	remote_fencing_op_t *op = data;

	op->query_timer = 0;
	if (op->state == st_done) {
	crm_debug("Operation %s for %s already completed", op->id, op->target);
	} else if (op->state == st_exec) {
	crm_debug("Operation %s for %s already in progress", op->id, op->target);
	} else if (op->query_results) {
	crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state);
	call_remote_stonith(op, NULL);
	} else {
	crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state);
	if (op->op_timer_total) {
	g_source_remove(op->op_timer_total);
	op->op_timer_total = 0;
	}
	remote_op_timeout(op);
	}

	return FALSE;
	}

	static gboolean
	topology_is_empty(stonith_topology_t *tp)
	{
	int i;

	if (tp == NULL) {
	return TRUE;
	}

	for (i = 0; i < ST_LEVEL_MAX; i++) {
	if (tp->levels[i] != NULL) {
	return FALSE;
	}
	}
	return TRUE;
	}

	/*!
	* \internal
	* \brief Add a device to an operation's automatic unfencing list
	*
	* \param[in,out] op Operation to modify
	* \param[in] device Device ID to add
	*/
	static void
	add_required_device(remote_fencing_op_t op, const char device)
	{
	GListPtr match = g_list_find_custom(op->automatic_list, device,
	sort_strings);

	if (!match) {
	op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
	}
	}

	/*!
	* \internal
	* \brief Remove a device from the automatic unfencing list
	*
	* \param[in,out] op Operation to modify
	* \param[in] device Device ID to remove
	*/
	static void
	remove_required_device(remote_fencing_op_t op, const char device)
	{
	GListPtr match = g_list_find_custom(op->automatic_list, device,
	sort_strings);

	if (match) {
	op->automatic_list = g_list_remove(op->automatic_list, match->data);
	}
	}

	/* deep copy the device list */
	static void
	set_op_device_list(remote_fencing_op_t * op, GListPtr devices)
	{
	GListPtr lpc = NULL;

	if (op->devices_list) {
	g_list_free_full(op->devices_list, free);
	op->devices_list = NULL;
	}
	for (lpc = devices; lpc != NULL; lpc = lpc->next) {
	op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
	}
	op->devices = op->devices_list;
	}

	/*!
	* \internal
	* \brief Check whether a node matches a topology target
	*
	* \param[in] tp Topology table entry to check
	* \param[in] node Name of node to check
	*
	* \return TRUE if node matches topology target
	*/
	static gboolean
	topology_matches(const stonith_topology_t tp, const char node)
	{
	regex_t r_patt;

	CRM_CHECK(node && tp && tp->target, return FALSE);
	switch(tp->kind) {
	case 2:
	/* This level targets by attribute, so tp->target is a NAME=VALUE pair
	* of a permanent attribute applied to targeted nodes. The test below
	* relies on the locally cached copy of the CIB, so if fencing needs to
	* be done before the initial CIB is received or after a malformed CIB
	* is received, then the topology will be unable to be used.
	*/
	if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
	crm_notice("Matched %s with %s by attribute", node, tp->target);
	return TRUE;
	}
	break;
	case 1:
	/* This level targets by name, so tp->target is a regular expression
	* matching names of nodes to be targeted.
	*/

	if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED\|REG_NOSUB)) {
	crm_info("Bad regex '%s' for fencing level", tp->target);
	} else {
	int status = regexec(&r_patt, node, 0, NULL, 0);

	regfree(&r_patt);
	if (status == 0) {
	crm_notice("Matched %s with %s by name", node, tp->target);
	return TRUE;
	}
	}
	break;
	case 0:
	crm_trace("Testing %s against %s", node, tp->target);
	return safe_str_eq(tp->target, node);
	}
	crm_trace("No match for %s with %s", node, tp->target);
	return FALSE;
	}

	stonith_topology_t *
	find_topology_for_host(const char *host)
	{
	GHashTableIter tIter;
	stonith_topology_t *tp = g_hash_table_lookup(topology, host);

	if(tp != NULL) {
	crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
	return tp;
	}

	g_hash_table_iter_init(&tIter, topology);
	while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
	if (topology_matches(tp, host)) {
	crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
	return tp;
	}
	}

	crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
	return NULL;
	}

	/*!
	* \internal
	* \brief Set fencing operation's device list to target's next topology level
	*
	* \param[in,out] op Remote fencing operation to modify
	*
	* \return pcmk_ok if successful, target was not specified (i.e. queries) or
	* target has no topology, or -EINVAL if no more topology levels to try
	*/
	static int
	stonith_topology_next(remote_fencing_op_t * op)
	{
	stonith_topology_t *tp = NULL;

	if (op->target) {
	/* Queries don't have a target set */
	tp = find_topology_for_host(op->target);
	}
	if (topology_is_empty(tp)) {
	return pcmk_ok;
	}

	set_bit(op->call_options, st_opt_topology);

	/* This is a new level, so undo any remapping left over from previous */
	undo_op_remap(op);

	do {
	op->level++;

	} while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);

	if (op->level < ST_LEVEL_MAX) {
	crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s",
	op->level, op->target, g_list_length(tp->levels[op->level]),
	op->client_name, op->originator, op->id);
	set_op_device_list(op, tp->levels[op->level]);

	if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
	/* A reboot has been requested for a topology level with multiple
	* devices. Instead of rebooting the devices sequentially, we will
	* turn them all off, then turn them all on again. (Think about
	* switched power outlets for redundant power supplies.)
	*/
	op_phase_off(op);
	}
	return pcmk_ok;
	}

	crm_notice("All fencing options to fence %s for %s@%s.%.8s failed",
	op->target, op->client_name, op->originator, op->id);
	return -EINVAL;
	}

	/*!
	* \brief Check to see if this operation is a duplicate of another in flight
	* operation. If so merge this operation into the inflight operation, and mark
	* it as a duplicate.
	*/
	static void
	merge_duplicates(remote_fencing_op_t * op)
	{
	GHashTableIter iter;
	remote_fencing_op_t *other = NULL;

	time_t now = time(NULL);

	g_hash_table_iter_init(&iter, stonith_remote_op_list);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
	crm_node_t *peer = NULL;
	const char *other_action = op_requested_action(other);

	if (other->state > st_exec) {
	/* Must be in-progress */
	continue;
	} else if (safe_str_neq(op->target, other->target)) {
	/* Must be for the same node */
	continue;
	} else if (safe_str_neq(op->action, other_action)) {
	crm_trace("Must be for the same action: %s vs. %s",
	op->action, other_action);
	continue;
	} else if (safe_str_eq(op->client_name, other->client_name)) {
	crm_trace("Must be for different clients: %s", op->client_name);
	continue;
	} else if (safe_str_eq(other->target, other->originator)) {
	crm_trace("Can't be a suicide operation: %s", other->target);
	continue;
	}

	peer = crm_get_peer(0, other->originator);
	if(fencing_peer_active(peer) == FALSE) {
	crm_notice("Failing stonith action %s for node %s originating from %s@%s.%.8s: Originator is dead",
	other->action, other->target, other->client_name, other->originator, other->id);
	other->state = st_failed;
	continue;

	} else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) {
	crm_info("Stonith action %s for node %s originating from %s@%s.%.8s is too old: %ld vs. %ld + %d",
	other->action, other->target, other->client_name, other->originator, other->id,
	now, other->created, other->total_timeout);
	continue;
	}

	/* There is another in-flight request to fence the same host
	* Piggyback on that instead. If it fails, so do we.
	*/
	other->duplicates = g_list_append(other->duplicates, op);
	if (other->total_timeout == 0) {
	crm_trace("Making a best-guess as to the timeout used");
	other->total_timeout = op->total_timeout =
	TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
	}
	crm_notice
	("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)",
	op->action, op->target, op->client_name, op->id, other->client_name, other->originator,
	other->id, other->total_timeout);
	report_timeout_period(op, other->total_timeout);
	op->state = st_duplicate;
	}
	}

	static uint32_t fencing_active_peers(void)
	{
	uint32_t count = 0;
	crm_node_t *entry;
	GHashTableIter gIter;

	g_hash_table_iter_init(&gIter, crm_peer_cache);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
	if(fencing_peer_active(entry)) {
	count++;
	}
	}
	return count;
	}

	int
	stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
	{
	xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);

	op->state = st_done;
	op->completed = time(NULL);
	op->delegate = strdup("a human");

	crm_notice("Injecting manual confirmation that %s is safely off/down",
	crm_element_value(dev, F_STONITH_TARGET));

	remote_op_done(op, msg, pcmk_ok, FALSE);

	/* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
	return -EINPROGRESS;
	}

	-char *
	-stonith_get_peer_name(unsigned int nodeid)
	-{
	- crm_node_t *node = crm_find_peer(nodeid, NULL);
	- char *nodename = NULL;
	-
	- if (node && node->uname) {
	- return strdup(node->uname);
	-
	- } else if ((nodename = get_node_name(nodeid))) {
	- return nodename;
	-
	- } else {
	- const char *last_known_name = g_hash_table_lookup(known_peer_names, GUINT_TO_POINTER(nodeid));
	-
	- if (last_known_name) {
	- crm_debug("Use the last known name %s for nodeid %u", last_known_name, nodeid);
	- return strdup(last_known_name);
	- }
	- }
	-
	- return NULL;
	-}
	-
	/*!
	* \internal
	* \brief Create a new remote stonith operation
	*
	* \param[in] client ID of local stonith client that initiated the operation
	* \param[in] request The request from the client that started the operation
	* \param[in] peer TRUE if this operation is owned by another stonith peer
	* (an operation owned by one peer is stored on all peers,
	* but only the owner executes it; all nodes get the results
	* once the owner finishes execution)
	*/
	void *
	create_remote_stonith_op(const char client, xmlNode request, gboolean peer)
	{
	remote_fencing_op_t *op = NULL;
	xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
	int call_options = 0;

	init_stonith_remote_op_hash_table(&stonith_remote_op_list);

	/* If this operation is owned by another node, check to make
	* sure we haven't already created this operation. */
	if (peer && dev) {
	const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);

	CRM_CHECK(op_id != NULL, return NULL);

	op = g_hash_table_lookup(stonith_remote_op_list, op_id);
	if (op) {
	crm_debug("%s already exists", op_id);
	return op;
	}
	}

	op = calloc(1, sizeof(remote_fencing_op_t));

	crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));

	if (peer && dev) {
	op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
	} else {
	op->id = crm_generate_uuid();
	}

	g_hash_table_replace(stonith_remote_op_list, op->id, op);
	CRM_LOG_ASSERT(g_hash_table_lookup(stonith_remote_op_list, op->id) != NULL);
	crm_trace("Created %s", op->id);

	op->state = st_query;
	op->replies_expected = fencing_active_peers();
	op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
	op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
	op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
	op->created = time(NULL);

	if (op->originator == NULL) {
	/* Local or relayed request */
	op->originator = strdup(stonith_our_uname);
	}

	CRM_LOG_ASSERT(client != NULL);
	if (client) {
	op->client_id = strdup(client);
	}

	op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);

	op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
	op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
	crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
	op->call_options = call_options;

	crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));

	crm_trace("%s new stonith op: %s - %s of %s for %s",
	(peer
	&& dev) ? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name);

	if (op->call_options & st_opt_cs_nodeid) {
	int nodeid = crm_atoi(op->target, NULL);
	- char *nodename = stonith_get_peer_name(nodeid);
	+ crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);

	/* Ensure the conversion only happens once */
	op->call_options &= ~st_opt_cs_nodeid;

	- if (nodename) {
	+ if (node && node->uname) {
	free(op->target);
	- op->target = nodename;
	+ op->target = strdup(node->uname);

	} else {
	crm_warn("Could not expand nodeid '%s' into a host name", op->target);
	}
	}

	/* check to see if this is a duplicate operation of another in-flight operation */
	merge_duplicates(op);

	if (op->state != st_duplicate) {
	/* kick history readers */
	do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
	}

	/* safe to trim as long as that doesn't touch pending ops */
	stonith_fence_history_trim();

	return op;
	}

	remote_fencing_op_t *
	initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
	{
	int query_timeout = 0;
	xmlNode *query = NULL;
	const char *client_id = NULL;
	remote_fencing_op_t *op = NULL;

	if (client) {
	client_id = client->id;
	} else {
	client_id = crm_element_value(request, F_STONITH_CLIENTID);
	}

	CRM_LOG_ASSERT(client_id != NULL);
	op = create_remote_stonith_op(client_id, request, FALSE);
	op->owner = TRUE;
	if (manual_ack) {
	crm_notice("Initiating manual confirmation for %s: %s",
	op->target, op->id);
	return op;
	}

	CRM_CHECK(op->action, return NULL);

	if (stonith_topology_next(op) != pcmk_ok) {
	op->state = st_failed;
	}

	switch (op->state) {
	case st_failed:
	crm_warn("Could not request peer fencing (%s) of %s "
	CRM_XS " id=%s", op->action, op->target, op->id);
	remote_op_done(op, NULL, -EINVAL, FALSE);
	return op;

	case st_duplicate:
	crm_info("Requesting peer fencing (%s) of %s (duplicate) "
	CRM_XS " id=%s", op->action, op->target, op->id);
	return op;

	default:
	crm_notice("Requesting peer fencing (%s) of %s "
	CRM_XS " id=%s state=%d",
	op->action, op->target, op->id, op->state);
	}

	query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
	NULL, op->call_options);

	crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
	crm_xml_add(query, F_STONITH_TARGET, op->target);
	crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
	crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
	crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
	crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
	crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);

	send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
	free_xml(query);

	query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
	op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);

	return op;
	}

	enum find_best_peer_options {
	/! Skip checking the target peer for capable fencing devices /
	FIND_PEER_SKIP_TARGET = 0x0001,
	/! Only check the target peer for capable fencing devices /
	FIND_PEER_TARGET_ONLY = 0x0002,
	/! Skip peers and devices that are not verified /
	FIND_PEER_VERIFIED_ONLY = 0x0004,
	};

	static st_query_result_t *
	find_best_peer(const char device, remote_fencing_op_t op, enum find_best_peer_options options)
	{
	GListPtr iter = NULL;
	gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;

	if (!device && is_set(op->call_options, st_opt_topology)) {
	return NULL;
	}

	for (iter = op->query_results; iter != NULL; iter = iter->next) {
	st_query_result_t *peer = iter->data;

	crm_trace("Testing result from %s for %s with %d devices: %d %x",
	peer->host, op->target, peer->ndevices, peer->tried, options);
	if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) {
	continue;
	}
	if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) {
	continue;
	}

	if (is_set(op->call_options, st_opt_topology)) {

	if (grab_peer_device(op, peer, device, verified_devices_only)) {
	return peer;
	}

	} else if ((peer->tried == FALSE)
	&& count_peer_devices(op, peer, verified_devices_only)) {

	/* No topology: Use the current best peer */
	crm_trace("Simple fencing");
	return peer;
	}
	}

	return NULL;
	}

	static st_query_result_t *
	stonith_choose_peer(remote_fencing_op_t * op)
	{
	const char *device = NULL;
	st_query_result_t *peer = NULL;
	uint32_t active = fencing_active_peers();

	do {
	if (op->devices) {
	device = op->devices->data;
	crm_trace("Checking for someone to fence (%s) %s with %s",
	op->action, op->target, device);
	} else {
	crm_trace("Checking for someone to fence (%s) %s",
	op->action, op->target);
	}

	/* Best choice is a peer other than the target with verified access */
	peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET\|FIND_PEER_VERIFIED_ONLY);
	if (peer) {
	crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
	return peer;
	}

	if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
	crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
	return NULL;
	}

	/* If no other peer has verified access, next best is unverified access */
	peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
	if (peer) {
	crm_trace("Found best unverified peer %s", peer->host);
	return peer;
	}

	/* If no other peer can do it, last option is self-fencing
	* (which is never allowed for the "on" phase of a remapped reboot)
	*/
	if (op->phase != st_phase_on) {
	peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
	if (peer) {
	crm_trace("%s will fence itself", peer->host);
	return peer;
	}
	}

	/* Try the next fencing level if there is one (unless we're in the "on"
	* phase of a remapped "reboot", because we ignore errors in that case)
	*/
	} while ((op->phase != st_phase_on)
	&& is_set(op->call_options, st_opt_topology)
	&& stonith_topology_next(op) == pcmk_ok);

	crm_notice("Couldn't find anyone to fence (%s) %s with %s",
	op->action, op->target, (device? device : "any device"));
	return NULL;
	}

	static int
	get_device_timeout(const remote_fencing_op_t op, const st_query_result_t peer,
	const char *device)
	{
	device_properties_t *props;

	if (!peer \|\| !device) {
	return op->base_timeout;
	}

	props = g_hash_table_lookup(peer->devices, device);
	if (!props) {
	return op->base_timeout;
	}

	return (props->custom_action_timeout[op->phase]?
	props->custom_action_timeout[op->phase] : op->base_timeout)
	+ props->delay_max[op->phase];
	}

	struct timeout_data {
	const remote_fencing_op_t *op;
	const st_query_result_t *peer;
	int total_timeout;
	};

	/*!
	* \internal
	* \brief Add timeout to a total if device has not been executed yet
	*
	* \param[in] key GHashTable key (device ID)
	* \param[in] value GHashTable value (device properties)
	* \param[in] user_data Timeout data
	*/
	static void
	add_device_timeout(gpointer key, gpointer value, gpointer user_data)
	{
	const char *device_id = key;
	device_properties_t *props = value;
	struct timeout_data *timeout = user_data;

	if (!props->executed[timeout->op->phase]
	&& !props->disallowed[timeout->op->phase]) {
	timeout->total_timeout += get_device_timeout(timeout->op,
	timeout->peer, device_id);
	}
	}

	static int
	get_peer_timeout(const remote_fencing_op_t op, const st_query_result_t peer)
	{
	struct timeout_data timeout;

	timeout.op = op;
	timeout.peer = peer;
	timeout.total_timeout = 0;

	g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);

	return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
	}

	static int
	get_op_total_timeout(const remote_fencing_op_t *op,
	const st_query_result_t *chosen_peer)
	{
	int total_timeout = 0;
	stonith_topology_t *tp = find_topology_for_host(op->target);

	if (is_set(op->call_options, st_opt_topology) && tp) {
	int i;
	GListPtr device_list = NULL;
	GListPtr iter = NULL;

	/* Yep, this looks scary, nested loops all over the place.
	* Here is what is going on.
	* Loop1: Iterate through fencing levels.
	* Loop2: If a fencing level has devices, loop through each device
	* Loop3: For each device in a fencing level, see what peer owns it
	* and what that peer has reported the timeout is for the device.
	*/
	for (i = 0; i < ST_LEVEL_MAX; i++) {
	if (!tp->levels[i]) {
	continue;
	}
	for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
	for (iter = op->query_results; iter != NULL; iter = iter->next) {
	const st_query_result_t *peer = iter->data;

	if (find_peer_device(op, peer, device_list->data)) {
	total_timeout += get_device_timeout(op, peer,
	device_list->data);
	break;
	}
	} /* End Loop3: match device with peer that owns device, find device's timeout period */
	} /* End Loop2: iterate through devices at a specific level */
	} /End Loop1: iterate through fencing levels /

	} else if (chosen_peer) {
	total_timeout = get_peer_timeout(op, chosen_peer);
	} else {
	total_timeout = op->base_timeout;
	}

	return total_timeout ? total_timeout : op->base_timeout;
	}

	static void
	report_timeout_period(remote_fencing_op_t * op, int op_timeout)
	{
	GListPtr iter = NULL;
	xmlNode *update = NULL;
	const char *client_node = NULL;
	const char *client_id = NULL;
	const char *call_id = NULL;

	if (op->call_options & st_opt_sync_call) {
	/* There is no reason to report the timeout for a synchronous call. It
	* is impossible to use the reported timeout to do anything when the client
	* is blocking for the response. This update is only important for
	* async calls that require a callback to report the results in. */
	return;
	} else if (!op->request) {
	return;
	}

	crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id);
	client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
	call_id = crm_element_value(op->request, F_STONITH_CALLID);
	client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
	if (!client_node \|\| !call_id \|\| !client_id) {
	return;
	}

	if (safe_str_eq(client_node, stonith_our_uname)) {
	/* The client is connected to this node, send the update direclty to them */
	do_stonith_async_timeout_update(client_id, call_id, op_timeout);
	return;
	}

	/* The client is connected to another node, relay this update to them */
	update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
	crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
	crm_xml_add(update, F_STONITH_CLIENTID, client_id);
	crm_xml_add(update, F_STONITH_CALLID, call_id);
	crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);

	send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);

	free_xml(update);

	for (iter = op->duplicates; iter != NULL; iter = iter->next) {
	remote_fencing_op_t *dup = iter->data;

	crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id);
	report_timeout_period(iter->data, op_timeout);
	}
	}

	/*!
	* \internal
	* \brief Advance an operation to the next device in its topology
	*
	* \param[in,out] op Operation to advance
	* \param[in] device ID of device just completed
	* \param[in] msg XML reply that contained device result (if available)
	* \param[in] rc Return code of device's execution
	*/
	static void
	advance_op_topology(remote_fencing_op_t op, const char device, xmlNode *msg,
	int rc)
	{
	/* Advance to the next device at this topology level, if any */
	if (op->devices) {
	op->devices = op->devices->next;
	}

	/* Handle automatic unfencing if an "on" action was requested */
	if ((op->phase == st_phase_requested) && safe_str_eq(op->action, "on")) {
	/* If the device we just executed was required, it's not anymore */
	remove_required_device(op, device);

	/* If there are no more devices at this topology level, run through any
	* remaining devices with automatic unfencing
	*/
	if (op->devices == NULL) {
	op->devices = op->automatic_list;
	}
	}

	if ((op->devices == NULL) && (op->phase == st_phase_off)) {
	/* We're done with this level and with required devices, but we had
	* remapped "reboot" to "off", so start over with "on". If any devices
	* need to be turned back on, op->devices will be non-NULL after this.
	*/
	op_phase_on(op);
	}

	if (op->devices) {
	/* Necessary devices remain, so execute the next one */
	crm_trace("Next for %s on behalf of %s@%s (rc was %d)",
	op->target, op->originator, op->client_name, rc);
	call_remote_stonith(op, NULL);
	} else {
	/* We're done with all devices and phases, so finalize operation */
	crm_trace("Marking complex fencing op for %s as complete", op->target);
	op->state = st_done;
	remote_op_done(op, msg, rc, FALSE);
	}
	}

	void
	call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
	{
	const char *device = NULL;
	int timeout = op->base_timeout;

	crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state);
	if (peer == NULL && !is_set(op->call_options, st_opt_topology)) {
	peer = stonith_choose_peer(op);
	}

	if (!op->op_timer_total) {
	int total_timeout = get_op_total_timeout(op, peer);

	op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout;
	op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
	report_timeout_period(op, op->total_timeout);
	crm_info("Total timeout set to %d for peer's fencing of %s for %s"
	CRM_XS "id=%s",
	total_timeout, op->target, op->client_name, op->id);
	}

	if (is_set(op->call_options, st_opt_topology) && op->devices) {
	/* Ignore any peer preference, they might not have the device we need */
	/* When using topology, stonith_choose_peer() removes the device from
	* further consideration, so be sure to calculate timeout beforehand */
	peer = stonith_choose_peer(op);

	device = op->devices->data;
	timeout = get_device_timeout(op, peer, device);
	}

	if (peer) {
	int timeout_one = 0;
	xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);

	crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
	crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
	crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
	crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
	crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
	crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
	crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
	crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);

	if (device) {
	timeout_one = TIMEOUT_MULTIPLY_FACTOR *
	get_device_timeout(op, peer, device);
	crm_info("Requesting that '%s' perform op '%s %s' with '%s' for %s (%ds)", peer->host,
	op->target, op->action, device, op->client_name, timeout_one);
	crm_xml_add(remote_op, F_STONITH_DEVICE, device);
	crm_xml_add(remote_op, F_STONITH_MODE, "slave");

	} else {
	timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
	crm_info("Requesting that '%s' perform op '%s %s' for %s (%ds, %lds)",
	peer->host, op->target, op->action, op->client_name, timeout_one, stonith_watchdog_timeout_ms);
	crm_xml_add(remote_op, F_STONITH_MODE, "smart");

	}

	op->state = st_exec;
	if (op->op_timer_one) {
	g_source_remove(op->op_timer_one);
	}

	if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) {
	crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
	stonith_watchdog_timeout_ms/1000, op->target,
	op->action, op->client_name, op->id, device);
	op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);

	/* TODO check devices to verify watchdog will be in use */
	} else if(stonith_watchdog_timeout_ms > 0
	&& safe_str_eq(peer->host, op->target)
	&& safe_str_neq(op->action, "on")) {
	crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
	stonith_watchdog_timeout_ms/1000, op->target,
	op->action, op->client_name, op->id, device);
	op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);

	} else {
	op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
	}


	send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
	peer->tried = TRUE;
	free_xml(remote_op);
	return;

	} else if (op->phase == st_phase_on) {
	/* A remapped "on" cannot be executed, but the node was already
	* turned off successfully, so ignore the error and continue.
	*/
	crm_warn("Ignoring %s 'on' failure (no capable peers) for %s after successful 'off'",
	device, op->target);
	advance_op_topology(op, device, NULL, pcmk_ok);
	return;

	} else if (op->owner == FALSE) {
	crm_err("Fencing (%s) of %s for %s is not ours to control",
	op->action, op->target, op->client_name);

	} else if (op->query_timer == 0) {
	/* We've exhausted all available peers */
	crm_info("No remaining peers capable of fencing (%s) %s for %s (%d)",
	op->target, op->action, op->client_name, op->state);
	CRM_LOG_ASSERT(op->state < st_done);
	remote_op_timeout(op);

	} else if(op->replies >= op->replies_expected \|\| op->replies >= fencing_active_peers()) {
	int rc = -EHOSTUNREACH;

	/* if the operation never left the query state,
	* but we have all the expected replies, then no devices
	* are available to execute the fencing operation. */

	if(stonith_watchdog_timeout_ms && (device == NULL \|\| safe_str_eq(device, "watchdog"))) {
	crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
	stonith_watchdog_timeout_ms/1000, op->target,
	op->action, op->client_name, op->id, device);

	op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
	return;
	}

	if (op->state == st_query) {
	crm_info("No peers (out of %d) have devices capable of fencing (%s) %s for %s (%d)",
	op->replies, op->action, op->target, op->client_name,
	op->state);

	rc = -ENODEV;
	} else {
	crm_info("No peers (out of %d) are capable of fencing (%s) %s for %s (%d)",
	op->replies, op->action, op->target, op->client_name,
	op->state);
	}

	op->state = st_failed;
	remote_op_done(op, NULL, rc, FALSE);

	} else if (device) {
	crm_info("Waiting for additional peers capable of fencing (%s) %s with %s for %s.%.8s",
	op->action, op->target, device, op->client_name, op->id);
	} else {
	crm_info("Waiting for additional peers capable of fencing (%s) %s for %s%.8s",
	op->action, op->target, op->client_name, op->id);
	}
	}

	/*!
	* \internal
	* \brief Comparison function for sorting query results
	*
	* \param[in] a GList item to compare
	* \param[in] b GList item to compare
	*
	* \return Per the glib documentation, "a negative integer if the first value
	* comes before the second, 0 if they are equal, or a positive integer
	* if the first value comes after the second."
	*/
	static gint
	sort_peers(gconstpointer a, gconstpointer b)
	{
	const st_query_result_t *peer_a = a;
	const st_query_result_t *peer_b = b;

	return (peer_b->ndevices - peer_a->ndevices);
	}

	/*!
	* \internal
	* \brief Determine if all the devices in the topology are found or not
	*/
	static gboolean
	all_topology_devices_found(remote_fencing_op_t * op)
	{
	GListPtr device = NULL;
	GListPtr iter = NULL;
	device_properties_t *match = NULL;
	stonith_topology_t *tp = NULL;
	gboolean skip_target = FALSE;
	int i;

	tp = find_topology_for_host(op->target);
	if (!tp) {
	return FALSE;
	}
	if (safe_str_eq(op->action, "off") \|\| safe_str_eq(op->action, "reboot")) {
	/* Don't count the devices on the target node if we are killing
	* the target node. */
	skip_target = TRUE;
	}

	for (i = 0; i < ST_LEVEL_MAX; i++) {
	for (device = tp->levels[i]; device; device = device->next) {
	match = NULL;
	for (iter = op->query_results; iter && !match; iter = iter->next) {
	st_query_result_t *peer = iter->data;

	if (skip_target && safe_str_eq(peer->host, op->target)) {
	continue;
	}
	match = find_peer_device(op, peer, device->data);
	}
	if (!match) {
	return FALSE;
	}
	}
	}

	return TRUE;
	}

	/*!
	* \internal
	* \brief Parse action-specific device properties from XML
	*
	* \param[in] msg XML element containing the properties
	* \param[in] peer Name of peer that sent XML (for logs)
	* \param[in] device Device ID (for logs)
	* \param[in] action Action the properties relate to (for logs)
	* \param[in] phase Phase the properties relate to
	* \param[in,out] props Device properties to update
	*/
	static void
	parse_action_specific(xmlNode xml, const char peer, const char *device,
	const char action, remote_fencing_op_t op,
	enum st_remap_phase phase, device_properties_t *props)
	{
	props->custom_action_timeout[phase] = 0;
	crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
	&props->custom_action_timeout[phase]);
	if (props->custom_action_timeout[phase]) {
	crm_trace("Peer %s with device %s returned %s action timeout %d",
	peer, device, action, props->custom_action_timeout[phase]);
	}

	props->delay_max[phase] = 0;
	crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
	if (props->delay_max[phase]) {
	crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
	peer, device, props->delay_max[phase], action);
	}

	props->delay_base[phase] = 0;
	crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
	if (props->delay_base[phase]) {
	crm_trace("Peer %s with device %s returned base delay %d for %s",
	peer, device, props->delay_base[phase], action);
	}

	/* Handle devices with automatic unfencing */
	if (safe_str_eq(action, "on")) {
	int required = 0;

	crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
	if (required) {
	crm_trace("Peer %s requires device %s to execute for action %s",
	peer, device, action);
	add_required_device(op, device);
	}
	}

	/* If a reboot is remapped to off+on, it's possible that a node is allowed
	* to perform one action but not another.
	*/
	if (crm_is_true(crm_element_value(xml, F_STONITH_ACTION_DISALLOWED))) {
	props->disallowed[phase] = TRUE;
	crm_trace("Peer %s is disallowed from executing %s for device %s",
	peer, action, device);
	}
	}

	/*!
	* \internal
	* \brief Parse one device's properties from peer's XML query reply
	*
	* \param[in] xml XML node containing device properties
	* \param[in,out] op Operation that query and reply relate to
	* \param[in,out] result Peer's results
	* \param[in] device ID of device being parsed
	*/
	static void
	add_device_properties(xmlNode xml, remote_fencing_op_t op,
	st_query_result_t result, const char device)
	{
	xmlNode *child;
	int verified = 0;
	device_properties_t *props = calloc(1, sizeof(device_properties_t));

	/* Add a new entry to this result's devices list */
	CRM_ASSERT(props != NULL);
	g_hash_table_insert(result->devices, strdup(device), props);

	/* Peers with verified (monitored) access will be preferred */
	crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
	if (verified) {
	crm_trace("Peer %s has confirmed a verified device %s",
	result->host, device);
	props->verified = TRUE;
	}

	/* Parse action-specific device properties */
	parse_action_specific(xml, result->host, device, op_requested_action(op),
	op, st_phase_requested, props);
	for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
	/* Replies for "reboot" operations will include the action-specific
	* values for "off" and "on" in child elements, just in case the reboot
	* winds up getting remapped.
	*/
	if (safe_str_eq(ID(child), "off")) {
	parse_action_specific(child, result->host, device, "off",
	op, st_phase_off, props);
	} else if (safe_str_eq(ID(child), "on")) {
	parse_action_specific(child, result->host, device, "on",
	op, st_phase_on, props);
	}
	}
	}

	/*!
	* \internal
	* \brief Parse a peer's XML query reply and add it to operation's results
	*
	* \param[in,out] op Operation that query and reply relate to
	* \param[in] host Name of peer that sent this reply
	* \param[in] ndevices Number of devices expected in reply
	* \param[in] xml XML node containing device list
	*
	* \return Newly allocated result structure with parsed reply
	*/
	static st_query_result_t *
	add_result(remote_fencing_op_t op, const char host, int ndevices, xmlNode *xml)
	{
	st_query_result_t *result = calloc(1, sizeof(st_query_result_t));
	xmlNode *child;

	CRM_CHECK(result != NULL, return NULL);
	result->host = strdup(host);
	result->devices = crm_str_table_new();

	/* Each child element describes one capable device available to the peer */
	for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
	const char *device = ID(child);

	if (device) {
	add_device_properties(child, op, result, device);
	}
	}

	result->ndevices = g_hash_table_size(result->devices);
	CRM_CHECK(ndevices == result->ndevices,
	crm_err("Query claimed to have %d devices but %d found",
	ndevices, result->ndevices));

	op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
	return result;
	}

	/*!
	* \internal
	* \brief Handle a peer's reply to our fencing query
	*
	* Parse a query result from XML and store it in the remote operation
	* table, and when enough replies have been received, issue a fencing request.
	*
	* \param[in] msg XML reply received
	*
	* \return pcmk_ok on success, -errno on error
	*
	* \note See initiate_remote_stonith_op() for how the XML query was initially
	* formed, and stonith_query() for how the peer formed its XML reply.
	*/
	int
	process_remote_stonith_query(xmlNode * msg)
	{
	int ndevices = 0;
	gboolean host_is_target = FALSE;
	gboolean have_all_replies = FALSE;
	const char *id = NULL;
	const char *host = NULL;
	remote_fencing_op_t *op = NULL;
	st_query_result_t *result = NULL;
	uint32_t replies_expected;
	xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);

	CRM_CHECK(dev != NULL, return -EPROTO);

	id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
	CRM_CHECK(id != NULL, return -EPROTO);

	dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
	CRM_CHECK(dev != NULL, return -EPROTO);
	crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);

	op = g_hash_table_lookup(stonith_remote_op_list, id);
	if (op == NULL) {
	crm_debug("Received query reply for unknown or expired operation %s",
	id);
	return -EOPNOTSUPP;
	}

	replies_expected = QB_MIN(op->replies_expected, fencing_active_peers());
	if ((++op->replies >= replies_expected) && (op->state == st_query)) {
	have_all_replies = TRUE;
	}
	host = crm_element_value(msg, F_ORIG);
	host_is_target = safe_str_eq(host, op->target);

	crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s",
	op->replies, replies_expected, host,
	op->target, op->action, ndevices, id);
	if (ndevices > 0) {
	result = add_result(op, host, ndevices, dev);
	}

	if (is_set(op->call_options, st_opt_topology)) {
	/* If we start the fencing before all the topology results are in,
	* it is possible fencing levels will be skipped because of the missing
	* query results. */
	if (op->state == st_query && all_topology_devices_found(op)) {
	/* All the query results are in for the topology, start the fencing ops. */
	crm_trace("All topology devices found");
	call_remote_stonith(op, result);

	} else if (have_all_replies) {
	crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
	replies_expected, op->replies);
	call_remote_stonith(op, NULL);
	}

	} else if (op->state == st_query) {
	int nverified = count_peer_devices(op, result, TRUE);

	/* We have a result for a non-topology fencing op that looks promising,
	* go ahead and start fencing before query timeout */
	if (result && (host_is_target == FALSE) && nverified) {
	/* we have a verified device living on a peer that is not the target */
	crm_trace("Found %d verified devices", nverified);
	call_remote_stonith(op, result);

	} else if (have_all_replies) {
	crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
	replies_expected, op->replies);
	call_remote_stonith(op, NULL);

	} else {
	crm_trace("Waiting for more peer results before launching fencing operation");
	}

	} else if (result && (op->state == st_done)) {
	crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
	result->host, result->ndevices, op->state);
	}

	return pcmk_ok;
	}

	/*!
	* \internal
	* \brief Handle a peer's reply to a fencing request
	*
	* Parse a fencing reply from XML, and either finalize the operation
	* or attempt another device as appropriate.
	*
	* \param[in] msg XML reply received
	*
	* \return pcmk_ok on success, -errno on error
	*/
	int
	process_remote_stonith_exec(xmlNode * msg)
	{
	int rc = 0;
	const char *id = NULL;
	const char *device = NULL;
	remote_fencing_op_t *op = NULL;
	xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);

	CRM_CHECK(dev != NULL, return -EPROTO);

	id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
	CRM_CHECK(id != NULL, return -EPROTO);

	dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
	CRM_CHECK(dev != NULL, return -EPROTO);

	crm_element_value_int(dev, F_STONITH_RC, &rc);

	device = crm_element_value(dev, F_STONITH_DEVICE);

	if (stonith_remote_op_list) {
	op = g_hash_table_lookup(stonith_remote_op_list, id);
	}

	if (op == NULL && rc == pcmk_ok) {
	/* Record successful fencing operations */
	const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);

	op = create_remote_stonith_op(client_id, dev, TRUE);
	}

	if (op == NULL) {
	/* Could be for an event that began before we started */
	/* TODO: Record the op for later querying */
	crm_info("Received peer result of unknown or expired operation %s", id);
	return -EOPNOTSUPP;
	}

	if (op->devices && device && safe_str_neq(op->devices->data, device)) {
	crm_err("Received outdated reply for device %s (instead of %s) to "
	"fence (%s) %s. Operation already timed out at peer level.",
	device, (const char *) op->devices->data, op->action, op->target);
	return rc;
	}

	if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) {
	crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)",
	op->action, op->target, op->client_name, op->id, op->originator,
	pcmk_strerror(rc), rc);
	if (rc == pcmk_ok) {
	op->state = st_done;
	} else {
	op->state = st_failed;
	}
	remote_op_done(op, msg, rc, FALSE);
	return pcmk_ok;
	} else if (safe_str_neq(op->originator, stonith_our_uname)) {
	/* If this isn't a remote level broadcast, and we are not the
	* originator of the operation, we should not be receiving this msg. */
	crm_err
	("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)",
	stonith_our_uname, device, op->target);
	return rc;
	}

	if (is_set(op->call_options, st_opt_topology)) {
	const char *device = crm_element_value(msg, F_STONITH_DEVICE);

	crm_notice("Call to %s for '%s %s' on behalf of %s@%s: %s (%d)",
	device, op->target, op->action, op->client_name, op->originator,
	pcmk_strerror(rc), rc);

	/* We own the op, and it is complete. broadcast the result to all nodes
	* and notify our local clients. */
	if (op->state == st_done) {
	remote_op_done(op, msg, rc, FALSE);
	return rc;
	}

	if ((op->phase == 2) && (rc != pcmk_ok)) {
	/* A remapped "on" failed, but the node was already turned off
	* successfully, so ignore the error and continue.
	*/
	crm_warn("Ignoring %s 'on' failure (exit code %d) for %s after successful 'off'",
	device, rc, op->target);
	rc = pcmk_ok;
	}

	if (rc == pcmk_ok) {
	/* An operation completed successfully. Try another device if
	* necessary, otherwise mark the operation as done. */
	advance_op_topology(op, device, msg, rc);
	return rc;
	} else {
	/* This device failed, time to try another topology level. If no other
	* levels are available, mark this operation as failed and report results. */
	if (stonith_topology_next(op) != pcmk_ok) {
	op->state = st_failed;
	remote_op_done(op, msg, rc, FALSE);
	return rc;
	}
	}
	} else if (rc == pcmk_ok && op->devices == NULL) {
	crm_trace("All done for %s", op->target);

	op->state = st_done;
	remote_op_done(op, msg, rc, FALSE);
	return rc;
	} else if (rc == -ETIME && op->devices == NULL) {
	/* If the operation timed out don't bother retrying other peers. */
	op->state = st_failed;
	remote_op_done(op, msg, rc, FALSE);
	return rc;
	} else {
	/* fall-through and attempt other fencing action using another peer */
	}

	/* Retry on failure */
	crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
	op->client_name, rc);
	call_remote_stonith(op, NULL);
	return rc;
	}

	gboolean
	stonith_check_fence_tolerance(int tolerance, const char target, const char action)
	{
	GHashTableIter iter;
	time_t now = time(NULL);
	remote_fencing_op_t *rop = NULL;

	crm_trace("tolerance=%d, stonith_remote_op_list=%p", tolerance,
	stonith_remote_op_list);

	if (tolerance <= 0 \|\| !stonith_remote_op_list \|\| target == NULL \|\|
	action == NULL) {
	return FALSE;
	}

	g_hash_table_iter_init(&iter, stonith_remote_op_list);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
	if (strcmp(rop->target, target) != 0) {
	continue;
	} else if (rop->state != st_done) {
	continue;
	/* We don't have to worry about remapped reboots here
	* because if state is done, any remapping has been undone
	*/
	} else if (strcmp(rop->action, action) != 0) {
	continue;
	} else if ((rop->completed + tolerance) < now) {
	continue;
	}

	crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
	target, action, tolerance, rop->delegate, rop->originator);
	return TRUE;
	}
	return FALSE;
	}
	diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
	index 0e0ac96e10..e67580059e 100644
	--- a/daemons/fenced/pacemaker-fenced.c
	+++ b/daemons/fenced/pacemaker-fenced.c
	@@ -1,1503 +1,1493 @@
	/*
	* Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <sys/utsname.h>

	#include <stdlib.h>
	#include <errno.h>
	#include <fcntl.h>
	#include <inttypes.h> /* U32T ~ PRIu32, X32T ~ PRIx32 */

	#include <crm/crm.h>
	#include <crm/msg_xml.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipcs.h>
	#include <crm/cluster/internal.h>

	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>
	#include <crm/common/xml.h>

	#include <crm/common/mainloop.h>

	#include <crm/cib/internal.h>
	#include <crm/pengine/status.h>
	#include <sched_allocate.h>

	#include <pacemaker-fenced.h>

	char *stonith_our_uname = NULL;
	char *stonith_our_uuid = NULL;
	long stonith_watchdog_timeout_ms = 0;

	static GMainLoop *mainloop = NULL;

	gboolean stand_alone = FALSE;
	static gboolean no_cib_connect = FALSE;
	static gboolean stonith_shutdown_flag = FALSE;

	static qb_ipcs_service_t *ipcs = NULL;
	static xmlNode *local_cib = NULL;

	-GHashTable *known_peer_names = NULL;
	-
	static cib_t *cib_api = NULL;
	static void *cib_library = NULL;

	static void stonith_shutdown(int nsig);
	static void stonith_cleanup(void);

	static int32_t
	st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
	{
	if (stonith_shutdown_flag) {
	crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
	return -EPERM;
	}

	if (crm_client_new(c, uid, gid) == NULL) {
	return -EIO;
	}
	return 0;
	}

	static void
	st_ipc_created(qb_ipcs_connection_t * c)
	{
	crm_trace("Connection created for %p", c);
	}

	/* Exit code means? */
	static int32_t
	st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
	{
	uint32_t id = 0;
	uint32_t flags = 0;
	int call_options = 0;
	xmlNode *request = NULL;
	crm_client_t *c = crm_client_get(qbc);
	const char *op = NULL;

	if (c == NULL) {
	crm_info("Invalid client: %p", qbc);
	return 0;
	}

	request = crm_ipcs_recv(c, data, size, &id, &flags);
	if (request == NULL) {
	crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__);
	return 0;
	}


	op = crm_element_value(request, F_CRM_TASK);
	if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
	crm_xml_add(request, F_TYPE, T_STONITH_NG);
	crm_xml_add(request, F_STONITH_OPERATION, op);
	crm_xml_add(request, F_STONITH_CLIENTID, c->id);
	crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
	crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);

	send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
	free_xml(request);
	return 0;
	}

	if (c->name == NULL) {
	const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);

	if (value == NULL) {
	value = "unknown";
	}
	c->name = crm_strdup_printf("%s.%u", value, c->pid);
	}

	crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
	crm_trace("Flags %" X32T "/%u for command %" U32T " from %s",
	flags, call_options, id, crm_client_name(c));

	if (is_set(call_options, st_opt_sync_call)) {
	CRM_ASSERT(flags & crm_ipc_client_response);
	CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
	c->request_id = id; /* Reply only to the last one */
	}

	crm_xml_add(request, F_STONITH_CLIENTID, c->id);
	crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
	crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);

	crm_log_xml_trace(request, "Client[inbound]");
	stonith_command(c, id, flags, request, NULL);

	free_xml(request);
	return 0;
	}

	/* Error code means? */
	static int32_t
	st_ipc_closed(qb_ipcs_connection_t * c)
	{
	crm_client_t *client = crm_client_get(c);

	if (client == NULL) {
	return 0;
	}

	crm_trace("Connection %p closed", c);
	crm_client_destroy(client);

	/* 0 means: yes, go ahead and destroy the connection */
	return 0;
	}

	static void
	st_ipc_destroy(qb_ipcs_connection_t * c)
	{
	crm_trace("Connection %p destroyed", c);
	st_ipc_closed(c);
	}

	static void
	stonith_peer_callback(xmlNode * msg, void *private_data)
	{
	const char *remote_peer = crm_element_value(msg, F_ORIG);
	const char *op = crm_element_value(msg, F_STONITH_OPERATION);

	if (crm_str_eq(op, "poke", TRUE)) {
	return;
	}

	crm_log_xml_trace(msg, "Peer[inbound]");
	stonith_command(NULL, 0, 0, msg, remote_peer);
	}

	#if SUPPORT_COROSYNC
	static void
	stonith_peer_ais_callback(cpg_handle_t handle,
	const struct cpg_name *groupName,
	uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
	{
	uint32_t kind = 0;
	xmlNode *xml = NULL;
	const char *from = NULL;
	char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);

	if(data == NULL) {
	return;
	}
	if (kind == crm_class_cluster) {
	xml = string2xml(data);
	if (xml == NULL) {
	crm_err("Invalid XML: '%.120s'", data);
	free(data);
	return;
	}
	crm_xml_add(xml, F_ORIG, from);
	/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
	stonith_peer_callback(xml, NULL);
	}

	free_xml(xml);
	free(data);
	return;
	}

	static void
	stonith_peer_cs_destroy(gpointer user_data)
	{
	crm_crit("Lost connection to cluster layer, shutting down");
	stonith_shutdown(0);
	}
	#endif

	void
	do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
	{
	/* send callback to originating child */
	crm_client_t *client_obj = NULL;
	int local_rc = pcmk_ok;

	crm_trace("Sending response");
	client_obj = crm_client_get_by_id(client_id);

	crm_trace("Sending callback to request originator");
	if (client_obj == NULL) {
	local_rc = -1;
	crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set.");

	} else {
	int rid = 0;

	if (sync_reply) {
	CRM_LOG_ASSERT(client_obj->request_id);

	rid = client_obj->request_id;
	client_obj->request_id = 0;

	crm_trace("Sending response %d to %s %s",
	rid, client_obj->name, from_peer ? "(originator of delegated request)" : "");

	} else {
	crm_trace("Sending an event to %s %s",
	client_obj->name, from_peer ? "(originator of delegated request)" : "");
	}

	local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event);
	}

	if (local_rc < pcmk_ok && client_obj != NULL) {
	crm_warn("%sSync reply to %s failed: %s",
	sync_reply ? "" : "A-",
	client_obj ? client_obj->name : "<unknown>", pcmk_strerror(local_rc));
	}
	}

	long long
	get_stonith_flag(const char *name)
	{
	if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) {
	return st_callback_notify_fence;

	} else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) {
	return st_callback_device_add;

	} else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) {
	return st_callback_device_del;

	} else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY)) {
	return st_callback_notify_history;

	}
	return st_callback_unknown;
	}

	static void
	stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
	{

	xmlNode *update_msg = user_data;
	crm_client_t *client = value;
	const char *type = NULL;

	CRM_CHECK(client != NULL, return);
	CRM_CHECK(update_msg != NULL, return);

	type = crm_element_value(update_msg, F_SUBTYPE);
	CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);

	if (client->ipcs == NULL) {
	crm_trace("Skipping client with NULL channel");
	return;
	}

	if (client->options & get_stonith_flag(type)) {
	int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event \| crm_ipc_server_error);

	if (rc <= 0) {
	crm_warn("%s notification of client %s.%.6s failed: %s (%d)",
	type, crm_client_name(client), client->id, pcmk_strerror(rc), rc);
	} else {
	crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client),
	client->id);
	}
	}
	}

	void
	do_stonith_async_timeout_update(const char client_id, const char call_id, int timeout)
	{
	crm_client_t *client = NULL;
	xmlNode *notify_data = NULL;

	if (!timeout \|\| !call_id \|\| !client_id) {
	return;
	}

	client = crm_client_get_by_id(client_id);
	if (!client) {
	return;
	}

	notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
	crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
	crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
	crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);

	crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);

	if (client) {
	crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event);
	}

	free_xml(notify_data);
	}

	void
	do_stonith_notify(int options, const char type, int result, xmlNode data)
	{
	/* TODO: Standardize the contents of data */
	xmlNode *update_msg = create_xml_node(NULL, "notify");

	CRM_CHECK(type != NULL,;);

	crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
	crm_xml_add(update_msg, F_SUBTYPE, type);
	crm_xml_add(update_msg, F_STONITH_OPERATION, type);
	crm_xml_add_int(update_msg, F_STONITH_RC, result);

	if (data != NULL) {
	add_message_xml(update_msg, F_STONITH_CALLDATA, data);
	}

	crm_trace("Notifying clients");
	g_hash_table_foreach(client_connections, stonith_notify_client, update_msg);
	free_xml(update_msg);
	crm_trace("Notify complete");
	}

	static void
	do_stonith_notify_config(int options, const char *op, int rc,
	const char *desc, int active)
	{
	xmlNode *notify_data = create_xml_node(NULL, op);

	CRM_CHECK(notify_data != NULL, return);

	crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
	crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);

	do_stonith_notify(options, op, rc, notify_data);
	free_xml(notify_data);
	}

	void
	do_stonith_notify_device(int options, const char op, int rc, const char desc)
	{
	do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
	}

	void
	do_stonith_notify_level(int options, const char op, int rc, const char desc)
	{
	do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
	}

	static void
	topology_remove_helper(const char *node, int level)
	{
	int rc;
	char *desc = NULL;
	xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);

	crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
	crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
	crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);

	rc = stonith_level_remove(data, &desc);
	do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);

	free_xml(data);
	free(desc);
	}

	static void
	remove_cib_device(xmlXPathObjectPtr xpathObj)
	{
	int max = numXpathResults(xpathObj), lpc = 0;

	for (lpc = 0; lpc < max; lpc++) {
	const char *rsc_id = NULL;
	const char *standard = NULL;
	xmlNode *match = getXpathResult(xpathObj, lpc);

	CRM_LOG_ASSERT(match != NULL);
	if(match != NULL) {
	standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
	}

	if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
	continue;
	}

	rsc_id = crm_element_value(match, XML_ATTR_ID);

	stonith_device_remove(rsc_id, TRUE);
	}
	}

	static void
	handle_topology_change(xmlNode *match, bool remove)
	{
	int rc;
	char *desc = NULL;

	CRM_CHECK(match != NULL, return);
	crm_trace("Updating %s", ID(match));

	if(remove) {
	int index = 0;
	char *key = stonith_level_key(match, -1);

	crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
	topology_remove_helper(key, index);
	free(key);
	}

	rc = stonith_level_register(match, &desc);
	do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);

	free(desc);
	}

	static void
	remove_fencing_topology(xmlXPathObjectPtr xpathObj)
	{
	int max = numXpathResults(xpathObj), lpc = 0;

	for (lpc = 0; lpc < max; lpc++) {
	xmlNode *match = getXpathResult(xpathObj, lpc);

	CRM_LOG_ASSERT(match != NULL);
	if (match && crm_element_value(match, XML_DIFF_MARKER)) {
	/* Deletion */
	int index = 0;
	char *target = stonith_level_key(match, -1);

	crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
	if (target == NULL) {
	crm_err("Invalid fencing target in element %s", ID(match));

	} else if (index <= 0) {
	crm_err("Invalid level for %s in element %s", target, ID(match));

	} else {
	topology_remove_helper(target, index);
	}
	/* } else { Deal with modifications during the 'addition' stage */
	}
	}
	}

	static void
	register_fencing_topology(xmlXPathObjectPtr xpathObj)
	{
	int max = numXpathResults(xpathObj), lpc = 0;

	for (lpc = 0; lpc < max; lpc++) {
	xmlNode *match = getXpathResult(xpathObj, lpc);

	handle_topology_change(match, TRUE);
	}
	}

	/* Fencing
	<diff crm_feature_set="3.0.6">
	<diff-removed>
	<fencing-topology>
	<fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
	<fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
	<fencing-level devices="disk,network" id="f-p2.1"/>
	</fencing-topology>
	</diff-removed>
	<diff-added>
	<fencing-topology>
	<fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
	<fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
	<fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
	</fencing-topology>
	</diff-added>
	</diff>
	*/

	static void
	fencing_topology_init()
	{
	xmlXPathObjectPtr xpathObj = NULL;
	const char *xpath = "//" XML_TAG_FENCING_LEVEL;

	crm_trace("Full topology refresh");
	free_topology_list();
	init_topology_list();

	/* Grab everything */
	xpathObj = xpath_search(local_cib, xpath);
	register_fencing_topology(xpathObj);

	freeXpathObject(xpathObj);
	}

	#define rsc_name(x) x->clone_name?x->clone_name:x->id

	/*!
	* \internal
	* \brief Check whether our uname is in a resource's allowed node list
	*
	* \param[in] rsc Resource to check
	*
	* \return Pointer to node object if found, NULL otherwise
	*/
	static node_t *
	our_node_allowed_for(resource_t *rsc)
	{
	GHashTableIter iter;
	node_t *node = NULL;

	if (rsc && stonith_our_uname) {
	g_hash_table_iter_init(&iter, rsc->allowed_nodes);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
	if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
	break;
	}
	node = NULL;
	}
	}
	return node;
	}

	/*!
	* \internal
	* \brief If a resource or any of its children are STONITH devices, update their
	* definitions given a cluster working set.
	*
	* \param[in] rsc Resource to check
	* \param[in] data_set Cluster working set with device information
	*/
	static void cib_device_update(resource_t rsc, pe_working_set_t data_set)
	{
	node_t *node = NULL;
	const char *value = NULL;
	const char *rclass = NULL;
	node_t *parent = NULL;
	gboolean remove = TRUE;

	/* If this is a complex resource, check children rather than this resource itself.
	* TODO: Mark each installed device and remove if untouched when this process finishes.
	*/
	if(rsc->children) {
	GListPtr gIter = NULL;
	for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
	cib_device_update(gIter->data, data_set);
	if(pe_rsc_is_clone(rsc)) {
	crm_trace("Only processing one copy of the clone %s", rsc->id);
	break;
	}
	}
	return;
	}

	/* We only care about STONITH resources. */
	rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
	if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)) {
	return;
	}

	/* If this STONITH resource is disabled, just remove it. */
	value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
	if (safe_str_eq(value, RSC_STOPPED)) {
	crm_info("Device %s has been disabled", rsc->id);
	goto update_done;
	}

	/* Check whether our node is allowed for this resource (and its parent if in a group) */
	node = our_node_allowed_for(rsc);
	if (rsc->parent && (rsc->parent->variant == pe_group)) {
	parent = our_node_allowed_for(rsc->parent);
	}

	if(node == NULL) {
	/* Our node is disallowed, so remove the device */
	GHashTableIter iter;

	crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
	g_hash_table_iter_init(&iter, rsc->allowed_nodes);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
	crm_trace("Available: %s = %d", node->details->uname, node->weight);
	}

	goto update_done;

	} else if(node->weight < 0 \|\| (parent && parent->weight < 0)) {
	/* Our node (or its group) is disallowed by score, so remove the device */
	char *score = score2char((node->weight < 0) ? node->weight : parent->weight);

	crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
	free(score);

	goto update_done;

	} else {
	/* Our node is allowed, so update the device information */
	xmlNode *data;
	GHashTableIter gIter;
	stonith_key_value_t *params = NULL;

	const char *name = NULL;
	const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
	const char *rsc_provides = NULL;

	crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
	get_rsc_attributes(rsc->parameters, rsc, node, data_set);
	get_meta_attributes(rsc->meta, rsc, node, data_set);

	rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES);

	g_hash_table_iter_init(&gIter, rsc->parameters);
	while (g_hash_table_iter_next(&gIter, (gpointer ) & name, (gpointer ) & value)) {
	if (!name \|\| !value) {
	continue;
	}
	params = stonith_key_value_add(params, name, value);
	crm_trace(" %s=%s", name, value);
	}

	remove = FALSE;
	data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
	agent, params, rsc_provides);
	stonith_device_register(data, NULL, TRUE);

	stonith_key_value_freeall(params, 1, 1);
	free_xml(data);
	}

	update_done:

	if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) {
	stonith_device_remove(rsc_name(rsc), TRUE);
	}
	}

	extern xmlNode do_calculations(pe_working_set_t data_set, xmlNode * xml_input, crm_time_t * now);

	/*!
	* \internal
	* \brief Update all STONITH device definitions based on current CIB
	*/
	static void
	cib_devices_update(void)
	{
	GListPtr gIter = NULL;
	pe_working_set_t data_set;

	crm_info("Updating devices to version %s.%s.%s",
	crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
	crm_element_value(local_cib, XML_ATTR_GENERATION),
	crm_element_value(local_cib, XML_ATTR_NUMUPDATES));

	set_working_set_defaults(&data_set);
	data_set.input = local_cib;
	data_set.now = crm_time_new(NULL);
	data_set.flags \|= pe_flag_quick_location;
	data_set.localhost = stonith_our_uname;

	cluster_status(&data_set);
	do_calculations(&data_set, NULL, NULL);

	for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) {
	cib_device_update(gIter->data, &data_set);
	}
	data_set.input = NULL; /* Wasn't a copy */
	cleanup_alloc_calculations(&data_set);
	}

	static void
	update_cib_stonith_devices_v2(const char event, xmlNode msg)
	{
	xmlNode *change = NULL;
	char *reason = NULL;
	bool needs_update = FALSE;
	xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);

	for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
	const char *op = crm_element_value(change, XML_DIFF_OP);
	const char *xpath = crm_element_value(change, XML_DIFF_PATH);
	const char *shortpath = NULL;

	if(op == NULL \|\| strcmp(op, "move") == 0) {
	continue;

	} else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) {
	const char *rsc_id = NULL;
	char *search = NULL;
	char *mutable = NULL;

	if (strstr(xpath, XML_TAG_ATTR_SETS)) {
	needs_update = TRUE;
	break;
	}
	mutable = strdup(xpath);
	rsc_id = strstr(mutable, "primitive[@id=\'");
	if (rsc_id != NULL) {
	rsc_id += strlen("primitive[@id=\'");
	search = strchr(rsc_id, '\'');
	}
	if (search != NULL) {
	*search = 0;
	stonith_device_remove(rsc_id, TRUE);
	} else {
	crm_warn("Ignoring malformed CIB update (resource deletion)");
	}
	free(mutable);

	} else if(strstr(xpath, XML_CIB_TAG_RESOURCES)) {
	shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
	reason = crm_strdup_printf("%s %s", op, shortpath+1);
	needs_update = TRUE;
	break;

	} else if(strstr(xpath, XML_CIB_TAG_CONSTRAINTS)) {
	shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
	reason = crm_strdup_printf("%s %s", op, shortpath+1);
	needs_update = TRUE;
	break;
	}
	}

	if(needs_update) {
	crm_info("Updating device list from the cib: %s", reason);
	cib_devices_update();
	} else {
	crm_trace("No updates for device list found in cib");
	}
	free(reason);
	}


	static void
	update_cib_stonith_devices_v1(const char event, xmlNode msg)
	{
	const char *reason = "none";
	gboolean needs_update = FALSE;
	xmlXPathObjectPtr xpath_obj = NULL;

	/* process new constraints */
	xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
	if (numXpathResults(xpath_obj) > 0) {
	int max = numXpathResults(xpath_obj), lpc = 0;

	/* Safest and simplest to always recompute */
	needs_update = TRUE;
	reason = "new location constraint";

	for (lpc = 0; lpc < max; lpc++) {
	xmlNode *match = getXpathResult(xpath_obj, lpc);

	crm_log_xml_trace(match, "new constraint");
	}
	}
	freeXpathObject(xpath_obj);

	/* process deletions */
	xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
	if (numXpathResults(xpath_obj) > 0) {
	remove_cib_device(xpath_obj);
	}
	freeXpathObject(xpath_obj);

	/* process additions */
	xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
	if (numXpathResults(xpath_obj) > 0) {
	int max = numXpathResults(xpath_obj), lpc = 0;

	for (lpc = 0; lpc < max; lpc++) {
	const char *rsc_id = NULL;
	const char *standard = NULL;
	xmlNode *match = getXpathResult(xpath_obj, lpc);

	rsc_id = crm_element_value(match, XML_ATTR_ID);
	standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);

	if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
	continue;
	}

	crm_trace("Fencing resource %s was added or modified", rsc_id);
	reason = "new resource";
	needs_update = TRUE;
	}
	}
	freeXpathObject(xpath_obj);

	if(needs_update) {
	crm_info("Updating device list from the cib: %s", reason);
	cib_devices_update();
	}
	}

	static void
	update_cib_stonith_devices(const char event, xmlNode msg)
	{
	int format = 1;
	xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);

	CRM_ASSERT(patchset);
	crm_element_value_int(patchset, "format", &format);
	switch(format) {
	case 1:
	update_cib_stonith_devices_v1(event, msg);
	break;
	case 2:
	update_cib_stonith_devices_v2(event, msg);
	break;
	default:
	crm_warn("Unknown patch format: %d", format);
	}
	}

	/* Needs to hold node name + attribute name + attribute value + 75 */
	#define XPATH_MAX 512

	/*!
	* \internal
	* \brief Check whether a node has a specific attribute name/value
	*
	* \param[in] node Name of node to check
	* \param[in] name Name of an attribute to look for
	* \param[in] value The value the named attribute needs to be set to in order to be considered a match
	*
	* \return TRUE if the locally cached CIB has the specified node attribute
	*/
	gboolean
	node_has_attr(const char node, const char name, const char *value)
	{
	char xpath[XPATH_MAX];
	xmlNode *match;
	int n;

	CRM_CHECK(local_cib != NULL, return FALSE);

	/* Search for the node's attributes in the CIB. While the schema allows
	* multiple sets of instance attributes, and allows instance attributes to
	* use id-ref to reference values elsewhere, that is intended for resources,
	* so we ignore that here.
	*/
	n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES
	"/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS
	"/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']",
	node, name, value);
	match = get_xpath_object(xpath, local_cib, LOG_TRACE);

	CRM_CHECK(n < XPATH_MAX, return FALSE);
	return (match != NULL);
	}

	static void
	update_fencing_topology(const char event, xmlNode msg)
	{
	int format = 1;
	const char *xpath;
	xmlXPathObjectPtr xpathObj = NULL;
	xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);

	CRM_ASSERT(patchset);
	crm_element_value_int(patchset, "format", &format);

	if(format == 1) {
	/* Process deletions (only) */
	xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
	xpathObj = xpath_search(msg, xpath);

	remove_fencing_topology(xpathObj);
	freeXpathObject(xpathObj);

	/* Process additions and changes */
	xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
	xpathObj = xpath_search(msg, xpath);

	register_fencing_topology(xpathObj);
	freeXpathObject(xpathObj);

	} else if(format == 2) {
	xmlNode *change = NULL;
	int add[] = { 0, 0, 0 };
	int del[] = { 0, 0, 0 };

	xml_patch_versions(patchset, add, del);

	for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
	const char *op = crm_element_value(change, XML_DIFF_OP);
	const char *xpath = crm_element_value(change, XML_DIFF_PATH);

	if(op == NULL) {
	continue;

	} else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
	/* Change to a specific entry */

	crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
	if(strcmp(op, "move") == 0) {
	continue;

	} else if(strcmp(op, "create") == 0) {
	handle_topology_change(change->children, FALSE);

	} else if(strcmp(op, "modify") == 0) {
	xmlNode *match = first_named_child(change, XML_DIFF_RESULT);

	if(match) {
	handle_topology_change(match->children, TRUE);
	}

	} else if(strcmp(op, "delete") == 0) {
	/* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
	crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
	op, add[0], add[1], add[2], xpath);
	fencing_topology_init();
	return;
	}

	} else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
	/* Change to the topology in general */
	crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s",
	op, add[0], add[1], add[2], xpath);
	fencing_topology_init();
	return;

	} else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
	/* Changes to the whole config section, possibly including the topology as a whild */
	if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
	crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
	op, add[0], add[1], add[2], xpath);

	} else if(strcmp(op, "delete") == 0 \|\| strcmp(op, "create") == 0) {
	crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
	op, add[0], add[1], add[2], xpath);
	fencing_topology_init();
	return;
	}

	} else {
	crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
	op, add[0], add[1], add[2], xpath);
	}
	}

	} else {
	crm_warn("Unknown patch format: %d", format);
	}
	}
	static bool have_cib_devices = FALSE;

	static void
	update_cib_cache_cb(const char event, xmlNode msg)
	{
	int rc = pcmk_ok;
	xmlNode *stonith_enabled_xml = NULL;
	xmlNode *stonith_watchdog_xml = NULL;
	const char *stonith_enabled_s = NULL;
	static gboolean stonith_enabled_saved = TRUE;

	if(!have_cib_devices) {
	crm_trace("Skipping updates until we get a full dump");
	return;

	} else if(msg == NULL) {
	crm_trace("Missing %s update", event);
	return;
	}

	/* Maintain a local copy of the CIB so that we have full access
	* to device definitions, location constraints, and node attributes
	*/
	if (local_cib != NULL) {
	int rc = pcmk_ok;
	xmlNode *patchset = NULL;

	crm_element_value_int(msg, F_CIB_RC, &rc);
	if (rc != pcmk_ok) {
	return;
	}

	patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
	xml_log_patchset(LOG_TRACE, "Config update", patchset);
	rc = xml_apply_patchset(local_cib, patchset, TRUE);
	switch (rc) {
	case pcmk_ok:
	case -pcmk_err_old_data:
	break;
	case -pcmk_err_diff_resync:
	case -pcmk_err_diff_failed:
	crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
	free_xml(local_cib);
	local_cib = NULL;
	break;
	default:
	crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
	free_xml(local_cib);
	local_cib = NULL;
	}
	}

	if (local_cib == NULL) {
	crm_trace("Re-requesting the full cib");
	rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local \| cib_sync_call);
	if(rc != pcmk_ok) {
	crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
	return;
	}
	CRM_ASSERT(local_cib != NULL);
	stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
	}

	+ crm_peer_caches_refresh(local_cib);
	+
	stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE);
	if (stonith_enabled_xml) {
	stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
	}

	if (stonith_enabled_s == NULL \|\| crm_is_true(stonith_enabled_s)) {
	long timeout_ms = 0;
	const char *value = NULL;

	stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
	if (stonith_watchdog_xml) {
	value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
	}

	if(value) {
	timeout_ms = crm_get_msec(value);
	}
	if (timeout_ms < 0) {
	timeout_ms = crm_auto_watchdog_timeout();
	}

	if(timeout_ms != stonith_watchdog_timeout_ms) {
	crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
	stonith_watchdog_timeout_ms = timeout_ms;
	}

	} else {
	stonith_watchdog_timeout_ms = 0;
	}

	if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
	crm_trace("Ignoring cib updates while stonith is disabled");
	stonith_enabled_saved = FALSE;
	return;

	} else if (stonith_enabled_saved == FALSE) {
	crm_info("Updating stonith device and topology lists now that stonith is enabled");
	stonith_enabled_saved = TRUE;
	fencing_topology_init();
	cib_devices_update();

	} else {
	update_fencing_topology(event, msg);
	update_cib_stonith_devices(event, msg);
	}
	}

	static void
	init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	crm_info("Updating device list from the cib: init");
	have_cib_devices = TRUE;
	local_cib = copy_xml(output);

	+ crm_peer_caches_refresh(local_cib);
	+
	fencing_topology_init();
	cib_devices_update();
	}

	static void
	stonith_shutdown(int nsig)
	{
	stonith_shutdown_flag = TRUE;
	crm_info("Terminating with %d clients",
	crm_hash_table_size(client_connections));
	if (mainloop != NULL && g_main_is_running(mainloop)) {
	g_main_loop_quit(mainloop);
	} else {
	stonith_cleanup();
	crm_exit(CRM_EX_OK);
	}
	}

	static void
	cib_connection_destroy(gpointer user_data)
	{
	if (stonith_shutdown_flag) {
	crm_info("Connection to the CIB manager closed");
	return;
	} else {
	crm_crit("Lost connection to the CIB manager, shutting down");
	}
	if (cib_api) {
	cib_api->cmds->signoff(cib_api);
	}
	stonith_shutdown(0);
	}

	static void
	stonith_cleanup(void)
	{
	if (cib_api) {
	cib_api->cmds->signoff(cib_api);
	}

	if (ipcs) {
	qb_ipcs_destroy(ipcs);
	}

	- if (known_peer_names != NULL) {
	- g_hash_table_destroy(known_peer_names);
	- known_peer_names = NULL;
	- }
	-
	crm_peer_destroy();
	crm_client_cleanup();
	free_stonith_remote_op_list();
	free_topology_list();
	free_device_list();
	free_metadata_cache();

	free(stonith_our_uname);
	stonith_our_uname = NULL;

	free_xml(local_cib);
	local_cib = NULL;
	}

	/* INDENT-OFF */
	static struct crm_option long_options[] = {
	{"stand-alone", 0, 0, 's'},
	{"stand-alone-w-cpg", 0, 0, 'c'},
	{"logfile", 1, 0, 'l'},
	{"verbose", 0, 0, 'V'},
	{"version", 0, 0, '$'},
	{"help", 0, 0, '?'},

	{0, 0, 0, 0}
	};
	/* INDENT-ON */

	static void
	setup_cib(void)
	{
	int rc, retries = 0;
	static cib_t (cib_new_fn) (void) = NULL;

	if (cib_new_fn == NULL) {
	cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE);
	}

	if (cib_new_fn != NULL) {
	cib_api = (*cib_new_fn) ();
	}

	if (cib_api == NULL) {
	crm_err("No connection to the CIB manager");
	return;
	}

	do {
	sleep(retries);
	rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
	} while (rc == -ENOTCONN && ++retries < 5);

	if (rc != pcmk_ok) {
	crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);

	} else if (pcmk_ok !=
	cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
	crm_err("Could not set CIB notification callback");

	} else {
	rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
	cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
	init_cib_cache_cb);
	cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
	crm_info("Watching for stonith topology changes");
	}
	}

	struct qb_ipcs_service_handlers ipc_callbacks = {
	.connection_accept = st_ipc_accept,
	.connection_created = st_ipc_created,
	.msg_process = st_ipc_dispatch,
	.connection_closed = st_ipc_closed,
	.connection_destroyed = st_ipc_destroy
	};

	/*!
	* \internal
	* \brief Callback for peer status changes
	*
	* \param[in] type What changed
	* \param[in] node What peer had the change
	* \param[in] data Previous value of what changed
	*/
	static void
	st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
	{
	if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) {
	- xmlNode *query = NULL;
	-
	- if (node->id && node->uname) {
	- g_hash_table_insert(known_peer_names, GUINT_TO_POINTER(node->id), strdup(node->uname));
	- }
	-
	/*
	* This is a hack until we can send to a nodeid and/or we fix node name lookups
	* These messages are ignored in stonith_peer_callback()
	*/
	- query = create_xml_node(NULL, "stonith_command");
	+ xmlNode *query = create_xml_node(NULL, "stonith_command");

	crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
	crm_xml_add(query, F_TYPE, T_STONITH_NG);
	crm_xml_add(query, F_STONITH_OPERATION, "poke");

	crm_debug("Broadcasting our uname because of node %u", node->id);
	send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);

	free_xml(query);
	}
	}

	int
	main(int argc, char **argv)
	{
	int flag;
	int lpc = 0;
	int argerr = 0;
	int option_index = 0;
	crm_cluster_t cluster;
	const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" };

	crm_log_preinit(NULL, argc, argv);
	crm_set_options(NULL, "mode [options]", long_options,
	"Provides a summary of cluster's current state."
	"\n\nOutputs varying levels of detail in a number of different formats.\n");

	while (1) {
	flag = crm_get_option(argc, argv, &option_index);
	if (flag == -1) {
	break;
	}

	switch (flag) {
	case 'V':
	crm_bump_log_level(argc, argv);
	break;
	case 'l':
	crm_add_logfile(optarg);
	break;
	case 's':
	stand_alone = TRUE;
	break;
	case 'c':
	stand_alone = FALSE;
	no_cib_connect = TRUE;
	break;
	case '$':
	case '?':
	crm_help(flag, CRM_EX_OK);
	break;
	default:
	++argerr;
	break;
	}
	}

	if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
	printf("<?xml version=\"1.0\"?><!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n");
	printf("<resource-agent name=\"pacemaker-fenced\">\n");
	printf(" <version>1.0</version>\n");
	printf(" <longdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources"
	" and used by Pacemaker's fence daemon, formerly known as stonithd</longdesc>\n");
	printf(" <shortdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources</shortdesc>\n");
	printf(" <parameters>\n");

	#if 0
	// priority is not implemented yet
	printf(" <parameter name=\"priority\" unique=\"0\">\n");
	printf(" <shortdesc lang=\"en\">Devices that are not in a topology "
	"are tried in order of highest to lowest integer priority</shortdesc>\n");
	printf(" <content type=\"integer\" default=\"0\"/>\n");
	printf(" </parameter>\n");
	#endif

	printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTARG);
	printf
	(" <shortdesc lang=\"en\">Advanced use only: An alternate parameter to supply instead of 'port'</shortdesc>\n");
	printf
	(" <longdesc lang=\"en\">Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
	"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
	"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
	" </longdesc>\n");
	printf(" <content type=\"string\" default=\"port\"/>\n");
	printf(" </parameter>\n");

	printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTMAP);
	printf
	(" <shortdesc lang=\"en\">A mapping of host names to ports numbers for devices that do not support host names.</shortdesc>\n");
	printf
	(" <longdesc lang=\"en\">Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2</longdesc>\n");
	printf(" <content type=\"string\" default=\"\"/>\n");
	printf(" </parameter>\n");

	printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTLIST);
	printf
	(" <shortdesc lang=\"en\">A list of machines controlled by this device (Optional unless %s=static-list).</shortdesc>\n",
	STONITH_ATTR_HOSTCHECK);
	printf(" <content type=\"string\" default=\"\"/>\n");
	printf(" </parameter>\n");

	printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTCHECK);
	printf
	(" <shortdesc lang=\"en\">How to determine which machines are controlled by the device.</shortdesc>\n");
	printf(" <longdesc lang=\"en\">Allowed values: dynamic-list "
	"(query the device via the 'list' command), static-list "
	"(check the " STONITH_ATTR_HOSTLIST " attribute), status "
	"(query the device via the 'status' command), none (assume "
	"every device can fence every machine)</longdesc>\n");
	printf(" <content type=\"string\" default=\"dynamic-list\"/>\n");
	printf(" </parameter>\n");

	printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_DELAY_MAX);
	printf
	(" <shortdesc lang=\"en\">Enable a random delay for stonith actions and specify the maximum of random delay.</shortdesc>\n");
	printf
	(" <longdesc lang=\"en\">This prevents double fencing when using slow devices such as sbd.\n"
	"Use this to enable a random delay for stonith actions.\n"
	"The overall delay is derived from this random delay value adding a static delay so that the sum is kept below the maximum delay.</longdesc>\n");
	printf(" <content type=\"time\" default=\"0s\"/>\n");
	printf(" </parameter>\n");

	printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_DELAY_BASE);
	printf
	(" <shortdesc lang=\"en\">Enable a base delay for stonith actions and specify base delay value.</shortdesc>\n");
	printf
	(" <longdesc lang=\"en\">This prevents double fencing when different delays are configured on the nodes.\n"
	"Use this to enable a static delay for stonith actions.\n"
	"The overall delay is derived from a random delay value adding this static delay so that the sum is kept below the maximum delay.</longdesc>\n");
	printf(" <content type=\"time\" default=\"0s\"/>\n");
	printf(" </parameter>\n");

	printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_ACTION_LIMIT);
	printf
	(" <shortdesc lang=\"en\">The maximum number of actions can be performed in parallel on this device</shortdesc>\n");
	printf
	(" <longdesc lang=\"en\">Cluster property concurrent-fencing=true needs to be configured first.\n"
	"Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.</longdesc>\n");
	printf(" <content type=\"integer\" default=\"1\"/>\n");
	printf(" </parameter>\n");


	for (lpc = 0; lpc < DIMOF(actions); lpc++) {
	printf(" <parameter name=\"pcmk_%s_action\" unique=\"0\">\n", actions[lpc]);
	printf
	(" <shortdesc lang=\"en\">Advanced use only: An alternate command to run instead of '%s'</shortdesc>\n",
	actions[lpc]);
	printf
	(" <longdesc lang=\"en\">Some devices do not support the standard commands or may provide additional ones.\n"
	"Use this to specify an alternate, device-specific, command that implements the '%s' action.</longdesc>\n",
	actions[lpc]);
	printf(" <content type=\"string\" default=\"%s\"/>\n", actions[lpc]);
	printf(" </parameter>\n");

	printf(" <parameter name=\"pcmk_%s_timeout\" unique=\"0\">\n", actions[lpc]);
	printf
	(" <shortdesc lang=\"en\">Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout</shortdesc>\n",
	actions[lpc]);
	printf
	(" <longdesc lang=\"en\">Some devices need much more/less time to complete than normal.\n"
	"Use this to specify an alternate, device-specific, timeout for '%s' actions.</longdesc>\n",
	actions[lpc]);
	printf(" <content type=\"time\" default=\"60s\"/>\n");
	printf(" </parameter>\n");

	printf(" <parameter name=\"pcmk_%s_retries\" unique=\"0\">\n", actions[lpc]);
	printf
	(" <shortdesc lang=\"en\">Advanced use only: The maximum number of times to retry the '%s' command within the timeout period</shortdesc>\n",
	actions[lpc]);
	printf(" <longdesc lang=\"en\">Some devices do not support multiple connections."
	" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
	" Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
	"</longdesc>\n", actions[lpc]);
	printf(" <content type=\"integer\" default=\"2\"/>\n");
	printf(" </parameter>\n");
	}

	printf(" </parameters>\n");
	printf("</resource-agent>\n");
	return CRM_EX_OK;
	}

	if (optind != argc) {
	++argerr;
	}

	if (argerr) {
	crm_help('?', CRM_EX_USAGE);
	}

	crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
	mainloop_add_signal(SIGTERM, stonith_shutdown);

	crm_peer_init();
	- known_peer_names = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);

	if (stand_alone == FALSE) {

	if (is_corosync_cluster()) {
	#if SUPPORT_COROSYNC
	cluster.destroy = stonith_peer_cs_destroy;
	cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback;
	cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
	#endif
	}

	crm_set_status_callback(&st_peer_update_callback);

	if (crm_cluster_connect(&cluster) == FALSE) {
	crm_crit("Cannot sign in to the cluster... terminating");
	crm_exit(CRM_EX_FATAL);
	}
	stonith_our_uname = cluster.uname;
	stonith_our_uuid = cluster.uuid;

	if (no_cib_connect == FALSE) {
	setup_cib();
	}

	} else {
	stonith_our_uname = strdup("localhost");
	}

	init_device_list();
	init_topology_list();

	if(stonith_watchdog_timeout_ms > 0) {
	xmlNode *xml;
	stonith_key_value_t *params = NULL;

	params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);

	xml = create_device_registration_xml("watchdog", st_namespace_internal,
	STONITH_WATCHDOG_AGENT, params,
	NULL);
	stonith_device_register(xml, NULL, FALSE);

	stonith_key_value_freeall(params, 1, 1);
	free_xml(xml);
	}

	stonith_ipc_server_init(&ipcs, &ipc_callbacks);

	/* Create the mainloop and run it... */
	mainloop = g_main_loop_new(NULL, FALSE);
	crm_info("Starting %s mainloop", crm_system_name);
	g_main_loop_run(mainloop);

	stonith_cleanup();
	return crm_exit(CRM_EX_OK);
	}
	diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
	index 7a51e95f65..3194e358cf 100644
	--- a/daemons/fenced/pacemaker-fenced.h
	+++ b/daemons/fenced/pacemaker-fenced.h
	@@ -1,268 +1,264 @@
	/*
	* Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm/common/mainloop.h>

	/*!
	* \internal
	* \brief Check to see if target was fenced in the last few seconds.
	* \param tolerance, The number of seconds to look back in time
	* \param target, The node to search for
	* \param action, The action we want to match.
	*
	* \retval FALSE, not match
	* \retval TRUE, fencing operation took place in the last 'tolerance' number of seconds.
	*/
	gboolean stonith_check_fence_tolerance(int tolerance, const char target, const char action);

	enum st_device_flags
	{
	st_device_supports_list = 0x0001,
	st_device_supports_status = 0x0002,
	st_device_supports_reboot = 0x0004,
	};

	typedef struct stonith_device_s {
	char *id;
	char *agent;
	char *namespace;

	/! list of actions that must execute on the target node. Used for unfencing /
	char *on_target_actions;
	GListPtr targets;
	time_t targets_age;
	gboolean has_attr_map;
	/* should nodeid parameter for victim be included in agent arguments */
	gboolean include_nodeid;
	/* whether the cluster should automatically unfence nodes with the device */
	gboolean automatic_unfencing;
	guint priority;

	enum st_device_flags flags;

	GHashTable *params;
	GHashTable *aliases;
	GList *pending_ops;
	crm_trigger_t *work;
	xmlNode *agent_metadata;

	/*! A verified device is one that has contacted the
	* agent successfully to perform a monitor operation */
	gboolean verified;

	gboolean cib_registered;
	gboolean api_registered;
	} stonith_device_t;

	/* These values are used to index certain arrays by "phase". Usually an
	* operation has only one "phase", so phase is always zero. However, some
	* reboots are remapped to "off" then "on", in which case "reboot" will be
	* phase 0, "off" will be phase 1 and "on" will be phase 2.
	*/
	enum st_remap_phase {
	st_phase_requested = 0,
	st_phase_off = 1,
	st_phase_on = 2,
	st_phase_max = 3
	};

	typedef struct remote_fencing_op_s {
	/* The unique id associated with this operation */
	char *id;
	/! The node this operation will fence /
	char *target;
	/! The fencing action to perform on the target. (reboot, on, off) /
	char *action;

	/! When was the fencing action recorded (seconds since epoch) /
	time_t created;

	/! Marks if the final notifications have been sent to local stonith clients. /
	gboolean notify_sent;
	/! The number of query replies received /
	guint replies;
	/! The number of query replies expected /
	guint replies_expected;
	/! Does this node own control of this operation /
	gboolean owner;
	/! After query is complete, This the high level timer that expires the entire operation /
	guint op_timer_total;
	/*! This timer expires the current fencing request. Many fencing
	* requests may exist in a single operation */
	guint op_timer_one;
	/*! This timer expires the query request sent out to determine
	* what nodes are contain what devices, and who those devices can fence */
	guint query_timer;
	/*! This is the default timeout to use for each fencing device if no
	* custom timeout is received in the query. */
	gint base_timeout;
	/*! This is the calculated total timeout an operation can take before
	* expiring. This is calculated by adding together all the timeout
	* values associated with the devices this fencing operation may call */
	gint total_timeout;

	/*! Delegate is the node being asked to perform a fencing action
	* on behalf of the node that owns the remote operation. Some operations
	* will involve multiple delegates. This value represents the final delegate
	* that is used. */
	char *delegate;
	/! The point at which the remote operation completed /
	time_t completed;
	/! The stonith_call_options associated with this remote operation /
	long long call_options;

	/*! The current state of the remote operation. This indicates
	* what stage the op is in, query, exec, done, duplicate, failed. */
	enum op_state state;
	/! The node that owns the remote operation /
	char *originator;
	/! The local client id that initiated the fencing request /
	char *client_id;
	/! The client's call_id that initiated the fencing request /
	int client_callid;
	/! The name of client that initiated the fencing request /
	char *client_name;
	/! List of the received query results for all the nodes in the cpg group /
	GListPtr query_results;
	/! The original request that initiated the remote stonith operation /
	xmlNode *request;

	/! The current topology level being executed /
	guint level;
	/! The current operation phase being executed /
	enum st_remap_phase phase;

	/! Devices with automatic unfencing (always run if "on" requested, never if remapped) /
	GListPtr automatic_list;
	/! List of all devices at the currently executing topology level /
	GListPtr devices_list;
	/! Current entry in the topology device list /
	GListPtr devices;

	/*! List of duplicate operations attached to this operation. Once this operation
	* completes, the duplicate operations will be closed out as well. */
	GListPtr duplicates;

	} remote_fencing_op_t;

	enum st_callback_flags {
	st_callback_unknown = 0x0000,
	st_callback_notify_fence = 0x0001,
	st_callback_device_add = 0x0004,
	st_callback_device_del = 0x0010,
	st_callback_notify_history = 0x0020
	};

	/*
	* Complex fencing requirements are specified via fencing topologies.
	* A topology consists of levels; each level is a list of fencing devices.
	* Topologies are stored in a hash table by node name. When a node needs to be
	* fenced, if it has an entry in the topology table, the levels are tried
	* sequentially, and the devices in each level are tried sequentially.
	* Fencing is considered successful as soon as any level succeeds;
	* a level is considered successful if all its devices succeed.
	* Essentially, all devices at a given level are "and-ed" and the
	* levels are "or-ed".
	*
	* This structure is used for the topology table entries.
	* Topology levels start from 1, so levels[0] is unused and always NULL.
	*/
	typedef struct stonith_topology_s {
	int kind;

	/! Node name regex or attribute name=value for which topology applies /
	char *target;
	char *target_value;
	char *target_pattern;
	char *target_attribute;

	/! Names of fencing devices at each topology level /
	GListPtr levels[ST_LEVEL_MAX];

	} stonith_topology_t;

	void init_device_list(void);
	void free_device_list(void);
	void init_topology_list(void);
	void free_topology_list(void);
	void free_stonith_remote_op_list(void);
	void init_stonith_remote_op_hash_table(GHashTable **table);
	void free_metadata_cache(void);

	long long get_stonith_flag(const char *name);

	void stonith_command(crm_client_t * client, uint32_t id, uint32_t flags,
	xmlNode * op_request, const char *remote_peer);

	int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib);

	int stonith_device_remove(const char *id, gboolean from_cib);

	char stonith_level_key(xmlNode msg, int mode);
	int stonith_level_kind(xmlNode * msg);
	int stonith_level_register(xmlNode * msg, char **desc);

	int stonith_level_remove(xmlNode * msg, char **desc);

	stonith_topology_t find_topology_for_host(const char host);

	void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
	gboolean from_peer);

	xmlNode stonith_construct_reply(xmlNode request, const char output, xmlNode data,
	int rc);

	void
	do_stonith_async_timeout_update(const char client, const char call_id, int timeout);

	void do_stonith_notify(int options, const char type, int result, xmlNode data);
	void do_stonith_notify_device(int options, const char op, int rc, const char desc);
	void do_stonith_notify_level(int options, const char op, int rc, const char desc);

	remote_fencing_op_t initiate_remote_stonith_op(crm_client_t client, xmlNode * request,
	gboolean manual_ack);

	int process_remote_stonith_exec(xmlNode * msg);

	int process_remote_stonith_query(xmlNode * msg);

	void create_remote_stonith_op(const char client, xmlNode * request, gboolean peer);

	int stonith_fence_history(xmlNode msg, xmlNode *output,
	const char *remote_peer, int options);

	void stonith_fence_history_trim(void);

	bool fencing_peer_active(crm_node_t *peer);

	int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);

	gboolean string_in_list(GListPtr list, const char *item);

	gboolean node_has_attr(const char node, const char name, const char *value);

	void
	schedule_internal_command(const char *origin,
	stonith_device_t * device,
	const char *action,
	const char *victim,
	int timeout,
	void *internal_user_data,
	void (done_cb) (GPid pid, int rc, const char output,
	gpointer user_data));

	-char *stonith_get_peer_name(unsigned int nodeid);
	-
	extern char *stonith_our_uname;
	extern gboolean stand_alone;
	extern GHashTable *device_list;
	extern GHashTable *topology;
	extern long stonith_watchdog_timeout_ms;

	-extern GHashTable *known_peer_names;
	-
	extern GHashTable *stonith_remote_op_list;
	diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
	index 369f22700c..12bf41ab02 100644
	--- a/include/crm/cluster/internal.h
	+++ b/include/crm/cluster/internal.h
	@@ -1,332 +1,335 @@
	/*
	* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef CRM_CLUSTER_INTERNAL__H
	# define CRM_CLUSTER_INTERNAL__H

	# include <crm/cluster.h>

	typedef struct crm_ais_host_s AIS_Host;
	typedef struct crm_ais_msg_s AIS_Message;

	struct crm_ais_host_s {
	uint32_t id;
	uint32_t pid;
	gboolean local;
	enum crm_ais_msg_types type;
	uint32_t size;
	char uname[MAX_NAME];

	} __attribute__ ((packed));

	struct crm_ais_msg_s {
	cs_ipc_header_response_t header __attribute__ ((aligned(8)));
	uint32_t id;
	gboolean is_compressed;

	AIS_Host host;
	AIS_Host sender;

	uint32_t size;
	uint32_t compressed_size;
	/* 584 bytes */
	char data[0];

	} __attribute__ ((packed));

	/* INDENT-OFF */
	enum crm_proc_flag {
	crm_proc_none = 0x00000001,

	// Cluster layers
	crm_proc_cpg = 0x04000000,

	// Daemons
	crm_proc_execd = 0x00000010,
	crm_proc_based = 0x00000100,
	crm_proc_controld = 0x00000200,
	crm_proc_attrd = 0x00001000,
	crm_proc_schedulerd = 0x00010000,
	crm_proc_fenced = 0x00100000,
	};
	/* INDENT-ON */

	/*!
	* \internal
	* \brief Return the process bit corresponding to the current cluster stack
	*
	* \return Process flag if detectable, otherwise 0
	*/
	static inline uint32_t
	crm_get_cluster_proc()
	{
	switch (get_cluster_type()) {
	case pcmk_cluster_corosync:
	return crm_proc_cpg;

	default:
	break;
	}
	return crm_proc_none;
	}

	static inline const char *
	peer2text(enum crm_proc_flag proc)
	{
	const char *text = "unknown";

	switch (proc) {
	case crm_proc_none:
	text = "none";
	break;
	case crm_proc_based:
	text = "pacemaker-based";
	break;
	case crm_proc_controld:
	text = "pacemaker-controld";
	break;
	case crm_proc_schedulerd:
	text = "pacemaker-schedulerd";
	break;
	case crm_proc_execd:
	text = "pacemaker-execd";
	break;
	case crm_proc_attrd:
	text = "pacemaker-attrd";
	break;
	case crm_proc_fenced:
	text = "pacemaker-fenced";
	break;
	case crm_proc_cpg:
	text = "corosync-cpg";
	break;
	}
	return text;
	}

	static inline const char *
	ais_dest(const AIS_Host *host)
	{
	if (host->local) {
	return "local";
	} else if (host->size > 0) {
	return host->uname;
	} else {
	return "<all>";
	}
	}

	# define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size)

	/*
	typedef enum {
	CS_OK = 1,
	CS_ERR_LIBRARY = 2,
	CS_ERR_VERSION = 3,
	CS_ERR_INIT = 4,
	CS_ERR_TIMEOUT = 5,
	CS_ERR_TRY_AGAIN = 6,
	CS_ERR_INVALID_PARAM = 7,
	CS_ERR_NO_MEMORY = 8,
	CS_ERR_BAD_HANDLE = 9,
	CS_ERR_BUSY = 10,
	CS_ERR_ACCESS = 11,
	CS_ERR_NOT_EXIST = 12,
	CS_ERR_NAME_TOO_LONG = 13,
	CS_ERR_EXIST = 14,
	CS_ERR_NO_SPACE = 15,
	CS_ERR_INTERRUPT = 16,
	CS_ERR_NAME_NOT_FOUND = 17,
	CS_ERR_NO_RESOURCES = 18,
	CS_ERR_NOT_SUPPORTED = 19,
	CS_ERR_BAD_OPERATION = 20,
	CS_ERR_FAILED_OPERATION = 21,
	CS_ERR_MESSAGE_ERROR = 22,
	CS_ERR_QUEUE_FULL = 23,
	CS_ERR_QUEUE_NOT_AVAILABLE = 24,
	CS_ERR_BAD_FLAGS = 25,
	CS_ERR_TOO_BIG = 26,
	CS_ERR_NO_SECTIONS = 27,
	CS_ERR_CONTEXT_NOT_FOUND = 28,
	CS_ERR_TOO_MANY_GROUPS = 30,
	CS_ERR_SECURITY = 100
	} cs_error_t;
	*/
	static inline const char *
	ais_error2text(int error)
	{
	const char *text = "unknown";

	# if SUPPORT_COROSYNC
	switch (error) {
	case CS_OK:
	text = "OK";
	break;
	case CS_ERR_LIBRARY:
	text = "Library error";
	break;
	case CS_ERR_VERSION:
	text = "Version error";
	break;
	case CS_ERR_INIT:
	text = "Initialization error";
	break;
	case CS_ERR_TIMEOUT:
	text = "Timeout";
	break;
	case CS_ERR_TRY_AGAIN:
	text = "Try again";
	break;
	case CS_ERR_INVALID_PARAM:
	text = "Invalid parameter";
	break;
	case CS_ERR_NO_MEMORY:
	text = "No memory";
	break;
	case CS_ERR_BAD_HANDLE:
	text = "Bad handle";
	break;
	case CS_ERR_BUSY:
	text = "Busy";
	break;
	case CS_ERR_ACCESS:
	text = "Access error";
	break;
	case CS_ERR_NOT_EXIST:
	text = "Doesn't exist";
	break;
	case CS_ERR_NAME_TOO_LONG:
	text = "Name too long";
	break;
	case CS_ERR_EXIST:
	text = "Exists";
	break;
	case CS_ERR_NO_SPACE:
	text = "No space";
	break;
	case CS_ERR_INTERRUPT:
	text = "Interrupt";
	break;
	case CS_ERR_NAME_NOT_FOUND:
	text = "Name not found";
	break;
	case CS_ERR_NO_RESOURCES:
	text = "No resources";
	break;
	case CS_ERR_NOT_SUPPORTED:
	text = "Not supported";
	break;
	case CS_ERR_BAD_OPERATION:
	text = "Bad operation";
	break;
	case CS_ERR_FAILED_OPERATION:
	text = "Failed operation";
	break;
	case CS_ERR_MESSAGE_ERROR:
	text = "Message error";
	break;
	case CS_ERR_QUEUE_FULL:
	text = "Queue full";
	break;
	case CS_ERR_QUEUE_NOT_AVAILABLE:
	text = "Queue not available";
	break;
	case CS_ERR_BAD_FLAGS:
	text = "Bad flags";
	break;
	case CS_ERR_TOO_BIG:
	text = "Too big";
	break;
	case CS_ERR_NO_SECTIONS:
	text = "No sections";
	break;
	}
	# endif
	return text;
	}

	static inline const char *
	msg_type2text(enum crm_ais_msg_types type)
	{
	const char *text = "unknown";

	switch (type) {
	case crm_msg_none:
	text = "unknown";
	break;
	case crm_msg_ais:
	text = "ais";
	break;
	case crm_msg_cib:
	text = "cib";
	break;
	case crm_msg_crmd:
	text = "crmd";
	break;
	case crm_msg_pe:
	text = "pengine";
	break;
	case crm_msg_te:
	text = "tengine";
	break;
	case crm_msg_lrmd:
	text = "lrmd";
	break;
	case crm_msg_attrd:
	text = "attrd";
	break;
	case crm_msg_stonithd:
	text = "stonithd";
	break;
	case crm_msg_stonith_ng:
	text = "stonith-ng";
	break;
	}
	return text;
	}

	gboolean check_message_sanity(const AIS_Message * msg, const char *data);

	# if SUPPORT_COROSYNC

	gboolean send_cpg_iov(struct iovec * iov);

	char get_corosync_uuid(crm_node_t peer);
	char corosync_node_name(uint64_t /cmap_handle_t */ cmap_handle, uint32_t nodeid);
	char *corosync_cluster_name(void);
	int corosync_cmap_has_config(const char *prefix);

	gboolean corosync_initialize_nodelist(void cluster, gboolean force_member, xmlNode xml_parent);

	gboolean send_cluster_message_cs(xmlNode * msg, gboolean local,
	crm_node_t * node, enum crm_ais_msg_types dest);

	enum cluster_type_e find_corosync_variant(void);

	void terminate_cs_connection(crm_cluster_t * cluster);
	gboolean init_cs_connection(crm_cluster_t * cluster);
	gboolean init_cs_connection_once(crm_cluster_t * cluster);
	# endif

	crm_node_t crm_update_peer_proc(const char source, crm_node_t * peer,
	uint32_t flag, const char *status);
	crm_node_t crm_update_peer_state(const char source, crm_node_t * node,
	const char *state, int membership);

	void crm_update_peer_uname(crm_node_t node, const char uname);
	void crm_update_peer_expected(const char source, crm_node_t node, const char *expected);
	void crm_reap_unseen_nodes(uint64_t ring_id);

	gboolean cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
	void (*destroy) (gpointer));

	gboolean node_name_is_valid(const char key, const char name);

	crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags);
	crm_node_t * crm_find_peer(unsigned int id, const char *uname);

	+void crm_peer_caches_refresh(xmlNode *cib);
	+crm_node_t crm_find_known_peer_full(unsigned int id, const char uname, int flags);
	+
	#endif
	diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
	index a487e762a8..c364f39cda 100644
	--- a/lib/cluster/membership.c
	+++ b/lib/cluster/membership.c
	@@ -1,1003 +1,1180 @@
	/*
	* Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#ifndef _GNU_SOURCE
	# define _GNU_SOURCE
	#endif

	#include <sys/param.h>
	#include <sys/types.h>
	#include <stdio.h>
	#include <unistd.h>
	#include <string.h>
	#include <glib.h>
	#include <crm/common/ipc.h>
	#include <crm/cluster/internal.h>
	#include <crm/msg_xml.h>
	#include <crm/stonith-ng.h>

	#define s_if_plural(i) (((i) == 1)? "" : "s")

	/* The peer cache remembers cluster nodes that have been seen.
	* This is managed mostly automatically by libcluster, based on
	* cluster membership events.
	*
	* Because cluster nodes can have conflicting names or UUIDs,
	* the hash table key is a uniquely generated ID.
	*/
	GHashTable *crm_peer_cache = NULL;

	/*
	* The remote peer cache tracks pacemaker_remote nodes. While the
	* value has the same type as the peer cache's, it is tracked separately for
	* three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
	* so the name (which is also the UUID) is used as the hash table key; there
	* is no equivalent of membership events, so management is not automatic; and
	* most users of the peer cache need to exclude pacemaker_remote nodes.
	*
	* That said, using a single cache would be more logical and less error-prone,
	* so it would be a good idea to merge them one day.
	*
	* libcluster provides two avenues for populating the cache:
	* crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it,
	* while crm_remote_peer_cache_refresh() populates it via the CIB.
	*/
	GHashTable *crm_remote_peer_cache = NULL;

	+GHashTable *crm_known_peer_cache = NULL;
	+
	unsigned long long crm_peer_seq = 0;
	gboolean crm_have_quorum = FALSE;
	static gboolean crm_autoreap = TRUE;

	int
	crm_remote_peer_cache_size(void)
	{
	if (crm_remote_peer_cache == NULL) {
	return 0;
	}
	return g_hash_table_size(crm_remote_peer_cache);
	}

	/*!
	* \brief Get a remote node peer cache entry, creating it if necessary
	*
	* \param[in] node_name Name of remote node
	*
	* \return Cache entry for node on success, NULL (and set errno) otherwise
	*
	* \note When creating a new entry, this will leave the node state undetermined,
	* so the caller should also call crm_update_peer_state() if the state is
	* known.
	*/
	crm_node_t *
	crm_remote_peer_get(const char *node_name)
	{
	crm_node_t *node;

	if (node_name == NULL) {
	errno = -EINVAL;
	return NULL;
	}

	/* Return existing cache entry if one exists */
	node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
	if (node) {
	return node;
	}

	/* Allocate a new entry */
	node = calloc(1, sizeof(crm_node_t));
	if (node == NULL) {
	return NULL;
	}

	/* Populate the essential information */
	node->flags = crm_remote_node;
	node->uuid = strdup(node_name);
	if (node->uuid == NULL) {
	free(node);
	errno = -ENOMEM;
	return NULL;
	}

	/* Add the new entry to the cache */
	g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
	crm_trace("added %s to remote cache", node_name);

	/* Update the entry's uname, ensuring peer status callbacks are called */
	crm_update_peer_uname(node, node_name);
	return node;
	}

	void
	crm_remote_peer_cache_remove(const char *node_name)
	{
	if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
	crm_trace("removed %s from remote peer cache", node_name);
	}
	}

	/*!
	* \internal
	* \brief Return node status based on a CIB status entry
	*
	* \param[in] node_state XML of node state
	*
	* \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state,
	* CRM_NODE_MEMBER otherwise
	* \note Unlike most boolean XML attributes, this one defaults to true, for
	* backward compatibility with older controllers that don't set it.
	*/
	static const char *
	remote_state_from_cib(xmlNode *node_state)
	{
	const char *status;

	status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
	if (status && !crm_is_true(status)) {
	status = CRM_NODE_LOST;
	} else {
	status = CRM_NODE_MEMBER;
	}
	return status;
	}

	/* user data for looping through remote node xpath searches */
	struct refresh_data {
	const char field; / XML attribute to check for node name */
	gboolean has_state; /* whether to update node state based on XML */
	};

	/*!
	* \internal
	* \brief Process one pacemaker_remote node xpath search result
	*
	* \param[in] result XML search result
	* \param[in] user_data what to look for in the XML
	*/
	static void
	remote_cache_refresh_helper(xmlNode result, void user_data)
	{
	struct refresh_data *data = user_data;
	const char *remote = crm_element_value(result, data->field);
	const char *state = NULL;
	crm_node_t *node;

	CRM_CHECK(remote != NULL, return);

	/* Determine node's state, if the result has it */
	if (data->has_state) {
	state = remote_state_from_cib(result);
	}

	/* Check whether cache already has entry for node */
	node = g_hash_table_lookup(crm_remote_peer_cache, remote);

	if (node == NULL) {
	/* Node is not in cache, so add a new entry for it */
	node = crm_remote_peer_get(remote);
	CRM_ASSERT(node);
	if (state) {
	crm_update_peer_state(__FUNCTION__, node, state, 0);
	}

	} else if (is_set(node->flags, crm_node_dirty)) {
	/* Node is in cache and hasn't been updated already, so mark it clean */
	clear_bit(node->flags, crm_node_dirty);
	if (state) {
	crm_update_peer_state(__FUNCTION__, node, state, 0);
	}
	}
	}

	static void
	mark_dirty(gpointer key, gpointer value, gpointer user_data)
	{
	set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
	}

	static gboolean
	is_dirty(gpointer key, gpointer value, gpointer user_data)
	{
	return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
	}

	/* search string to find CIB resources entries for guest nodes */
	#define XPATH_GUEST_NODE_CONFIG \
	"//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
	"//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
	"[@name='" XML_RSC_ATTR_REMOTE_NODE "']"

	/* search string to find CIB resources entries for remote nodes */
	#define XPATH_REMOTE_NODE_CONFIG \
	"//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
	"[@type='remote'][@provider='pacemaker']"

	/* search string to find CIB node status entries for pacemaker_remote nodes */
	#define XPATH_REMOTE_NODE_STATUS \
	"//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
	"[@" XML_NODE_IS_REMOTE "='true']"

	/*!
	* \brief Repopulate the remote peer cache based on CIB XML
	*
	* \param[in] xmlNode CIB XML to parse
	*/
	void
	crm_remote_peer_cache_refresh(xmlNode *cib)
	{
	struct refresh_data data;

	crm_peer_init();

	/* First, we mark all existing cache entries as dirty,
	* so that later we can remove any that weren't in the CIB.
	* We don't empty the cache, because we need to detect changes in state.
	*/
	g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);

	/* Look for guest nodes and remote nodes in the status section */
	data.field = "id";
	data.has_state = TRUE;
	crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_STATUS,
	remote_cache_refresh_helper, &data);

	/* Look for guest nodes and remote nodes in the configuration section,
	* because they may have just been added and not have a status entry yet.
	* In that case, the cached node state will be left NULL, so that the
	* peer status callback isn't called until we're sure the node started
	* successfully.
	*/
	data.field = "value";
	data.has_state = FALSE;
	crm_foreach_xpath_result(cib, XPATH_GUEST_NODE_CONFIG,
	remote_cache_refresh_helper, &data);
	data.field = "id";
	data.has_state = FALSE;
	crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_CONFIG,
	remote_cache_refresh_helper, &data);

	/* Remove all old cache entries that weren't seen in the CIB */
	g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
	}

	gboolean
	crm_is_peer_active(const crm_node_t * node)
	{
	if(node == NULL) {
	return FALSE;
	}

	if (is_set(node->flags, crm_remote_node)) {
	/* remote nodes are never considered active members. This
	* guarantees they will never be considered for DC membership.*/
	return FALSE;
	}
	#if SUPPORT_COROSYNC
	if (is_corosync_cluster()) {
	return crm_is_corosync_peer_active(node);
	}
	#endif
	crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
	return FALSE;
	}

	static gboolean
	crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
	{
	crm_node_t *node = value;
	crm_node_t *search = user_data;

	if (search == NULL) {
	return FALSE;

	} else if (search->id && node->id != search->id) {
	return FALSE;

	} else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
	return FALSE;

	} else if (crm_is_peer_active(value) == FALSE) {
	crm_info("Removing node with name %s and id %u from membership cache",
	(node->uname? node->uname : "unknown"), node->id);
	return TRUE;
	}
	return FALSE;
	}

	/*!
	* \brief Remove all peer cache entries matching a node ID and/or uname
	*
	* \param[in] id ID of node to remove (or 0 to ignore)
	* \param[in] name Uname of node to remove (or NULL to ignore)
	*
	* \return Number of cache entries removed
	*/
	guint
	reap_crm_member(uint32_t id, const char *name)
	{
	int matches = 0;
	crm_node_t search;

	if (crm_peer_cache == NULL) {
	crm_trace("Membership cache not initialized, ignoring purge request");
	return 0;
	}

	search.id = id;
	search.uname = name ? strdup(name) : NULL;
	matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
	if(matches) {
	crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
	matches, s_if_plural(matches), search.id,
	(search.uname? " and/or uname=" : ""),
	(search.uname? search.uname : ""));

	} else {
	crm_info("No peers with id=%u%s%s to purge from the membership cache",
	search.id, (search.uname? " and/or uname=" : ""),
	(search.uname? search.uname : ""));
	}

	free(search.uname);
	return matches;
	}

	static void
	crm_count_peer(gpointer key, gpointer value, gpointer user_data)
	{
	guint *count = user_data;
	crm_node_t *node = value;

	if (crm_is_peer_active(node)) {
	count = count + 1;
	}
	}

	guint
	crm_active_peers(void)
	{
	guint count = 0;

	if (crm_peer_cache) {
	g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
	}
	return count;
	}

	static void
	destroy_crm_node(gpointer data)
	{
	crm_node_t *node = data;

	crm_trace("Destroying entry for node %u: %s", node->id, node->uname);

	free(node->uname);
	free(node->state);
	free(node->uuid);
	free(node->expected);
	free(node);
	}

	void
	crm_peer_init(void)
	{
	if (crm_peer_cache == NULL) {
	crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
	}

	if (crm_remote_peer_cache == NULL) {
	crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
	}
	+
	+ if (crm_known_peer_cache == NULL) {
	+ crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
	+ }
	}

	void
	crm_peer_destroy(void)
	{
	if (crm_peer_cache != NULL) {
	crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
	g_hash_table_destroy(crm_peer_cache);
	crm_peer_cache = NULL;
	}

	if (crm_remote_peer_cache != NULL) {
	crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
	g_hash_table_destroy(crm_remote_peer_cache);
	crm_remote_peer_cache = NULL;
	}
	+
	+ if (crm_known_peer_cache != NULL) {
	+ crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache));
	+ g_hash_table_destroy(crm_known_peer_cache);
	+ crm_known_peer_cache = NULL;
	+ }
	+
	}

	void (crm_status_callback) (enum crm_status_type, crm_node_t , const void *) = NULL;

	/*!
	* \brief Set a client function that will be called after peer status changes
	*
	* \param[in] dispatch Pointer to function to use as callback
	*
	* \note Previously, client callbacks were responsible for peer cache
	* management. This is no longer the case, and client callbacks should do
	* only client-specific handling. Callbacks MUST NOT add or remove entries
	* in the peer caches.
	*/
	void
	crm_set_status_callback(void (dispatch) (enum crm_status_type, crm_node_t , const void *))
	{
	crm_status_callback = dispatch;
	}

	/*!
	* \brief Tell the library whether to automatically reap lost nodes
	*
	* If TRUE (the default), calling crm_update_peer_proc() will also update the
	* peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and crm_update_peer_state()
	* will reap peers whose state changes to anything other than CRM_NODE_MEMBER.
	* Callers should leave this enabled unless they plan to manage the cache
	* separately on their own.
	*
	* \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable
	*/
	void
	crm_set_autoreap(gboolean autoreap)
	{
	crm_autoreap = autoreap;
	}

	static void crm_dump_peer_hash(int level, const char *caller)
	{
	GHashTableIter iter;
	const char *id = NULL;
	crm_node_t *node = NULL;

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, (gpointer ) &id, (gpointer ) &node)) {
	do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
	}
	}

	static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
	{
	if(value == user_data) {
	return TRUE;
	}
	return FALSE;
	}

	crm_node_t *
	crm_find_peer_full(unsigned int id, const char *uname, int flags)
	{
	crm_node_t *node = NULL;

	CRM_ASSERT(id > 0 \|\| uname != NULL);

	crm_peer_init();

	if ((uname != NULL) && (flags & CRM_GET_PEER_REMOTE)) {
	node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	}

	if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
	node = crm_find_peer(id, uname);
	}
	return node;
	}

	crm_node_t *
	crm_get_peer_full(unsigned int id, const char *uname, int flags)
	{
	crm_node_t *node = NULL;

	CRM_ASSERT(id > 0 \|\| uname != NULL);

	crm_peer_init();

	if (flags & CRM_GET_PEER_REMOTE) {
	node = g_hash_table_lookup(crm_remote_peer_cache, uname);
	}

	if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
	node = crm_get_peer(id, uname);
	}
	return node;
	}

	crm_node_t *
	crm_find_peer(unsigned int id, const char *uname)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;
	crm_node_t *by_id = NULL;
	crm_node_t *by_name = NULL;

	CRM_ASSERT(id > 0 \|\| uname != NULL);

	crm_peer_init();

	if (uname != NULL) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node->uname && strcasecmp(node->uname, uname) == 0) {
	crm_trace("Name match: %s = %p", node->uname, node);
	by_name = node;
	break;
	}
	}
	}

	if (id > 0) {
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	if(node->id == id) {
	crm_trace("ID match: %u = %p", node->id, node);
	by_id = node;
	break;
	}
	}
	}

	node = by_id; /* Good default */
	if(by_id == by_name) {
	/* Nothing to do if they match (both NULL counts) */
	crm_trace("Consistent: %p for %u/%s", by_id, id, uname);

	} else if(by_id == NULL && by_name) {
	crm_trace("Only one: %p for %u/%s", by_name, id, uname);

	if(id && by_name->id) {
	crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
	crm_crit("Node %u and %u share the same name '%s'",
	id, by_name->id, uname);
	node = NULL; /* Create a new one */

	} else {
	node = by_name;
	}

	} else if(by_name == NULL && by_id) {
	crm_trace("Only one: %p for %u/%s", by_id, id, uname);

	if(uname && by_id->uname) {
	crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
	crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
	uname, by_id->uname, id, uname);
	}

	} else if(uname && by_id->uname) {
	if(safe_str_eq(uname, by_id->uname)) {
	crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
	g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);

	} else {
	crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
	crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
	crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
	}

	} else if(id && by_name->id) {
	crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);

	} else {
	/* Simple merge */

	/* Only corosync based clusters use nodeid's
	*
	* The functions that call crm_update_peer_state() only know nodeid
	* so 'by_id' is authorative when merging
	*
	* Same for crm_update_peer_proc()
	*/
	crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);

	crm_info("Merging %p into %p", by_name, by_id);
	g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
	}

	return node;
	}

	#if SUPPORT_COROSYNC
	static guint
	crm_remove_conflicting_peer(crm_node_t *node)
	{
	int matches = 0;
	GHashTableIter iter;
	crm_node_t *existing_node = NULL;

	if (node->id == 0 \|\| node->uname == NULL) {
	return 0;
	}

	if (corosync_cmap_has_config("nodelist") != 0) {
	return 0;
	}

	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
	if (existing_node->id > 0
	&& existing_node->id != node->id
	&& existing_node->uname != NULL
	&& strcasecmp(existing_node->uname, node->uname) == 0) {

	if (crm_is_peer_active(existing_node)) {
	continue;
	}

	crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
	existing_node->id, existing_node->uname, node->id);

	g_hash_table_iter_remove(&iter);
	matches++;
	}
	}

	return matches;
	}
	#endif

	/* coverity[-alloc] Memory is referenced in one or both hashtables */
	crm_node_t *
	crm_get_peer(unsigned int id, const char *uname)
	{
	crm_node_t *node = NULL;
	char *uname_lookup = NULL;

	CRM_ASSERT(id > 0 \|\| uname != NULL);

	crm_peer_init();

	node = crm_find_peer(id, uname);

	/* if uname wasn't provided, and find_peer did not turn up a uname based on id.
	* we need to do a lookup of the node name using the id in the cluster membership. */
	if ((node == NULL \|\| node->uname == NULL) && (uname == NULL)) {
	uname_lookup = get_node_name(id);
	}

	if (uname_lookup) {
	uname = uname_lookup;
	crm_trace("Inferred a name of '%s' for node %u", uname, id);

	/* try to turn up the node one more time now that we know the uname. */
	if (node == NULL) {
	node = crm_find_peer(id, uname);
	}
	}


	if (node == NULL) {
	char *uniqueid = crm_generate_uuid();

	node = calloc(1, sizeof(crm_node_t));
	CRM_ASSERT(node);

	crm_info("Created entry %s/%p for node %s/%u (%d total)",
	uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
	g_hash_table_replace(crm_peer_cache, uniqueid, node);
	}

	if(id > 0 && uname && (node->id == 0 \|\| node->uname == NULL)) {
	crm_info("Node %u is now known as %s", id, uname);
	}

	if(id > 0 && node->id == 0) {
	node->id = id;
	}

	if (uname && (node->uname == NULL)) {
	crm_update_peer_uname(node, uname);
	}

	if(node->uuid == NULL) {
	const char *uuid = crm_peer_uuid(node);

	if (uuid) {
	crm_info("Node %u has uuid %s", id, uuid);

	} else {
	crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
	}
	}

	free(uname_lookup);

	return node;
	}

	/*!
	* \internal
	* \brief Update a node's uname
	*
	* \param[in] node Node object to update
	* \param[in] uname New name to set
	*
	* \note This function should not be called within a peer cache iteration,
	* because in some cases it can remove conflicting cache entries,
	* which would invalidate the iterator.
	*/
	void
	crm_update_peer_uname(crm_node_t node, const char uname)
	{
	CRM_CHECK(uname != NULL,
	crm_err("Bug: can't update node name without name"); return);
	CRM_CHECK(node != NULL,
	crm_err("Bug: can't update node name to %s without node", uname);
	return);

	if (safe_str_eq(uname, node->uname)) {
	crm_debug("Node uname '%s' did not change", uname);
	return;
	}

	for (const char c = uname; c; ++c) {
	if ((c >= 'A') && (c <= 'Z')) {
	crm_warn("Node names with capitals are discouraged, consider changing '%s'",
	uname);
	break;
	}
	}

	free(node->uname);
	node->uname = strdup(uname);
	CRM_ASSERT(node->uname != NULL);

	if (crm_status_callback) {
	crm_status_callback(crm_status_uname, node, NULL);
	}

	#if SUPPORT_COROSYNC
	if (is_corosync_cluster() && !is_set(node->flags, crm_remote_node)) {
	crm_remove_conflicting_peer(node);
	}
	#endif
	}

	/*!
	* \internal
	* \brief Update a node's process information (and potentially state)
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in] node Node object to update
	* \param[in] flag Bitmask of new process information
	* \param[in] status node status (online, offline, etc.)
	*
	* \return NULL if any node was reaped from peer caches, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function should not be
	* called within a cache iteration if reaping is possible, otherwise
	* reaping could invalidate the iterator.
	*/
	crm_node_t *
	crm_update_peer_proc(const char source, crm_node_t node, uint32_t flag, const char *status)
	{
	uint32_t last = 0;
	gboolean changed = FALSE;

	CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
	source, peer2text(flag), status); return NULL);

	/* Pacemaker doesn't spawn processes on remote nodes */
	if (is_set(node->flags, crm_remote_node)) {
	return node;
	}

	last = node->processes;
	if (status == NULL) {
	node->processes = flag;
	if (node->processes != last) {
	changed = TRUE;
	}

	} else if (safe_str_eq(status, ONLINESTATUS)) {
	if ((node->processes & flag) != flag) {
	set_bit(node->processes, flag);
	changed = TRUE;
	}

	} else if (node->processes & flag) {
	clear_bit(node->processes, flag);
	changed = TRUE;
	}

	if (changed) {
	if (status == NULL && flag <= crm_proc_none) {
	crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
	node->id);
	} else {
	crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
	peer2text(flag), status);
	}

	/* Call the client callback first, then update the peer state,
	* in case the node will be reaped
	*/
	if (crm_status_callback) {
	crm_status_callback(crm_status_processes, node, &last);
	}

	/* The client callback shouldn't touch the peer caches,
	* but as a safety net, bail if the peer cache was destroyed.
	*/
	if (crm_peer_cache == NULL) {
	return NULL;
	}

	if (crm_autoreap) {
	node = crm_update_peer_state(__FUNCTION__, node,
	is_set(node->processes, crm_get_cluster_proc())?
	CRM_NODE_MEMBER : CRM_NODE_LOST, 0);
	}
	} else {
	crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
	peer2text(flag), status);
	}
	return node;
	}

	void
	crm_update_peer_expected(const char source, crm_node_t node, const char *expected)
	{
	char *last = NULL;
	gboolean changed = FALSE;

	CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
	return);

	/* Remote nodes don't participate in joins */
	if (is_set(node->flags, crm_remote_node)) {
	return;
	}

	last = node->expected;
	if (expected != NULL && safe_str_neq(node->expected, expected)) {
	node->expected = strdup(expected);
	changed = TRUE;
	}

	if (changed) {
	crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
	expected, last);
	free(last);
	} else {
	crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
	node->id, expected);
	}
	}

	/*!
	* \internal
	* \brief Update a node's state and membership information
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in] node Node object to update
	* \param[in] state Node's new state
	* \param[in] membership Node's new membership ID
	* \param[in] iter If not NULL, pointer to node's peer cache iterator
	*
	* \return NULL if any node was reaped, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function may be called from
	* within a peer cache iteration if the iterator is supplied.
	*/
	static crm_node_t *
	crm_update_peer_state_iter(const char source, crm_node_t node, const char state, int membership, GHashTableIter iter)
	{
	gboolean is_member;

	CRM_CHECK(node != NULL,
	crm_err("Could not set state for unknown host to %s"
	CRM_XS " source=%s", state, source);
	return NULL);

	is_member = safe_str_eq(state, CRM_NODE_MEMBER);
	if (is_member) {
	node->when_lost = 0;
	if (membership) {
	node->last_seen = membership;
	}
	}

	if (state && safe_str_neq(node->state, state)) {
	char *last = node->state;

	node->state = strdup(state);
	crm_notice("Node %s state is now %s " CRM_XS
	" nodeid=%u previous=%s source=%s", node->uname, state,
	node->id, (last? last : "unknown"), source);
	if (crm_status_callback) {
	crm_status_callback(crm_status_nstate, node, last);
	}
	free(last);

	if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
	/* We only autoreap from the peer cache, not the remote peer cache,
	* because the latter should be managed only by
	* crm_remote_peer_cache_refresh().
	*/
	if(iter) {
	crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
	g_hash_table_iter_remove(iter);

	} else {
	reap_crm_member(node->id, node->uname);
	}
	node = NULL;
	}

	} else {
	crm_trace("Node %s state is unchanged (%s) " CRM_XS
	" nodeid=%u source=%s", node->uname, state, node->id, source);
	}
	return node;
	}

	/*!
	* \brief Update a node's state and membership information
	*
	* \param[in] source Caller's function name (for log messages)
	* \param[in] node Node object to update
	* \param[in] state Node's new state
	* \param[in] membership Node's new membership ID
	*
	* \return NULL if any node was reaped, value of node otherwise
	*
	* \note If this function returns NULL, the supplied node object was likely
	* freed and should not be used again. This function should not be
	* called within a cache iteration if reaping is possible,
	* otherwise reaping could invalidate the iterator.
	*/
	crm_node_t *
	crm_update_peer_state(const char source, crm_node_t node, const char *state, int membership)
	{
	return crm_update_peer_state_iter(source, node, state, membership, NULL);
	}

	/*!
	* \internal
	* \brief Reap all nodes from cache whose membership information does not match
	*
	* \param[in] membership Membership ID of nodes to keep
	*/
	void
	crm_reap_unseen_nodes(uint64_t membership)
	{
	GHashTableIter iter;
	crm_node_t *node = NULL;

	crm_trace("Reaping unseen nodes...");
	g_hash_table_iter_init(&iter, crm_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
	if (node->last_seen != membership) {
	if (node->state) {
	/*
	* Calling crm_update_peer_state_iter() allows us to
	* remove the node from crm_peer_cache without
	* invalidating our iterator
	*/
	crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);

	} else {
	crm_info("State of node %s[%u] is still unknown",
	node->uname, node->id);
	}
	}
	}
	}

	int
	crm_terminate_member(int nodeid, const char uname, void unused)
	{
	/* Always use the synchronous, non-mainloop version */
	return stonith_api_kick(nodeid, uname, 120, TRUE);
	}

	int
	crm_terminate_member_no_mainloop(int nodeid, const char uname, int connection)
	{
	return stonith_api_kick(nodeid, uname, 120, TRUE);
	}
	+
	+static crm_node_t *
	+crm_find_known_peer(const char id, const char uname)
	+{
	+ GHashTableIter iter;
	+ crm_node_t *node = NULL;
	+ crm_node_t *by_id = NULL;
	+ crm_node_t *by_name = NULL;
	+
	+ if (uname) {
	+ g_hash_table_iter_init(&iter, crm_known_peer_cache);
	+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	+ if (node->uname && strcasecmp(node->uname, uname) == 0) {
	+ crm_trace("Name match: %s = %p", node->uname, node);
	+ by_name = node;
	+ break;
	+ }
	+ }
	+ }
	+
	+ if (id) {
	+ g_hash_table_iter_init(&iter, crm_known_peer_cache);
	+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	+ if(strcasecmp(node->uuid, id) == 0) {
	+ crm_trace("ID match: %s= %p", id, node);
	+ by_id = node;
	+ break;
	+ }
	+ }
	+ }
	+
	+ node = by_id; /* Good default */
	+ if (by_id == by_name) {
	+ /* Nothing to do if they match (both NULL counts) */
	+ crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
	+
	+ } else if (by_id == NULL && by_name) {
	+ crm_trace("Only one: %p for %s/%s", by_name, id, uname);
	+
	+ if (id) {
	+ node = NULL;
	+
	+ } else {
	+ node = by_name;
	+ }
	+
	+ } else if (by_name == NULL && by_id) {
	+ crm_trace("Only one: %p for %s/%s", by_id, id, uname);
	+
	+ if (uname) {
	+ node = NULL;
	+ }
	+
	+ } else if (uname && by_id->uname
	+ && safe_str_eq(uname, by_id->uname)) {
	+ /* Multiple nodes have the same uname in the CIB.
	+ * Return by_id. */
	+
	+ } else if (id && by_name->uuid
	+ && safe_str_eq(id, by_name->uuid)) {
	+ /* Multiple nodes have the same id in the CIB.
	+ * Return by_name. */
	+ node = by_name;
	+
	+ } else {
	+ node = NULL;
	+ }
	+
	+ if (node == NULL) {
	+ crm_debug("Couldn't find node%s%s%s%s",
	+ id? " " : "",
	+ id? id : "",
	+ uname? " with name " : "",
	+ uname? uname : "");
	+ }
	+
	+ return node;
	+}
	+
	+static void
	+known_peer_cache_refresh_helper(xmlNode xml_node, void user_data)
	+{
	+ const char *id = crm_element_value(xml_node, XML_ATTR_ID);
	+ const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
	+ crm_node_t * node = NULL;
	+
	+ CRM_CHECK(id != NULL && uname !=NULL, return);
	+ node = crm_find_known_peer(id, uname);
	+
	+ if (node == NULL) {
	+ char *uniqueid = crm_generate_uuid();
	+
	+ node = calloc(1, sizeof(crm_node_t));
	+ CRM_ASSERT(node != NULL);
	+
	+ node->uname = strdup(uname);
	+ CRM_ASSERT(node->uname != NULL);
	+
	+ node->uuid = strdup(id);
	+ CRM_ASSERT(node->uuid != NULL);
	+
	+ g_hash_table_replace(crm_known_peer_cache, uniqueid, node);
	+
	+ } else if (is_set(node->flags, crm_node_dirty)) {
	+ if (safe_str_neq(uname, node->uname)) {
	+ free(node->uname);
	+ node->uname = strdup(uname);
	+ CRM_ASSERT(node->uname != NULL);
	+ }
	+
	+ /* Node is in cache and hasn't been updated already, so mark it clean */
	+ clear_bit(node->flags, crm_node_dirty);
	+ }
	+
	+}
	+
	+#define XPATH_MEMBER_NODE_CONFIG \
	+ "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \
	+ "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']"
	+
	+static void
	+crm_known_peer_cache_refresh(xmlNode *cib)
	+{
	+ crm_peer_init();
	+
	+ g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL);
	+
	+ crm_foreach_xpath_result(cib, XPATH_MEMBER_NODE_CONFIG,
	+ known_peer_cache_refresh_helper, NULL);
	+
	+ /* Remove all old cache entries that weren't seen in the CIB */
	+ g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL);
	+}
	+
	+void
	+crm_peer_caches_refresh(xmlNode *cib)
	+{
	+ crm_remote_peer_cache_refresh(cib);
	+ crm_known_peer_cache_refresh(cib);
	+}
	+
	+crm_node_t *
	+crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
	+{
	+ crm_node_t *node = NULL;
	+ char *id_str = NULL;
	+
	+ CRM_ASSERT(id > 0 \|\| uname != NULL);
	+
	+ node = crm_find_peer_full(id, uname, flags);
	+
	+ if (node \|\| !(flags & CRM_GET_PEER_CLUSTER)) {
	+ return node;
	+ }
	+
	+ if (id > 0) {
	+ id_str = crm_strdup_printf("%u", id);
	+ }
	+
	+ node = crm_find_known_peer(id_str, uname);
	+
	+ free(id_str);
	+ return node;
	+}

File Metadata

Mime Type: text/x-diff
Expires: Thu, Oct 16, 12:32 AM (14 h, 34 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 2530891
Default Alt Text: (318 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions