No OneTemporary
Actions

Size

75 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
	index 3ce7470442..48225ac58a 100644
	--- a/daemons/controld/controld_callbacks.c
	+++ b/daemons/controld/controld_callbacks.c
	@@ -1,340 +1,340 @@
	/*
	* Copyright 2004-2019 the Pacemaker project contributors
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>
	#include <crm/crm.h>
	#include <string.h>
	#include <controld_fsa.h>

	#include <crm/msg_xml.h>
	#include <crm/common/xml.h>

	#include <crm/cluster.h>
	#include <crm/cib.h>

	#include <pacemaker-controld.h>
	#include <controld_messages.h>
	#include <controld_callbacks.h>
	#include <controld_lrm.h>
	#include <controld_fencing.h>
	#include <controld_transition.h>
	#include <controld_membership.h>

	/* From join_dc... */
	extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);

	void
	crmd_ha_msg_filter(xmlNode * msg)
	{
	if (AM_I_DC) {
	const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);

	if (safe_str_eq(sys_from, CRM_SYSTEM_DC)) {
	const char *from = crm_element_value(msg, F_ORIG);

	if (safe_str_neq(from, fsa_our_uname)) {
	int level = LOG_INFO;
	const char *op = crm_element_value(msg, F_CRM_TASK);

	/* make sure the election happens NOW */
	if (fsa_state != S_ELECTION) {
	ha_msg_input_t new_input;

	level = LOG_WARNING;
	new_input.msg = msg;
	register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
	__FUNCTION__);
	}

	do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
	goto done;
	}
	}

	} else {
	const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);

	if (safe_str_eq(sys_to, CRM_SYSTEM_DC)) {
	return;
	}
	}

	/* crm_log_xml_trace("HA[inbound]", msg); */
	route_message(C_HA_MESSAGE, msg);

	done:
	trigger_fsa(fsa_source);
	}

	/*!
	* \internal
	* \brief Check whether a node is online
	*
	* \param[in] node Node to check
	*
	* \retval -1 if completely dead
	* \retval 0 if partially alive
	* \retval 1 if completely alive
	*/
	static int
	node_alive(const crm_node_t *node)
	{
	if (is_set(node->flags, crm_remote_node)) {
	// Pacemaker Remote nodes can't be partially alive
	return safe_str_eq(node->state, CRM_NODE_MEMBER)? 1: -1;

	} else if (crm_is_peer_active(node)) {
	// Completely up cluster node: both cluster member and peer
	return 1;

	} else if (is_not_set(node->processes, crm_get_cluster_proc())
	&& safe_str_neq(node->state, CRM_NODE_MEMBER)) {
	// Completely down cluster node: neither cluster member nor peer
	return -1;
	}

	// Partially up cluster node: only cluster member or only peer
	return 0;
	}

	#define state_text(state) ((state)? (const char *)(state) : "in unknown state")

	void
	peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
	{
	uint32_t old = 0;
	bool appeared = FALSE;
	bool is_remote = is_set(node->flags, crm_remote_node);

	/* The controller waits to receive some information from the membership
	* layer before declaring itself operational. If this is being called for a
	* cluster node, indicate that we have it.
	*/
	if (!is_remote) {
	set_bit(fsa_input_register, R_PEER_DATA);
	}

	if (node->uname == NULL) {
	return;
	}

	switch (type) {
	case crm_status_uname:
	/* If we've never seen the node, then it also won't be in the status section */
	crm_info("%s node %s is now %s",
	(is_remote? "Remote" : "Cluster"),
	node->uname, state_text(node->state));
	return;

	case crm_status_nstate:
	/* This callback should not be called unless the state actually
	* changed, but here's a failsafe just in case.
	*/
	CRM_CHECK(safe_str_neq(data, node->state), return);

	crm_info("%s node %s is now %s (was %s)",
	(is_remote? "Remote" : "Cluster"),
	node->uname, state_text(node->state), state_text(data));

	if (safe_str_eq(CRM_NODE_MEMBER, node->state)) {
	appeared = TRUE;
	if (!is_remote) {
	remove_stonith_cleanup(node->uname);
	}
	} else {
	controld_remove_voter(node->uname);
	}

	crmd_alert_node_event(node);
	break;

	case crm_status_processes:
	CRM_CHECK(data != NULL, return);
	old = (const uint32_t )data;
	appeared = is_set(node->processes, crm_get_cluster_proc());

	crm_info("Node %s is %s a peer " CRM_XS " DC=%s old=0x%07x new=0x%07x",
	node->uname, (appeared? "now" : "no longer"),
	(AM_I_DC? "true" : (fsa_our_dc? fsa_our_dc : "<none>")),
	old, node->processes);

	if (is_not_set((node->processes ^ old), crm_get_cluster_proc())) {
	/* Peer status did not change. This should not be possible,
	* since we don't track process flags other than peer status.
	*/
	crm_trace("Process flag 0x%7x did not change from 0x%7x to 0x%7x",
	crm_get_cluster_proc(), old, node->processes);
	return;

	}

	if (!appeared) {
	controld_remove_voter(node->uname);
	}

	if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
	crm_trace("Ignoring peer status change because not connected to CIB");
	return;

	} else if (fsa_state == S_STOPPING) {
	crm_trace("Ignoring peer status change because stopping");
	return;
	}

	if (safe_str_eq(node->uname, fsa_our_uname) && !appeared) {
	/* Did we get evicted? */
	crm_notice("Our peer connection failed");
	register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);

	} else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
	/* Did the DC leave us? */
	crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
	register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);

	/* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
	* want to fence it. Newer DCs will send their shutdown request
	* to all peers, who will update the DC's expected state to
	* down, thus avoiding fencing. We can safely erase the DC's
	* transient attributes when it leaves in that case. However,
	* the only way to avoid fencing older DCs is to leave the
	* transient attributes intact until it rejoins.
	*/
	if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
	erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
	}

	} else if(AM_I_DC) {
	if (appeared) {
	- te_trigger_stonith_history_sync();
	+ te_trigger_stonith_history_sync(FALSE);
	} else {
	erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
	}
	}
	break;
	}

	if (AM_I_DC) {
	xmlNode *update = NULL;
	int flags = node_update_peer;
	int alive = node_alive(node);
	crm_action_t *down = match_down_event(node->uuid);

	crm_trace("Alive=%d, appeared=%d, down=%d",
	alive, appeared, (down? down->id : -1));

	if (appeared && (alive > 0) && !is_remote) {
	register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
	}

	if (down) {
	const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);

	if (safe_str_eq(task, CRM_OP_FENCE)) {

	/* tengine_stonith_callback() confirms fence actions */
	crm_trace("Updating CIB %s fencer reported fencing of %s complete",
	(down->confirmed? "after" : "before"), node->uname);

	} else if (!appeared && safe_str_eq(task, CRM_OP_SHUTDOWN)) {

	// Shutdown actions are immediately confirmed (i.e. no_wait)
	if (!is_remote) {
	flags \|= node_update_join \| node_update_expected;
	crmd_peer_down(node, FALSE);
	check_join_state(fsa_state, __FUNCTION__);
	}
	if (alive >= 0) {
	crm_info("%s of peer %s is in progress " CRM_XS " action=%d",
	task, node->uname, down->id);
	} else {
	crm_notice("%s of peer %s is complete " CRM_XS " action=%d",
	task, node->uname, down->id);
	update_graph(transition_graph, down);
	trigger_graph();
	}

	} else {
	crm_trace("Node %s is %s, was expected to %s (op %d)",
	node->uname,
	((alive > 0)? "alive" :
	((alive < 0)? "dead" : "partially alive")),
	task, down->id);
	}

	} else if (appeared == FALSE) {
	crm_warn("Stonith/shutdown of node %s was not expected",
	node->uname);
	if (!is_remote) {
	crm_update_peer_join(__FUNCTION__, node, crm_join_none);
	check_join_state(fsa_state, __FUNCTION__);
	}
	abort_transition(INFINITY, tg_restart, "Node failure", NULL);
	fail_incompletable_actions(transition_graph, node->uuid);

	} else {
	crm_trace("Node %s came up, was not expected to be down",
	node->uname);
	}

	if (is_remote) {
	/* A pacemaker_remote node won't have its cluster status updated
	* in the CIB by membership-layer callbacks, so do it here.
	*/
	flags \|= node_update_cluster;

	/* Trigger resource placement on newly integrated nodes */
	if (appeared) {
	abort_transition(INFINITY, tg_restart,
	"pacemaker_remote node integrated", NULL);
	}
	}

	/* Update the CIB node state */
	update = create_node_state_update(node, flags, NULL, __FUNCTION__);
	if (update == NULL) {
	crm_debug("Node state update not yet possible for %s", node->uname);
	} else {
	fsa_cib_anon_update(XML_CIB_TAG_STATUS, update);
	}
	free_xml(update);
	}

	trigger_fsa(fsa_source);
	}

	void
	crmd_cib_connection_destroy(gpointer user_data)
	{
	CRM_CHECK(user_data == fsa_cib_conn,;);

	crm_trace("Invoked");
	trigger_fsa(fsa_source);
	fsa_cib_conn->state = cib_disconnected;

	if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
	crm_info("Connection to the CIB manager terminated");
	return;
	}

	// @TODO This should trigger a reconnect, not a shutdown
	crm_crit("Lost connection to the CIB manager, shutting down");
	register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
	clear_bit(fsa_input_register, R_CIB_CONNECTED);

	return;
	}

	gboolean
	crm_fsa_trigger(gpointer user_data)
	{
	crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue));
	s_crmd_fsa(C_FSA_INTERNAL);
	crm_trace("Exited (queue len: %d)", g_list_length(fsa_message_queue));
	return TRUE;
	}
	diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
	index e99d605988..f3bb20f7db 100644
	--- a/daemons/controld/controld_control.c
	+++ b/daemons/controld/controld_control.c
	@@ -1,878 +1,880 @@
	/*
	* Copyright 2004-2019 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <sys/param.h>

	#include <crm/crm.h>

	#include <crm/msg_xml.h>

	#include <crm/pengine/rules.h>
	#include <crm/cluster/internal.h>
	#include <crm/cluster/election.h>
	#include <crm/common/ipcs.h>

	#include <pacemaker-controld.h>
	#include <controld_fsa.h>
	#include <controld_messages.h>
	#include <controld_callbacks.h>
	#include <controld_lrm.h>
	#include <controld_fencing.h>
	#include <controld_alerts.h>
	#include <controld_metadata.h>
	#include <controld_transition.h>
	#include <controld_throttle.h>

	#include <sys/types.h>
	#include <sys/stat.h>

	qb_ipcs_service_t *ipcs = NULL;

	#if SUPPORT_COROSYNC
	extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
	#endif

	void crm_shutdown(int nsig);
	gboolean crm_read_options(gpointer user_data);

	gboolean fsa_has_quorum = FALSE;
	crm_trigger_t *fsa_source = NULL;
	crm_trigger_t *config_read = NULL;
	bool no_quorum_suicide_escalation = FALSE;

	/* A_HA_CONNECT */
	void
	do_ha_control(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	gboolean registered = FALSE;
	static crm_cluster_t *cluster = NULL;

	if (cluster == NULL) {
	cluster = calloc(1, sizeof(crm_cluster_t));
	}

	if (action & A_HA_DISCONNECT) {
	crm_cluster_disconnect(cluster);
	crm_info("Disconnected from the cluster");

	set_bit(fsa_input_register, R_HA_DISCONNECTED);
	}

	if (action & A_HA_CONNECT) {
	crm_set_status_callback(&peer_update_callback);
	crm_set_autoreap(FALSE);

	if (is_corosync_cluster()) {
	#if SUPPORT_COROSYNC
	registered = crm_connect_corosync(cluster);
	#endif
	}

	if (registered == TRUE) {
	controld_election_init(cluster->uname);
	fsa_our_uname = cluster->uname;
	fsa_our_uuid = cluster->uuid;
	if(cluster->uuid == NULL) {
	crm_err("Could not obtain local uuid");
	registered = FALSE;
	}
	}

	if (registered == FALSE) {
	set_bit(fsa_input_register, R_HA_DISCONNECTED);
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	return;
	}

	populate_cib_nodes(node_update_none, __FUNCTION__);
	clear_bit(fsa_input_register, R_HA_DISCONNECTED);
	crm_info("Connected to the cluster");
	}

	if (action & ~(A_HA_CONNECT \| A_HA_DISCONNECT)) {
	crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
	}
	}

	/* A_SHUTDOWN */
	void
	do_shutdown(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	/* just in case */
	set_bit(fsa_input_register, R_SHUTDOWN);
	controld_disconnect_fencer(FALSE);
	}

	/* A_SHUTDOWN_REQ */
	void
	do_shutdown_req(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	xmlNode *msg = NULL;

	set_bit(fsa_input_register, R_SHUTDOWN);
	crm_info("Sending shutdown request to all peers (DC is %s)",
	(fsa_our_dc? fsa_our_dc : "not set"));
	msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

	/* set_bit(fsa_input_register, R_STAYDOWN); */
	if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	free_xml(msg);
	}

	extern char *max_generation_from;
	extern xmlNode *max_generation_xml;
	extern GHashTable *resource_history;
	extern GHashTable *voted;

	void
	crmd_fast_exit(crm_exit_t exit_code)
	{
	if (is_set(fsa_input_register, R_STAYDOWN)) {
	crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
	exit_code, CRM_EX_FATAL);
	exit_code = CRM_EX_FATAL;

	} else if ((exit_code == CRM_EX_OK)
	&& is_set(fsa_input_register, R_IN_RECOVERY)) {
	crm_err("Could not recover from internal error");
	exit_code = CRM_EX_ERROR;
	}
	crm_exit(exit_code);
	}

	crm_exit_t
	crmd_exit(crm_exit_t exit_code)
	{
	GListPtr gIter = NULL;
	GMainLoop *mloop = crmd_mainloop;

	static bool in_progress = FALSE;

	if (in_progress && (exit_code == CRM_EX_OK)) {
	crm_debug("Exit is already in progress");
	return exit_code;

	} else if(in_progress) {
	crm_notice("Error during shutdown process, exiting now with status %d (%s)",
	exit_code, crm_exit_str(exit_code));
	crm_write_blackbox(SIGTRAP, NULL);
	crmd_fast_exit(exit_code);
	}

	in_progress = TRUE;
	crm_trace("Preparing to exit with status %d (%s)",
	exit_code, crm_exit_str(exit_code));

	/* Suppress secondary errors resulting from us disconnecting everything */
	set_bit(fsa_input_register, R_HA_DISCONNECTED);

	/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */

	if(ipcs) {
	crm_trace("Closing IPC server");
	mainloop_del_ipc_server(ipcs);
	ipcs = NULL;
	}

	controld_close_attrd_ipc();
	pe_subsystem_free();
	controld_disconnect_fencer(TRUE);

	if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
	crm_debug("No mainloop detected");
	exit_code = CRM_EX_ERROR;
	}

	/* On an error, just get out.
	*
	* Otherwise, make the effort to have mainloop exit gracefully so
	* that it (mostly) cleans up after itself and valgrind has less
	* to report on - allowing real errors stand out
	*/
	if (exit_code != CRM_EX_OK) {
	crm_notice("Forcing immediate exit with status %d (%s)",
	exit_code, crm_exit_str(exit_code));
	crm_write_blackbox(SIGTRAP, NULL);
	crmd_fast_exit(exit_code);
	}

	/* Clean up as much memory as possible for valgrind */

	for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
	fsa_data_t *fsa_data = gIter->data;

	crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
	fsa_input2string(fsa_data->fsa_input),
	fsa_state2string(fsa_state),
	fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
	delete_fsa_input(fsa_data);
	}

	clear_bit(fsa_input_register, R_MEMBERSHIP);
	g_list_free(fsa_message_queue); fsa_message_queue = NULL;

	metadata_cache_fini();
	controld_election_fini();

	/* Tear down the CIB manager connection, but don't free it yet -- it could
	* be used when we drain the mainloop later.
	*/
	cib_free_callbacks(fsa_cib_conn);
	fsa_cib_conn->cmds->signoff(fsa_cib_conn);

	verify_stopped(fsa_state, LOG_WARNING);
	clear_bit(fsa_input_register, R_LRM_CONNECTED);
	lrm_state_destroy_all();

	/* This basically will not work, since mainloop has a reference to it */
	mainloop_destroy_trigger(fsa_source); fsa_source = NULL;

	mainloop_destroy_trigger(config_read); config_read = NULL;
	mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;

	crm_client_cleanup();
	crm_peer_destroy();

	crm_timer_stop(transition_timer);
	crm_timer_stop(integration_timer);
	crm_timer_stop(finalization_timer);
	crm_timer_stop(election_trigger);
	crm_timer_stop(shutdown_escalation_timer);
	crm_timer_stop(wait_timer);
	crm_timer_stop(recheck_timer);

	+ te_cleanup_stonith_history_sync(NULL, TRUE);
	+
	free(transition_timer); transition_timer = NULL;
	free(integration_timer); integration_timer = NULL;
	free(finalization_timer); finalization_timer = NULL;
	free(election_trigger); election_trigger = NULL;
	free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
	free(wait_timer); wait_timer = NULL;
	free(recheck_timer); recheck_timer = NULL;

	free(fsa_our_dc_version); fsa_our_dc_version = NULL;
	free(fsa_our_uname); fsa_our_uname = NULL;
	free(fsa_our_uuid); fsa_our_uuid = NULL;
	free(fsa_our_dc); fsa_our_dc = NULL;

	free(fsa_cluster_name); fsa_cluster_name = NULL;

	free(te_uuid); te_uuid = NULL;
	free(fsa_pe_ref); fsa_pe_ref = NULL;
	free(failed_stop_offset); failed_stop_offset = NULL;
	free(failed_start_offset); failed_start_offset = NULL;

	free(max_generation_from); max_generation_from = NULL;
	free_xml(max_generation_xml); max_generation_xml = NULL;

	mainloop_destroy_signal(SIGPIPE);
	mainloop_destroy_signal(SIGUSR1);
	mainloop_destroy_signal(SIGTERM);
	mainloop_destroy_signal(SIGTRAP);
	/* leave SIGCHLD engaged as we might still want to drain some service-actions */

	if (mloop) {
	GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);

	/* Don't re-enter this block */
	crmd_mainloop = NULL;

	/* no signals on final draining anymore */
	mainloop_destroy_signal(SIGCHLD);

	crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));

	{
	int lpc = 0;

	while((g_main_context_pending(ctx) && lpc < 10)) {
	lpc++;
	crm_trace("Iteration %d", lpc);
	g_main_context_dispatch(ctx);
	}
	}

	crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
	g_main_loop_quit(mloop);

	/* Won't do anything yet, since we're inside it now */
	g_main_loop_unref(mloop);
	} else {
	mainloop_destroy_signal(SIGCHLD);
	}

	cib_delete(fsa_cib_conn);
	fsa_cib_conn = NULL;

	throttle_fini();

	/* Graceful */
	crm_trace("Done preparing for exit with status %d (%s)",
	exit_code, crm_exit_str(exit_code));
	return exit_code;
	}

	/* A_EXIT_0, A_EXIT_1 */
	void
	do_exit(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_exit_t exit_code = CRM_EX_OK;
	int log_level = LOG_INFO;
	const char *exit_type = "gracefully";

	if (action & A_EXIT_1) {
	log_level = LOG_ERR;
	exit_type = "forcefully";
	exit_code = CRM_EX_ERROR;
	}

	verify_stopped(cur_state, LOG_ERR);
	do_crm_log(log_level, "Performing %s - %s exiting the controller",
	fsa_action2string(action), exit_type);

	crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
	crmd_exit(exit_code);
	}

	static void sigpipe_ignore(int nsig) { return; }

	/* A_STARTUP */
	void
	do_startup(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	int was_error = 0;

	crm_debug("Registering Signal Handlers");
	mainloop_add_signal(SIGTERM, crm_shutdown);
	mainloop_add_signal(SIGPIPE, sigpipe_ignore);

	fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
	config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
	transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);

	crm_debug("Creating CIB manager and executor objects");
	fsa_cib_conn = cib_new();

	lrm_state_init_local();

	/* set up the timers */
	transition_timer = calloc(1, sizeof(fsa_timer_t));
	integration_timer = calloc(1, sizeof(fsa_timer_t));
	finalization_timer = calloc(1, sizeof(fsa_timer_t));
	election_trigger = calloc(1, sizeof(fsa_timer_t));
	shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
	wait_timer = calloc(1, sizeof(fsa_timer_t));
	recheck_timer = calloc(1, sizeof(fsa_timer_t));

	if (election_trigger != NULL) {
	election_trigger->source_id = 0;
	election_trigger->period_ms = -1;
	election_trigger->fsa_input = I_DC_TIMEOUT;
	election_trigger->callback = crm_timer_popped;
	election_trigger->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if (transition_timer != NULL) {
	transition_timer->source_id = 0;
	transition_timer->period_ms = -1;
	transition_timer->fsa_input = I_PE_CALC;
	transition_timer->callback = crm_timer_popped;
	transition_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if (integration_timer != NULL) {
	integration_timer->source_id = 0;
	integration_timer->period_ms = -1;
	integration_timer->fsa_input = I_INTEGRATED;
	integration_timer->callback = crm_timer_popped;
	integration_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if (finalization_timer != NULL) {
	finalization_timer->source_id = 0;
	finalization_timer->period_ms = -1;
	finalization_timer->fsa_input = I_FINALIZED;
	finalization_timer->callback = crm_timer_popped;
	finalization_timer->repeat = FALSE;
	/* for possible enabling... a bug in the join protocol left
	* a slave in S_PENDING while we think it's in S_NOT_DC
	*
	* raising I_FINALIZED put us into a transition loop which is
	* never resolved.
	* in this loop we continually send probes which the node
	* NACK's because it's in S_PENDING
	*
	* if we have nodes where the cluster layer is active but the
	* CRM is not... then this will be handled in the
	* integration phase
	*/
	finalization_timer->fsa_input = I_ELECTION;

	} else {
	was_error = TRUE;
	}

	if (shutdown_escalation_timer != NULL) {
	shutdown_escalation_timer->source_id = 0;
	shutdown_escalation_timer->period_ms = -1;
	shutdown_escalation_timer->fsa_input = I_STOP;
	shutdown_escalation_timer->callback = crm_timer_popped;
	shutdown_escalation_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if (wait_timer != NULL) {
	wait_timer->source_id = 0;
	wait_timer->period_ms = 2000;
	wait_timer->fsa_input = I_NULL;
	wait_timer->callback = crm_timer_popped;
	wait_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if (recheck_timer != NULL) {
	recheck_timer->source_id = 0;
	recheck_timer->period_ms = -1;
	recheck_timer->fsa_input = I_PE_CALC;
	recheck_timer->callback = crm_timer_popped;
	recheck_timer->repeat = FALSE;
	} else {
	was_error = TRUE;
	}

	if (was_error) {
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}

	static int32_t
	crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
	{
	crm_trace("Connection %p", c);
	if (crm_client_new(c, uid, gid) == NULL) {
	return -EIO;
	}
	return 0;
	}

	static void
	crmd_ipc_created(qb_ipcs_connection_t * c)
	{
	crm_trace("Connection %p", c);
	}

	static int32_t
	crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
	{
	uint32_t id = 0;
	uint32_t flags = 0;
	crm_client_t *client = crm_client_get(c);

	xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags);

	crm_trace("Invoked: %s", crm_client_name(client));
	crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);

	if (msg == NULL) {
	return 0;
	}

	#if ENABLE_ACL
	CRM_ASSERT(client->user != NULL);
	crm_acl_get_set_user(msg, F_CRM_USER, client->user);
	#endif

	crm_trace("Processing msg from %s", crm_client_name(client));
	crm_log_xml_trace(msg, "controller[inbound]");

	crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
	if (crmd_authorize_message(msg, client, NULL)) {
	route_message(C_IPC_MESSAGE, msg);
	}

	trigger_fsa(fsa_source);
	free_xml(msg);
	return 0;
	}

	static int32_t
	crmd_ipc_closed(qb_ipcs_connection_t * c)
	{
	crm_client_t *client = crm_client_get(c);

	if (client) {
	crm_trace("Disconnecting %sregistered client %s (%p/%p)",
	(client->userdata? "" : "un"), crm_client_name(client),
	c, client);
	free(client->userdata);
	crm_client_destroy(client);
	trigger_fsa(fsa_source);
	}
	return 0;
	}

	static void
	crmd_ipc_destroy(qb_ipcs_connection_t * c)
	{
	crm_trace("Connection %p", c);
	crmd_ipc_closed(c);
	}

	/* A_STOP */
	void
	do_stop(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_trace("Closing IPC server");
	mainloop_del_ipc_server(ipcs); ipcs = NULL;
	register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
	}

	/* A_STARTED */
	void
	do_started(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	static struct qb_ipcs_service_handlers crmd_callbacks = {
	.connection_accept = crmd_ipc_accept,
	.connection_created = crmd_ipc_created,
	.msg_process = crmd_ipc_dispatch,
	.connection_closed = crmd_ipc_closed,
	.connection_destroyed = crmd_ipc_destroy
	};

	if (cur_state != S_STARTING) {
	crm_err("Start cancelled... %s", fsa_state2string(cur_state));
	return;

	} else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) {
	crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);

	crmd_fsa_stall(TRUE);
	return;

	} else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
	crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);

	crmd_fsa_stall(TRUE);
	return;

	} else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
	crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);

	crmd_fsa_stall(TRUE);
	return;

	} else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
	crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);

	crmd_fsa_stall(TRUE);
	return;

	} else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) {

	crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
	crmd_fsa_stall(TRUE);
	return;
	}

	crm_debug("Init server comms");
	ipcs = crmd_ipc_server_init(&crmd_callbacks);
	if (ipcs == NULL) {
	crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	controld_trigger_fencer_connect();

	crm_notice("Pacemaker controller successfully started and accepting connections");
	clear_bit(fsa_input_register, R_STARTING);
	register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
	}

	/* A_RECOVER */
	void
	do_recover(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	set_bit(fsa_input_register, R_IN_RECOVERY);
	crm_warn("Fast-tracking shutdown in response to errors");

	register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
	}

	/* INDENT-OFF */
	static pe_cluster_option crmd_opts[] = {
	/* name, old-name, validate, values, default, short description, long description */
	{ "dc-version", NULL, "string", NULL, "none", NULL,
	"Version of Pacemaker on the cluster's DC.",
	"Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes."
	},
	{ "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
	"The messaging stack on which Pacemaker is currently running.",
	"Used for informational and diagnostic purposes." },
	{ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "20s", &check_time,
	"How long to wait for a response from other nodes during startup.",
	"The \"correct\" value will depend on the speed/load of your network and the type of switches used."
	},
	{ XML_CONFIG_ATTR_RECHECK, NULL, "time",
	"Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)",
	"15min", &check_timer,
	"Polling interval for time based changes to options, resource parameters and constraints.",
	"The Cluster is primarily event driven, however the configuration can have elements that change based on time."
	" To ensure these changes take effect, we can optionally poll the cluster's status for changes."
	},

	{ "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization,
	"The maximum amount of system resources that should be used by nodes in the cluster",
	"The cluster will slow down its recovery process when the amount of system resources used"
	" (currently CPU) approaches this limit",
	},
	{ "node-action-limit", NULL, "integer", NULL, "0", &check_number,
	"The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"},
	{ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer,
	"* Advanced Use Only *.", "If need to adjust this value, it probably indicates the presence of a bug."
	},
	{ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer,
	"* Advanced Use Only *.", "If need to adjust this value, it probably indicates the presence of a bug."
	},
	{
	"join-integration-timeout", "crmd-integration-timeout",
	"time", NULL, "3min", &check_timer,
	"* Advanced Use Only *",
	"If need to adjust this value, it probably indicates the presence of a bug"
	},
	{
	"join-finalization-timeout", "crmd-finalization-timeout",
	"time", NULL, "30min", &check_timer,
	"* Advanced Use Only *",
	"If you need to adjust this value, it probably indicates the presence of a bug"
	},
	{
	"transition-delay", "crmd-transition-delay",
	"time", NULL, "0s", &check_timer,
	"* Advanced Use Only * Enabling this option will slow down cluster recovery under all conditions",
	"Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n"
	"Useful if your configuration is sensitive to the order in which ping updates arrive."
	},
	{ "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout,
	"How long to wait before we can assume nodes are safely down", NULL
	},
	{ "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
	"How many times stonith can fail before it will no longer be attempted on a target"
	},
	{ "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
	};
	/* INDENT-ON */

	void
	crmd_metadata(void)
	{
	config_metadata("pacemaker-controld", "1.0",
	"controller properties",
	"Cluster properties used by Pacemaker's controller,"
	" formerly known as crmd",
	crmd_opts, DIMOF(crmd_opts));
	}

	static void
	verify_crmd_options(GHashTable * options)
	{
	verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
	}

	static const char *
	crmd_pref(GHashTable * options, const char *name)
	{
	return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
	}

	static void
	config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	const char *value = NULL;
	GHashTable *config_hash = NULL;
	crm_time_t *now = crm_time_new(NULL);
	xmlNode *crmconfig = NULL;
	xmlNode *alerts = NULL;

	if (rc != pcmk_ok) {
	fsa_data_t *msg_data = NULL;

	crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);

	if (rc == -EACCES \|\| rc == -pcmk_err_schema_validation) {
	crm_err("The cluster is mis-configured - shutting down and staying down");
	set_bit(fsa_input_register, R_STAYDOWN);
	}
	goto bail;
	}

	crmconfig = output;
	if ((crmconfig) &&
	(crm_element_name(crmconfig)) &&
	(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
	crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
	}
	if (!crmconfig) {
	fsa_data_t *msg_data = NULL;

	crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	goto bail;
	}

	crm_debug("Call %d : Parsing CIB options", call_id);
	config_hash = crm_str_table_new();
	unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash,
	CIB_OPTIONS_FIRST, FALSE, now);

	verify_crmd_options(config_hash);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
	election_trigger->period_ms = crm_get_msec(value);

	value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
	throttle_update_job_max(value);

	value = crmd_pref(config_hash, "load-threshold");
	if(value) {
	throttle_set_load_target(strtof(value, NULL) / 100.0);
	}

	value = crmd_pref(config_hash, "no-quorum-policy");
	if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) {
	no_quorum_suicide_escalation = TRUE;
	}

	value = crmd_pref(config_hash,"stonith-max-attempts");
	update_stonith_max_attempts(value);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
	shutdown_escalation_timer->period_ms = crm_get_msec(value);
	/* How long to declare an election over - even if not everyone voted */
	crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
	controld_set_election_period(value);

	value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
	recheck_timer->period_ms = crm_get_msec(value);
	crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms);

	value = crmd_pref(config_hash, "transition-delay");
	transition_timer->period_ms = crm_get_msec(value);

	value = crmd_pref(config_hash, "join-integration-timeout");
	integration_timer->period_ms = crm_get_msec(value);

	value = crmd_pref(config_hash, "join-finalization-timeout");
	finalization_timer->period_ms = crm_get_msec(value);

	free(fsa_cluster_name);
	fsa_cluster_name = NULL;

	value = g_hash_table_lookup(config_hash, "cluster-name");
	if (value) {
	fsa_cluster_name = strdup(value);
	}

	alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
	crmd_unpack_alerts(alerts);

	set_bit(fsa_input_register, R_READ_CONFIG);
	crm_trace("Triggering FSA: %s", __FUNCTION__);
	mainloop_set_trigger(fsa_source);

	g_hash_table_destroy(config_hash);
	bail:
	crm_time_free(now);
	}

	gboolean
	crm_read_options(gpointer user_data)
	{
	int call_id =
	fsa_cib_conn->cmds->query(fsa_cib_conn,
	"//" XML_CIB_TAG_CRMCONFIG " \| //" XML_CIB_TAG_ALERTS,
	NULL, cib_xpath \| cib_scope_local);

	fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
	crm_trace("Querying the CIB... call %d", call_id);
	return TRUE;
	}

	/* A_READCONFIG */
	void
	do_read_config(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	throttle_init();
	mainloop_set_trigger(config_read);
	}

	void
	crm_shutdown(int nsig)
	{
	if (crmd_mainloop != NULL && g_main_loop_is_running(crmd_mainloop)) {
	if (is_set(fsa_input_register, R_SHUTDOWN)) {
	crm_err("Escalating the shutdown");
	register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);

	} else {
	set_bit(fsa_input_register, R_SHUTDOWN);
	register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);

	if (shutdown_escalation_timer->period_ms < 1) {
	const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
	int msec = crm_get_msec(value);

	crm_debug("Using default shutdown escalation: %dms", msec);
	shutdown_escalation_timer->period_ms = msec;
	}

	/* can't rely on this... */
	crm_notice("Shutting down cluster resource manager " CRM_XS
	" limit=%dms", shutdown_escalation_timer->period_ms);
	crm_timer_start(shutdown_escalation_timer);
	}

	} else {
	crm_info("exit from shutdown");
	crmd_exit(CRM_EX_OK);
	}
	}
	diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
	index b925bc5a03..22fa727e77 100644
	--- a/daemons/controld/controld_fencing.c
	+++ b/daemons/controld/controld_fencing.c
	@@ -1,886 +1,948 @@
	/*
	* Copyright 2004-2019 the Pacemaker project contributors
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>
	#include <crm/crm.h>
	#include <crm/msg_xml.h>
	#include <crm/common/xml.h>

	#include <controld_transition.h>
	#include <controld_fsa.h>
	#include <controld_lrm.h>
	#include <controld_fencing.h>

	#ifdef HAVE_SYS_REBOOT_H
	# include <unistd.h>
	# include <sys/reboot.h>
	#endif

	+static void
	+tengine_stonith_history_synced(stonith_t st, stonith_event_t st_event);
	+
	/*
	* stonith failure counting
	*
	* We don't want to get stuck in a permanent fencing loop. Keep track of the
	* number of fencing failures for each target node, and the most we'll restart a
	* transition for.
	*/

	struct st_fail_rec {
	int count;
	};

	static unsigned long int stonith_max_attempts = 10;
	static GHashTable *stonith_failures = NULL;

	void
	update_stonith_max_attempts(const char *value)
	{
	if (safe_str_eq(value, CRM_INFINITY_S)) {
	stonith_max_attempts = CRM_SCORE_INFINITY;
	} else {
	stonith_max_attempts = crm_int_helper(value, NULL);
	}
	}

	static gboolean
	too_many_st_failures(const char *target)
	{
	GHashTableIter iter;
	const char *key = NULL;
	struct st_fail_rec *value = NULL;

	if (stonith_failures == NULL) {
	return FALSE;
	}

	if (target == NULL) {
	g_hash_table_iter_init(&iter, stonith_failures);
	while (g_hash_table_iter_next(&iter, (gpointer *) &key,
	(gpointer *) &value)) {

	if (value->count >= stonith_max_attempts) {
	target = (const char*)key;
	goto too_many;
	}
	}
	} else {
	value = g_hash_table_lookup(stonith_failures, target);
	if ((value != NULL) && (value->count >= stonith_max_attempts)) {
	goto too_many;
	}
	}
	return FALSE;

	too_many:
	crm_warn("Too many failures (%d) to fence %s, giving up",
	value->count, target);
	return TRUE;
	}

	/*!
	* \internal
	* \brief Reset a stonith fail count
	*
	* \param[in] target Name of node to reset, or NULL for all
	*/
	void
	st_fail_count_reset(const char *target)
	{
	if (stonith_failures == NULL) {
	return;
	}

	if (target) {
	struct st_fail_rec *rec = NULL;

	rec = g_hash_table_lookup(stonith_failures, target);
	if (rec) {
	rec->count = 0;
	}
	} else {
	GHashTableIter iter;
	const char *key = NULL;
	struct st_fail_rec *rec = NULL;

	g_hash_table_iter_init(&iter, stonith_failures);
	while (g_hash_table_iter_next(&iter, (gpointer *) &key,
	(gpointer *) &rec)) {
	rec->count = 0;
	}
	}
	}

	static void
	st_fail_count_increment(const char *target)
	{
	struct st_fail_rec *rec = NULL;

	if (stonith_failures == NULL) {
	stonith_failures = crm_str_table_new();
	}

	rec = g_hash_table_lookup(stonith_failures, target);
	if (rec) {
	rec->count++;
	} else {
	rec = malloc(sizeof(struct st_fail_rec));
	if(rec == NULL) {
	return;
	}

	rec->count = 1;
	g_hash_table_insert(stonith_failures, strdup(target), rec);
	}
	}

	/* end stonith fail count functions */


	static void
	cib_fencing_updated(xmlNode msg, int call_id, int rc, xmlNode output,
	void *user_data)
	{
	if (rc < pcmk_ok) {
	crm_err("Fencing update %d for %s: failed - %s (%d)",
	call_id, (char *)user_data, pcmk_strerror(rc), rc);
	crm_log_xml_warn(msg, "Failed update");
	abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);

	} else {
	crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
	}
	}

	static void
	send_stonith_update(crm_action_t action, const char target, const char *uuid)
	{
	int rc = pcmk_ok;
	crm_node_t *peer = NULL;

	/* We (usually) rely on the membership layer to do node_update_cluster,
	* and the peer status callback to do node_update_peer, because the node
	* might have already rejoined before we get the stonith result here.
	*/
	int flags = node_update_join \| node_update_expected;

	/* zero out the node-status & remove all LRM status info */
	xmlNode *node_state = NULL;

	CRM_CHECK(target != NULL, return);
	CRM_CHECK(uuid != NULL, return);

	/* Make sure the membership and join caches are accurate */
	peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);

	CRM_CHECK(peer != NULL, return);

	if (peer->state == NULL) {
	/* Usually, we rely on the membership layer to update the cluster state
	* in the CIB. However, if the node has never been seen, do it here, so
	* the node is not considered unclean.
	*/
	flags \|= node_update_cluster;
	}

	if (peer->uuid == NULL) {
	crm_info("Recording uuid '%s' for node '%s'", uuid, target);
	peer->uuid = strdup(uuid);
	}

	crmd_peer_down(peer, TRUE);

	/* Generate a node state update for the CIB */
	node_state = create_node_state_update(peer, flags, NULL, __FUNCTION__);

	/* we have to mark whether or not remote nodes have already been fenced */
	if (peer->flags & crm_remote_node) {
	time_t now = time(NULL);
	char *now_s = crm_itoa(now);
	crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
	free(now_s);
	}

	/* Force our known ID */
	crm_xml_add(node_state, XML_ATTR_UUID, uuid);

	rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
	cib_quorum_override \| cib_scope_local \| cib_can_create);

	/* Delay processing the trigger until the update completes */
	crm_debug("Sending fencing update %d for %s", rc, target);
	fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);

	/* Make sure it sticks */
	/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override\|cib_scope_local); */

	erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
	erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);

	free_xml(node_state);
	return;
	}

	/*!
	* \internal
	* \brief Abort transition due to stonith failure
	*
	* \param[in] abort_action Whether to restart or stop transition
	* \param[in] target Don't restart if this (NULL for any) has too many failures
	* \param[in] reason Log this stonith action XML as abort reason (or NULL)
	*/
	static void
	abort_for_stonith_failure(enum transition_action abort_action,
	const char target, xmlNode reason)
	{
	/* If stonith repeatedly fails, we eventually give up on starting a new
	* transition for that reason.
	*/
	if ((abort_action != tg_stop) && too_many_st_failures(target)) {
	abort_action = tg_stop;
	}
	abort_transition(INFINITY, abort_action, "Stonith failed", reason);
	}


	/*
	* stonith cleanup list
	*
	* If the DC is shot, proper notifications might not go out.
	* The stonith cleanup list allows the cluster to (re-)send
	* notifications once a new DC is elected.
	*/

	static GListPtr stonith_cleanup_list = NULL;

	/*!
	* \internal
	* \brief Add a node to the stonith cleanup list
	*
	* \param[in] target Name of node to add
	*/
	void
	add_stonith_cleanup(const char *target) {
	stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
	}

	/*!
	* \internal
	* \brief Remove a node from the stonith cleanup list
	*
	* \param[in] Name of node to remove
	*/
	void
	remove_stonith_cleanup(const char *target)
	{
	GListPtr iter = stonith_cleanup_list;

	while (iter != NULL) {
	GListPtr tmp = iter;
	char *iter_name = tmp->data;

	iter = iter->next;
	if (safe_str_eq(target, iter_name)) {
	crm_trace("Removing %s from the cleanup list", iter_name);
	stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
	free(iter_name);
	}
	}
	}

	/*!
	* \internal
	* \brief Purge all entries from the stonith cleanup list
	*/
	void
	purge_stonith_cleanup()
	{
	if (stonith_cleanup_list) {
	GListPtr iter = NULL;

	for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
	char *target = iter->data;

	crm_info("Purging %s from stonith cleanup list", target);
	free(target);
	}
	g_list_free(stonith_cleanup_list);
	stonith_cleanup_list = NULL;
	}
	}

	/*!
	* \internal
	* \brief Send stonith updates for all entries in cleanup list, then purge it
	*/
	void
	execute_stonith_cleanup()
	{
	GListPtr iter;

	for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
	char *target = iter->data;
	crm_node_t *target_node = crm_get_peer(0, target);
	const char *uuid = crm_peer_uuid(target_node);

	crm_notice("Marking %s, target of a previous stonith action, as clean", target);
	send_stonith_update(NULL, target, uuid);
	free(target);
	}
	g_list_free(stonith_cleanup_list);
	stonith_cleanup_list = NULL;
	}

	/* end stonith cleanup list functions */


	/* stonith API client
	*
	* Functions that need to interact directly with the fencer via its API
	*/

	static stonith_t *stonith_api = NULL;
	static crm_trigger_t *stonith_reconnect = NULL;
	static char *te_client_id = NULL;

	static gboolean
	fail_incompletable_stonith(crm_graph_t *graph)
	{
	GListPtr lpc = NULL;
	const char *task = NULL;
	xmlNode *last_action = NULL;

	if (graph == NULL) {
	return FALSE;
	}

	for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
	GListPtr lpc2 = NULL;
	synapse_t synapse = (synapse_t ) lpc->data;

	if (synapse->confirmed) {
	continue;
	}

	for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
	crm_action_t action = (crm_action_t ) lpc2->data;

	if (action->type != action_type_crm \|\| action->confirmed) {
	continue;
	}

	task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
	if (task && safe_str_eq(task, CRM_OP_FENCE)) {
	action->failed = TRUE;
	last_action = action->xml;
	update_graph(graph, action);
	crm_notice("Failing action %d (%s): fencer terminated",
	action->id, ID(action->xml));
	}
	}
	}

	if (last_action != NULL) {
	crm_warn("Fencer failure resulted in unrunnable actions");
	abort_for_stonith_failure(tg_restart, NULL, last_action);
	return TRUE;
	}

	return FALSE;
	}

	static void
	tengine_stonith_connection_destroy(stonith_t st, stonith_event_t e)
	{
	+ te_cleanup_stonith_history_sync(st, FALSE);
	+
	if (is_set(fsa_input_register, R_ST_REQUIRED)) {
	crm_crit("Fencing daemon connection failed");
	mainloop_set_trigger(stonith_reconnect);

	} else {
	crm_info("Fencing daemon disconnected");
	}

	if (stonith_api) {
	/* the client API won't properly reconnect notifications
	* if they are still in the table - so remove them
	*/
	- stonith_api->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
	- stonith_api->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
	if (stonith_api->state != stonith_disconnected) {
	stonith_api->cmds->disconnect(st);
	}
	+ stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
	+ stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE);
	+ stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED);
	}

	if (AM_I_DC) {
	fail_incompletable_stonith(transition_graph);
	trigger_graph();
	}
	}

	static void
	tengine_stonith_notify(stonith_t st, stonith_event_t st_event)
	{
	if (te_client_id == NULL) {
	te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
	(unsigned long) getpid());
	}

	if (st_event == NULL) {
	crm_err("Notify data not found");
	return;
	}

	crmd_alert_fencing_op(st_event);

	if ((st_event->result == pcmk_ok) && safe_str_eq("on", st_event->action)) {
	crm_notice("%s was successfully unfenced by %s (at the request of %s)",
	st_event->target,
	st_event->executioner? st_event->executioner : "<anyone>",
	st_event->origin);
	/* TODO: Hook up st_event->device */
	return;

	} else if (safe_str_eq("on", st_event->action)) {
	crm_err("Unfencing of %s by %s failed: %s (%d)",
	st_event->target,
	st_event->executioner? st_event->executioner : "<anyone>",
	pcmk_strerror(st_event->result), st_event->result);
	return;

	} else if ((st_event->result == pcmk_ok)
	&& crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {

	crm_crit("We were allegedly just fenced by %s for %s!",
	st_event->executioner? st_event->executioner : "<anyone>",
	st_event->origin); /* Dumps blackbox if enabled */

	qb_log_fini(); /* Try to get the above log message to disk - somehow */

	/* Get out ASAP and do not come back up.
	*
	* Triggering a reboot is also not the worst idea either since
	* the rest of the cluster thinks we're safely down
	*/

	#ifdef RB_HALT_SYSTEM
	reboot(RB_HALT_SYSTEM);
	#endif

	/*
	* If reboot() fails or is not supported, coming back up will
	* probably lead to a situation where the other nodes set our
	* status to 'lost' because of the fencing callback and will
	* discard subsequent election votes with:
	*
	* Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
	*
	* So just stay dead, something is seriously messed up anyway.
	*
	*/
	exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
	return;
	}

	/* Update the count of stonith failures for this target, in case we become
	* DC later. The current DC has already updated its fail count in
	* tengine_stonith_callback().
	*/
	if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
	if (st_event->result == pcmk_ok) {
	st_fail_count_reset(st_event->target);
	} else {
	st_fail_count_increment(st_event->target);
	}
	}

	crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
	CRM_XS " initiator=%s ref=%s",
	st_event->target, st_event->result == pcmk_ok ? "" : " not",
	st_event->action,
	st_event->executioner ? st_event->executioner : "<anyone>",
	(st_event->client_origin? st_event->client_origin : "<unknown>"),
	pcmk_strerror(st_event->result),
	st_event->origin, st_event->id);

	if (st_event->result == pcmk_ok) {
	crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
	const char *uuid = NULL;
	gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

	if (peer == NULL) {
	return;
	}

	uuid = crm_peer_uuid(peer);

	crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
	if(AM_I_DC) {
	/* The DC always sends updates */
	send_stonith_update(NULL, st_event->target, uuid);

	/* @TODO Ideally, at this point, we'd check whether the fenced node
	* hosted any guest nodes, and call remote_node_down() for them.
	* Unfortunately, the controller doesn't have a simple, reliable way
	* to map hosts to guests. It might be possible to track this in the
	* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
	* on the PE creating fence pseudo-events for the guests.
	*/

	if (st_event->client_origin
	&& safe_str_neq(st_event->client_origin, te_client_id)) {

	/* Abort the current transition graph if it wasn't us
	* that invoked stonith to fence someone
	*/
	crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
	abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
	}

	/* Assume it was our leader if we don't currently have one */
	} else if (((fsa_our_dc == NULL) \|\| safe_str_eq(fsa_our_dc, st_event->target))
	&& is_not_set(peer->flags, crm_remote_node)) {

	crm_notice("Target %s our leader %s (recorded: %s)",
	fsa_our_dc ? "was" : "may have been", st_event->target,
	fsa_our_dc ? fsa_our_dc : "<unset>");

	/* Given the CIB resyncing that occurs around elections,
	* have one node update the CIB now and, if the new DC is different,
	* have them do so too after the election
	*/
	if (we_are_executioner) {
	send_stonith_update(NULL, st_event->target, uuid);
	}
	add_stonith_cleanup(st_event->target);
	}

	/* If the target is a remote node, and we host its connection,
	* immediately fail all monitors so it can be recovered quickly.
	* The connection won't necessarily drop when a remote node is fenced,
	* so the failure might not otherwise be detected until the next poke.
	*/
	if (is_set(peer->flags, crm_remote_node)) {
	remote_ra_fail(st_event->target);
	}

	crmd_peer_down(peer, TRUE);
	}
	}

	/*!
	* \brief Connect to fencer
	*
	* \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
	*
	* \return TRUE
	* \note If user_data is NULL, this will wait 2s between attempts, for up to
	* 30 attempts, meaning the controller could be blocked as long as 58s.
	*/
	static gboolean
	te_connect_stonith(gpointer user_data)
	{
	int rc = pcmk_ok;

	if (stonith_api == NULL) {
	stonith_api = stonith_api_new();
	}

	if (stonith_api->state != stonith_disconnected) {
	crm_trace("Already connected to fencer, no need to retry");
	return TRUE;
	}

	if (user_data == NULL) {
	// Blocking (retry failures now until successful)
	rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
	if (rc != pcmk_ok) {
	crm_err("Could not connect to fencer in 30 attempts: %s "
	CRM_XS " rc=%d", pcmk_strerror(rc), rc);
	}
	} else {
	// Non-blocking (retry failures later in main loop)
	rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
	if (rc != pcmk_ok) {
	if (is_set(fsa_input_register, R_ST_REQUIRED)) {
	crm_err("Fencer connection failed (will retry): %s "
	CRM_XS " rc=%d", pcmk_strerror(rc), rc);
	mainloop_set_trigger(stonith_reconnect);
	} else {
	crm_info("Fencer connection failed (ignoring because no longer required): %s "
	CRM_XS " rc=%d", pcmk_strerror(rc), rc);
	}
	return TRUE;
	}
	}

	if (rc == pcmk_ok) {
	stonith_api->cmds->register_notification(stonith_api,
	T_STONITH_NOTIFY_DISCONNECT,
	tengine_stonith_connection_destroy);
	stonith_api->cmds->register_notification(stonith_api,
	T_STONITH_NOTIFY_FENCE,
	tengine_stonith_notify);
	+ stonith_api->cmds->register_notification(stonith_api,
	+ T_STONITH_NOTIFY_HISTORY_SYNCED,
	+ tengine_stonith_history_synced);
	+ te_trigger_stonith_history_sync(TRUE);
	}
	+
	return TRUE;
	}

	/*!
	\internal
	\brief Schedule fencer connection attempt in main loop
	*/
	void
	controld_trigger_fencer_connect()
	{
	if (stonith_reconnect == NULL) {
	stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
	te_connect_stonith,
	GINT_TO_POINTER(TRUE));
	}
	set_bit(fsa_input_register, R_ST_REQUIRED);
	mainloop_set_trigger(stonith_reconnect);
	}

	void
	controld_disconnect_fencer(bool destroy)
	{
	if (stonith_api) {
	// Prevent fencer connection from coming up again
	clear_bit(fsa_input_register, R_ST_REQUIRED);

	- stonith_api->cmds->disconnect(stonith_api);
	+ if (stonith_api->state != stonith_disconnected) {
	+ stonith_api->cmds->disconnect(stonith_api);
	+ }
	+ stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
	+ stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE);
	+ stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED);
	}
	if (destroy) {
	if (stonith_api) {
	stonith_api->cmds->free(stonith_api);
	stonith_api = NULL;
	}
	if (stonith_reconnect) {
	mainloop_destroy_trigger(stonith_reconnect);
	stonith_reconnect = NULL;
	}
	if (te_client_id) {
	free(te_client_id);
	te_client_id = NULL;
	}
	}
	}

	static gboolean
	do_stonith_history_sync(gpointer user_data)
	{
	if (stonith_api && (stonith_api->state != stonith_disconnected)) {
	stonith_history_t *history = NULL;

	+ te_cleanup_stonith_history_sync(stonith_api, FALSE);
	stonith_api->cmds->history(stonith_api,
	st_opt_sync_call \| st_opt_broadcast,
	NULL, &history, 5);
	stonith_history_free(history);
	return TRUE;
	} else {
	crm_info("Skip triggering stonith history-sync as stonith is disconnected");
	return FALSE;
	}
	}

	static void
	tengine_stonith_callback(stonith_t stonith, stonith_callback_data_t data)
	{
	char *uuid = NULL;
	int stonith_id = -1;
	int transition_id = -1;
	crm_action_t *action = NULL;
	int call_id = data->call_id;
	int rc = data->rc;
	char *userdata = data->userdata;

	CRM_CHECK(userdata != NULL, return);
	crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
	pcmk_strerror(rc), rc);

	if (AM_I_DC == FALSE) {
	return;
	}

	/* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
	/* op->call_id, op->optype, op->node_name, op->op_result, */
	/* (char )op->node_list, op->private_data); /

	/* filter out old STONITH actions */
	CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
	goto bail);

	if (transition_graph->complete \|\| stonith_id < 0 \|\| safe_str_neq(uuid, te_uuid)
	\|\| transition_graph->id != transition_id) {
	crm_info("Ignoring STONITH action initiated outside of the current transition");
	goto bail;
	}

	action = get_action(stonith_id, FALSE);
	if (action == NULL) {
	crm_err("Stonith action not matched");
	goto bail;
	}

	stop_te_timer(action->timer);
	if (rc == pcmk_ok) {
	const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
	const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
	const char *op = crm_meta_value(action->params, "stonith_action");

	crm_info("Stonith operation %d for %s passed", call_id, target);
	if (action->confirmed == FALSE) {
	te_action_confirmed(action);
	if (safe_str_eq("on", op)) {
	const char *value = NULL;
	char *now = crm_itoa(time(NULL));

	update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
	free(now);

	value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
	update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);

	value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
	update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);

	} else if (action->sent_update == FALSE) {
	send_stonith_update(action, target, uuid);
	action->sent_update = TRUE;
	}
	}
	st_fail_count_reset(target);

	} else {
	const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
	enum transition_action abort_action = tg_restart;

	action->failed = TRUE;
	crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
	call_id, target, pcmk_strerror(rc));

	/* If no fence devices were available, there's no use in immediately
	* checking again, so don't start a new transition in that case.
	*/
	if (rc == -ENODEV) {
	crm_warn("No devices found in cluster to fence %s, giving up",
	target);
	abort_action = tg_stop;
	}

	/* Increment the fail count now, so abort_for_stonith_failure() can
	* check it. Non-DC nodes will increment it in tengine_stonith_notify().
	*/
	st_fail_count_increment(target);
	abort_for_stonith_failure(abort_action, target, NULL);
	}

	update_graph(transition_graph, action);
	trigger_graph();

	bail:
	free(userdata);
	free(uuid);
	return;
	}

	gboolean
	te_fence_node(crm_graph_t graph, crm_action_t action)
	{
	int rc = 0;
	const char *id = NULL;
	const char *uuid = NULL;
	const char *target = NULL;
	const char *type = NULL;
	gboolean invalid_action = FALSE;
	enum stonith_call_options options = st_opt_none;

	id = ID(action->xml);
	target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
	uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
	type = crm_meta_value(action->params, "stonith_action");

	CRM_CHECK(id != NULL, invalid_action = TRUE);
	CRM_CHECK(uuid != NULL, invalid_action = TRUE);
	CRM_CHECK(type != NULL, invalid_action = TRUE);
	CRM_CHECK(target != NULL, invalid_action = TRUE);

	if (invalid_action) {
	crm_log_xml_warn(action->xml, "BadAction");
	return FALSE;
	}

	crm_notice("Requesting fencing (%s) of node %s "
	CRM_XS " action=%s timeout=%d",
	type, target, id, transition_graph->stonith_timeout);

	/* Passing NULL means block until we can connect... */
	te_connect_stonith(NULL);

	if (crmd_join_phase_count(crm_join_confirmed) == 1) {
	options \|= st_opt_allow_suicide;
	}

	rc = stonith_api->cmds->fence(stonith_api, options, target, type,
	transition_graph->stonith_timeout / 1000, 0);

	stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
	st_opt_timeout_updates,
	generate_transition_key(transition_graph->id, action->id,
	0, te_uuid),
	"tengine_stonith_callback", tengine_stonith_callback);

	return TRUE;
	}

	/* end stonith API client functions */


	/*
	* stonith history synchronization
	*
	* Each node's fencer keeps track of a cluster-wide fencing history. When a node
	* joins or leaves, we need to synchronize the history across all nodes.
	*/

	static crm_trigger_t *stonith_history_sync_trigger = NULL;
	-static mainloop_timer_t *stonith_history_sync_timer = NULL;
	+static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
	+static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
	+
	+void
	+te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
	+{
	+ if (free_timers) {
	+ mainloop_timer_del(stonith_history_sync_timer_short);
	+ stonith_history_sync_timer_short = NULL;
	+ mainloop_timer_del(stonith_history_sync_timer_long);
	+ stonith_history_sync_timer_long = NULL;
	+ } else {
	+ mainloop_timer_stop(stonith_history_sync_timer_short);
	+ mainloop_timer_stop(stonith_history_sync_timer_long);
	+ }
	+
	+ if (st) {
	+ st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
	+ }
	+}
	+
	+static void
	+tengine_stonith_history_synced(stonith_t st, stonith_event_t st_event)
	+{
	+ te_cleanup_stonith_history_sync(st, FALSE);
	+ crm_debug("Fence-history synced - cancel all timers");
	+}

	static gboolean
	stonith_history_sync_set_trigger(gpointer user_data)
	{
	mainloop_set_trigger(stonith_history_sync_trigger);
	return FALSE;
	}

	void
	-te_trigger_stonith_history_sync(void)
	+te_trigger_stonith_history_sync(bool long_timeout)
	{
	/* trigger a sync in 5s to give more nodes the
	* chance to show up so that we don't create
	* unnecessary stonith-history-sync traffic
	+ *
	+ * the long timeout of 30s is there as a fallback
	+ * so that after a successful connection to fenced
	+ * we will wait for 30s for the DC to trigger a
	+ * history-sync
	+ * if this doesn't happen we trigger a sync locally
	+ * (e.g. fenced segfaults and is restarted by pacemakerd)
	*/

	/* as we are finally checking the stonith-connection
	* in do_stonith_history_sync we should be fine
	* leaving stonith_history_sync_time & stonith_history_sync_trigger
	* around
	*/
	if (stonith_history_sync_trigger == NULL) {
	stonith_history_sync_trigger =
	mainloop_add_trigger(G_PRIORITY_LOW,
	do_stonith_history_sync, NULL);
	}

	- if(stonith_history_sync_timer == NULL) {
	- stonith_history_sync_timer =
	- mainloop_timer_add("history_sync", 5000,
	- FALSE, stonith_history_sync_set_trigger,
	- NULL);
	+ if (long_timeout) {
	+ if(stonith_history_sync_timer_long == NULL) {
	+ stonith_history_sync_timer_long =
	+ mainloop_timer_add("history_sync_long", 30000,
	+ FALSE, stonith_history_sync_set_trigger,
	+ NULL);
	+ }
	+ crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
	+ mainloop_timer_start(stonith_history_sync_timer_long);
	+ } else {
	+ if(stonith_history_sync_timer_short == NULL) {
	+ stonith_history_sync_timer_short =
	+ mainloop_timer_add("history_sync_short", 5000,
	+ FALSE, stonith_history_sync_set_trigger,
	+ NULL);
	+ }
	+ crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
	+ mainloop_timer_start(stonith_history_sync_timer_short);
	}
	- crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
	- mainloop_timer_start(stonith_history_sync_timer);
	+
	}

	/* end stonith history synchronization functions */
	diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
	index 8f7f19b40c..2fe6d88d4a 100644
	--- a/daemons/controld/controld_fencing.h
	+++ b/daemons/controld/controld_fencing.h
	@@ -1,34 +1,35 @@
	/*
	* Copyright 2004-2019 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef CONTROLD_FENCING__H
	# define CONTROLD_FENCING__H

	#include <stdbool.h> // bool
	#include <pacemaker-internal.h> // crm_graph_t, crm_action_t

	// stonith fail counts
	void st_fail_count_reset(const char * target);
	void update_stonith_max_attempts(const char* value);

	// stonith API client
	void controld_trigger_fencer_connect(void);
	void controld_disconnect_fencer(bool destroy);
	gboolean te_fence_node(crm_graph_t graph, crm_action_t action);

	// stonith cleanup list
	void add_stonith_cleanup(const char *target);
	void remove_stonith_cleanup(const char *target);
	void purge_stonith_cleanup(void);
	void execute_stonith_cleanup(void);

	// stonith history synchronization
	-void te_trigger_stonith_history_sync(void);
	+void te_trigger_stonith_history_sync(bool long_timeout);
	+void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers);

	#endif

File Metadata

Mime Type: text/x-diff
Expires: Sat, Nov 23, 4:47 PM (14 h, 16 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1018942
Default Alt Text: (75 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions