No OneTemporary
Actions

Size

400 KB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
	index fdc238375e..a18076f7a3 100644
	--- a/daemons/attrd/attrd_attributes.c
	+++ b/daemons/attrd/attrd_attributes.c
	@@ -1,284 +1,476 @@
	/*
	- * Copyright 2013-2024 the Pacemaker project contributors
	+ * Copyright 2013-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <errno.h>
	#include <stdbool.h>
	#include <stdlib.h>
	#include <glib.h>

	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	static attribute_t *
	attrd_create_attribute(xmlNode *xml)
	{
	int is_private = 0;
	long long dampen = 0;
	const char *name = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	const char *set_type = crm_element_value(xml, PCMK__XA_ATTR_SET_TYPE);
	const char *dampen_s = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING);
	attribute_t *a = NULL;

	if (set_type == NULL) {
	set_type = PCMK_XE_INSTANCE_ATTRIBUTES;
	}

	/* Set type is meaningful only when writing to the CIB. Private
	* attributes are not written.
	*/
	crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &is_private);
	if (!is_private && !pcmk__str_any_of(set_type,
	PCMK_XE_INSTANCE_ATTRIBUTES,
	PCMK_XE_UTILIZATION, NULL)) {
	crm_warn("Ignoring attribute %s with invalid set type %s",
	pcmk__s(name, "(unidentified)"), set_type);
	return NULL;
	}

	a = pcmk__assert_alloc(1, sizeof(attribute_t));

	a->id = pcmk__str_copy(name);
	a->set_type = pcmk__str_copy(set_type);
	a->set_id = crm_element_value_copy(xml, PCMK__XA_ATTR_SET);
	a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER);
	a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value);

	if (is_private) {
	attrd_set_attr_flags(a, attrd_attr_is_private);
	}

	if (dampen_s != NULL) {
	dampen = crm_get_msec(dampen_s);
	}

	if (dampen > 0) {
	a->timeout_ms = (int) QB_MIN(dampen, INT_MAX);
	a->timer = attrd_add_timer(a->id, a->timeout_ms, a);
	} else if (dampen < 0) {
	crm_warn("Ignoring invalid delay %s for attribute %s", dampen_s, a->id);
	}

	crm_trace("Created attribute %s with %s write delay and %s CIB user",
	a->id,
	((dampen > 0)? pcmk__readable_interval(a->timeout_ms) : "no"),
	pcmk__s(a->user, "default"));

	g_hash_table_replace(attributes, a->id, a);
	return a;
	}

	static int
	attrd_update_dampening(attribute_t a, xmlNode xml, const char *attr)
	{
	const char *dvalue = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING);
	long long dampen = 0;

	if (dvalue == NULL) {
	crm_warn("Could not update %s: peer did not specify value for delay",
	attr);
	return EINVAL;
	}

	dampen = crm_get_msec(dvalue);
	if (dampen < 0) {
	crm_warn("Could not update %s: invalid delay value %dms (%s)",
	attr, dampen, dvalue);
	return EINVAL;
	}

	if (a->timeout_ms != dampen) {
	mainloop_timer_del(a->timer);
	a->timeout_ms = (int) QB_MIN(dampen, INT_MAX);
	if (dampen > 0) {
	a->timer = attrd_add_timer(attr, a->timeout_ms, a);
	crm_info("Update attribute %s delay to %dms (%s)",
	attr, dampen, dvalue);
	} else {
	a->timer = NULL;
	crm_info("Update attribute %s to remove delay", attr);
	}

	/* If dampening changed, do an immediate write-out,
	* otherwise repeated dampening changes would prevent write-outs
	*/
	attrd_write_or_elect_attribute(a);
	}

	return pcmk_rc_ok;
	}

	GHashTable *attributes = NULL;

	/*!
	* \internal
	* \brief Create an XML representation of an attribute for use in peer messages
	*
	* \param[in,out] parent Create attribute XML as child element of this
	* \param[in] a Attribute to represent
	* \param[in] v Attribute value to represent
	* \param[in] force_write If true, value should be written even if unchanged
	*
	* \return XML representation of attribute
	*/
	xmlNode *
	attrd_add_value_xml(xmlNode parent, const attribute_t a,
	const attribute_value_t *v, bool force_write)
	{
	xmlNode *xml = pcmk__xe_create(parent, __func__);

	crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
	crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type);
	crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
	crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
	crm_xml_add(xml, PCMK__XA_ATTR_HOST, v->nodename);

	/* @COMPAT Prior to 2.1.10 and 3.0.1, the node's cluster ID was added
	* instead of its XML ID. For Corosync and Pacemaker Remote nodes, those are
	* the same, but if we ever support node XML IDs that differ from their
	* cluster IDs, we will have to drop support for rolling upgrades from
	* versions before those.
	*/
	crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_get_node_xml_id(v->nodename));

	crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
	crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING,
	pcmk__timeout_ms2s(a->timeout_ms));
	crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE,
	pcmk_is_set(a->flags, attrd_attr_is_private));
	crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE,
	pcmk_is_set(v->flags, attrd_value_remote));
	crm_xml_add_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, force_write);

	return xml;
	}

	void
	attrd_clear_value_seen(void)
	{
	GHashTableIter aIter;
	GHashTableIter vIter;
	attribute_t *a;
	attribute_value_t *v = NULL;

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
	g_hash_table_iter_init(&vIter, a->values);
	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
	attrd_clear_value_flags(v, attrd_value_from_peer);
	}
	}
	}

	attribute_t *
	attrd_populate_attribute(xmlNode xml, const char attr)
	{
	attribute_t *a = NULL;
	bool update_both = false;

	const char *op = crm_element_value(xml, PCMK_XA_TASK);

	// NULL because PCMK__ATTRD_CMD_SYNC_RESPONSE has no PCMK_XA_TASK
	update_both = pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH,
	pcmk__str_null_matches);

	// Look up or create attribute entry
	a = g_hash_table_lookup(attributes, attr);
	if (a == NULL) {
	if (update_both \|\| pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) {
	a = attrd_create_attribute(xml);
	if (a == NULL) {
	return NULL;
	}

	} else {
	crm_warn("Could not update %s: attribute not found", attr);
	return NULL;
	}
	}

	// Update attribute dampening
	if (update_both \|\| pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
	int rc = attrd_update_dampening(a, xml, attr);

	if (rc != pcmk_rc_ok \|\| !update_both) {
	return NULL;
	}
	}

	return a;
	}

	/*!
	* \internal
	* \brief Get the XML ID used to write out an attribute set
	*
	* \param[in] attr Attribute to get set ID for
	* \param[in] node_state_id XML ID of node state that attribute value is for
	*
	* \return Newly allocated string with XML ID to use for \p attr set
	*/
	char *
	attrd_set_id(const attribute_t attr, const char node_state_id)
	{
	char *set_id = NULL;

	pcmk__assert((attr != NULL) && (node_state_id != NULL));

	if (pcmk__str_empty(attr->set_id)) {
	/* @COMPAT This should really take the set type into account. Currently
	* we use the same XML ID for transient attributes and utilization
	* attributes. It doesn't cause problems because the status section is
	* not limited by the schema in any way, but it's still unfortunate.
	* For backward compatibility reasons, we can't change this.
	*/
	set_id = crm_strdup_printf("%s-%s", PCMK_XE_STATUS, node_state_id);
	} else {
	/* @COMPAT When the user specifies a set ID for an attribute, it is the
	* same for every node. That is less than ideal, but again, the schema
	* doesn't enforce anything for the status section. We couldn't change
	* it without allowing the set ID to vary per value rather than per
	* attribute, which would break backward compatibility, pose design
	* challenges, and potentially cause problems in rolling upgrades.
	*/
	set_id = pcmk__str_copy(attr->set_id);
	}
	pcmk__xml_sanitize_id(set_id);
	return set_id;
	}

	/*!
	* \internal
	* \brief Get the XML ID used to write out an attribute value
	*
	* \param[in] attr Attribute to get value XML ID for
	* \param[in] node_state_id UUID of node that attribute value is for
	*
	* \return Newly allocated string with XML ID of \p attr value
	*/
	char *
	attrd_nvpair_id(const attribute_t attr, const char node_state_id)
	{
	char *nvpair_id = NULL;

	if (attr->set_id != NULL) {
	nvpair_id = crm_strdup_printf("%s-%s", attr->set_id, attr->id);

	} else {
	nvpair_id = crm_strdup_printf(PCMK_XE_STATUS "-%s-%s",
	node_state_id, attr->id);
	}
	pcmk__xml_sanitize_id(nvpair_id);
	return nvpair_id;
	}
	+
	+/*!
	+ * \internal
	+ * \brief Check whether an attribute is one that must be written to the CIB
	+ *
	+ * \param[in] a Attribute to check
	+ *
	+ * \return false if we are in standalone mode or \p a is private, otherwise true
	+ */
	+bool
	+attrd_for_cib(const attribute_t *a)
	+{
	+ return !stand_alone && (a != NULL)
	+ && !pcmk_is_set(a->flags, attrd_attr_is_private);
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Drop NULL attribute values as indicated by given function
	+ *
	+ * Drop all NULL node attribute values that a given function indicates should
	+ * be, based on the XML ID of an element that was removed from the CIB.
	+ *
	+ * \param[in] cib_id ID of XML element that was removed from CIB
	+ * (a name/value pair, an attribute set, or a node state)
	+ * \param[in] set_type If not NULL, drop only attributes with this set type
	+ * \param[in] func Call this function for every attribute/value
	+ * combination; keep the value if it returns 0, drop
	+ * the value and keep checking the attribute's other
	+ * values if it returns 1, or drop value and stop checking
	+ * the attribute's further values if it returns 2
	+ */
	+static void
	+drop_removed_values(const char cib_id, const char set_type,
	+ int (func)(const attribute_t , const attribute_value_t *,
	+ const char *))
	+{
	+ attribute_t *a = NULL;
	+ GHashTableIter attr_iter;
	+ const char *entry_type = pcmk__s(set_type, "status entry"); // for log
	+
	+ CRM_CHECK((cib_id != NULL) && (func != NULL), return);
	+
	+ // Check every attribute ...
	+ g_hash_table_iter_init(&attr_iter, attributes);
	+ while (g_hash_table_iter_next(&attr_iter, NULL, (gpointer *) &a)) {
	+ attribute_value_t *v = NULL;
	+ GHashTableIter value_iter;
	+
	+ if (!attrd_for_cib(a)
	+ \|\| ((set_type != NULL)
	+ && !pcmk__str_eq(a->set_type, set_type, pcmk__str_none))) {
	+ continue;
	+ }
	+
	+ // Check every value of the attribute ...
	+ g_hash_table_iter_init(&value_iter, a->values);
	+ while (g_hash_table_iter_next(&value_iter, NULL, (gpointer *) &v)) {
	+ int rc = 0;
	+
	+ if (v->current != NULL) {
	+ continue;
	+ }
	+
	+ if (attrd_get_node_xml_id(v->nodename) == NULL) {
	+ /* This shouldn't be a significant issue, since we will know the
	+ * XML ID if any attribute for the node has ever been written.
	+ */
	+ crm_trace("Ignoring %s[%s] after CIB erasure of %s %s because "
	+ "its node XML ID is unknown (possibly attribute was "
	+ "never written to CIB)",
	+ a->id, v->nodename, entry_type, cib_id);
	+ continue;
	+ }
	+
	+ rc = func(a, v, cib_id);
	+ if (rc > 0) {
	+ crm_debug("Dropping %s[%s] after CIB erasure of %s %s",
	+ a->id, v->nodename, entry_type, cib_id);
	+ g_hash_table_iter_remove(&value_iter);
	+ if (rc > 1) {
	+ return;
	+ }
	+ }
	+ }
	+ }
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Check whether an attribute value has a given XML ID
	+ *
	+ * \param[in] a Attribute being checked
	+ * \param[in] v Attribute value being checked
	+ * \param[in] cib_id ID of name/value pair element that was removed from CIB
	+ *
	+ * \return 2 if value matches XML ID, otherwise 0
	+ */
	+static int
	+nvpair_matches(const attribute_t a, const attribute_value_t v,
	+ const char *cib_id)
	+{
	+ char *id = attrd_nvpair_id(a, attrd_get_node_xml_id(v->nodename));
	+
	+ /* The attribute manager doesn't enforce uniqueness for value XML IDs
	+ * (schema validation could be disabled), but in practice they should be,
	+ * so we can stop looping if we find a match.
	+ */
	+ int rc = pcmk__str_eq(id, cib_id, pcmk__str_none)? 2 : 0;
	+
	+ free(id);
	+ return rc;
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Drop attribute value corresponding to given removed CIB entry
	+ *
	+ * \param[in] cib_id ID of name/value pair element that was removed from CIB
	+ */
	+void
	+attrd_drop_removed_value(const char *cib_id)
	+{
	+ drop_removed_values(cib_id, NULL, nvpair_matches);
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Check whether an attribute value has a given attribute set ID
	+ *
	+ * \param[in] a Attribute being checked
	+ * \param[in] v Attribute value being checked
	+ * \param[in] cib_id ID of attribute set that was removed from CIB
	+ *
	+ * \return 1 if value matches XML ID, otherwise 0
	+ */
	+static int
	+set_id_matches(const attribute_t a, const attribute_value_t v,
	+ const char *cib_id)
	+{
	+ char *id = attrd_set_id(a, attrd_get_node_xml_id(v->nodename));
	+ int rc = pcmk__str_eq(id, cib_id, pcmk__str_none)? 1: 0;
	+
	+ free(id);
	+ return rc;
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Drop all removed attribute values for an attribute set
	+ *
	+ * \param[in] set_type XML element name of set that was removed
	+ * \param[in] cib_id ID of attribute set that was removed from CIB
	+ */
	+void
	+attrd_drop_removed_set(const char set_type, const char cib_id)
	+{
	+ drop_removed_values(cib_id, set_type, set_id_matches);
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Check whether an attribute value has a given node state XML ID
	+ *
	+ * \param[in] a Attribute being checked
	+ * \param[in] v Attribute value being checked
	+ * \param[in] cib_id ID of node state that was removed from CIB
	+ *
	+ * \return 1 if value matches removed ID, otherwise 0
	+ */
	+static int
	+node_matches(const attribute_t a, const attribute_value_t v,
	+ const char *cib_id)
	+{
	+ if (pcmk__str_eq(cib_id, attrd_get_node_xml_id(v->nodename),
	+ pcmk__str_none)) {
	+ return 1;
	+ }
	+ return 0;
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Drop all removed attribute values for a node
	+ *
	+ * \param[in] cib_id ID of node state that was removed from CIB
	+ */
	+void
	+attrd_drop_removed_values(const char *cib_id)
	+{
	+ drop_removed_values(cib_id, NULL, node_matches);
	+}
	diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
	index 4231e4a668..588e779348 100644
	--- a/daemons/attrd/attrd_cib.c
	+++ b/daemons/attrd/attrd_cib.c
	@@ -1,712 +1,871 @@
	/*
	- * Copyright 2013-2024 the Pacemaker project contributors
	+ * Copyright 2013-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	+#include <sys/types.h> // for regex.h
	#include <errno.h>
	+#include <string.h> // strndup()
	+#include <regex.h> // regcomp(), regexec(), regex_t, regmatch_t, regoff_t
	#include <stdbool.h>
	#include <stdlib.h>
	#include <glib.h>

	#include <crm/cib/internal.h> // cib__*
	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/xml.h>
	#include <crm/cluster/internal.h> // pcmk__get_node()

	#include "pacemaker-attrd.h"

	static int last_cib_op_done = 0;

	static void write_attribute(attribute_t *a, bool ignore_delay);

	static void
	attrd_cib_destroy_cb(gpointer user_data)
	{
	cib_t *cib = user_data;

	cib->cmds->signoff(cib);

	- if (attrd_shutting_down(false)) {
	+ if (attrd_shutting_down()) {
	crm_info("Disconnected from the CIB manager");

	} else {
	// @TODO This should trigger a reconnect, not a shutdown
	crm_crit("Lost connection to the CIB manager, shutting down");
	attrd_exit_status = CRM_EX_DISCONNECT;
	attrd_shutdown(0);
	}
	}

	+/* In a CIB patchset, deletions have the XPath to the deleted element, like:
	+ *
	+ * /cib/status/node_state[@id='X']
	+ *
	+ * This regular expression checks whether a node state was deleted, or transient
	+ * attributes beneath that, or an attribute set beneath that, or a name/value
	+ * pair beneath that.
	+ */
	+#define ID_REGEX "\\[@" PCMK_XA_ID "='([^']+)'\\]"
	+#define DELETION_REGEX "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
	+ "/" PCMK__XE_NODE_STATE ID_REGEX \
	+ "(/" PCMK__XE_TRANSIENT_ATTRIBUTES ID_REGEX ")?" \
	+ "(/([^[/]+)" ID_REGEX ")?" "(/" PCMK_XE_NVPAIR ID_REGEX ")?$"
	+
	+// Number of parenthesized submatches in DELETION_REGEX plus 1 for entire match
	+#define DELETION_NMATCH 9
	+
	+/*!
	+ * \internal
	+ * \brief Duplicate a regular expression submatch
	+ *
	+ * \param[in] string String being matched against a regular expression
	+ * \param[in] matches Submatches, as determined by regexec()
	+ * \param[in] submatch Desired index into \p matches
	+ *
	+ * \return Newly allocated string with desired submatch
	+ * \note This asserts on allocation failure, so the result is guaranteed to be
	+ * non-NULL.
	+ */
	+static char *
	+re_submatch(const char *string, regmatch_t matches[], size_t submatch)
	+{
	+ const regoff_t start = matches[submatch].rm_so;
	+ char *match = strndup(string + start, matches[submatch].rm_eo - start);
	+
	+ pcmk__mem_assert(match);
	+ return match;
	+}
	+
	+/*!
	+ * \internal
	+ * \brief Check a patchset change for deletion of node attribute values
	+ *
	+ * \param[in] xml Patchset change element
	+ * \param[in] data Ignored
	+ *
	+ * \return pcmk_rc_ok (to always continue to next patchset change)
	+ */
	+static int
	+drop_values_in_deletion(xmlNode xml, void data)
	+{
	+ const char *value = NULL;
	+ char *id = NULL;
	+ regmatch_t matches[DELETION_NMATCH];
	+
	+ static regex_t re;
	+ static bool re_compiled = false;
	+
	+ // Skip this change if it does not look like a deletion
	+ value = crm_element_value(xml, PCMK_XA_OPERATION);
	+ if (!pcmk__str_eq(value, "delete", pcmk__str_none)) {
	+ return pcmk_rc_ok;
	+ }
	+ value = crm_element_value(xml, PCMK_XA_PATH);
	+ if (value == NULL) {
	+ crm_warn("Ignoring malformed deletion in "
	+ "CIB change notification: No " PCMK_XA_PATH);
	+ return pcmk_rc_ok;
	+ }
	+
	+ // Check whether deleted XPath could contain node attribute values
	+ if (!re_compiled) {
	+ CRM_CHECK(regcomp(&re, DELETION_REGEX, REG_EXTENDED) == 0,
	+ return pcmk_rc_ok);
	+ re_compiled = true;
	+ }
	+ if (regexec(&re, value, DELETION_NMATCH, matches, 0) != 0) {
	+ return pcmk_rc_ok; // This is not an attribute deletion
	+ }
	+
	+ /* matches[0] = entire node state match
	+ * matches[1] = node state ID
	+ *
	+ * Optional:
	+ * matches[2] = transient attributes element with ID
	+ * matches[3] = transient attributes ID
	+ * matches[4] = attribute set element with ID
	+ * matches[5] = attribute set element name
	+ * matches[6] = attribute set ID
	+ * matches[7] = name/value pair element with ID
	+ * matches[8] = name/value pair ID
	+ */
	+
	+ // If we get here, we must have matched at least node state and its ID
	+ CRM_CHECK((matches[0].rm_so == 0) && (matches[1].rm_so > 0),
	+ return pcmk_rc_ok);
	+
	+ /* Check whether all of node's attributes were deleted (if no matches[2],
	+ * entire node state was deleted; if no matches[4], entire
	+ * transient attributes section was deleted)
	+ */
	+ if ((matches[2].rm_so < 0) \|\| (matches[4].rm_so < 0)) {
	+ id = re_submatch(value, matches, 1);
	+ attrd_drop_removed_values(id);
	+ free(id);
	+ return pcmk_rc_ok;
	+ }
	+
	+ /* We matched transient attributes, so we must have its ID, as well as an
	+ * attribute set, so we must have its element name and ID
	+ */
	+ CRM_CHECK((matches[3].rm_so > 0) && (matches[5].rm_so > 0)
	+ && (matches[6].rm_so > 0), return pcmk_rc_ok);
	+
	+ // Check whether the entire set was deleted
	+ if (matches[7].rm_so < 0) {
	+ char *set_type = re_submatch(value, matches, 5);
	+
	+ id = re_submatch(value, matches, 6);
	+ attrd_drop_removed_set(set_type, id);
	+ free(id);
	+ free(set_type);
	+ return pcmk_rc_ok;
	+ }
	+
	+ // We matched a single name/value pair, so we must have its ID
	+ CRM_CHECK(matches[8].rm_so > 0, return pcmk_rc_ok);
	+
	+ // Drop the one value
	+ id = re_submatch(value, matches, 8);
	+ attrd_drop_removed_value(id);
	+ free(id);
	+ return pcmk_rc_ok;
	+}
	+
	static void
	attrd_cib_updated_cb(const char event, xmlNode msg)
	{
	const xmlNode *patchset = NULL;
	const char *client_name = NULL;
	bool status_changed = false;

	if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) {
	return;
	}

	if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS)) {
	- if (attrd_shutting_down(true)) {
	+ if (attrd_shutting_down()) {
	crm_debug("Ignoring alerts change in CIB during shutdown");
	} else {
	mainloop_set_trigger(attrd_config_read);
	}
	}

	status_changed = pcmk__cib_element_in_patchset(patchset, PCMK_XE_STATUS);

	client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME);
	if (!cib__client_triggers_refresh(client_name)) {
	/* This change came from a source that ensured the CIB is consistent
	* with our attributes table, so we don't need to write anything out.
	+ * If a removed attribute has been erased, we can forget it now.
	*/
	+ int format = 1;
	+
	+ if ((crm_element_value_int(patchset, PCMK_XA_FORMAT, &format) != 0)
	+ \|\| (format != 2)) {
	+ crm_warn("Can't handle CIB patch format %d", format);
	+ return;
	+ }
	+
	+ /* This won't modify patchset, but we need to break const to match the
	+ * function signature.
	+ */
	+ pcmk__xe_foreach_child((xmlNode *) patchset, PCMK_XE_CHANGE,
	+ drop_values_in_deletion, NULL);
	return;
	}

	if (!attrd_election_won()) {
	// Don't write attributes if we're not the writer
	return;
	}

	if (status_changed
	\|\| pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES)) {

	- if (attrd_shutting_down(true)) {
	+ if (attrd_shutting_down()) {
	crm_debug("Ignoring node change in CIB during shutdown");
	return;
	}

	/* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS
	* section. Write transient attributes to ensure they're up-to-date in
	* the CIB.
	*/
	if (client_name == NULL) {
	client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTID);
	}
	crm_notice("Updating all attributes after %s event triggered by %s",
	event, pcmk__s(client_name, "unidentified client"));

	attrd_write_attributes(attrd_write_all);
	}
	}

	int
	attrd_cib_connect(int max_retry)
	{
	static int attempts = 0;

	int rc = -ENOTCONN;

	the_cib = cib_new();
	if (the_cib == NULL) {
	return -ENOTCONN;
	}

	do {
	if (attempts > 0) {
	sleep(attempts);
	}
	attempts++;
	crm_debug("Connection attempt %d to the CIB manager", attempts);
	rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command);

	} while ((rc != pcmk_ok) && (attempts < max_retry));

	if (rc != pcmk_ok) {
	crm_err("Connection to the CIB manager failed: %s " QB_XS " rc=%d",
	pcmk_strerror(rc), rc);
	goto cleanup;
	}

	crm_debug("Connected to the CIB manager after %d attempts", attempts);

	rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb);
	if (rc != pcmk_ok) {
	crm_err("Could not set disconnection callback");
	goto cleanup;
	}

	rc = the_cib->cmds->add_notify_callback(the_cib,
	PCMK__VALUE_CIB_DIFF_NOTIFY,
	attrd_cib_updated_cb);
	if (rc != pcmk_ok) {
	crm_err("Could not set CIB notification callback");
	goto cleanup;
	}

	return pcmk_ok;

	cleanup:
	cib__clean_up_connection(&the_cib);
	return -ENOTCONN;
	}

	void
	attrd_cib_disconnect(void)
	{
	CRM_CHECK(the_cib != NULL, return);
	the_cib->cmds->del_notify_callback(the_cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
	attrd_cib_updated_cb);
	cib__clean_up_connection(&the_cib);
	mainloop_destroy_trigger(attrd_config_read);
	}

	static void
	attrd_erase_cb(xmlNode msg, int call_id, int rc, xmlNode output,
	void *user_data)
	{
	const char node = pcmk__s((const char ) user_data, "a node");

	if (rc == pcmk_ok) {
	crm_info("Cleared transient node attributes for %s from CIB", node);
	} else {
	crm_err("Unable to clear transient node attributes for %s from CIB: %s",
	node, pcmk_strerror(rc));
	}
	}

	#define XPATH_TRANSIENT "//" PCMK__XE_NODE_STATE \
	"[@" PCMK_XA_UNAME "='%s']" \
	"/" PCMK__XE_TRANSIENT_ATTRIBUTES

	/*!
	* \internal
	* \brief Wipe all transient node attributes for a node from the CIB
	*
	* \param[in] node Node to clear attributes for
	*/
	void
	attrd_cib_erase_transient_attrs(const char *node)
	{
	int call_id = 0;
	char *xpath = NULL;

	CRM_CHECK(node != NULL, return);

	xpath = crm_strdup_printf(XPATH_TRANSIENT, node);

	crm_debug("Clearing transient node attributes for %s from CIB using %s",
	node, xpath);

	call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath);
	free(xpath);

	the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE,
	pcmk__str_copy(node),
	"attrd_erase_cb", attrd_erase_cb,
	free);
	}

	/*!
	* \internal
	* \brief Prepare the CIB after cluster is connected
	*/
	void
	attrd_cib_init(void)
	{
	/* We have no attribute values in memory, so wipe the CIB to match. This is
	- * normally done by the DC's controller when this node leaves the cluster, but
	- * this handles the case where the node restarted so quickly that the
	+ * normally done by the writer when this node leaves the cluster, but this
	+ * handles the case where the node restarted so quickly that the
	* cluster layer didn't notice.
	*
	* \todo If the attribute manager respawns after crashing (see
	* PCMK_ENV_RESPAWNED), ideally we'd skip this and sync our attributes
	* from the writer. However, currently we reject any values for us
	* that the writer has, in attrd_peer_update().
	*/
	attrd_cib_erase_transient_attrs(attrd_cluster->priv->node_name);

	// Set a trigger for reading the CIB (for the alerts section)
	attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL);

	// Always read the CIB at start-up
	mainloop_set_trigger(attrd_config_read);
	}

	static gboolean
	attribute_timer_cb(gpointer data)
	{
	attribute_t *a = data;
	crm_trace("Dampen interval expired for %s", a->id);
	attrd_write_or_elect_attribute(a);
	return FALSE;
	}

	static void
	attrd_cib_callback(xmlNode msg, int call_id, int rc, xmlNode output, void *user_data)
	{
	int level = LOG_ERR;
	GHashTableIter iter;
	const char *peer = NULL;
	attribute_value_t *v = NULL;

	char *name = user_data;
	attribute_t *a = g_hash_table_lookup(attributes, name);

	if(a == NULL) {
	crm_info("Attribute %s no longer exists", name);
	return;
	}

	a->update = 0;
	if (rc == pcmk_ok && call_id < 0) {
	rc = call_id;
	}

	switch (rc) {
	case pcmk_ok:
	level = LOG_INFO;
	last_cib_op_done = call_id;
	if (a->timer && !a->timeout_ms) {
	// Remove temporary dampening for failed writes
	mainloop_timer_del(a->timer);
	a->timer = NULL;
	}
	break;

	case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */
	case -ETIME: /* When an attr changes while there is a DC election */
	case -ENXIO: /* When an attr changes while the CIB is syncing a
	* newer config from a node that just came up
	*/
	level = LOG_WARNING;
	break;
	}

	do_crm_log(level, "CIB update %d result for %s: %s " QB_XS " rc=%d",
	call_id, a->id, pcmk_strerror(rc), rc);

	g_hash_table_iter_init(&iter, a->values);
	while (g_hash_table_iter_next(&iter, (gpointer ) & peer, (gpointer ) & v)) {
	if (rc == pcmk_ok) {
	crm_info("* Wrote %s[%s]=%s",
	a->id, peer, pcmk__s(v->requested, "(unset)"));
	pcmk__str_update(&(v->requested), NULL);
	} else {
	do_crm_log(level, "* Could not write %s[%s]=%s",
	a->id, peer, pcmk__s(v->requested, "(unset)"));
	/* Reattempt write below if we are still the writer */
	attrd_set_attr_flags(a, attrd_attr_changed);
	}
	}

	if (pcmk_is_set(a->flags, attrd_attr_changed) && attrd_election_won()) {
	if (rc == pcmk_ok) {
	/* We deferred a write of a new update because this update was in
	* progress. Write out the new value without additional delay.
	*/
	crm_debug("Pending update for %s can be written now", a->id);
	write_attribute(a, false);

	/* We're re-attempting a write because the original failed; delay
	* the next attempt so we don't potentially flood the CIB manager
	* and logs with a zillion attempts per second.
	*
	* @TODO We could elect a new writer instead. However, we'd have to
	* somehow downgrade our vote, and we'd still need something like this
	* if all peers similarly fail to write this attribute (which may
	* indicate a corrupted attribute entry rather than a CIB issue).
	*/
	} else if (a->timer) {
	// Attribute has a dampening value, so use that as delay
	if (!mainloop_timer_running(a->timer)) {
	crm_trace("Delayed re-attempted write for %s by %s",
	name, pcmk__readable_interval(a->timeout_ms));
	mainloop_timer_start(a->timer);
	}
	} else {
	/* Set a temporary dampening of 2 seconds (timer will continue
	* to exist until the attribute's dampening gets set or the
	* write succeeds).
	*/
	a->timer = attrd_add_timer(a->id, 2000, a);
	mainloop_timer_start(a->timer);
	}
	}
	}

	/*!
	* \internal
	* \brief Add a set-attribute update request to the current CIB transaction
	*
	* \param[in] attr Attribute to update
	* \param[in] attr_id ID of attribute to update
	* \param[in] node_id ID of node for which to update attribute value
	* \param[in] set_id ID of attribute set
	* \param[in] value New value for attribute
	*
	* \return Standard Pacemaker return code
	*/
	static int
	add_set_attr_update(const attribute_t attr, const char attr_id,
	const char node_id, const char set_id, const char *value)
	{
	xmlNode *update = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
	xmlNode *child = update;
	int rc = ENOMEM;

	crm_xml_add(child, PCMK_XA_ID, node_id);

	child = pcmk__xe_create(child, PCMK__XE_TRANSIENT_ATTRIBUTES);
	crm_xml_add(child, PCMK_XA_ID, node_id);

	child = pcmk__xe_create(child, attr->set_type);
	crm_xml_add(child, PCMK_XA_ID, set_id);

	child = pcmk__xe_create(child, PCMK_XE_NVPAIR);
	crm_xml_add(child, PCMK_XA_ID, attr_id);
	crm_xml_add(child, PCMK_XA_NAME, attr->id);
	crm_xml_add(child, PCMK_XA_VALUE, value);

	rc = the_cib->cmds->modify(the_cib, PCMK_XE_STATUS, update,
	cib_can_create\|cib_transaction);
	rc = pcmk_legacy2rc(rc);

	pcmk__xml_free(update);
	return rc;
	}

	/*!
	* \internal
	* \brief Add an unset-attribute update request to the current CIB transaction
	*
	* \param[in] attr Attribute to update
	* \param[in] attr_id ID of attribute to update
	* \param[in] node_id ID of node for which to update attribute value
	* \param[in] set_id ID of attribute set
	*
	* \return Standard Pacemaker return code
	*/
	static int
	add_unset_attr_update(const attribute_t attr, const char attr_id,
	const char node_id, const char set_id)
	{
	char *xpath = crm_strdup_printf("/" PCMK_XE_CIB
	"/" PCMK_XE_STATUS
	"/" PCMK__XE_NODE_STATE
	"[@" PCMK_XA_ID "='%s']"
	"/" PCMK__XE_TRANSIENT_ATTRIBUTES
	"[@" PCMK_XA_ID "='%s']"
	"/%s[@" PCMK_XA_ID "='%s']"
	"/" PCMK_XE_NVPAIR
	"[@" PCMK_XA_ID "='%s' "
	"and @" PCMK_XA_NAME "='%s']",
	node_id, node_id, attr->set_type, set_id,
	attr_id, attr->id);

	int rc = the_cib->cmds->remove(the_cib, xpath, NULL,
	cib_xpath\|cib_transaction);

	free(xpath);
	return pcmk_legacy2rc(rc);
	}

	/*!
	* \internal
	* \brief Add an attribute update request to the current CIB transaction
	*
	* \param[in] attr Attribute to update
	* \param[in] value New value for attribute
	* \param[in] node_id ID of node for which to update attribute value
	*
	* \return Standard Pacemaker return code
	*/
	static int
	add_attr_update(const attribute_t attr, const char value, const char *node_id)
	{
	char *set_id = attrd_set_id(attr, node_id);
	char *nvpair_id = attrd_nvpair_id(attr, node_id);
	int rc = pcmk_rc_ok;

	if (value == NULL) {
	rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id);
	} else {
	rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value);
	}
	free(set_id);
	free(nvpair_id);
	return rc;
	}

	static void
	send_alert_attributes_value(attribute_t a, GHashTable t)
	{
	int rc = 0;
	attribute_value_t *at = NULL;
	GHashTableIter vIter;

	g_hash_table_iter_init(&vIter, t);

	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) {
	const char *node_xml_id = attrd_get_node_xml_id(at->nodename);

	rc = attrd_send_attribute_alert(at->nodename, node_xml_id,
	a->id, at->current);
	crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s "
	"(%s agents failed)",
	a->id, at->nodename, at->current,
	pcmk__s(node_xml_id, "unknown"),
	((rc == 0)? "no" : ((rc == -1)? "some" : "all")));
	}
	}

	static void
	set_alert_attribute_value(GHashTable t, attribute_value_t v)
	{
	attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t));

	a_v->nodename = pcmk__str_copy(v->nodename);
	a_v->current = pcmk__str_copy(v->current);

	g_hash_table_replace(t, a_v->nodename, a_v);
	}

	mainloop_timer_t *
	attrd_add_timer(const char id, int timeout_ms, attribute_t attr)
	{
	return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr);
	}

	/*!
	* \internal
	* \brief Write an attribute's values to the CIB if appropriate
	*
	* \param[in,out] a Attribute to write
	* \param[in] ignore_delay If true, write attribute now regardless of any
	* configured delay
	*/
	static void
	write_attribute(attribute_t *a, bool ignore_delay)
	{
	int private_updates = 0, cib_updates = 0;
	attribute_value_t *v = NULL;
	GHashTableIter iter;
	GHashTable *alert_attribute_value = NULL;
	int rc = pcmk_ok;
	- bool should_write = true;
	+ bool should_write = attrd_for_cib(a);

	if (a == NULL) {
	return;
	}

	- // Private attributes (or any in standalone mode) are not written to the CIB
	- if (stand_alone \|\| pcmk_is_set(a->flags, attrd_attr_is_private)) {
	- should_write = false;
	- }
	-
	- /* If this attribute will be written to the CIB ... */
	if (should_write) {
	/* Defer the write if now's not a good time */
	if (a->update && (a->update < last_cib_op_done)) {
	crm_info("Write out of '%s' continuing: update %d considered lost",
	a->id, a->update);
	a->update = 0; // Don't log this message again

	} else if (a->update) {
	crm_info("Write out of '%s' delayed: update %d in progress",
	a->id, a->update);
	goto done;

	} else if (mainloop_timer_running(a->timer)) {
	if (ignore_delay) {
	mainloop_timer_stop(a->timer);
	crm_debug("Overriding '%s' write delay", a->id);
	} else {
	crm_info("Delaying write of '%s'", a->id);
	goto done;
	}
	}

	// Initiate a transaction for all the peer value updates
	CRM_CHECK(the_cib != NULL, goto done);
	the_cib->cmds->set_user(the_cib, a->user);
	rc = the_cib->cmds->init_transaction(the_cib);
	if (rc != pcmk_ok) {
	crm_err("Failed to write %s (set %s): Could not initiate "
	"CIB transaction",
	a->id, pcmk__s(a->set_id, "unspecified"));
	goto done;
	}
	}

	/* The changed and force-write flags apply only to the next write,
	* which this is, so clear them now. Also clear the "node unknown" flag
	* because we will check whether it is known below and reset if appopriate.
	*/
	attrd_clear_attr_flags(a, attrd_attr_changed
	\|attrd_attr_force_write
	\|attrd_attr_node_unknown);

	/* Make the table for the attribute trap */
	alert_attribute_value = pcmk__strikey_table(NULL,
	attrd_free_attribute_value);

	/* Iterate over each peer value of this attribute */
	g_hash_table_iter_init(&iter, a->values);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
	const char *node_xml_id = NULL;
	const char *prev_xml_id = NULL;
	+ pcmk__node_status_t *peer = NULL;

	if (!should_write) {
	private_updates++;
	continue;
	}

	/* We need the node's CIB XML ID to write out its attributes, so look
	* for it now. Check the node caches first, even if the ID was
	* previously known (in case it changed), but use any previous value as
	* a fallback.
	*/

	prev_xml_id = attrd_get_node_xml_id(v->nodename);

	if (pcmk_is_set(v->flags, attrd_value_remote)) {
	// A Pacemaker Remote node's XML ID is the same as its name
	node_xml_id = v->nodename;

	+ } else if (v->current == NULL) {
	+ /* If a value was removed, check the caches for the node XML ID,
	+ * but don't create a new cache entry. We don't want to re-create a
	+ * purged node.
	+ */
	+ peer = pcmk__search_node_caches(0, v->nodename, prev_xml_id,
	+ pcmk__node_search_any
	+ \|pcmk__node_search_cluster_cib);
	+ node_xml_id = pcmk__cluster_get_xml_id(peer);
	+ if (node_xml_id == NULL) {
	+ node_xml_id = prev_xml_id;
	+ }
	+
	} else {
	// This creates a cluster node cache entry if none exists
	- pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename,
	- prev_xml_id,
	- pcmk__node_search_any);
	-
	+ peer = pcmk__get_node(0, v->nodename, prev_xml_id,
	+ pcmk__node_search_any);
	node_xml_id = pcmk__cluster_get_xml_id(peer);
	if (node_xml_id == NULL) {
	node_xml_id = prev_xml_id;
	}
	}

	// Defer write if this is a cluster node that's never been seen
	if (node_xml_id == NULL) {
	attrd_set_attr_flags(a, attrd_attr_node_unknown);
	crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID "
	"is unknown (will retry if learned)",
	a->id, v->nodename, v->current);
	continue;
	}

	if (!pcmk__str_eq(prev_xml_id, node_xml_id, pcmk__str_none)) {
	crm_trace("Setting %s[%s] node XML ID to %s (was %s)",
	a->id, v->nodename, node_xml_id,
	pcmk__s(prev_xml_id, "unknown"));
	attrd_set_node_xml_id(v->nodename, node_xml_id);
	}

	// Update this value as part of the CIB transaction we're building
	rc = add_attr_update(a, v->current, node_xml_id);
	if (rc != pcmk_rc_ok) {
	crm_err("Couldn't add %s[%s]='%s' to CIB transaction: %s "
	QB_XS " node XML ID %s",
	- a->id, v->nodename, v->current, pcmk_rc_str(rc),
	- node_xml_id);
	+ a->id, v->nodename, pcmk__s(v->current, "(unset)"),
	+ pcmk_rc_str(rc), node_xml_id);
	continue;
	}

	crm_debug("Added %s[%s]=%s to CIB transaction (node XML ID %s)",
	a->id, v->nodename, pcmk__s(v->current, "(unset)"),
	node_xml_id);
	cib_updates++;

	/* Preservation of the attribute to transmit alert */
	set_alert_attribute_value(alert_attribute_value, v);

	// Save this value so we can log it when write completes
	pcmk__str_update(&(v->requested), v->current);
	}

	if (private_updates) {
	crm_info("Processed %d private change%s for %s (set %s)",
	private_updates, pcmk__plural_s(private_updates),
	a->id, pcmk__s(a->set_id, "unspecified"));
	}
	if (cib_updates > 0) {
	char *id = pcmk__str_copy(a->id);

	// Commit transaction
	a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none);

	crm_info("Sent CIB request %d with %d change%s for %s (set %s)",
	a->update, cib_updates, pcmk__plural_s(cib_updates),
	a->id, pcmk__s(a->set_id, "unspecified"));

	if (the_cib->cmds->register_callback_full(the_cib, a->update,
	CIB_OP_TIMEOUT_S, FALSE, id,
	"attrd_cib_callback",
	attrd_cib_callback, free)) {
	// Transmit alert of the attribute
	+ // @TODO Do this in callback only if write was successful
	send_alert_attributes_value(a, alert_attribute_value);
	}
	}

	done:
	// Discard transaction (if any)
	if (the_cib != NULL) {
	the_cib->cmds->end_transaction(the_cib, false, cib_none);
	the_cib->cmds->set_user(the_cib, NULL);
	}

	if (alert_attribute_value != NULL) {
	g_hash_table_destroy(alert_attribute_value);
	}
	}

	/*!
	* \internal
	* \brief Write out attributes
	*
	* \param[in] options Group of enum attrd_write_options
	*/
	void
	attrd_write_attributes(uint32_t options)
	{
	GHashTableIter iter;
	attribute_t *a = NULL;

	crm_debug("Writing out %s attributes",
	pcmk_is_set(options, attrd_write_all)? "all" : "changed");
	g_hash_table_iter_init(&iter, attributes);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
	if (!pcmk_is_set(options, attrd_write_all)
	&& pcmk_is_set(a->flags, attrd_attr_node_unknown)) {
	// Try writing this attribute again, in case peer ID was learned
	attrd_set_attr_flags(a, attrd_attr_changed);
	} else if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
	/* If the force_write flag is set, write the attribute. */
	attrd_set_attr_flags(a, attrd_attr_changed);
	}

	if (pcmk_is_set(options, attrd_write_all) \|\|
	pcmk_is_set(a->flags, attrd_attr_changed)) {
	bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay);

	if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
	// Always ignore delay when forced write flag is set
	ignore_delay = true;
	}
	write_attribute(a, ignore_delay);
	} else {
	crm_trace("Skipping unchanged attribute %s", a->id);
	}
	}
	}

	void
	attrd_write_or_elect_attribute(attribute_t *a)
	{
	if (attrd_election_won()) {
	write_attribute(a, false);
	} else {
	attrd_start_election_if_needed();
	}
	}
	diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
	index e681fb24d4..af4bc015de 100644
	--- a/daemons/attrd/attrd_corosync.c
	+++ b/daemons/attrd/attrd_corosync.c
	@@ -1,633 +1,645 @@
	/*
	- * Copyright 2013-2024 the Pacemaker project contributors
	+ * Copyright 2013-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <errno.h>
	#include <stdbool.h>
	#include <stdint.h>
	#include <stdlib.h>

	#include <crm/cluster.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	static xmlNode *
	attrd_confirmation(int callid)
	{
	xmlNode *node = pcmk__xe_create(NULL, __func__);

	crm_xml_add(node, PCMK__XA_T, PCMK__VALUE_ATTRD);
	crm_xml_add(node, PCMK__XA_SRC, pcmk__cluster_local_node_name());
	crm_xml_add(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM);
	crm_xml_add_int(node, PCMK__XA_CALL_ID, callid);

	return node;
	}

	static void
	attrd_peer_message(pcmk__node_status_t peer, xmlNode xml)
	{
	const char *election_op = crm_element_value(xml, PCMK__XA_CRM_TASK);

	if (election_op) {
	attrd_handle_election_op(peer, xml);
	return;
	}

	- if (attrd_shutting_down(false)) {
	+ if (attrd_shutting_down()) {
	/* If we're shutting down, we want to continue responding to election
	* ops as long as we're a cluster member (because our vote may be
	* needed). Ignore all other messages.
	*/
	return;

	} else {
	pcmk__request_t request = {
	.ipc_client = NULL,
	.ipc_id = 0,
	.ipc_flags = 0,
	.peer = peer->name,
	.xml = xml,
	.call_options = 0,
	.result = PCMK__UNKNOWN_RESULT,
	};

	request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
	CRM_CHECK(request.op != NULL, return);

	attrd_handle_request(&request);

	/* Having finished handling the request, check to see if the originating
	* peer requested confirmation. If so, send that confirmation back now.
	*/
	if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) &&
	!pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
	int callid = 0;
	xmlNode *reply = NULL;

	/* Add the confirmation ID for the message we are confirming to the
	* response so the originating peer knows what they're a confirmation
	* for.
	*/
	crm_element_value_int(xml, PCMK__XA_CALL_ID, &callid);
	reply = attrd_confirmation(callid);

	/* And then send the confirmation back to the originating peer. This
	* ends up right back in this same function (attrd_peer_message) on the
	* peer where it will have to do something with a PCMK__XA_CONFIRM type
	* message.
	*/
	crm_debug("Sending %s a confirmation", peer->name);
	attrd_send_message(peer, reply, false);
	pcmk__xml_free(reply);
	}

	pcmk__reset_request(&request);
	}
	}

	static void
	attrd_cpg_dispatch(cpg_handle_t handle,
	const struct cpg_name *groupName,
	uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
	{
	xmlNode *xml = NULL;
	const char *from = NULL;
	char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);

	if(data == NULL) {
	return;
	}

	xml = pcmk__xml_parse(data);

	if (xml == NULL) {
	crm_err("Bad message received from %s[%" PRIu32 "]: '%.120s'",
	from, nodeid, data);
	} else {
	attrd_peer_message(pcmk__get_node(nodeid, from, NULL,
	pcmk__node_search_cluster_member),
	xml);
	}

	pcmk__xml_free(xml);
	free(data);
	}

	static void
	attrd_cpg_destroy(gpointer unused)
	{
	- if (attrd_shutting_down(false)) {
	+ if (attrd_shutting_down()) {
	crm_info("Disconnected from Corosync process group");

	} else {
	crm_crit("Lost connection to Corosync process group, shutting down");
	attrd_exit_status = CRM_EX_DISCONNECT;
	attrd_shutdown(0);
	}
	}

	/*!
	* \internal
	* \brief Broadcast an update for a single attribute value
	*
	* \param[in] a Attribute to broadcast
	* \param[in] v Attribute value to broadcast
	*/
	void
	attrd_broadcast_value(const attribute_t a, const attribute_value_t v)
	{
	xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP);

	crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
	attrd_add_value_xml(op, a, v, false);
	attrd_send_message(NULL, op, false);
	pcmk__xml_free(op);
	}

	#define state_text(state) pcmk__s((state), "in unknown state")

	static void
	attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer,
	const void *data)
	{
	bool gone = false;
	bool is_remote = pcmk_is_set(peer->flags, pcmk__node_status_remote);

	switch (kind) {
	case pcmk__node_update_name:
	crm_debug("%s node %s[%" PRIu32 "] is now %s",
	(is_remote? "Remote" : "Cluster"),
	pcmk__s(peer->name, "unknown"), peer->cluster_layer_id,
	state_text(peer->state));
	break;

	case pcmk__node_update_processes:
	if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
	gone = true;
	}
	crm_debug("Node %s[%" PRIu32 "] is %s a peer",
	pcmk__s(peer->name, "unknown"), peer->cluster_layer_id,
	(gone? "no longer" : "now"));
	break;

	case pcmk__node_update_state:
	crm_debug("%s node %s[%" PRIu32 "] is now %s (was %s)",
	(is_remote? "Remote" : "Cluster"),
	pcmk__s(peer->name, "unknown"), peer->cluster_layer_id,
	state_text(peer->state), state_text(data));
	if (pcmk__str_eq(peer->state, PCMK_VALUE_MEMBER, pcmk__str_none)) {
	/* If we're the writer, send new peers a list of all attributes
	* (unless it's a remote node, which doesn't run its own attrd)
	*/
	if (!is_remote) {
	if (attrd_election_won()) {
	attrd_peer_sync(peer);

	} else {
	// Anyway send a message so that the peer learns our name
	attrd_send_protocol(peer);
	}
	}

	} else {
	// Remove all attribute values associated with lost nodes
	if (peer->name != NULL) {
	attrd_peer_remove(peer->name, false, "loss");
	}
	gone = true;
	}
	break;
	}

	// Remove votes from cluster nodes that leave, in case election in progress
	if (gone && !is_remote && peer->name != NULL) {
	attrd_remove_voter(peer);
	attrd_remove_peer_protocol_ver(peer->name);
	attrd_do_not_expect_from_peer(peer->name);
	}
	}

	#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")

	#define readable_peer(p) \
	(((p) == NULL)? "all peers" : pcmk__s((p)->name, "unknown peer"))

	static void
	update_attr_on_host(attribute_t a, const pcmk__node_status_t peer,
	const xmlNode xml, const char attr, const char *value,
	const char *host, bool filter)
	{
	int is_remote = 0;
	bool changed = false;
	attribute_value_t *v = NULL;
	const char *prev_xml_id = NULL;
	const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID);

	// Create entry for value if not already existing
	v = g_hash_table_lookup(a->values, host);
	if (v == NULL) {
	v = pcmk__assert_alloc(1, sizeof(attribute_value_t));

	v->nodename = pcmk__str_copy(host);
	g_hash_table_replace(a->values, v->nodename, v);
	}

	/* If update doesn't contain the node XML ID, fall back to any previously
	* known value (for logging)
	*/
	prev_xml_id = attrd_get_node_xml_id(v->nodename);
	if (node_xml_id == NULL) {
	node_xml_id = prev_xml_id;
	}

	// If value is for a Pacemaker Remote node, remember that
	crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
	if (is_remote) {
	attrd_set_value_flags(v, attrd_value_remote);
	pcmk__assert(pcmk__cluster_lookup_remote_node(host) != NULL);
	}

	// Check whether the value changed
	changed = !pcmk__str_eq(v->current, value, pcmk__str_casei);

	if (changed && filter
	&& pcmk__str_eq(host, attrd_cluster->priv->node_name,
	pcmk__str_casei)) {
	/* Broadcast the local value for an attribute that differs from the
	* value provided in a peer's attribute synchronization response. This
	* ensures a node's values for itself take precedence and all peers are
	* kept in sync.
	*/
	v = g_hash_table_lookup(a->values, attrd_cluster->priv->node_name);
	crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
	attr, host, readable_value(v), value, peer->name);
	attrd_broadcast_value(a, v);

	} else if (changed) {
	crm_notice("Setting %s[%s]%s%s: %s -> %s "
	QB_XS " from %s with %s write delay and node XML ID %s",
	attr, host, a->set_type ? " in " : "",
	pcmk__s(a->set_type, ""), readable_value(v),
	pcmk__s(value, "(unset)"), peer->name,
	(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms),
	pcmk__s(node_xml_id, "unknown"));
	pcmk__str_update(&v->current, value);
	attrd_set_attr_flags(a, attrd_attr_changed);

	- if (pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei)
	- && pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) {
	-
	- if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
	- attrd_set_requesting_shutdown();
	-
	- } else {
	- attrd_clear_requesting_shutdown();
	- }
	- }
	-
	// Write out new value or start dampening timer
	if (a->timeout_ms && a->timer) {
	crm_trace("Delaying write of %s %s for dampening",
	attr, pcmk__readable_interval(a->timeout_ms));
	mainloop_timer_start(a->timer);
	} else {
	attrd_write_or_elect_attribute(a);
	}

	} else {
	int is_force_write = 0;

	crm_element_value_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE,
	&is_force_write);

	if (is_force_write == 1 && a->timeout_ms && a->timer) {
	/* Save forced writing and set change flag. */
	/* The actual attribute is written by Writer after election. */
	crm_trace("%s[%s] from %s is unchanged (%s), forcing write",
	attr, host, peer->name, pcmk__s(value, "unset"));
	attrd_set_attr_flags(a, attrd_attr_force_write);
	} else {
	crm_trace("%s[%s] from %s is unchanged (%s)",
	attr, host, peer->name, pcmk__s(value, "unset"));
	}
	}

	// This allows us to later detect local values that peer doesn't know about
	attrd_set_value_flags(v, attrd_value_from_peer);

	// Remember node's XML ID if we're just learning it
	if ((node_xml_id != NULL)
	&& !pcmk__str_eq(node_xml_id, prev_xml_id, pcmk__str_none)) {
	// Remember node's name in case unknown in the membership cache
	pcmk__node_status_t *known_peer =
	pcmk__get_node(0, host, node_xml_id,
	pcmk__node_search_cluster_member);

	crm_trace("Learned %s[%s] node XML ID is %s (was %s)",
	a->id, known_peer->name, node_xml_id,
	pcmk__s(prev_xml_id, "unknown"));
	attrd_set_node_xml_id(v->nodename, node_xml_id);
	if (attrd_election_won()) {
	// In case we couldn't write a value missing the XML ID before
	attrd_write_attributes(attrd_write_changed);
	}
	}
	}

	static void
	attrd_peer_update_one(const pcmk__node_status_t peer, xmlNode xml,
	bool filter)
	{
	attribute_t *a = NULL;
	const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);

	if (attr == NULL) {
	crm_warn("Could not update attribute: peer did not specify name");
	return;
	}

	a = attrd_populate_attribute(xml, attr);
	if (a == NULL) {
	return;
	}

	if (host == NULL) {
	// If no host was specified, update all hosts
	GHashTableIter vIter;

	crm_debug("Setting %s for all hosts to %s", attr, value);
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID);
	g_hash_table_iter_init(&vIter, a->values);

	while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
	update_attr_on_host(a, peer, xml, attr, value, host, filter);
	}

	} else {
	// Update attribute value for the given host
	update_attr_on_host(a, peer, xml, attr, value, host, filter);
	}

	/* If this is a message from some attrd instance broadcasting its protocol
	* version, check to see if it's a new minimum version.
	*/
	if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) {
	attrd_update_minimum_protocol_ver(peer->name, value);
	}
	}

	static void
	broadcast_unseen_local_values(void)
	{
	GHashTableIter aIter;
	GHashTableIter vIter;
	attribute_t *a = NULL;
	attribute_value_t *v = NULL;
	xmlNode *sync = NULL;

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {

	g_hash_table_iter_init(&vIter, a->values);
	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {

	if (!pcmk_is_set(v->flags, attrd_value_from_peer)
	&& pcmk__str_eq(v->nodename, attrd_cluster->priv->node_name,
	pcmk__str_casei)) {
	crm_trace("* %s[%s]='%s' is local-only",
	a->id, v->nodename, readable_value(v));
	if (sync == NULL) {
	sync = pcmk__xe_create(NULL, __func__);
	crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
	}
	attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer);
	}
	}
	}

	if (sync != NULL) {
	crm_debug("Broadcasting local-only values");
	attrd_send_message(NULL, sync, false);
	pcmk__xml_free(sync);
	}
	}

	int
	attrd_cluster_connect(void)
	{
	int rc = pcmk_rc_ok;

	attrd_cluster = pcmk_cluster_new();

	pcmk_cluster_set_destroy_fn(attrd_cluster, attrd_cpg_destroy);
	pcmk_cpg_set_deliver_fn(attrd_cluster, attrd_cpg_dispatch);
	pcmk_cpg_set_confchg_fn(attrd_cluster, pcmk__cpg_confchg_cb);

	pcmk__cluster_set_status_callback(&attrd_peer_change_cb);

	rc = pcmk_cluster_connect(attrd_cluster);
	rc = pcmk_rc2legacy(rc);
	if (rc != pcmk_ok) {
	crm_err("Cluster connection failed");
	return rc;
	}
	return pcmk_ok;
	}

	void
	attrd_peer_clear_failure(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;
	const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
	const char *op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
	const char *interval_spec = crm_element_value(xml,
	PCMK__XA_ATTR_CLEAR_INTERVAL);
	guint interval_ms = 0U;
	char *attr = NULL;
	GHashTableIter iter;
	regex_t regex;

	pcmk__node_status_t *peer =
	pcmk__get_node(0, request->peer, NULL,
	pcmk__node_search_cluster_member);

	pcmk_parse_interval_spec(interval_spec, &interval_ms);

	if (attrd_failure_regex(&regex, rsc, op, interval_ms) != pcmk_ok) {
	crm_info("Ignoring invalid request to clear failures for %s",
	pcmk__s(rsc, "all resources"));
	return;
	}

	crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);

	/* Make sure value is not set, so we delete */
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);

	g_hash_table_iter_init(&iter, attributes);
	while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
	if (regexec(&regex, attr, 0, NULL, 0) == 0) {
	crm_trace("Matched %s when clearing %s",
	attr, pcmk__s(rsc, "all resources"));
	crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr);
	attrd_peer_update(peer, xml, host, false);
	}
	}
	regfree(&regex);
	}

	/*!
	* \internal
	* \brief Load attributes from a peer sync response
	*
	* \param[in] peer Peer that sent sync response
	* \param[in] peer_won Whether peer is the attribute writer
	* \param[in,out] xml Request XML
	*/
	void
	attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won,
	xmlNode *xml)
	{
	crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
	peer->name);

	if (peer_won) {
	/* Initialize the "seen" flag for all attributes to cleared, so we can
	* detect attributes that local node has but the writer doesn't.
	*/
	attrd_clear_value_seen();
	}

	// Process each attribute update in the sync response
	for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL);
	child != NULL; child = pcmk__xe_next(child, NULL)) {

	attrd_peer_update(peer, child,
	crm_element_value(child, PCMK__XA_ATTR_HOST), true);
	}

	if (peer_won) {
	/* If any attributes are still not marked as seen, the writer doesn't
	* know about them, so send all peers an update with them.
	*/
	broadcast_unseen_local_values();
	}
	}

	/*!
	* \internal
	* \brief Remove all attributes and optionally peer cache entries for a node
	*
	* \param[in] host Name of node to purge
	* \param[in] uncache If true, remove node from peer caches
	* \param[in] source Who requested removal (only used for logging)
	*/
	void
	attrd_peer_remove(const char host, bool uncache, const char source)
	{
	attribute_t *a = NULL;
	GHashTableIter aIter;

	CRM_CHECK(host != NULL, return);
	crm_notice("Removing all %s attributes for node %s "
	QB_XS " %s reaping node from cache",
	host, source, (uncache? "and" : "without"));

	g_hash_table_iter_init(&aIter, attributes);
	- while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
	- if(g_hash_table_remove(a->values, host)) {
	- crm_debug("Removed %s[%s] for peer %s", a->id, host, source);
	+ while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) &a)) {
	+ /* If the attribute won't be written to the CIB, we can drop the value
	+ * now. Otherwise we need to set it NULL and wait for a notification
	+ * that it was erased, because if there's no writer or the current
	+ * writer fails to write it then leaves, we may become the writer and
	+ * need to do it.
	+ */
	+ if (attrd_for_cib(a)) {
	+ attribute_value_t *v = g_hash_table_lookup(a->values, host);
	+
	+ if ((v != NULL) && (v->current != NULL)) {
	+ crm_debug("Removed %s[%s] (by setting NULL) for %s",
	+ a->id, host, source);
	+ pcmk__str_update(&(v->current), NULL);
	+ attrd_set_attr_flags(a, attrd_attr_changed);
	+ }
	+ } else if (g_hash_table_remove(a->values, host)) {
	+ crm_debug("Removed %s[%s] immediately for %s",
	+ a->id, host, source);
	}
	}

	+ if (attrd_election_won()) {
	+ attrd_cib_erase_transient_attrs(host); // Wipe from CIB
	+ } else {
	+ attrd_start_election_if_needed(); // Make sure CIB gets updated
	+ }
	+
	+ // Remove node from caches if requested
	if (uncache) {
	pcmk__purge_node_from_cache(host, 0);
	attrd_forget_node_xml_id(host);
	}
	}

	/*!
	* \internal
	* \brief Send all known attributes and values to a peer
	*
	* \param[in] peer Peer to send sync to (if NULL, broadcast to all peers)
	*/
	void
	attrd_peer_sync(pcmk__node_status_t *peer)
	{
	GHashTableIter aIter;
	GHashTableIter vIter;

	attribute_t *a = NULL;
	attribute_value_t *v = NULL;
	xmlNode *sync = pcmk__xe_create(NULL, __func__);

	crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
	g_hash_table_iter_init(&vIter, a->values);
	while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
	crm_debug("Syncing %s[%s]='%s' to %s",
	a->id, v->nodename, readable_value(v),
	readable_peer(peer));
	attrd_add_value_xml(sync, a, v, false);
	}
	}

	crm_debug("Syncing values to %s", readable_peer(peer));
	attrd_send_message(peer, sync, false);
	pcmk__xml_free(sync);
	}

	void
	attrd_peer_update(const pcmk__node_status_t peer, xmlNode xml,
	const char *host, bool filter)
	{
	bool handle_sync_point = false;

	CRM_CHECK((peer != NULL) && (xml != NULL), return);
	if (xml->children != NULL) {
	for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL);
	child != NULL; child = pcmk__xe_next(child, PCMK_XE_OP)) {

	pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
	attrd_peer_update_one(peer, child, filter);

	if (attrd_request_has_sync_point(child)) {
	handle_sync_point = true;
	}
	}

	} else {
	attrd_peer_update_one(peer, xml, filter);

	if (attrd_request_has_sync_point(xml)) {
	handle_sync_point = true;
	}
	}

	/* If the update XML specified that the client wanted to wait for a sync
	* point, process that now.
	*/
	if (handle_sync_point) {
	crm_trace("Hit local sync point for attribute update");
	attrd_ack_waitlist_clients(attrd_sync_point_local, xml);
	}
	}
	diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
	index 281ec12c2f..1a79a93df2 100644
	--- a/daemons/attrd/attrd_elections.c
	+++ b/daemons/attrd/attrd_elections.c
	@@ -1,176 +1,176 @@
	/*
	- * Copyright 2013-2024 the Pacemaker project contributors
	+ * Copyright 2013-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>
	#include <crm/cluster.h>
	#include <crm/cluster/election_internal.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	static char *peer_writer = NULL;

	static void
	attrd_election_cb(pcmk_cluster_t *cluster)
	{
	attrd_declare_winner();

	/* Update the peers after an election */
	attrd_peer_sync(NULL);

	/* After winning an election, update the CIB with the values of all
	* attributes as the winner knows them.
	*/
	attrd_write_attributes(attrd_write_all);
	}

	void
	attrd_election_init(void)
	{
	election_init(attrd_cluster, attrd_election_cb);
	}

	void
	attrd_start_election_if_needed(void)
	{
	if ((peer_writer == NULL)
	&& (election_state(attrd_cluster) != election_in_progress)
	- && !attrd_shutting_down(false)) {
	+ && !attrd_shutting_down()) {

	crm_info("Starting an election to determine the writer");
	election_vote(attrd_cluster);
	}
	}

	bool
	attrd_election_won(void)
	{
	return (election_state(attrd_cluster) == election_won);
	}

	void
	attrd_handle_election_op(const pcmk__node_status_t peer, xmlNode xml)
	{
	enum election_result rc = 0;
	enum election_result previous = election_state(attrd_cluster);

	crm_xml_add(xml, PCMK__XA_SRC, peer->name);

	// Don't become writer if we're shutting down
	- rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down(false));
	+ rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down());

	switch(rc) {
	case election_start:
	crm_debug("Unsetting writer (was %s) and starting new election",
	peer_writer? peer_writer : "unset");
	free(peer_writer);
	peer_writer = NULL;
	election_vote(attrd_cluster);
	break;

	case election_lost:
	/* The election API should really distinguish between "we just lost
	* to this peer" and "we already lost previously, and we are
	* discarding this vote for some reason", but it doesn't.
	*
	* In the first case, we want to tentatively set the peer writer to
	* this peer, even though another peer may eventually win (which we
	* will learn via attrd_check_for_new_writer()), so
	* attrd_start_election_if_needed() doesn't start a new election.
	*
	* Approximate a test for that case as best as possible.
	*/
	if ((peer_writer == NULL) \|\| (previous != election_lost)) {
	pcmk__str_update(&peer_writer, peer->name);
	crm_debug("Election lost, presuming %s is writer for now",
	peer_writer);
	}
	break;

	case election_in_progress:
	election_check(attrd_cluster);
	break;

	default:
	crm_info("Ignoring election op from %s due to error", peer->name);
	break;
	}
	}

	bool
	attrd_check_for_new_writer(const pcmk__node_status_t peer, const xmlNode xml)
	{
	int peer_state = 0;

	crm_element_value_int(xml, PCMK__XA_ATTR_WRITER, &peer_state);
	if (peer_state == election_won) {
	if ((election_state(attrd_cluster) == election_won)
	&& !pcmk__str_eq(peer->name, attrd_cluster->priv->node_name,
	pcmk__str_casei)) {
	crm_notice("Detected another attribute writer (%s), starting new "
	"election",
	peer->name);
	election_vote(attrd_cluster);

	} else if (!pcmk__str_eq(peer->name, peer_writer, pcmk__str_casei)) {
	crm_notice("Recorded new attribute writer: %s (was %s)",
	peer->name, pcmk__s(peer_writer, "unset"));
	pcmk__str_update(&peer_writer, peer->name);
	}
	}
	return (peer_state == election_won);
	}

	void
	attrd_declare_winner(void)
	{
	crm_notice("Recorded local node as attribute writer (was %s)",
	(peer_writer? peer_writer : "unset"));
	pcmk__str_update(&peer_writer, attrd_cluster->priv->node_name);
	}

	void
	attrd_remove_voter(const pcmk__node_status_t *peer)
	{
	election_remove(attrd_cluster, peer->name);
	if ((peer_writer != NULL)
	&& pcmk__str_eq(peer->name, peer_writer, pcmk__str_casei)) {

	free(peer_writer);
	peer_writer = NULL;
	crm_notice("Lost attribute writer %s", peer->name);

	/* Clear any election dampening in effect. Otherwise, if the lost writer
	* had just won, the election could fizzle out with no new writer.
	*/
	election_clear_dampening(attrd_cluster);

	/* If the writer received attribute updates during its shutdown, it will
	* not have written them to the CIB. Ensure we get a new writer so they
	* are written out. This means that every node that sees the writer
	* leave will start a new election, but that's better than losing
	* attributes.
	*/
	attrd_start_election_if_needed();

	/* If an election is in progress, we need to call election_check(), in case
	* this lost peer is the only one that hasn't voted, otherwise the election
	* would be pending until it's timed out.
	*/
	} else if (election_state(attrd_cluster) == election_in_progress) {
	crm_debug("Checking election status upon loss of voter %s", peer->name);
	election_check(attrd_cluster);
	}
	}

	void
	attrd_xml_add_writer(xmlNode *xml)
	{
	crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(attrd_cluster));
	}
	diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
	index 07fe84399a..4e83f7623a 100644
	--- a/daemons/attrd/attrd_ipc.c
	+++ b/daemons/attrd/attrd_ipc.c
	@@ -1,630 +1,630 @@
	/*
	* Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <errno.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <inttypes.h> // PRIu32
	#include <sys/types.h>

	#include <crm/cluster.h>
	#include <crm/cluster/internal.h>
	#include <crm/common/acl_internal.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/common/logging.h>
	#include <crm/common/results.h>
	#include <crm/common/strings_internal.h>
	#include <crm/common/util.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	static qb_ipcs_service_t *ipcs = NULL;

	/*!
	* \internal
	* \brief Build the XML reply to a client query
	*
	* \param[in] attr Name of requested attribute
	* \param[in] host Name of requested host (or NULL for all hosts)
	*
	* \return New XML reply
	* \note Caller is responsible for freeing the resulting XML
	*/
	static xmlNode build_query_reply(const char attr, const char *host)
	{
	xmlNode *reply = pcmk__xe_create(NULL, __func__);
	attribute_t *a;

	crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_ATTRD);
	crm_xml_add(reply, PCMK__XA_SUBT, PCMK__ATTRD_CMD_QUERY);
	crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);

	/* If desired attribute exists, add its value(s) to the reply */
	a = g_hash_table_lookup(attributes, attr);
	if (a) {
	attribute_value_t *v;
	xmlNode *host_value;

	crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr);

	/* Allow caller to use "localhost" to refer to local node */
	if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) {
	host = attrd_cluster->priv->node_name;
	crm_trace("Mapped localhost to %s", host);
	}

	/* If a specific node was requested, add its value */
	if (host) {
	v = g_hash_table_lookup(a->values, host);
	host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
	crm_xml_add(host_value, PCMK__XA_ATTR_HOST, host);
	crm_xml_add(host_value, PCMK__XA_ATTR_VALUE,
	(v? v->current : NULL));

	/* Otherwise, add all nodes' values */
	} else {
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, a->values);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
	host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
	crm_xml_add(host_value, PCMK__XA_ATTR_HOST, v->nodename);
	crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current);
	}
	}
	}
	return reply;
	}

	xmlNode *
	attrd_client_clear_failure(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;
	const char rsc, op, *interval_spec;

	if (minimum_protocol_version >= 2) {
	/* Propagate to all peers (including ourselves).
	* This ends up at attrd_peer_message().
	*/
	attrd_send_message(NULL, xml, false);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
	op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
	interval_spec = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_INTERVAL);

	/* Map this to an update */
	crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);

	/* Add regular expression matching desired attributes */

	if (rsc) {
	char *pattern;

	if (op == NULL) {
	pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);

	} else {
	guint interval_ms = 0U;

	pcmk_parse_interval_spec(interval_spec, &interval_ms);
	pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
	rsc, op, interval_ms);
	}

	crm_xml_add(xml, PCMK__XA_ATTR_REGEX, pattern);
	free(pattern);

	} else {
	crm_xml_add(xml, PCMK__XA_ATTR_REGEX, ATTRD_RE_CLEAR_ALL);
	}

	/* Make sure attribute and value are not set, so we delete via regex */
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_NAME);
	pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);

	return attrd_client_update(request);
	}

	xmlNode *
	attrd_client_peer_remove(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;

	// Host and ID are not used in combination, rather host has precedence
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
	char *host_alloc = NULL;

	attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);

	if (host == NULL) {
	int nodeid = 0;

	crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID, &nodeid);
	if (nodeid > 0) {
	pcmk__node_status_t *node = NULL;
	char *host_alloc = NULL;

	node = pcmk__search_node_caches(nodeid, NULL, NULL,
	pcmk__node_search_cluster_member);
	if ((node != NULL) && (node->name != NULL)) {
	// Use cached name if available
	host = node->name;
	} else {
	// Otherwise ask cluster layer
	host_alloc = pcmk__cluster_node_name(nodeid);
	host = host_alloc;
	}
	crm_xml_add(xml, PCMK__XA_ATTR_HOST, host);
	}
	}

	if (host) {
	crm_info("Client %s is requesting all values for %s be removed",
	pcmk__client_name(request->ipc_client), host);
	attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
	free(host_alloc);
	} else {
	crm_info("Ignoring request by client %s to remove all peer values without specifying peer",
	pcmk__client_name(request->ipc_client));
	}

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	xmlNode *
	attrd_client_query(pcmk__request_t *request)
	{
	xmlNode *query = request->xml;
	xmlNode *reply = NULL;
	const char *attr = NULL;

	crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client));

	/* Request must specify attribute name to query */
	attr = crm_element_value(query, PCMK__XA_ATTR_NAME);
	if (attr == NULL) {
	pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"Ignoring malformed query from %s (no attribute name given)",
	pcmk__client_name(request->ipc_client));
	return NULL;
	}

	/* Build the XML reply */
	reply = build_query_reply(attr,
	crm_element_value(query, PCMK__XA_ATTR_HOST));
	if (reply == NULL) {
	pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"Could not respond to query from %s: could not create XML reply",
	pcmk__client_name(request->ipc_client));
	return NULL;
	} else {
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	}

	request->ipc_client->request_id = 0;
	return reply;
	}

	xmlNode *
	attrd_client_refresh(pcmk__request_t *request)
	{
	crm_info("Updating all attributes");

	attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
	attrd_write_attributes(attrd_write_all\|attrd_write_no_delay);

	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	static void
	handle_missing_host(xmlNode *xml)
	{
	if (crm_element_value(xml, PCMK__XA_ATTR_HOST) == NULL) {
	crm_trace("Inferring local node %s with XML ID %s",
	attrd_cluster->priv->node_name,
	attrd_cluster->priv->node_xml_id);
	crm_xml_add(xml, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name);
	crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID,
	attrd_cluster->priv->node_xml_id);
	}
	}

	/* Convert a single IPC message with a regex into one with multiple children, one
	* for each regex match.
	*/
	static int
	expand_regexes(xmlNode xml, const char attr, const char value, const char regex)
	{
	if (attr == NULL && regex) {
	bool matched = false;
	GHashTableIter aIter;
	regex_t r_patt;

	crm_debug("Setting %s to %s", regex, value);
	if (regcomp(&r_patt, regex, REG_EXTENDED\|REG_NOSUB)) {
	return EINVAL;
	}

	g_hash_table_iter_init(&aIter, attributes);
	while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
	int status = regexec(&r_patt, attr, 0, NULL, 0);

	if (status == 0) {
	xmlNode *child = pcmk__xe_create(xml, PCMK_XE_OP);

	crm_trace("Matched %s with %s", attr, regex);
	matched = true;

	/* Copy all the non-conflicting attributes from the parent over,
	* but remove the regex and replace it with the name.
	*/
	pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
	pcmk__xe_remove_attr(child, PCMK__XA_ATTR_REGEX);
	crm_xml_add(child, PCMK__XA_ATTR_NAME, attr);
	}
	}

	regfree(&r_patt);

	/* Return a code if we never matched anything. This should not be treated
	* as an error. It indicates there was a regex, and it was a valid regex,
	* but simply did not match anything and the caller should not continue
	* doing any regex-related processing.
	*/
	if (!matched) {
	return pcmk_rc_op_unsatisfied;
	}

	} else if (attr == NULL) {
	return pcmk_rc_bad_nvpair;
	}

	return pcmk_rc_ok;
	}

	static int
	handle_regexes(pcmk__request_t *request)
	{
	xmlNode *xml = request->xml;
	int rc = pcmk_rc_ok;

	const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	const char *regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);

	rc = expand_regexes(xml, attr, value, regex);

	if (rc == EINVAL) {
	pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"Bad regex '%s' for update from client %s", regex,
	pcmk__client_name(request->ipc_client));

	} else if (rc == pcmk_rc_bad_nvpair) {
	crm_err("Update request did not specify attribute or regular expression");
	pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
	"Client %s update request did not specify attribute or regular expression",
	pcmk__client_name(request->ipc_client));
	}

	return rc;
	}

	static int
	handle_value_expansion(const char *value, xmlNode xml, const char *op,
	const char *attr)
	{
	attribute_t *a = g_hash_table_lookup(attributes, attr);

	if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
	return EINVAL;
	}

	if (value && attrd_value_needs_expansion(value)) {
	int int_value;
	attribute_value_t *v = NULL;

	if (a) {
	const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
	v = g_hash_table_lookup(a->values, host);
	}

	int_value = attrd_expand_value(*value, (v? v->current : NULL));

	crm_info("Expanded %s=%s to %d", attr, *value, int_value);
	crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value);

	/* Replacing the value frees the previous memory, so re-query it */
	*value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	}

	return pcmk_rc_ok;
	}

	static void
	send_update_msg_to_cluster(pcmk__request_t request, xmlNode xml)
	{
	if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) {
	/* The client is waiting on the cluster-wide sync point. In this case,
	* the response ACK is not sent until this attrd broadcasts the update
	* and receives its own confirmation back from all peers.
	*/
	attrd_expect_confirmations(request, attrd_cluster_sync_point_update);
	attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */

	} else {
	/* The client is either waiting on the local sync point or was not
	* waiting on any sync point at all. For the local sync point, the
	* response ACK is sent in attrd_peer_update. For clients not
	* waiting on any sync point, the response ACK is sent in
	* handle_update_request immediately before this function was called.
	*/
	attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
	}
	}

	static int
	send_child_update(xmlNode child, void data)
	{
	pcmk__request_t request = (pcmk__request_t ) data;

	/* Calling pcmk__set_result is handled by one of these calls to
	* attrd_client_update, so no need to do it again here.
	*/
	request->xml = child;
	attrd_client_update(request);
	return pcmk_rc_ok;
	}

	xmlNode *
	attrd_client_update(pcmk__request_t *request)
	{
	xmlNode *xml = NULL;
	const char attr, value, *regex;

	CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL);

	xml = request->xml;

	/* If the message has children, that means it is a message from a newer
	* client that supports sending multiple operations at a time. There are
	* two ways we can handle that.
	*/
	if (xml->children != NULL) {
	if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) {
	/* First, if all peers support a certain protocol version, we can
	* just broadcast the big message and they'll handle it. However,
	* we also need to apply all the transformations in this function
	* to the children since they don't happen anywhere else.
	*/
	for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL,
	NULL);
	child != NULL; child = pcmk__xe_next(child, PCMK_XE_OP)) {

	attr = crm_element_value(child, PCMK__XA_ATTR_NAME);
	value = crm_element_value(child, PCMK__XA_ATTR_VALUE);

	handle_missing_host(child);

	if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) {
	pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
	"Attribute %s does not exist", attr);
	return NULL;
	}
	}

	send_update_msg_to_cluster(request, xml);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);

	} else {
	/* Save the original xml node pointer so it can be restored after iterating
	* over all the children.
	*/
	xmlNode *orig_xml = request->xml;

	/* Second, if they do not support that protocol version, split it
	* up into individual messages and call attrd_client_update on
	* each one.
	*/
	pcmk__xe_foreach_child(xml, PCMK_XE_OP, send_child_update, request);
	request->xml = orig_xml;
	}

	return NULL;
	}

	attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
	value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
	regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);

	if (handle_regexes(request) != pcmk_rc_ok) {
	/* Error handling was already dealt with in handle_regexes, so just return. */
	return NULL;
	} else if (regex) {
	/* Recursively call attrd_client_update on the new message with regexes
	* expanded. If supported by the attribute daemon, this means that all
	* matches can also be handled atomically.
	*/
	return attrd_client_update(request);
	}

	handle_missing_host(xml);

	if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) {
	pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
	"Attribute %s does not exist", attr);
	return NULL;
	}

	crm_debug("Broadcasting %s[%s]=%s%s",
	attr, crm_element_value(xml, PCMK__XA_ATTR_HOST),
	value, (attrd_election_won()? " (writer)" : ""));

	send_update_msg_to_cluster(request, xml);
	pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
	return NULL;
	}

	/*!
	* \internal
	* \brief Accept a new client IPC connection
	*
	* \param[in,out] c New connection
	* \param[in] uid Client user id
	* \param[in] gid Client group id
	*
	* \return pcmk_ok on success, -errno otherwise
	*/
	static int32_t
	attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
	{
	crm_trace("New client connection %p", c);
	- if (attrd_shutting_down(false)) {
	+ if (attrd_shutting_down()) {
	crm_info("Ignoring new connection from pid %d during shutdown",
	pcmk__client_pid(c));
	return -ECONNREFUSED;
	}

	if (pcmk__new_client(c, uid, gid) == NULL) {
	return -ENOMEM;
	}
	return pcmk_ok;
	}

	/*!
	* \internal
	* \brief Destroy a client IPC connection
	*
	* \param[in] c Connection to destroy
	*
	* \return FALSE (i.e. do not re-run this callback)
	*/
	static int32_t
	attrd_ipc_closed(qb_ipcs_connection_t *c)
	{
	pcmk__client_t *client = pcmk__find_client(c);

	if (client == NULL) {
	crm_trace("Ignoring request to clean up unknown connection %p", c);
	} else {
	crm_trace("Cleaning up closed client connection %p", c);

	/* Remove the client from the sync point waitlist if it's present. */
	attrd_remove_client_from_waitlist(client);

	/* And no longer wait for confirmations from any peers. */
	attrd_do_not_wait_for_client(client);

	pcmk__free_client(client);
	}

	return FALSE;
	}

	/*!
	* \internal
	* \brief Destroy a client IPC connection
	*
	* \param[in,out] c Connection to destroy
	*
	* \note We handle a destroyed connection the same as a closed one,
	* but we need a separate handler because the return type is different.
	*/
	static void
	attrd_ipc_destroy(qb_ipcs_connection_t *c)
	{
	crm_trace("Destroying client connection %p", c);
	attrd_ipc_closed(c);
	}

	static int32_t
	attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
	{
	uint32_t id = 0;
	uint32_t flags = 0;
	pcmk__client_t *client = pcmk__find_client(c);
	xmlNode *xml = NULL;

	// Sanity-check, and parse XML from IPC data
	CRM_CHECK((c != NULL) && (client != NULL), return 0);
	if (data == NULL) {
	crm_debug("No IPC data from PID %d", pcmk__client_pid(c));
	return 0;
	}

	xml = pcmk__client_data2xml(client, data, &id, &flags);

	if (xml == NULL) {
	crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c));
	pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
	CRM_EX_PROTOCOL);
	return 0;

	} else {
	pcmk__request_t request = {
	.ipc_client = client,
	.ipc_id = id,
	.ipc_flags = flags,
	.peer = NULL,
	.xml = xml,
	.call_options = 0,
	.result = PCMK__UNKNOWN_RESULT,
	};

	pcmk__assert(client->user != NULL);
	pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user);

	request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
	CRM_CHECK(request.op != NULL, return 0);

	attrd_handle_request(&request);
	pcmk__reset_request(&request);
	}

	pcmk__xml_free(xml);
	return 0;
	}

	static struct qb_ipcs_service_handlers ipc_callbacks = {
	.connection_accept = attrd_ipc_accept,
	.connection_created = NULL,
	.msg_process = attrd_ipc_dispatch,
	.connection_closed = attrd_ipc_closed,
	.connection_destroyed = attrd_ipc_destroy
	};

	void
	attrd_ipc_fini(void)
	{
	if (ipcs != NULL) {
	pcmk__drop_all_clients(ipcs);
	qb_ipcs_destroy(ipcs);
	ipcs = NULL;
	}

	attrd_unregister_handlers();
	pcmk__client_cleanup();
	}

	/*!
	* \internal
	* \brief Set up attrd IPC communication
	*/
	void
	attrd_init_ipc(void)
	{
	pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks);
	}
	diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
	index f219b8862d..f6c6f0ac52 100644
	--- a/daemons/attrd/attrd_utils.c
	+++ b/daemons/attrd/attrd_utils.c
	@@ -1,306 +1,277 @@
	/*
	- * Copyright 2004-2024 the Pacemaker project contributors
	+ * Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <stdio.h>
	#include <stdbool.h>
	#include <errno.h>
	#include <glib.h>
	#include <regex.h>
	#include <sys/types.h>

	#include <crm/crm.h>
	#include <crm/common/ipc_internal.h>
	#include <crm/common/mainloop.h>
	#include <crm/common/xml.h>

	#include "pacemaker-attrd.h"

	cib_t *the_cib = NULL;

	-static bool requesting_shutdown = false;
	static bool shutting_down = false;
	static GMainLoop *mloop = NULL;

	/* A hash table storing information on the protocol version of each peer attrd.
	* The key is the peer's uname, and the value is the protocol version number.
	*/
	GHashTable *peer_protocol_vers = NULL;

	-/*!
	- * \internal
	- * \brief Set requesting_shutdown state
	- */
	-void
	-attrd_set_requesting_shutdown(void)
	-{
	- requesting_shutdown = true;
	-}
	-
	-/*!
	- * \internal
	- * \brief Clear requesting_shutdown state
	- */
	-void
	-attrd_clear_requesting_shutdown(void)
	-{
	- requesting_shutdown = false;
	-}
	-
	/*!
	* \internal
	* \brief Check whether local attribute manager is shutting down
	*
	- * \param[in] if_requested If \c true, also consider presence of
	- * \c PCMK__NODE_ATTR_SHUTDOWN attribute
	- *
	- * \return \c true if local attribute manager has begun shutdown sequence
	- * or (if \p if_requested is \c true) whether local node has a nonzero
	- * \c PCMK__NODE_ATTR_SHUTDOWN attribute set, otherwise \c false
	- * \note Most callers should pass \c false for \p if_requested, because the
	- * attribute manager needs to continue performing while the controller is
	- * shutting down, and even needs to be eligible for election in case all
	- * nodes are shutting down.
	+ * \return \c true if local attribute manager has begun shutdown sequence,
	+ * otherwise \c false
	*/
	bool
	-attrd_shutting_down(bool if_requested)
	+attrd_shutting_down(void)
	{
	- return shutting_down \|\| (if_requested && requesting_shutdown);
	+ return shutting_down;
	}

	/*!
	* \internal
	* \brief Exit (using mainloop or not, as appropriate)
	*
	* \param[in] nsig Ignored
	*/
	void
	attrd_shutdown(int nsig)
	{
	// Tell various functions not to do anthing
	shutting_down = true;

	// Don't respond to signals while shutting down
	mainloop_destroy_signal(SIGTERM);
	mainloop_destroy_signal(SIGCHLD);
	mainloop_destroy_signal(SIGPIPE);
	mainloop_destroy_signal(SIGUSR1);
	mainloop_destroy_signal(SIGUSR2);
	mainloop_destroy_signal(SIGTRAP);

	attrd_free_waitlist();
	attrd_free_confirmations();

	if (peer_protocol_vers != NULL) {
	g_hash_table_destroy(peer_protocol_vers);
	peer_protocol_vers = NULL;
	}

	if ((mloop == NULL) \|\| !g_main_loop_is_running(mloop)) {
	/* If there's no main loop active, just exit. This should be possible
	* only if we get SIGTERM in brief windows at start-up and shutdown.
	*/
	crm_exit(CRM_EX_OK);
	} else {
	g_main_loop_quit(mloop);
	g_main_loop_unref(mloop);
	}
	}

	/*!
	* \internal
	* \brief Create a main loop for attrd
	*/
	void
	attrd_init_mainloop(void)
	{
	mloop = g_main_loop_new(NULL, FALSE);
	}

	/*!
	* \internal
	* \brief Run attrd main loop
	*/
	void
	attrd_run_mainloop(void)
	{
	g_main_loop_run(mloop);
	}

	/* strlen("value") */
	#define plus_plus_len (5)

	/*!
	* \internal
	* \brief Check whether an attribute value should be expanded
	*
	* \param[in] value Attribute value to check
	*
	* \return true if value needs expansion, false otherwise
	*/
	bool
	attrd_value_needs_expansion(const char *value)
	{
	return ((strlen(value) >= (plus_plus_len + 2))
	&& (value[plus_plus_len] == '+')
	&& ((value[plus_plus_len + 1] == '+')
	\|\| (value[plus_plus_len + 1] == '=')));
	}

	/*!
	* \internal
	* \brief Expand an increment expression into an integer
	*
	* \param[in] value Attribute increment expression to expand
	* \param[in] old_value Previous value of attribute
	*
	* \return Expanded value
	*/
	int
	attrd_expand_value(const char value, const char old_value)
	{
	int increment = 1;
	int score = 0;

	if (pcmk_parse_score(old_value, &score, 0) != pcmk_rc_ok) {
	return 0; // Original value is not a score
	}

	// value++ means increment by one, value+=OFFSET means incremement by OFFSET
	if ((value[plus_plus_len + 1] != '+')
	&& (pcmk_parse_score(value + plus_plus_len + 2, &increment,
	0) != pcmk_rc_ok)) {
	increment = 0; // Invalid increment
	}

	if (increment < 0) {
	return QB_MAX(score + increment, -PCMK_SCORE_INFINITY);
	}
	return QB_MIN(score + increment, PCMK_SCORE_INFINITY);
	}

	/*!
	* \internal
	* \brief Create regular expression matching failure-related attributes
	*
	* \param[out] regex Where to store created regular expression
	* \param[in] rsc Name of resource to clear (or NULL for all)
	* \param[in] op Operation to clear if rsc is specified (or NULL for all)
	* \param[in] interval_ms Interval of operation to clear if op is specified
	*
	* \return pcmk_ok on success, -EINVAL if arguments are invalid
	*
	* \note The caller is responsible for freeing the result with regfree().
	*/
	int
	attrd_failure_regex(regex_t regex, const char rsc, const char *op,
	guint interval_ms)
	{
	char *pattern = NULL;
	int rc;

	/* Create a pattern that matches desired attributes */

	if (rsc == NULL) {
	pattern = pcmk__str_copy(ATTRD_RE_CLEAR_ALL);
	} else if (op == NULL) {
	pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
	} else {
	pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms);
	}

	/* Compile pattern into regular expression */
	crm_trace("Clearing attributes matching %s", pattern);
	rc = regcomp(regex, pattern, REG_EXTENDED\|REG_NOSUB);
	free(pattern);

	return (rc == 0)? pcmk_ok : -EINVAL;
	}

	void
	attrd_free_attribute_value(gpointer data)
	{
	attribute_value_t *v = data;

	free(v->nodename);
	free(v->current);
	free(v->requested);
	free(v);
	}

	void
	attrd_free_attribute(gpointer data)
	{
	attribute_t *a = data;
	if(a) {
	free(a->id);
	free(a->set_id);
	free(a->set_type);
	free(a->user);

	mainloop_timer_del(a->timer);
	g_hash_table_destroy(a->values);

	free(a);
	}
	}

	/*!
	* \internal
	* \brief When a peer node leaves the cluster, stop tracking its protocol version.
	*
	* \param[in] host The peer node's uname to be removed
	*/
	void
	attrd_remove_peer_protocol_ver(const char *host)
	{
	if (peer_protocol_vers != NULL) {
	g_hash_table_remove(peer_protocol_vers, host);
	}
	}

	/*!
	* \internal
	* \brief When a peer node broadcasts a message with its protocol version, keep
	* track of that information.
	*
	* We keep track of each peer's protocol version so we know which peers to
	* expect confirmation messages from when handling cluster-wide sync points.
	* We additionally keep track of the lowest protocol version supported by all
	* peers so we know when we can send IPC messages containing more than one
	* request.
	*
	* \param[in] host The peer node's uname to be tracked
	* \param[in] value The peer node's protocol version
	*/
	void
	attrd_update_minimum_protocol_ver(const char host, const char value)
	{
	int ver;

	if (peer_protocol_vers == NULL) {
	peer_protocol_vers = pcmk__strkey_table(free, NULL);
	}

	pcmk__scan_min_int(value, &ver, 0);

	if (ver > 0) {
	/* Record the peer attrd's protocol version. */
	g_hash_table_insert(peer_protocol_vers, pcmk__str_copy(host),
	GINT_TO_POINTER(ver));

	/* If the protocol version is a new minimum, record it as such. */
	if (minimum_protocol_version == -1 \|\| ver < minimum_protocol_version) {
	minimum_protocol_version = ver;
	crm_trace("Set minimum attrd protocol version to %d",
	minimum_protocol_version);
	}
	}
	}
	diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
	index d9423c8915..627dd97976 100644
	--- a/daemons/attrd/pacemaker-attrd.h
	+++ b/daemons/attrd/pacemaker-attrd.h
	@@ -1,260 +1,262 @@
	/*
	* Copyright 2013-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PACEMAKER_ATTRD__H
	# define PACEMAKER_ATTRD__H

	#include <regex.h>
	#include <glib.h>
	#include <crm/crm.h>
	#include <crm/cluster.h>
	#include <crm/cluster/election_internal.h>
	#include <crm/common/messages_internal.h>
	#include <crm/cib/cib_types.h>

	/*
	* Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used
	* with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned.
	*
	* With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every
	* peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will
	* also set a private attribute for itself with its version, so any attrd can
	* determine the minimum version supported by all peers.
	*
	* Protocol Pacemaker Significant changes
	* -------- --------- -------------------
	* 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only),
	* PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH,
	* "flush", PCMK__ATTRD_CMD_SYNC_RESPONSE
	* 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_REGEX),
	* PCMK__ATTRD_CMD_QUERY
	* 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH,
	* PCMK__ATTRD_CMD_UPDATE_DELAY
	* 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE
	* 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
	* 4 2.1.5 Multiple attributes can be updated in a single IPC
	* message
	* 5 2.1.5 Peers can request confirmation of a sent message
	* 6 2.1.7 PCMK__ATTRD_CMD_PEER_REMOVE supports PCMK__XA_REAP
	* 7 3.0.0 "flush" support dropped
	*/
	#define ATTRD_PROTOCOL_VERSION "7"

	#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4)
	#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5)

	#define attrd_send_ack(client, id, flags) \
	pcmk__ipc_send_ack((client), (id), (flags), PCMK__XE_ACK, \
	ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE)

	void attrd_init_mainloop(void);
	void attrd_run_mainloop(void);

	-void attrd_set_requesting_shutdown(void);
	-void attrd_clear_requesting_shutdown(void);
	void attrd_free_waitlist(void);
	-bool attrd_shutting_down(bool if_requested);
	+bool attrd_shutting_down(void);
	void attrd_shutdown(int nsig);
	void attrd_init_ipc(void);
	void attrd_ipc_fini(void);

	int attrd_cib_connect(int max_retry);
	void attrd_cib_disconnect(void);
	void attrd_cib_init(void);
	void attrd_cib_erase_transient_attrs(const char *node);

	bool attrd_value_needs_expansion(const char *value);
	int attrd_expand_value(const char value, const char old_value);

	/* regular expression to clear failures of all resources */
	#define ATTRD_RE_CLEAR_ALL \
	"^(" PCMK__FAIL_COUNT_PREFIX "\|" PCMK__LAST_FAILURE_PREFIX ")-"

	/* regular expression to clear failure of all operations for one resource
	* (format takes resource name)
	*/
	#define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s#.+_[0-9]+$"

	/* regular expression to clear failure of one operation for one resource
	* (format takes resource name, operation name, and interval)
	*/
	#define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s#%s_%u$"

	int attrd_failure_regex(regex_t regex, const char rsc, const char *op,
	guint interval_ms);

	extern cib_t *the_cib;
	extern crm_exit_t attrd_exit_status;

	/* Alerts */

	extern lrmd_t *the_lrmd;
	extern crm_trigger_t *attrd_config_read;

	void attrd_lrmd_disconnect(void);
	gboolean attrd_read_options(gpointer user_data);
	int attrd_send_attribute_alert(const char node, const char node_xml_id,
	const char attr, const char value);

	// Elections
	void attrd_election_init(void);
	void attrd_start_election_if_needed(void);
	bool attrd_election_won(void);
	void attrd_handle_election_op(const pcmk__node_status_t peer, xmlNode xml);
	bool attrd_check_for_new_writer(const pcmk__node_status_t *peer,
	const xmlNode *xml);
	void attrd_declare_winner(void);
	void attrd_remove_voter(const pcmk__node_status_t *peer);
	void attrd_xml_add_writer(xmlNode *xml);

	enum attrd_attr_flags {
	attrd_attr_none = 0U,

	// At least one of attribute's values has changed since last write
	attrd_attr_changed = (1U << 0),

	// At least one of attribute's values has an unknown node XML ID
	attrd_attr_node_unknown = (1U << 1),

	// This attribute should never be written to the CIB
	attrd_attr_is_private = (1U << 2),

	// Ignore any configured delay for next write of this attribute
	attrd_attr_force_write = (1U << 3),
	};

	typedef struct attribute_s {
	char *id; // Attribute name
	char *set_type; // PCMK_XE_INSTANCE_ATTRIBUTES or PCMK_XE_UTILIZATION
	char *set_id; // Set's XML ID to use when writing
	char *user; // ACL user to use for CIB writes
	int update; // Call ID of pending write
	int timeout_ms; // How long to wait for more changes before writing
	uint32_t flags; // Group of enum attrd_attr_flags
	GHashTable *values; // Key: node name, value: attribute_value_t
	mainloop_timer_t *timer; // Timer to use for timeout_ms
	} attribute_t;

	#define attrd_set_attr_flags(attr, flags_to_set) do { \
	(attr)->flags = pcmk__set_flags_as(__func__, __LINE__, \
	LOG_TRACE, "Value for attribute", (attr)->id, \
	(attr)->flags, (flags_to_set), #flags_to_set); \
	} while (0)

	#define attrd_clear_attr_flags(attr, flags_to_clear) do { \
	(attr)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
	LOG_TRACE, "Value for attribute", (attr)->id, \
	(attr)->flags, (flags_to_clear), #flags_to_clear); \
	} while (0)

	enum attrd_value_flags {
	attrd_value_none = 0U,
	attrd_value_remote = (1U << 0), // Value is for Pacemaker Remote node
	attrd_value_from_peer = (1U << 1), // Value is from peer sync response
	};

	typedef struct attribute_value_s {
	char *nodename; // Node that this value is for
	char *current; // Attribute value
	char *requested; // Value specified in pending CIB write, if any
	uint32_t flags; // Group of attrd_value_flags
	} attribute_value_t;

	#define attrd_set_value_flags(attr_value, flags_to_set) do { \
	(attr_value)->flags = pcmk__set_flags_as(__func__, __LINE__, \
	LOG_TRACE, "Value for node", (attr_value)->nodename, \
	(attr_value)->flags, (flags_to_set), #flags_to_set); \
	} while (0)

	#define attrd_clear_value_flags(attr_value, flags_to_clear) do { \
	(attr_value)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
	LOG_TRACE, "Value for node", (attr_value)->nodename, \
	(attr_value)->flags, (flags_to_clear), #flags_to_clear); \
	} while (0)

	extern pcmk_cluster_t *attrd_cluster;
	extern GHashTable *attributes;
	extern GHashTable *peer_protocol_vers;

	#define CIB_OP_TIMEOUT_S 120

	int attrd_cluster_connect(void);
	void attrd_broadcast_value(const attribute_t a, const attribute_value_t v);
	void attrd_peer_update(const pcmk__node_status_t peer, xmlNode xml,
	const char *host, bool filter);
	void attrd_peer_sync(pcmk__node_status_t *peer);
	void attrd_peer_remove(const char host, bool uncache, const char source);
	void attrd_peer_clear_failure(pcmk__request_t *request);
	void attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won,
	xmlNode *xml);

	void attrd_send_protocol(const pcmk__node_status_t *peer);
	xmlNode attrd_client_peer_remove(pcmk__request_t request);
	xmlNode attrd_client_clear_failure(pcmk__request_t request);
	xmlNode attrd_client_update(pcmk__request_t request);
	xmlNode attrd_client_refresh(pcmk__request_t request);
	xmlNode attrd_client_query(pcmk__request_t request);
	gboolean attrd_send_message(const pcmk__node_status_t node, xmlNode data,
	bool confirm);

	xmlNode attrd_add_value_xml(xmlNode parent, const attribute_t *a,
	const attribute_value_t *v, bool force_write);
	void attrd_clear_value_seen(void);
	void attrd_free_attribute(gpointer data);
	void attrd_free_attribute_value(gpointer data);
	attribute_t attrd_populate_attribute(xmlNode xml, const char *attr);
	char attrd_set_id(const attribute_t attr, const char *node_state_id);
	char attrd_nvpair_id(const attribute_t attr, const char *node_state_id);
	+bool attrd_for_cib(const attribute_t *a);
	+void attrd_drop_removed_value(const char *cib_id);
	+void attrd_drop_removed_set(const char set_type, const char cib_id);
	+void attrd_drop_removed_values(const char *cib_id);

	enum attrd_write_options {
	attrd_write_changed = 0,
	attrd_write_all = (1 << 0),
	attrd_write_no_delay = (1 << 1),
	};

	void attrd_write_attributes(uint32_t options);
	void attrd_write_or_elect_attribute(attribute_t *a);

	extern int minimum_protocol_version;
	void attrd_remove_peer_protocol_ver(const char *host);
	void attrd_update_minimum_protocol_ver(const char host, const char value);

	mainloop_timer_t attrd_add_timer(const char id, int timeout_ms, attribute_t *attr);

	void attrd_unregister_handlers(void);
	void attrd_handle_request(pcmk__request_t *request);

	enum attrd_sync_point {
	attrd_sync_point_local,
	attrd_sync_point_cluster,
	};

	typedef int (attrd_confirmation_action_fn)(xmlNode );

	void attrd_add_client_to_waitlist(pcmk__request_t *request);
	void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml);
	int attrd_cluster_sync_point_update(xmlNode *xml);
	void attrd_do_not_expect_from_peer(const char *host);
	void attrd_do_not_wait_for_client(pcmk__client_t *client);
	void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn);
	void attrd_free_confirmations(void);
	void attrd_handle_confirmation(int callid, const char *host);
	void attrd_remove_client_from_waitlist(pcmk__client_t *client);
	const char attrd_request_sync_point(xmlNode xml);
	bool attrd_request_has_sync_point(xmlNode *xml);

	extern gboolean stand_alone;

	// Node utilities (from attrd_nodes.c)
	const char attrd_get_node_xml_id(const char node_name);
	void attrd_set_node_xml_id(const char node_name, const char node_xml_id);
	void attrd_forget_node_xml_id(const char *node_name);
	void attrd_cleanup_xml_ids(void);

	#endif /* PACEMAKER_ATTRD__H */
	diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c
	index eff8070818..32fcbe2568 100644
	--- a/daemons/controld/controld_attrd.c
	+++ b/daemons/controld/controld_attrd.c
	@@ -1,158 +1,165 @@
	/*
	- * Copyright 2006-2024 the Pacemaker project contributors
	+ * Copyright 2006-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <crm/crm.h>
	#include <crm/common/attrs_internal.h>
	#include <crm/common/ipc.h>
	#include <crm/common/ipc_attrd_internal.h>
	#include <crm/common/xml.h>

	#include <pacemaker-controld.h>

	static pcmk_ipc_api_t *attrd_api = NULL;

	void
	controld_close_attrd_ipc(void)
	{
	if (attrd_api != NULL) {
	crm_trace("Closing connection to " PCMK__SERVER_ATTRD);
	pcmk_disconnect_ipc(attrd_api);
	pcmk_free_ipc_api(attrd_api);
	attrd_api = NULL;
	}
	}

	static inline const char *
	node_type(bool is_remote)
	{
	return is_remote? "Pacemaker Remote" : "cluster";
	}

	static inline const char *
	when(void)
	{
	return pcmk_is_set(controld_globals.fsa_input_register,
	R_SHUTDOWN)? " at shutdown" : "";
	}

	static void
	handle_attr_error(void)
	{
	if (AM_I_DC) {
	/* We are unable to provide accurate information to the
	* scheduler, so allow another node to take over DC.
	* @TODO Should we do this unconditionally on any failure?
	*/
	crmd_exit(CRM_EX_FATAL);

	} else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
	// Fast-track shutdown since unable to request via attribute
	register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL);
	}
	}

	void
	update_attrd(const char host, const char name, const char *value,
	const char *user_name, gboolean is_remote_node)
	{
	int rc = pcmk_rc_ok;

	if (attrd_api == NULL) {
	rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
	}
	if (rc == pcmk_rc_ok) {
	uint32_t attrd_opts = pcmk__node_attr_value;

	if (is_remote_node) {
	pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
	}
	rc = pcmk__attrd_api_update(attrd_api, host, name, value,
	NULL, NULL, user_name, attrd_opts);
	}
	if (rc != pcmk_rc_ok) {
	do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
	"Could not update attribute %s=%s for %s node %s%s: %s "
	QB_XS " rc=%d", name, value, node_type(is_remote_node),
	host, when(), pcmk_rc_str(rc), rc);
	handle_attr_error();
	}
	}

	void
	update_attrd_list(GList *attrs, uint32_t opts)
	{
	int rc = pcmk_rc_ok;

	if (attrd_api == NULL) {
	rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
	}
	if (rc == pcmk_rc_ok) {
	rc = pcmk__attrd_api_update_list(attrd_api, attrs, NULL, NULL, NULL,
	opts \| pcmk__node_attr_value);
	}
	if (rc != pcmk_rc_ok) {
	do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
	"Could not update multiple node attributes: %s "
	QB_XS " rc=%d", pcmk_rc_str(rc), rc);
	handle_attr_error();
	}
	}

	+/*!
	+ * \internal
	+ * \brief Ask attribute manager to purge a node and its transient attributes
	+ *
	+ * \param[in] node_name Node to purge
	+ * \param[in] from_cache If true, purge from node caches as well
	+ */
	void
	-update_attrd_remote_node_removed(const char host, const char user_name)
	+controld_purge_node_attrs(const char *node_name, bool from_cache)
	{
	int rc = pcmk_rc_ok;

	if (attrd_api == NULL) {
	rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
	}
	if (rc == pcmk_rc_ok) {
	- crm_trace("Asking attribute manager to purge Pacemaker Remote node %s",
	- host);
	- rc = pcmk__attrd_api_purge(attrd_api, host, true);
	+ crm_debug("Asking %s to purge transient attributes%s for %s ",
	+ pcmk_ipc_name(attrd_api, true),
	+ (from_cache? " and node cache" : ""), node_name);
	+ rc = pcmk__attrd_api_purge(attrd_api, node_name, from_cache);
	}
	if (rc != pcmk_rc_ok) {
	- crm_err("Could not purge Pacemaker Remote node %s "
	- "in attribute manager%s: %s " QB_XS " rc=%d",
	- host, when(), pcmk_rc_str(rc), rc);
	+ crm_err("Could not purge node %s from attribute manager%s: %s "
	+ QB_XS " rc=%d", node_name, when(), pcmk_rc_str(rc), rc);
	}
	}

	void
	update_attrd_clear_failures(const char host, const char rsc, const char *op,
	const char *interval_spec, gboolean is_remote_node)
	{
	int rc = pcmk_rc_ok;

	if (attrd_api == NULL) {
	rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
	}
	if (rc == pcmk_rc_ok) {
	uint32_t attrd_opts = pcmk__node_attr_none;

	if (is_remote_node) {
	pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
	}
	rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op,
	interval_spec, NULL, attrd_opts);
	}
	if (rc != pcmk_rc_ok) {
	const char *interval_desc = "all";

	if (op != NULL) {
	interval_desc = pcmk__s(interval_spec, "nonrecurring");
	}
	crm_err("Could not clear failure of %s %s for %s on %s node %s%s: %s "
	QB_XS " rc=%d", interval_desc, pcmk__s(op, "operations"),
	pcmk__s(rsc, "all resources"), node_type(is_remote_node), host,
	when(), pcmk_rc_str(rc), rc);
	}
	}
	diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
	index 48c255ed19..cd4b077dd5 100644
	--- a/daemons/controld/controld_callbacks.c
	+++ b/daemons/controld/controld_callbacks.c
	@@ -1,396 +1,382 @@
	/*
	* Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <inttypes.h> // PRIu32
	#include <stdbool.h> // bool
	#include <stdint.h> // uint32_t
	#include <stdio.h> // NULL

	#include <sys/param.h>
	#include <string.h>

	#include <crm/crm.h>
	#include <crm/common/xml.h>
	#include <crm/cluster.h>
	#include <crm/cib.h>

	#include <pacemaker-controld.h>

	/* From join_dc... */
	extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);

	void
	crmd_ha_msg_filter(xmlNode * msg)
	{
	if (AM_I_DC) {
	const char *sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);

	if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
	const char *from = crm_element_value(msg, PCMK__XA_SRC);

	if (!controld_is_local_node(from)) {
	int level = LOG_INFO;
	const char *op = crm_element_value(msg, PCMK__XA_CRM_TASK);

	/* make sure the election happens NOW */
	if (controld_globals.fsa_state != S_ELECTION) {
	ha_msg_input_t new_input;

	level = LOG_WARNING;
	new_input.msg = msg;
	register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
	__func__);
	}

	do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
	goto done;
	}
	}

	} else {
	const char *sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);

	if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
	return;
	}
	}

	/* crm_log_xml_trace(msg, "HA[inbound]"); */
	route_message(C_HA_MESSAGE, msg);

	done:
	controld_trigger_fsa();
	}

	/*!
	* \internal
	* \brief Check whether a node is online
	*
	* \param[in] node Node to check
	*
	* \retval -1 if completely dead
	* \retval 0 if partially alive
	* \retval 1 if completely alive
	*/
	static int
	node_alive(const pcmk__node_status_t *node)
	{
	if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
	// Pacemaker Remote nodes can't be partially alive
	if (pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)) {
	return 1;
	}
	return -1;

	} else if (pcmk__cluster_is_node_active(node)) {
	// Completely up cluster node: both cluster member and peer
	return 1;

	} else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
	&& !pcmk__str_eq(node->state, PCMK_VALUE_MEMBER,
	pcmk__str_none)) {
	// Completely down cluster node: neither cluster member nor peer
	return -1;
	}

	// Partially up cluster node: only cluster member or only peer
	return 0;
	}

	#define state_text(state) ((state)? (const char *)(state) : "in unknown state")

	// @TODO This is insanely long, and some parts should be functionized
	void
	peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
	const void *data)
	{
	uint32_t old = 0;
	bool appeared = FALSE;
	bool is_remote = pcmk_is_set(node->flags, pcmk__node_status_remote);

	controld_node_pending_timer(node);

	/* The controller waits to receive some information from the membership
	* layer before declaring itself operational. If this is being called for a
	* cluster node, indicate that we have it.
	*/
	if (!is_remote) {
	controld_set_fsa_input_flags(R_PEER_DATA);
	}

	if ((type == pcmk__node_update_processes)
	&& pcmk_is_set(node->processes, crm_get_cluster_proc())
	&& !AM_I_DC
	&& !is_remote) {
	/* relay_message() on the recipient ignores these messages, but
	* libcrmcluster will have cached the node name by then
	*/
	xmlNode *query = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD,
	NULL, CRM_SYSTEM_CRMD, CRM_OP_HELLO,
	NULL);

	crm_debug("Sending hello to node %" PRIu32 " so that it learns our "
	"node name",
	node->cluster_layer_id);
	pcmk__cluster_send_message(node, pcmk_ipc_controld, query);
	pcmk__xml_free(query);
	}

	if (node->name == NULL) {
	return;
	}

	switch (type) {
	case pcmk__node_update_name:
	/* If we've never seen the node, then it also won't be in the status section */
	crm_info("%s node %s is now %s",
	(is_remote? "Remote" : "Cluster"),
	node->name, state_text(node->state));
	return;

	case pcmk__node_update_state:
	/* This callback should not be called unless the state actually
	* changed, but here's a failsafe just in case.
	*/
	CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
	return);

	crm_info("%s node %s is now %s (was %s)",
	(is_remote? "Remote" : "Cluster"),
	node->name, state_text(node->state), state_text(data));

	if (pcmk__str_eq(PCMK_VALUE_MEMBER, node->state, pcmk__str_none)) {
	appeared = TRUE;
	if (!is_remote) {
	remove_stonith_cleanup(node->name);
	}
	} else {
	controld_remove_failed_sync_node(node->name);
	controld_remove_voter(node->name);
	}

	crmd_alert_node_event(node);
	break;

	case pcmk__node_update_processes:
	CRM_CHECK(data != NULL, return);
	old = (const uint32_t )data;
	appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());

	{
	const char *dc_s = controld_globals.dc_name;

	if ((dc_s == NULL) && AM_I_DC) {
	dc_s = PCMK_VALUE_TRUE;
	}

	crm_info("Node %s is %s a peer " QB_XS
	" DC=%s old=%#07x new=%#07x",
	node->name, (appeared? "now" : "no longer"),
	pcmk__s(dc_s, "<none>"), old, node->processes);
	}

	if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
	/* Peer status did not change. This should not be possible,
	* since we don't track process flags other than peer status.
	*/
	crm_trace("Process flag %#7x did not change from %#7x to %#7x",
	crm_get_cluster_proc(), old, node->processes);
	return;

	}

	if (!appeared) {
	node->peer_lost = time(NULL);
	controld_remove_failed_sync_node(node->name);
	controld_remove_voter(node->name);
	}

	if (!pcmk_is_set(controld_globals.fsa_input_register,
	R_CIB_CONNECTED)) {
	crm_trace("Ignoring peer status change because not connected to CIB");
	return;

	} else if (controld_globals.fsa_state == S_STOPPING) {
	crm_trace("Ignoring peer status change because stopping");
	return;
	}

	if (!appeared && controld_is_local_node(node->name)) {
	/* Did we get evicted? */
	crm_notice("Our peer connection failed");
	register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);

	} else if (pcmk__str_eq(node->name, controld_globals.dc_name,
	pcmk__str_casei)
	&& !pcmk__cluster_is_node_active(node)) {

	- /* The DC has left, so delete its transient attributes and
	- * trigger a new election.
	- *
	- * A DC sends its shutdown request to all peers, who update the
	- * DC's expected state to down. This avoids fencing upon
	- * deletion of its transient attributes.
	- */
	+ // The DC has left, so trigger a new election
	crm_notice("Our peer on the DC (%s) is dead",
	controld_globals.dc_name);
	-
	register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
	- controld_delete_node_state(node->name, controld_section_attrs,
	- cib_none);
	-
	} else if (AM_I_DC
	\|\| pcmk_is_set(controld_globals.flags, controld_dc_left)
	\|\| (controld_globals.dc_name == NULL)) {
	/* This only needs to be done once, so normally the DC should do
	* it. However if there is no DC, every node must do it, since
	* there is no other way to ensure some one node does it.
	*/
	if (appeared) {
	te_trigger_stonith_history_sync(FALSE);
	- } else {
	- controld_delete_node_state(node->name,
	- controld_section_attrs,
	- cib_none);
	}
	}
	break;
	}

	if (AM_I_DC) {
	xmlNode *update = NULL;
	uint32_t flags = controld_node_update_peer;
	int alive = node_alive(node);
	pcmk__graph_action_t *down = match_down_event(node->xml_id);

	crm_trace("Alive=%d, appeared=%d, down=%d",
	alive, appeared, (down? down->id : -1));

	if (appeared && (alive > 0) && !is_remote) {
	register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
	}

	if (down) {
	const char *task = crm_element_value(down->xml, PCMK_XA_OPERATION);

	if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
	const bool confirmed =
	pcmk_is_set(down->flags, pcmk__graph_action_confirmed);

	/* tengine_stonith_callback() confirms fence actions */
	crm_trace("Updating CIB %s fencer reported fencing of %s complete",
	(confirmed? "after" : "before"), node->name);

	} else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN,
	pcmk__str_casei)) {

	// Shutdown actions are immediately confirmed (i.e. no_wait)
	if (!is_remote) {
	flags \|= controld_node_update_join
	\|controld_node_update_expected;
	crmd_peer_down(node, FALSE);
	check_join_state(controld_globals.fsa_state, __func__);
	}
	if (alive >= 0) {
	crm_info("%s of peer %s is in progress " QB_XS " action=%d",
	task, node->name, down->id);
	} else {
	crm_notice("%s of peer %s is complete " QB_XS " action=%d",
	task, node->name, down->id);
	pcmk__update_graph(controld_globals.transition_graph, down);
	trigger_graph();
	}

	} else {
	const char *liveness = "alive";

	if (alive == 0) {
	liveness = "partially alive";

	} else if (alive < 0) {
	liveness = "dead";
	}

	crm_trace("Node %s is %s, was expected to %s (op %d)",
	node->name, liveness, task, down->id);
	}

	} else if (appeared == FALSE) {
	if ((controld_globals.transition_graph == NULL)
	\|\| (controld_globals.transition_graph->id <= 0)) {
	crm_info("Stonith/shutdown of node %s is unknown to the "
	"current DC", node->name);
	} else {
	crm_warn("Stonith/shutdown of node %s was not expected",
	node->name);
	}
	if (!is_remote) {
	crm_update_peer_join(__func__, node, controld_join_none);
	check_join_state(controld_globals.fsa_state, __func__);
	}
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
	"Node failure", NULL);
	fail_incompletable_actions(controld_globals.transition_graph,
	node->xml_id);

	} else {
	crm_trace("Node %s came up, was not expected to be down",
	node->name);
	}

	if (is_remote) {
	/* A pacemaker_remote node won't have its cluster status updated
	* in the CIB by membership-layer callbacks, so do it here.
	*/
	flags \|= controld_node_update_cluster;

	/* Trigger resource placement on newly integrated nodes */
	if (appeared) {
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
	"Pacemaker Remote node integrated", NULL);
	}
	}

	if (!appeared && (type == pcmk__node_update_processes)
	&& (node->when_member > 1)) {
	/* The node left CPG but is still a cluster member. Set its
	* membership time to 1 to record it in the cluster state as a
	* boolean, so we don't fence it due to
	* PCMK_OPT_NODE_PENDING_TIMEOUT.
	*/
	node->when_member = 1;
	flags \|= controld_node_update_cluster;
	controld_node_pending_timer(node);
	}

	/* Update the CIB node state */
	update = create_node_state_update(node, flags, NULL, __func__);
	if (update == NULL) {
	crm_debug("Node state update not yet possible for %s", node->name);
	} else {
	fsa_cib_anon_update(PCMK_XE_STATUS, update);
	}
	pcmk__xml_free(update);
	}

	controld_trigger_fsa();
	}

	gboolean
	crm_fsa_trigger(gpointer user_data)
	{
	crm_trace("Invoked (queue len: %d)",
	g_list_length(controld_globals.fsa_message_queue));
	s_crmd_fsa(C_FSA_INTERNAL);
	crm_trace("Exited (queue len: %d)",
	g_list_length(controld_globals.fsa_message_queue));
	return TRUE;
	}
	diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c
	index e2a0d50179..b5e6a29146 100644
	--- a/daemons/controld/controld_cib.c
	+++ b/daemons/controld/controld_cib.c
	@@ -1,1061 +1,1028 @@
	/*
	* Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <unistd.h> /* sleep */

	#include <crm/common/alerts_internal.h>
	#include <crm/common/xml.h>
	#include <crm/crm.h>
	#include <crm/lrmd_internal.h>

	#include <pacemaker-controld.h>

	// Call ID of the most recent in-progress CIB resource update (or 0 if none)
	static int pending_rsc_update = 0;

	/*!
	* \internal
	* \brief Respond to a dropped CIB connection
	*
	* \param[in] user_data CIB connection that dropped
	*/
	static void
	handle_cib_disconnect(gpointer user_data)
	{
	CRM_LOG_ASSERT(user_data == controld_globals.cib_conn);

	controld_trigger_fsa();
	controld_globals.cib_conn->state = cib_disconnected;

	if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
	// @TODO This should trigger a reconnect, not a shutdown
	crm_crit("Lost connection to the CIB manager, shutting down");
	register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
	controld_clear_fsa_input_flags(R_CIB_CONNECTED);

	} else { // Expected
	crm_info("Disconnected from the CIB manager");
	}
	}

	static void
	do_cib_updated(const char event, xmlNode msg)
	{
	const xmlNode *patchset = NULL;
	const char *client_name = NULL;

	crm_debug("Received CIB diff notification: DC=%s", pcmk__btoa(AM_I_DC));

	if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) {
	return;
	}

	if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS)
	\|\| pcmk__cib_element_in_patchset(patchset, PCMK_XE_CRM_CONFIG)) {

	controld_trigger_config();
	}

	if (!AM_I_DC) {
	// We're not in control of the join sequence
	return;
	}

	client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME);
	if (!cib__client_triggers_refresh(client_name)) {
	// The CIB is still accurate
	return;
	}

	if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES)
	\|\| pcmk__cib_element_in_patchset(patchset, PCMK_XE_STATUS)) {

	/* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS
	* section. Ensure the node list is up-to-date, and start the join
	* process again so we get everyone's current resource history.
	*/
	if (client_name == NULL) {
	client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTID);
	}
	crm_notice("Populating nodes and starting an election after %s event "
	"triggered by %s",
	event, pcmk__s(client_name, "(unidentified client)"));

	populate_cib_nodes(controld_node_update_quick\|controld_node_update_all,
	__func__);
	register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
	}
	}

	void
	controld_disconnect_cib_manager(void)
	{
	cib_t *cib_conn = controld_globals.cib_conn;

	pcmk__assert(cib_conn != NULL);

	crm_debug("Disconnecting from the CIB manager");

	controld_clear_fsa_input_flags(R_CIB_CONNECTED);

	cib_conn->cmds->del_notify_callback(cib_conn, PCMK__VALUE_CIB_DIFF_NOTIFY,
	do_cib_updated);
	cib_free_callbacks(cib_conn);

	if (cib_conn->state != cib_disconnected) {
	cib_conn->cmds->set_secondary(cib_conn, cib_discard_reply);
	cib_conn->cmds->signoff(cib_conn);
	}
	}

	/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */
	void
	do_cib_control(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	static int cib_retries = 0;

	cib_t *cib_conn = controld_globals.cib_conn;

	void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect;
	void (update_cb) (const char event, xmlNodePtr msg) = do_cib_updated;

	int rc = pcmk_ok;

	pcmk__assert(cib_conn != NULL);

	if (pcmk_is_set(action, A_CIB_STOP)) {
	if ((cib_conn->state != cib_disconnected)
	&& (pending_rsc_update != 0)) {

	crm_info("Waiting for resource update %d to complete",
	pending_rsc_update);
	crmd_fsa_stall(FALSE);
	return;
	}
	controld_disconnect_cib_manager();
	}

	if (!pcmk_is_set(action, A_CIB_START)) {
	return;
	}

	if (cur_state == S_STOPPING) {
	crm_err("Ignoring request to connect to the CIB manager after "
	"shutdown");
	return;
	}

	rc = cib_conn->cmds->signon(cib_conn, crm_system_name,
	cib_command_nonblocking);

	if (rc != pcmk_ok) {
	// A short wait that usually avoids stalling the FSA
	sleep(1);
	rc = cib_conn->cmds->signon(cib_conn, crm_system_name,
	cib_command_nonblocking);
	}

	if (rc != pcmk_ok) {
	crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc));

	} else if (cib_conn->cmds->set_connection_dnotify(cib_conn,
	dnotify_fn) != pcmk_ok) {
	crm_err("Could not set dnotify callback");

	} else if (cib_conn->cmds->add_notify_callback(cib_conn,
	PCMK__VALUE_CIB_DIFF_NOTIFY,
	update_cb) != pcmk_ok) {
	crm_err("Could not set CIB notification callback (update)");

	} else {
	controld_set_fsa_input_flags(R_CIB_CONNECTED);
	cib_retries = 0;
	}

	if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
	cib_retries++;

	if (cib_retries < 30) {
	crm_warn("Couldn't complete CIB registration %d times... "
	"pause and retry", cib_retries);
	controld_start_wait_timer();
	crmd_fsa_stall(FALSE);

	} else {
	crm_err("Could not complete CIB registration %d times... "
	"hard error", cib_retries);
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}
	}

	#define MIN_CIB_OP_TIMEOUT (30)

	/*!
	* \internal
	* \brief Get the timeout (in seconds) that should be used with CIB operations
	*
	* \return The maximum of 30 seconds, the value of the PCMK_cib_timeout
	* environment variable, or 10 seconds times one more than the number of
	* nodes in the cluster.
	*/
	unsigned int
	cib_op_timeout(void)
	{
	unsigned int calculated_timeout = 10U * (pcmk__cluster_num_active_nodes()
	+ pcmk__cluster_num_remote_nodes()
	+ 1U);

	calculated_timeout = QB_MAX(calculated_timeout, MIN_CIB_OP_TIMEOUT);
	crm_trace("Calculated timeout: %s",
	pcmk__readable_interval(calculated_timeout * 1000));

	if (controld_globals.cib_conn) {
	controld_globals.cib_conn->call_timeout = calculated_timeout;
	}
	return calculated_timeout;
	}

	/*!
	* \internal
	* \brief Get CIB call options to use local scope if primary is unavailable
	*
	* \return CIB call options
	*/
	int
	crmd_cib_smart_opt(void)
	{
	int call_opt = cib_none;

	if ((controld_globals.fsa_state == S_ELECTION)
	\|\| (controld_globals.fsa_state == S_PENDING)) {
	crm_info("Sending update to local CIB in state: %s",
	fsa_state2string(controld_globals.fsa_state));
	cib__set_call_options(call_opt, "update", cib_none);
	}
	return call_opt;
	}

	static void
	cib_delete_callback(xmlNode msg, int call_id, int rc, xmlNode output,
	void *user_data)
	{
	char *desc = user_data;

	if (rc == 0) {
	crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id);
	} else {
	crm_warn("Deletion of %s (via CIB call %d) failed: %s " QB_XS " rc=%d",
	desc, call_id, pcmk_strerror(rc), rc);
	}
	}

	// Searches for various portions of PCMK__XE_NODE_STATE to delete

	// Match a particular node's PCMK__XE_NODE_STATE (takes node name 1x)
	#define XPATH_NODE_STATE "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"

	// Node's lrm section (name 1x)
	#define XPATH_NODE_LRM XPATH_NODE_STATE "/" PCMK__XE_LRM

	/* Node's PCMK__XE_LRM_RSC_OP entries and PCMK__XE_LRM_RESOURCE entries without
	* unexpired lock
	* (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x)
	*/
	#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" PCMK__XE_LRM_RSC_OP \
	"\|" XPATH_NODE_STATE \
	"//" PCMK__XE_LRM_RESOURCE \
	"[not(@" PCMK_OPT_SHUTDOWN_LOCK ") " \
	"or " PCMK_OPT_SHUTDOWN_LOCK "<%lld]"

	-// Node's PCMK__XE_TRANSIENT_ATTRIBUTES section (name 1x)
	-#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" PCMK__XE_TRANSIENT_ATTRIBUTES
	-
	-// Everything under PCMK__XE_NODE_STATE (name 1x)
	-#define XPATH_NODE_ALL XPATH_NODE_STATE "/*"
	-
	-/* Unlocked history + transient attributes
	- * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x, name 1x)
	- */
	-#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "\|" XPATH_NODE_ATTRS
	-
	/*!
	* \internal
	- * \brief Get the XPath and description of a node state section to be deleted
	+ * \brief Get the XPath and description of resource history to be deleted
	*
	- * \param[in] uname Desired node
	- * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to be deleted
	- * \param[out] xpath Where to store XPath of \p section
	- * \param[out] desc If not \c NULL, where to store description of \p section
	+ * \param[in] uname Name of node to delete resource history for
	+ * \param[in] unlocked_only If true, delete history of only unlocked resources
	+ * \param[out] xpath Where to store XPath for history deletion
	+ * \param[out] desc If not NULL, where to store loggable description
	*/
	void
	-controld_node_state_deletion_strings(const char *uname,
	- enum controld_section_e section,
	- char xpath, char desc)
	+controld_node_history_deletion_strings(const char *uname, bool unlocked_only,
	+ char xpath, char desc)
	{
	const char *desc_pre = NULL;

	// Shutdown locks that started before this time are expired
	long long expire = (long long) time(NULL)
	- controld_globals.shutdown_lock_limit;

	- switch (section) {
	- case controld_section_lrm:
	- *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
	- desc_pre = "resource history";
	- break;
	- case controld_section_lrm_unlocked:
	- *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED,
	- uname, uname, expire);
	- desc_pre = "resource history (other than shutdown locks)";
	- break;
	- case controld_section_attrs:
	- *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
	- desc_pre = "transient attributes";
	- break;
	- case controld_section_all:
	- *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
	- desc_pre = "all state";
	- break;
	- case controld_section_all_unlocked:
	- *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED,
	- uname, uname, expire, uname);
	- desc_pre = "all state (other than shutdown locks)";
	- break;
	- default:
	- // We called this function incorrectly
	- pcmk__assert(false);
	- break;
	+ if (unlocked_only) {
	+ *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED,
	+ uname, uname, expire);
	+ desc_pre = "resource history (other than shutdown locks)";
	+ } else {
	+ *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
	+ desc_pre = "resource history";
	}

	if (desc != NULL) {
	*desc = crm_strdup_printf("%s for node %s", desc_pre, uname);
	}
	}

	+
	/*!
	* \internal
	- * \brief Delete subsection of a node's CIB \c PCMK__XE_NODE_STATE
	+ * \brief Delete a node's resource history from the CIB
	*
	- * \param[in] uname Desired node
	- * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to delete
	- * \param[in] options CIB call options to use
	+ * \param[in] uname Desired node
	+ * \param[in] unlocked_only If true, delete history of only unlocked resources
	+ * \param[in] options CIB call options to use
	*/
	void
	-controld_delete_node_state(const char *uname, enum controld_section_e section,
	- int options)
	+controld_delete_node_history(const char *uname, bool unlocked_only, int options)
	{
	cib_t *cib = controld_globals.cib_conn;
	char *xpath = NULL;
	char *desc = NULL;
	int cib_rc = pcmk_ok;

	pcmk__assert((uname != NULL) && (cib != NULL));

	- controld_node_state_deletion_strings(uname, section, &xpath, &desc);
	-
	+ controld_node_history_deletion_strings(uname, unlocked_only, &xpath, &desc);
	cib__set_call_options(options, "node state deletion",
	cib_xpath\|cib_multiple);
	cib_rc = cib->cmds->remove(cib, xpath, NULL, options);
	fsa_register_cib_callback(cib_rc, desc, cib_delete_callback);
	crm_info("Deleting %s (via CIB call %d) " QB_XS " xpath=%s",
	desc, cib_rc, xpath);

	// CIB library handles freeing desc
	free(xpath);
	}

	// Takes node name and resource ID
	#define XPATH_RESOURCE_HISTORY "//" PCMK__XE_NODE_STATE \
	"[@" PCMK_XA_UNAME "='%s']/" \
	PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \
	"/" PCMK__XE_LRM_RESOURCE \
	"[@" PCMK_XA_ID "='%s']"
	// @TODO could add "and @PCMK_OPT_SHUTDOWN_LOCK" to limit to locks

	/*!
	* \internal
	* \brief Clear resource history from CIB for a given resource and node
	*
	* \param[in] rsc_id ID of resource to be cleared
	* \param[in] node Node whose resource history should be cleared
	* \param[in] user_name ACL user name to use
	* \param[in] call_options CIB call options
	*
	* \return Standard Pacemaker return code
	*/
	int
	controld_delete_resource_history(const char rsc_id, const char node,
	const char *user_name, int call_options)
	{
	char *desc = NULL;
	char *xpath = NULL;
	int rc = pcmk_rc_ok;
	cib_t *cib = controld_globals.cib_conn;

	CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL);

	desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node);
	if (cib == NULL) {
	crm_err("Unable to clear %s: no CIB connection", desc);
	free(desc);
	return ENOTCONN;
	}

	// Ask CIB to delete the entry
	xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id);

	cib->cmds->set_user(cib, user_name);
	rc = cib->cmds->remove(cib, xpath, NULL, call_options\|cib_xpath);
	cib->cmds->set_user(cib, NULL);

	if (rc < 0) {
	rc = pcmk_legacy2rc(rc);
	crm_err("Could not delete resource status of %s on %s%s%s: %s "
	QB_XS " rc=%d", rsc_id, node,
	(user_name? " for user " : ""), (user_name? user_name : ""),
	pcmk_rc_str(rc), rc);
	free(desc);
	free(xpath);
	return rc;
	}

	if (pcmk_is_set(call_options, cib_sync_call)) {
	if (pcmk_is_set(call_options, cib_dryrun)) {
	crm_debug("Deletion of %s would succeed", desc);
	} else {
	crm_debug("Deletion of %s succeeded", desc);
	}
	free(desc);

	} else {
	crm_info("Clearing %s (via CIB call %d) " QB_XS " xpath=%s",
	desc, rc, xpath);
	fsa_register_cib_callback(rc, desc, cib_delete_callback);
	// CIB library handles freeing desc
	}

	free(xpath);
	return pcmk_rc_ok;
	}

	/*!
	* \internal
	* \brief Build XML and string of parameters meeting some criteria, for digest
	*
	* \param[in] op Executor event with parameter table to use
	* \param[in] metadata Parsed meta-data for executed resource agent
	* \param[in] param_type Flag used for selection criteria
	* \param[out] result Will be set to newly created XML with selected
	* parameters as attributes
	*
	* \return Newly allocated space-separated string of parameter names
	* \note Selection criteria varies by param_type: for the restart digest, we
	* want parameters that are not marked reloadable (OCF 1.1) or that
	* are marked unique (pre-1.1), for both string and XML results; for the
	* secure digest, we want parameters that are marked private for the
	* string, but parameters that are not marked private for the XML.
	* \note It is the caller's responsibility to free the string return value with
	* \p g_string_free() and the XML result with \p pcmk__xml_free().
	*/
	static GString *
	build_parameter_list(const lrmd_event_data_t *op,
	const struct ra_metadata_s *metadata,
	enum ra_param_flags_e param_type, xmlNode **result)
	{
	GString *list = NULL;

	*result = pcmk__xe_create(NULL, PCMK_XE_PARAMETERS);

	/* Consider all parameters only except private ones to be consistent with
	* what scheduler does with calculate_secure_digest().
	*/
	if (param_type == ra_param_private
	&& compare_version(controld_globals.dc_version, "3.16.0") >= 0) {
	g_hash_table_foreach(op->params, hash2field, *result);
	pcmk__filter_op_for_digest(*result);
	}

	for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) {
	struct ra_param_s param = (struct ra_param_s ) iter->data;

	bool accept_for_list = false;
	bool accept_for_xml = false;

	switch (param_type) {
	case ra_param_reloadable:
	accept_for_list = !pcmk_is_set(param->rap_flags, param_type);
	accept_for_xml = accept_for_list;
	break;

	case ra_param_unique:
	accept_for_list = pcmk_is_set(param->rap_flags, param_type);
	accept_for_xml = accept_for_list;
	break;

	case ra_param_private:
	accept_for_list = pcmk_is_set(param->rap_flags, param_type);
	accept_for_xml = !accept_for_list;
	break;
	}

	if (accept_for_list) {
	crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));

	if (list == NULL) {
	// We will later search for " WORD ", so start list with a space
	pcmk__add_word(&list, 256, " ");
	}
	pcmk__add_word(&list, 0, param->rap_name);

	} else {
	crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
	}

	if (accept_for_xml) {
	const char *v = g_hash_table_lookup(op->params, param->rap_name);

	if (v != NULL) {
	crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
	crm_xml_add(*result, param->rap_name, v);
	}

	} else {
	crm_trace("Removing attr %s from the xml result", param->rap_name);
	pcmk__xe_remove_attr(*result, param->rap_name);
	}
	}

	if (list != NULL) {
	// We will later search for " WORD ", so end list with a space
	pcmk__add_word(&list, 0, " ");
	}
	return list;
	}

	static void
	append_restart_list(lrmd_event_data_t op, struct ra_metadata_s metadata,
	xmlNode update, const char version)
	{
	GString *list = NULL;
	char *digest = NULL;
	xmlNode *restart = NULL;

	CRM_LOG_ASSERT(op->params != NULL);

	if (op->interval_ms > 0) {
	/* monitors are not reloadable */
	return;
	}

	if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) {
	/* Add parameters not marked reloadable to the PCMK__XA_OP_FORCE_RESTART
	* list
	*/
	list = build_parameter_list(op, metadata, ra_param_reloadable,
	&restart);

	} else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) {
	/* @COMPAT pre-OCF-1.1 resource agents
	*
	* Before OCF 1.1, Pacemaker abused "unique=0" to indicate
	* reloadability. Add any parameters with unique="1" to the
	* PCMK__XA_OP_FORCE_RESTART list.
	*/
	list = build_parameter_list(op, metadata, ra_param_unique, &restart);

	} else {
	// Resource does not support agent reloads
	return;
	}

	digest = pcmk__digest_operation(restart);
	/* Add PCMK__XA_OP_FORCE_RESTART and PCMK__XA_OP_RESTART_DIGEST to indicate
	* the resource supports reload, no matter if it actually supports any
	* reloadable parameters
	*/
	crm_xml_add(update, PCMK__XA_OP_FORCE_RESTART,
	(list == NULL)? "" : (const char *) list->str);
	crm_xml_add(update, PCMK__XA_OP_RESTART_DIGEST, digest);

	if ((list != NULL) && (list->len > 0)) {
	crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str);
	} else {
	crm_trace("%s: %s", op->rsc_id, digest);
	}

	if (list != NULL) {
	g_string_free(list, TRUE);
	}
	pcmk__xml_free(restart);
	free(digest);
	}

	static void
	append_secure_list(lrmd_event_data_t op, struct ra_metadata_s metadata,
	xmlNode update, const char version)
	{
	GString *list = NULL;
	char *digest = NULL;
	xmlNode *secure = NULL;

	CRM_LOG_ASSERT(op->params != NULL);

	/* To keep PCMK__XA_OP_SECURE_PARAMS short, we want it to contain the secure
	* parameters but PCMK__XA_OP_SECURE_DIGEST to be based on the insecure ones
	*/
	list = build_parameter_list(op, metadata, ra_param_private, &secure);

	if (list != NULL) {
	digest = pcmk__digest_operation(secure);
	crm_xml_add(update, PCMK__XA_OP_SECURE_PARAMS,
	(const char *) list->str);
	crm_xml_add(update, PCMK__XA_OP_SECURE_DIGEST, digest);

	crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str);
	g_string_free(list, TRUE);
	} else {
	crm_trace("%s: no secure parameters", op->rsc_id);
	}

	pcmk__xml_free(secure);
	free(digest);
	}

	/*!
	* \internal
	* \brief Create XML for a resource history entry
	*
	* \param[in] func Function name of caller
	* \param[in,out] parent XML to add entry to
	* \param[in] rsc Affected resource
	* \param[in,out] op Action to add an entry for (or NULL to do nothing)
	* \param[in] node_name Node where action occurred
	*/
	void
	controld_add_resource_history_xml_as(const char func, xmlNode parent,
	const lrmd_rsc_info_t *rsc,
	lrmd_event_data_t *op,
	const char *node_name)
	{
	int target_rc = 0;
	xmlNode *xml_op = NULL;
	struct ra_metadata_s *metadata = NULL;
	const char *caller_version = NULL;
	lrm_state_t *lrm_state = NULL;

	if (op == NULL) {
	return;
	}

	target_rc = rsc_op_expected_rc(op);

	caller_version = g_hash_table_lookup(op->params, PCMK_XA_CRM_FEATURE_SET);
	CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET);

	xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
	controld_globals.cluster->priv->node_name,
	func);
	if (xml_op == NULL) {
	return;
	}

	if ((rsc == NULL) \|\| (op->params == NULL)
	\|\| !crm_op_needs_metadata(rsc->standard, op->op_type)) {

	crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
	op->op_type, op->rsc_id, op->params, rsc);
	return;
	}

	lrm_state = controld_get_executor_state(node_name, false);
	if (lrm_state == NULL) {
	crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT
	" because we have no connection to executor for %s",
	op->rsc_id, op->op_type, op->interval_ms, node_name);
	return;
	}

	/* Ideally the metadata is cached, and the agent is just a fallback.
	*
	* @TODO Go through all callers and ensure they get metadata asynchronously
	* first.
	*/
	metadata = controld_get_rsc_metadata(lrm_state, rsc,
	controld_metadata_from_agent
	\|controld_metadata_from_cache);
	if (metadata == NULL) {
	return;
	}

	crm_trace("Including additional digests for %s:%s:%s",
	rsc->standard, rsc->provider, rsc->type);
	append_restart_list(op, metadata, xml_op, caller_version);
	append_secure_list(op, metadata, xml_op, caller_version);

	return;
	}

	/*!
	* \internal
	* \brief Record an action as pending in the CIB, if appropriate
	*
	* \param[in] node_name Node where the action is pending
	* \param[in] rsc Resource that action is for
	* \param[in,out] op Pending action
	*
	* \return true if action was recorded in CIB, otherwise false
	*/
	bool
	controld_record_pending_op(const char node_name, const lrmd_rsc_info_t rsc,
	lrmd_event_data_t *op)
	{
	const char *record_pending = NULL;

	CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL),
	return false);

	// Never record certain operation types as pending
	if ((op->op_type == NULL) \|\| (op->params == NULL)
	\|\| !controld_action_is_recordable(op->op_type)) {
	return false;
	}

	// Check action's PCMK_META_RECORD_PENDING meta-attribute (defaults to true)
	record_pending = crm_meta_value(op->params, PCMK_META_RECORD_PENDING);
	if ((record_pending != NULL) && !crm_is_true(record_pending)) {
	pcmk__warn_once(pcmk__wo_record_pending,
	"The " PCMK_META_RECORD_PENDING " option (for example, "
	"for the %s resource's %s operation) is deprecated and "
	"will be removed in a future release",
	rsc->id, op->op_type);
	return false;
	}

	op->call_id = -1;
	op->t_run = time(NULL);
	op->t_rcchange = op->t_run;

	lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);

	crm_debug("Recording pending %s-interval %s for %s on %s in the CIB",
	pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
	node_name);
	controld_update_resource_history(node_name, rsc, op, 0);
	return true;
	}

	static void
	cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	switch (rc) {
	case pcmk_ok:
	case -pcmk_err_diff_failed:
	case -pcmk_err_diff_resync:
	crm_trace("Resource history update completed (call=%d rc=%d)",
	call_id, rc);
	break;
	default:
	if (call_id > 0) {
	crm_warn("Resource history update %d failed: %s "
	QB_XS " rc=%d", call_id, pcmk_strerror(rc), rc);
	} else {
	crm_warn("Resource history update failed: %s " QB_XS " rc=%d",
	pcmk_strerror(rc), rc);
	}
	}

	if (call_id == pending_rsc_update) {
	pending_rsc_update = 0;
	controld_trigger_fsa();
	}
	}

	/* Only successful stops, and probes that found the resource inactive, get locks
	* recorded in the history. This ensures the resource stays locked to the node
	* until it is active there again after the node comes back up.
	*/
	static bool
	should_preserve_lock(lrmd_event_data_t *op)
	{
	if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
	return false;
	}
	if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) {
	return true;
	}
	if (!strcmp(op->op_type, PCMK_ACTION_MONITOR)
	&& (op->rc == PCMK_OCF_NOT_RUNNING)) {
	return true;
	}
	return false;
	}

	/*!
	* \internal
	* \brief Request a CIB update
	*
	* \param[in] section Section of CIB to update
	* \param[in] data New XML of CIB section to update
	* \param[in] options CIB call options
	* \param[in] callback If not \c NULL, set this as the operation callback
	*
	* \return Standard Pacemaker return code
	*
	* \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is
	* stored in \p pending_rsc_update on success.
	*/
	int
	controld_update_cib(const char section, xmlNode data, int options,
	void (callback)(xmlNode , int, int, xmlNode , void ))
	{
	cib_t *cib = controld_globals.cib_conn;
	int cib_rc = -ENOTCONN;

	pcmk__assert(data != NULL);

	if (cib != NULL) {
	cib_rc = cib->cmds->modify(cib, section, data, options);
	if (cib_rc >= 0) {
	crm_debug("Submitted CIB update %d for %s section",
	cib_rc, section);
	}
	}

	if (callback == NULL) {
	if (cib_rc < 0) {
	crm_err("Failed to update CIB %s section: %s",
	section, pcmk_rc_str(pcmk_legacy2rc(cib_rc)));
	}

	} else {
	if ((cib_rc >= 0) && (callback == cib_rsc_callback)) {
	/* Checking for a particular callback is a little hacky, but it
	* didn't seem worth adding an output argument for cib_rc for just
	* one use case.
	*/
	pending_rsc_update = cib_rc;
	}
	fsa_register_cib_callback(cib_rc, NULL, callback);
	}

	return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc);
	}

	/*!
	* \internal
	* \brief Update resource history entry in CIB
	*
	* \param[in] node_name Node where action occurred
	* \param[in] rsc Resource that action is for
	* \param[in,out] op Action to record
	* \param[in] lock_time If nonzero, when resource was locked to node
	*
	* \note On success, the CIB update's call ID will be stored in
	* pending_rsc_update.
	*/
	void
	controld_update_resource_history(const char *node_name,
	const lrmd_rsc_info_t *rsc,
	lrmd_event_data_t *op, time_t lock_time)
	{
	xmlNode *update = NULL;
	xmlNode *xml = NULL;
	int call_opt = crmd_cib_smart_opt();
	const char *node_id = NULL;
	const char *container = NULL;

	CRM_CHECK((node_name != NULL) && (op != NULL), return);

	if (rsc == NULL) {
	crm_warn("Resource %s no longer exists in the executor", op->rsc_id);
	controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id);
	return;
	}

	// <status>
	update = pcmk__xe_create(NULL, PCMK_XE_STATUS);

	// <node_state ...>
	xml = pcmk__xe_create(update, PCMK__XE_NODE_STATE);
	if (controld_is_local_node(node_name)) {
	node_id = controld_globals.our_uuid;
	} else {
	node_id = node_name;
	pcmk__xe_set_bool_attr(xml, PCMK_XA_REMOTE_NODE, true);
	}
	crm_xml_add(xml, PCMK_XA_ID, node_id);
	crm_xml_add(xml, PCMK_XA_UNAME, node_name);
	crm_xml_add(xml, PCMK_XA_CRM_DEBUG_ORIGIN, __func__);

	// <lrm ...>
	xml = pcmk__xe_create(xml, PCMK__XE_LRM);
	crm_xml_add(xml, PCMK_XA_ID, node_id);

	// <lrm_resources>
	xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCES);

	// <lrm_resource ...>
	xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCE);
	crm_xml_add(xml, PCMK_XA_ID, op->rsc_id);
	crm_xml_add(xml, PCMK_XA_CLASS, rsc->standard);
	crm_xml_add(xml, PCMK_XA_PROVIDER, rsc->provider);
	crm_xml_add(xml, PCMK_XA_TYPE, rsc->type);
	if (lock_time != 0) {
	/* Actions on a locked resource should either preserve the lock by
	* recording it with the action result, or clear it.
	*/
	if (!should_preserve_lock(op)) {
	lock_time = 0;
	}
	crm_xml_add_ll(xml, PCMK_OPT_SHUTDOWN_LOCK, (long long) lock_time);
	}
	if (op->params != NULL) {
	container = g_hash_table_lookup(op->params,
	CRM_META "_" PCMK__META_CONTAINER);
	if (container != NULL) {
	crm_trace("Resource %s is a part of container resource %s",
	op->rsc_id, container);
	crm_xml_add(xml, PCMK__META_CONTAINER, container);
	}
	}

	// <lrm_resource_op ...> (possibly more than one)
	controld_add_resource_history_xml(xml, rsc, op, node_name);

	/* Update CIB asynchronously. Even if it fails, the resource state should be
	* discovered during the next election. Worst case, the node is wrongly
	* fenced for running a resource it isn't.
	*/
	crm_log_xml_trace(update, __func__);
	controld_update_cib(PCMK_XE_STATUS, update, call_opt, cib_rsc_callback);
	pcmk__xml_free(update);
	}

	/*!
	* \internal
	* \brief Erase an LRM history entry from the CIB, given the operation data
	*
	* \param[in] op Operation whose history should be deleted
	*/
	void
	controld_delete_action_history(const lrmd_event_data_t *op)
	{
	xmlNode *xml_top = NULL;

	CRM_CHECK(op != NULL, return);

	xml_top = pcmk__xe_create(NULL, PCMK__XE_LRM_RSC_OP);
	crm_xml_add_int(xml_top, PCMK__XA_CALL_ID, op->call_id);
	crm_xml_add(xml_top, PCMK__XA_TRANSITION_KEY, op->user_data);

	if (op->interval_ms > 0) {
	char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);

	/* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
	crm_xml_add(xml_top, PCMK_XA_ID, op_id);
	free(op_id);
	}

	crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)",
	op->rsc_id, op->op_type, op->interval_ms, op->call_id);

	controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn,
	PCMK_XE_STATUS, xml_top, cib_none);
	crm_log_xml_trace(xml_top, "op:cancel");
	pcmk__xml_free(xml_top);
	}

	/* Define xpath to find LRM resource history entry by node and resource */
	#define XPATH_HISTORY \
	"/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
	"/" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" \
	"/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \
	"/" PCMK__XE_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']" \
	"/" PCMK__XE_LRM_RSC_OP

	/* ... and also by operation key */
	#define XPATH_HISTORY_ID XPATH_HISTORY "[@" PCMK_XA_ID "='%s']"

	/* ... and also by operation key and operation call ID */
	#define XPATH_HISTORY_CALL XPATH_HISTORY \
	"[@" PCMK_XA_ID "='%s' and @" PCMK__XA_CALL_ID "='%d']"

	/* ... and also by operation key and original operation key */
	#define XPATH_HISTORY_ORIG XPATH_HISTORY \
	"[@" PCMK_XA_ID "='%s' and @" PCMK__XA_OPERATION_KEY "='%s']"

	/*!
	* \internal
	* \brief Delete a last_failure resource history entry from the CIB
	*
	* \param[in] rsc_id Name of resource to clear history for
	* \param[in] node Name of node to clear history for
	* \param[in] action If specified, delete only if this was failed action
	* \param[in] interval_ms If \p action is specified, it has this interval
	*/
	void
	controld_cib_delete_last_failure(const char rsc_id, const char node,
	const char *action, guint interval_ms)
	{
	char *xpath = NULL;
	char *last_failure_key = NULL;
	CRM_CHECK((rsc_id != NULL) && (node != NULL), return);

	// Generate XPath to match desired entry
	last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0);
	if (action == NULL) {
	xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id,
	last_failure_key);
	} else {
	char *action_key = pcmk__op_key(rsc_id, action, interval_ms);

	xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id,
	last_failure_key, action_key);
	free(action_key);
	}
	free(last_failure_key);

	controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath,
	NULL, cib_xpath);
	free(xpath);
	}

	/*!
	* \internal
	* \brief Delete resource history entry from the CIB, given operation key
	*
	* \param[in] rsc_id Name of resource to clear history for
	* \param[in] node Name of node to clear history for
	* \param[in] key Operation key of operation to clear history for
	* \param[in] call_id If specified, delete entry only if it has this call ID
	*/
	void
	controld_delete_action_history_by_key(const char rsc_id, const char node,
	const char *key, int call_id)
	{
	char *xpath = NULL;

	CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return);

	if (call_id > 0) {
	xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key,
	call_id);
	} else {
	xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key);
	}
	controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath,
	NULL, cib_xpath);
	free(xpath);
	}
	diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h
	index b8622d5228..116db64924 100644
	--- a/daemons/controld/controld_cib.h
	+++ b/daemons/controld/controld_cib.h
	@@ -1,121 +1,112 @@
	/*
	- * Copyright 2004-2024 the Pacemaker project contributors
	+ * Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef PCMK__CONTROLD_CIB__H
	#define PCMK__CONTROLD_CIB__H

	#include <crm_internal.h>

	#include <glib.h>

	#include <crm/crm.h>
	#include <crm/common/xml.h>
	#include <crm/cib/internal.h> // cib__*
	#include "controld_globals.h" // controld_globals.cib_conn

	static inline void
	fsa_cib_anon_update(const char section, xmlNode data) {
	if (controld_globals.cib_conn == NULL) {
	crm_err("No CIB connection available");
	} else {
	controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
	section, data, cib_can_create);
	}
	}

	static inline void
	fsa_cib_anon_update_discard_reply(const char section, xmlNode data) {
	if (controld_globals.cib_conn == NULL) {
	crm_err("No CIB connection available");
	} else {
	controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
	section, data,
	cib_can_create
	\|cib_discard_reply);
	}
	}

	int controld_update_cib(const char section, xmlNode data, int options,
	void (callback)(xmlNode , int, int, xmlNode *,
	void *));
	unsigned int cib_op_timeout(void);

	-// Subsections of PCMK__XE_NODE_STATE
	-enum controld_section_e {
	- controld_section_lrm,
	- controld_section_lrm_unlocked,
	- controld_section_attrs,
	- controld_section_all,
	- controld_section_all_unlocked
	-};
	-
	-void controld_node_state_deletion_strings(const char *uname,
	- enum controld_section_e section,
	- char xpath, char desc);
	-void controld_delete_node_state(const char *uname,
	- enum controld_section_e section, int options);
	+void controld_node_history_deletion_strings(const char *uname,
	+ bool unlocked_only,
	+ char xpath, char desc);
	+void controld_delete_node_history(const char *uname, bool unlocked_only,
	+ int options);
	int controld_delete_resource_history(const char rsc_id, const char node,
	const char *user_name, int call_options);

	/* Convenience macro for registering a CIB callback
	* (assumes that data can be freed with free())
	*/
	# define fsa_register_cib_callback(id, data, fn) do { \
	cib_t *cib_conn = controld_globals.cib_conn; \
	\
	pcmk__assert(cib_conn != NULL); \
	cib_conn->cmds->register_callback_full(cib_conn, id, cib_op_timeout(), \
	FALSE, data, #fn, fn, free); \
	} while(0)

	void controld_add_resource_history_xml_as(const char func, xmlNode parent,
	const lrmd_rsc_info_t *rsc,
	lrmd_event_data_t *op,
	const char *node_name);

	#define controld_add_resource_history_xml(parent, rsc, op, node_name) \
	controld_add_resource_history_xml_as(__func__, (parent), (rsc), \
	(op), (node_name))

	bool controld_record_pending_op(const char *node_name,
	const lrmd_rsc_info_t *rsc,
	lrmd_event_data_t *op);

	void controld_update_resource_history(const char *node_name,
	const lrmd_rsc_info_t *rsc,
	lrmd_event_data_t *op, time_t lock_time);

	void controld_delete_action_history(const lrmd_event_data_t *op);

	void controld_cib_delete_last_failure(const char rsc_id, const char node,
	const char *action, guint interval_ms);

	void controld_delete_action_history_by_key(const char rsc_id, const char node,
	const char *key, int call_id);

	void controld_disconnect_cib_manager(void);

	int crmd_cib_smart_opt(void);

	/*!
	* \internal
	* \brief Check whether an action type should be recorded in the CIB
	*
	* \param[in] action Action type
	*
	* \return true if action should be recorded, false otherwise
	*/
	static inline bool
	controld_action_is_recordable(const char *action)
	{
	return !pcmk__str_any_of(action, PCMK_ACTION_CANCEL, PCMK_ACTION_DELETE,
	PCMK_ACTION_NOTIFY, PCMK_ACTION_META_DATA, NULL);
	}

	#endif // PCMK__CONTROLD_CIB__H
	diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
	index 2ec689394d..977acf01ab 100644
	--- a/daemons/controld/controld_execd.c
	+++ b/daemons/controld/controld_execd.c
	@@ -1,2406 +1,2405 @@
	/*
	* Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <regex.h>
	#include <sys/param.h>
	#include <sys/types.h>
	#include <sys/wait.h>

	#include <crm/crm.h>
	#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_rsc_info_t, etc.
	#include <crm/services.h>
	#include <crm/common/xml.h>
	#include <crm/lrmd_internal.h>

	#include <pacemaker-internal.h>
	#include <pacemaker-controld.h>

	#define START_DELAY_THRESHOLD 5 * 60 * 1000
	#define MAX_LRM_REG_FAILS 30

	struct delete_event_s {
	int rc;
	const char *rsc;
	lrm_state_t *lrm_state;
	};

	static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
	static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
	static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);

	static lrmd_event_data_t construct_op(const lrm_state_t lrm_state,
	const xmlNode *rsc_op,
	const char *rsc_id,
	const char *operation);
	static void do_lrm_rsc_op(lrm_state_t lrm_state, lrmd_rsc_info_t rsc,
	xmlNode msg, struct ra_metadata_s md);

	static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
	int log_level);

	static void
	lrm_connection_destroy(void)
	{
	if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
	crm_crit("Lost connection to local executor");
	register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
	controld_clear_fsa_input_flags(R_LRM_CONNECTED);
	}
	}

	static char *
	make_stop_id(const char *rsc, int call_id)
	{
	return crm_strdup_printf("%s:%d", rsc, call_id);
	}

	static void
	copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
	{
	if (strstr(key, CRM_META "_") == NULL) {
	pcmk__insert_dup(user_data, (const char ) key, (const char ) value);
	}
	}

	static void
	copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
	{
	if (strstr(key, CRM_META "_") != NULL) {
	pcmk__insert_dup(user_data, (const char ) key, (const char ) value);
	}
	}

	/*!
	* \internal
	* \brief Remove a recurring operation from a resource's history
	*
	* \param[in,out] history Resource history to modify
	* \param[in] op Operation to remove
	*
	* \return TRUE if the operation was found and removed, FALSE otherwise
	*/
	static gboolean
	history_remove_recurring_op(rsc_history_t history, const lrmd_event_data_t op)
	{
	GList *iter;

	for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
	lrmd_event_data_t *existing = iter->data;

	if ((op->interval_ms == existing->interval_ms)
	&& pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
	&& pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {

	history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
	lrmd_free_event(existing);
	return TRUE;
	}
	}
	return FALSE;
	}

	/*!
	* \internal
	* \brief Free all recurring operations in resource history
	*
	* \param[in,out] history Resource history to modify
	*/
	static void
	history_free_recurring_ops(rsc_history_t *history)
	{
	GList *iter;

	for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
	lrmd_free_event(iter->data);
	}
	g_list_free(history->recurring_op_list);
	history->recurring_op_list = NULL;
	}

	/*!
	* \internal
	* \brief Free resource history
	*
	* \param[in,out] history Resource history to free
	*/
	void
	history_free(gpointer data)
	{
	rsc_history_t history = (rsc_history_t)data;

	if (history->stop_params) {
	g_hash_table_destroy(history->stop_params);
	}

	/* Don't need to free history->rsc.id because it's set to history->id */
	free(history->rsc.type);
	free(history->rsc.standard);
	free(history->rsc.provider);

	lrmd_free_event(history->failed);
	lrmd_free_event(history->last);
	free(history->id);
	history_free_recurring_ops(history);
	free(history);
	}

	static void
	update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
	{
	int target_rc = 0;
	rsc_history_t *entry = NULL;

	if (op->rsc_deleted) {
	crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
	controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
	NULL, crmd_cib_smart_opt());
	return;
	}

	if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
	return;
	}

	crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);

	entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
	if (entry == NULL && rsc) {
	entry = pcmk__assert_alloc(1, sizeof(rsc_history_t));
	entry->id = pcmk__str_copy(op->rsc_id);
	g_hash_table_insert(lrm_state->resource_history, entry->id, entry);

	entry->rsc.id = entry->id;
	entry->rsc.type = pcmk__str_copy(rsc->type);
	entry->rsc.standard = pcmk__str_copy(rsc->standard);
	entry->rsc.provider = pcmk__str_copy(rsc->provider);

	} else if (entry == NULL) {
	crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
	return;
	}

	entry->last_callid = op->call_id;
	target_rc = rsc_op_expected_rc(op);
	if (op->op_status == PCMK_EXEC_CANCELLED) {
	if (op->interval_ms > 0) {
	crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
	op->rsc_id, op->op_type, op->interval_ms);
	history_remove_recurring_op(entry, op);
	return;
	} else {
	crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
	op->rsc_id, op->op_type, op->interval_ms, op->rc,
	op->op_status);
	}

	} else if (did_rsc_op_fail(op, target_rc)) {
	/* Store failed monitors here, otherwise the block below will cause them
	* to be forgotten when a stop happens.
	*/
	if (entry->failed) {
	lrmd_free_event(entry->failed);
	}
	entry->failed = lrmd_copy_event(op);

	} else if (op->interval_ms == 0) {
	if (entry->last) {
	lrmd_free_event(entry->last);
	}
	entry->last = lrmd_copy_event(op);

	if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START,
	PCMK_ACTION_RELOAD,
	PCMK_ACTION_RELOAD_AGENT,
	PCMK_ACTION_MONITOR, NULL)) {
	if (entry->stop_params) {
	g_hash_table_destroy(entry->stop_params);
	}
	entry->stop_params = pcmk__strkey_table(free, free);

	g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
	}
	}

	if (op->interval_ms > 0) {
	/* Ensure there are no duplicates */
	history_remove_recurring_op(entry, op);

	crm_trace("Adding recurring op: " PCMK__OP_FMT,
	op->rsc_id, op->op_type, op->interval_ms);
	entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));

	} else if ((entry->recurring_op_list != NULL)
	&& !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR,
	pcmk__str_casei)) {
	crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
	g_list_length(entry->recurring_op_list), op->rsc_id,
	op->op_type, op->interval_ms);
	history_free_recurring_ops(entry);
	}
	}

	/*!
	* \internal
	* \brief Send a direct OK ack for a resource task
	*
	* \param[in] lrm_state LRM connection
	* \param[in] input Input message being ack'ed
	* \param[in] rsc_id ID of affected resource
	* \param[in] rsc Affected resource (if available)
	* \param[in] task Operation task being ack'ed
	* \param[in] ack_host Name of host to send ack to
	* \param[in] ack_sys IPC system name to ack
	*/
	static void
	send_task_ok_ack(const lrm_state_t lrm_state, const ha_msg_input_t input,
	const char rsc_id, const lrmd_rsc_info_t rsc,
	const char task, const char ack_host, const char *ack_sys)
	{
	lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);

	lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
	controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
	lrmd_free_event(op);
	}

	static inline const char *
	op_node_name(lrmd_event_data_t *op)
	{
	return pcmk__s(op->remote_nodename,
	controld_globals.cluster->priv->node_name);
	}

	void
	lrm_op_callback(lrmd_event_data_t * op)
	{
	CRM_CHECK(op != NULL, return);
	switch (op->type) {
	case lrmd_event_disconnect:
	if (op->remote_nodename == NULL) {
	/* If this is the local executor IPC connection, set the right
	* bits in the controller when the connection goes down.
	*/
	lrm_connection_destroy();
	}
	break;

	case lrmd_event_exec_complete:
	{
	lrm_state_t *lrm_state =
	controld_get_executor_state(op_node_name(op), false);

	pcmk__assert(lrm_state != NULL);
	process_lrm_event(lrm_state, op, NULL, NULL);
	}
	break;

	default:
	break;
	}
	}

	static void
	try_local_executor_connect(long long action, fsa_data_t *msg_data,
	lrm_state_t *lrm_state)
	{
	int rc = pcmk_rc_ok;

	crm_debug("Connecting to the local executor");

	// If we can connect, great
	rc = controld_connect_local_executor(lrm_state);
	if (rc == pcmk_rc_ok) {
	controld_set_fsa_input_flags(R_LRM_CONNECTED);
	crm_info("Connection to the local executor established");
	return;
	}

	// Otherwise, if we can try again, set a timer to do so
	if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
	crm_warn("Failed to connect to the local executor %d time%s "
	"(%d max): %s", lrm_state->num_lrm_register_fails,
	pcmk__plural_s(lrm_state->num_lrm_register_fails),
	MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
	controld_start_wait_timer();
	crmd_fsa_stall(FALSE);
	return;
	}

	// Otherwise give up
	crm_err("Failed to connect to the executor the max allowed "
	"%d time%s: %s", lrm_state->num_lrm_register_fails,
	pcmk__plural_s(lrm_state->num_lrm_register_fails),
	pcmk_rc_str(rc));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}

	/* A_LRM_CONNECT */
	void
	do_lrm_control(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	/* This only pertains to local executor connections. Remote connections are
	* handled as resources within the scheduler. Connecting and disconnecting
	* from remote executor instances is handled differently.
	*/

	lrm_state_t *lrm_state = NULL;

	if (controld_globals.cluster->priv->node_name == NULL) {
	return; // Shouldn't be possible
	}
	lrm_state = controld_get_executor_state(NULL, true);
	if (lrm_state == NULL) {
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	return;
	}

	if (action & A_LRM_DISCONNECT) {
	if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
	if (action == A_LRM_DISCONNECT) {
	crmd_fsa_stall(FALSE);
	return;
	}
	}

	controld_clear_fsa_input_flags(R_LRM_CONNECTED);
	lrm_state_disconnect(lrm_state);
	lrm_state_reset_tables(lrm_state, FALSE);
	}

	if (action & A_LRM_CONNECT) {
	try_local_executor_connect(action, msg_data, lrm_state);
	}

	if (action & ~(A_LRM_CONNECT \| A_LRM_DISCONNECT)) {
	crm_err("Unexpected action %s in %s", fsa_action2string(action),
	__func__);
	}
	}

	static gboolean
	lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
	{
	int counter = 0;
	gboolean rc = TRUE;
	const char *when = "lrm disconnect";

	GHashTableIter gIter;
	const char *key = NULL;
	rsc_history_t *entry = NULL;
	active_op_t *pending = NULL;

	crm_debug("Checking for active resources before exit");

	if (cur_state == S_TERMINATE) {
	log_level = LOG_ERR;
	when = "shutdown";

	} else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
	when = "shutdown... waiting";
	}

	if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
	guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
	stop_recurring_actions,
	lrm_state);
	guint nremaining = g_hash_table_size(lrm_state->active_ops);

	if (removed \|\| nremaining) {
	crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
	removed, pcmk__plural_s(removed), when, nremaining);
	}
	}

	if (lrm_state->active_ops != NULL) {
	g_hash_table_iter_init(&gIter, lrm_state->active_ops);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
	/* Ignore recurring actions in the shutdown calculations */
	if (pending->interval_ms == 0) {
	counter++;
	}
	}
	}

	if (counter > 0) {
	do_crm_log(log_level, "%d pending executor operation%s at %s",
	counter, pcmk__plural_s(counter), when);

	if ((cur_state == S_TERMINATE)
	\|\| !pcmk_is_set(controld_globals.fsa_input_register,
	R_SENT_RSC_STOP)) {
	g_hash_table_iter_init(&gIter, lrm_state->active_ops);
	while (g_hash_table_iter_next(&gIter, (gpointer)&key, (gpointer)&pending)) {
	do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
	}

	} else {
	rc = FALSE;
	}
	return rc;
	}

	if (lrm_state->resource_history == NULL) {
	return rc;
	}

	if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
	/* At this point we're not waiting, we're just shutting down */
	when = "shutdown";
	}

	counter = 0;
	g_hash_table_iter_init(&gIter, lrm_state->resource_history);
	while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
	if (is_rsc_active(lrm_state, entry->id) == FALSE) {
	continue;
	}

	counter++;
	if (log_level == LOG_ERR) {
	crm_info("Found %s active at %s", entry->id, when);
	} else {
	crm_trace("Found %s active at %s", entry->id, when);
	}
	if (lrm_state->active_ops != NULL) {
	GHashTableIter hIter;

	g_hash_table_iter_init(&hIter, lrm_state->active_ops);
	while (g_hash_table_iter_next(&hIter, (gpointer)&key, (gpointer)&pending)) {
	if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
	crm_notice("%sction %s (%s) incomplete at %s",
	pending->interval_ms == 0 ? "A" : "Recurring a",
	key, pending->op_key, when);
	}
	}
	}
	}

	if (counter) {
	crm_err("%d resource%s active at %s",
	counter, (counter == 1)? " was" : "s were", when);
	}

	return rc;
	}

	static gboolean
	is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
	{
	rsc_history_t *entry = NULL;

	entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
	if (entry == NULL \|\| entry->last == NULL) {
	return FALSE;
	}

	crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
	entry->last->interval_ms, entry->last->rc);
	if ((entry->last->rc == PCMK_OCF_OK)
	&& pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP,
	pcmk__str_casei)) {
	return FALSE;

	} else if (entry->last->rc == PCMK_OCF_OK
	&& pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO,
	pcmk__str_casei)) {
	// A stricter check is too complex ... leave that to the scheduler
	return FALSE;

	} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
	return FALSE;

	} else if ((entry->last->interval_ms == 0)
	&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
	/* Badly configured resources can't be reliably stopped */
	return FALSE;
	}

	return TRUE;
	}

	static gboolean
	build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
	{
	GHashTableIter iter;
	rsc_history_t *entry = NULL;

	g_hash_table_iter_init(&iter, lrm_state->resource_history);
	while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {

	GList *gIter = NULL;
	xmlNode *xml_rsc = pcmk__xe_create(rsc_list, PCMK__XE_LRM_RESOURCE);

	crm_xml_add(xml_rsc, PCMK_XA_ID, entry->id);
	crm_xml_add(xml_rsc, PCMK_XA_TYPE, entry->rsc.type);
	crm_xml_add(xml_rsc, PCMK_XA_CLASS, entry->rsc.standard);
	crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, entry->rsc.provider);

	if (entry->last && entry->last->params) {
	static const char *name = CRM_META "_" PCMK__META_CONTAINER;
	const char *container = g_hash_table_lookup(entry->last->params,
	name);

	if (container) {
	crm_trace("Resource %s is a part of container resource %s", entry->id, container);
	crm_xml_add(xml_rsc, PCMK__META_CONTAINER, container);
	}
	}
	controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
	lrm_state->node_name);
	controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
	lrm_state->node_name);
	for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
	controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
	lrm_state->node_name);
	}
	}

	return FALSE;
	}

	xmlNode *
	controld_query_executor_state(void)
	{
	// @TODO Ensure all callers handle NULL returns
	xmlNode *xml_state = NULL;
	xmlNode *xml_data = NULL;
	xmlNode *rsc_list = NULL;
	pcmk__node_status_t *peer = NULL;
	lrm_state_t *lrm_state = controld_get_executor_state(NULL, false);

	if (!lrm_state) {
	crm_err("Could not get executor state for local node");
	return NULL;
	}

	peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any);
	CRM_CHECK(peer != NULL, return NULL);

	xml_state = create_node_state_update(peer,
	controld_node_update_cluster
	\|controld_node_update_peer,
	NULL, __func__);
	if (xml_state == NULL) {
	return NULL;
	}

	xml_data = pcmk__xe_create(xml_state, PCMK__XE_LRM);
	crm_xml_add(xml_data, PCMK_XA_ID, peer->xml_id);
	rsc_list = pcmk__xe_create(xml_data, PCMK__XE_LRM_RESOURCES);

	// Build a list of active (not necessarily running) resources
	build_active_RAs(lrm_state, rsc_list);

	crm_log_xml_trace(xml_state, "Current executor state");

	return xml_state;
	}

	/*!
	* \internal
	* \brief Map standard Pacemaker return code to operation status and OCF code
	*
	* \param[out] event Executor event whose status and return code should be set
	* \param[in] rc Standard Pacemaker return code
	*/
	void
	controld_rc2event(lrmd_event_data_t *event, int rc)
	{
	/* This is called for cleanup requests from controller peers/clients, not
	* for resource actions, so no exit reason is needed.
	*/
	switch (rc) {
	case pcmk_rc_ok:
	lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
	break;
	case EACCES:
	lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
	PCMK_EXEC_ERROR, NULL);
	break;
	default:
	lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
	NULL);
	break;
	}
	}

	/*!
	* \internal
	* \brief Trigger a new transition after CIB status was deleted
	*
	* If a CIB status delete was not expected (as part of the transition graph),
	* trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
	* cluster property.
	*
	* \param[in] from_sys IPC name that requested the delete
	* \param[in] rsc_id Resource whose status was deleted (for logging only)
	*/
	void
	controld_trigger_delete_refresh(const char from_sys, const char rsc_id)
	{
	if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
	char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));

	crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
	cib__update_node_attr(controld_globals.logger_out,
	controld_globals.cib_conn, cib_none,
	PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL,
	"last-lrm-refresh", now_s, NULL, NULL);
	free(now_s);
	}
	}

	static void
	notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
	{
	lrmd_event_data_t *op = NULL;
	const char *from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
	const char *from_host = crm_element_value(input->msg, PCMK__XA_SRC);

	crm_info("Notifying %s on %s that %s was%s deleted",
	from_sys, (from_host? from_host : "localhost"), rsc_id,
	((rc == pcmk_ok)? "" : " not"));
	op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE);
	controld_rc2event(op, pcmk_legacy2rc(rc));
	controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
	lrmd_free_event(op);
	controld_trigger_delete_refresh(from_sys, rsc_id);
	}

	static gboolean
	lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
	{
	struct delete_event_s *event = user_data;
	struct pending_deletion_op_s *op = value;

	if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
	notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
	return TRUE;
	}
	return FALSE;
	}

	static gboolean
	lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
	{
	const char *rsc = user_data;
	active_op_t *pending = value;

	if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
	crm_info("Removing op %s:%d for deleted resource %s",
	pending->op_key, pending->call_id, rsc);
	return TRUE;
	}
	return FALSE;
	}

	static void
	delete_rsc_entry(lrm_state_t lrm_state, ha_msg_input_t input,
	const char rsc_id, GHashTableIter rsc_iter, int rc,
	const char *user_name, bool from_cib)
	{
	struct delete_event_s event;

	CRM_CHECK(rsc_id != NULL, return);

	if (rc == pcmk_ok) {
	char *rsc_id_copy = pcmk__str_copy(rsc_id);

	if (rsc_iter) {
	g_hash_table_iter_remove(rsc_iter);
	} else {
	g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
	}

	if (from_cib) {
	controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
	user_name, crmd_cib_smart_opt());
	}
	g_hash_table_foreach_remove(lrm_state->active_ops,
	lrm_remove_deleted_op, rsc_id_copy);
	free(rsc_id_copy);
	}

	if (input) {
	notify_deleted(lrm_state, input, rsc_id, rc);
	}

	event.rc = rc;
	event.rsc = rsc_id;
	event.lrm_state = lrm_state;
	g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
	}

	static inline gboolean
	last_failed_matches_op(rsc_history_t entry, const char op, guint interval_ms)
	{
	if (entry == NULL) {
	return FALSE;
	}
	if (op == NULL) {
	return TRUE;
	}
	return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
	&& (interval_ms == entry->failed->interval_ms));
	}

	/*!
	* \internal
	* \brief Clear a resource's last failure
	*
	* Erase a resource's last failure on a particular node from both the
	* LRM resource history in the CIB, and the resource history remembered
	* for the LRM state.
	*
	* \param[in] rsc_id Resource name
	* \param[in] node_name Node name
	* \param[in] operation If specified, only clear if matching this operation
	* \param[in] interval_ms If operation is specified, it has this interval
	*/
	void
	lrm_clear_last_failure(const char rsc_id, const char node_name,
	const char *operation, guint interval_ms)
	{
	lrm_state_t *lrm_state = controld_get_executor_state(node_name, false);

	if (lrm_state == NULL) {
	return;
	}
	if (lrm_state->resource_history != NULL) {
	rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
	rsc_id);

	if (last_failed_matches_op(entry, operation, interval_ms)) {
	lrmd_free_event(entry->failed);
	entry->failed = NULL;
	}
	}
	}

	/* Returns: gboolean - cancellation is in progress */
	static gboolean
	cancel_op(lrm_state_t * lrm_state, const char rsc_id, const char key, int op, gboolean remove)
	{
	int rc = pcmk_ok;
	char *local_key = NULL;
	active_op_t *pending = NULL;

	CRM_CHECK(op != 0, return FALSE);
	CRM_CHECK(rsc_id != NULL, return FALSE);
	if (key == NULL) {
	local_key = make_stop_id(rsc_id, op);
	key = local_key;
	}
	pending = g_hash_table_lookup(lrm_state->active_ops, key);

	if (pending) {
	if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
	controld_set_active_op_flags(pending, active_op_remove);
	crm_debug("Scheduling %s for removal", key);
	}

	if (pcmk_is_set(pending->flags, active_op_cancelled)) {
	crm_debug("Operation %s already cancelled", key);
	free(local_key);
	return FALSE;
	}
	controld_set_active_op_flags(pending, active_op_cancelled);

	} else {
	crm_info("No pending op found for %s", key);
	free(local_key);
	return FALSE;
	}

	crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
	rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
	pending->interval_ms);
	if (rc == pcmk_ok) {
	crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
	free(local_key);
	return TRUE;
	}

	crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
	/* The caller needs to make sure the entry is
	* removed from the active operations list
	*
	* Usually by returning TRUE inside the worker function
	* supplied to g_hash_table_foreach_remove()
	*
	* Not removing the entry from active operations will block
	* the node from shutting down
	*/
	free(local_key);
	return FALSE;
	}

	struct cancel_data {
	gboolean done;
	gboolean remove;
	const char *key;
	lrmd_rsc_info_t *rsc;
	lrm_state_t *lrm_state;
	};

	static gboolean
	cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
	{
	gboolean remove = FALSE;
	struct cancel_data *data = user_data;
	active_op_t *op = value;

	if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
	data->done = TRUE;
	remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
	}
	return remove;
	}

	static gboolean
	cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
	{
	guint removed = 0;
	struct cancel_data data;

	CRM_CHECK(rsc != NULL, return FALSE);
	CRM_CHECK(key != NULL, return FALSE);

	data.key = key;
	data.rsc = rsc;
	data.done = FALSE;
	data.remove = remove;
	data.lrm_state = lrm_state;

	removed = g_hash_table_foreach_remove(lrm_state->active_ops,
	cancel_action_by_key, &data);
	crm_trace("Removed %u op cache entries, new size: %u",
	removed, g_hash_table_size(lrm_state->active_ops));
	return data.done;
	}

	/*!
	* \internal
	* \brief Retrieve resource information from LRM
	*
	* \param[in,out] lrm_state Executor connection state to use
	* \param[in] rsc_xml XML containing resource configuration
	* \param[in] do_create If true, register resource if not already
	* \param[out] rsc_info Where to store information obtained from executor
	*
	* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
	* \retval -EINVAL Required information is missing from arguments
	* \retval -ENOTCONN No active connection to LRM
	* \retval -ENODEV Resource not found
	* \retval -errno Error communicating with executor when registering resource
	*
	* \note Caller is responsible for freeing result on success.
	*/
	static int
	get_lrm_resource(lrm_state_t lrm_state, const xmlNode rsc_xml,
	gboolean do_create, lrmd_rsc_info_t **rsc_info)
	{
	const char *id = pcmk__xe_id(rsc_xml);

	CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
	CRM_CHECK(id, return -EINVAL);

	if (lrm_state_is_connected(lrm_state) == FALSE) {
	return -ENOTCONN;
	}

	crm_trace("Retrieving resource information for %s from the executor", id);
	*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);

	// If resource isn't known by ID, try clone name, if provided
	if (!*rsc_info) {
	const char *long_id = crm_element_value(rsc_xml, PCMK__XA_LONG_ID);

	if (long_id) {
	*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
	}
	}

	if ((*rsc_info == NULL) && do_create) {
	const char *class = crm_element_value(rsc_xml, PCMK_XA_CLASS);
	const char *provider = crm_element_value(rsc_xml, PCMK_XA_PROVIDER);
	const char *type = crm_element_value(rsc_xml, PCMK_XA_TYPE);
	int rc;

	crm_trace("Registering resource %s with the executor", id);
	rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
	lrmd_opt_drop_recurring);
	if (rc != pcmk_ok) {
	fsa_data_t *msg_data = NULL;

	crm_err("Could not register resource %s with the executor on %s: %s "
	QB_XS " rc=%d",
	id, lrm_state->node_name, pcmk_strerror(rc), rc);

	/* Register this as an internal error if this involves the local
	* executor. Otherwise, we're likely dealing with an unresponsive
	* remote node, which is not an FSA failure.
	*/
	if (lrm_state_is_local(lrm_state) == TRUE) {
	register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
	}
	return rc;
	}

	*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
	}
	return *rsc_info? pcmk_ok : -ENODEV;
	}

	static void
	delete_resource(lrm_state_t lrm_state, const char id, lrmd_rsc_info_t *rsc,
	GHashTableIter iter, const char sys, const char *user,
	ha_msg_input_t *request, bool unregister, bool from_cib)
	{
	int rc = pcmk_ok;

	crm_info("Removing resource %s from executor for %s%s%s",
	id, sys, (user? " as " : ""), (user? user : ""));

	if (rsc && unregister) {
	rc = lrm_state_unregister_rsc(lrm_state, id, 0);
	}

	if (rc == pcmk_ok) {
	crm_trace("Resource %s deleted from executor", id);
	} else if (rc == -EINPROGRESS) {
	crm_info("Deletion of resource '%s' from executor is pending", id);
	if (request) {
	struct pending_deletion_op_s *op = NULL;
	char *ref = crm_element_value_copy(request->msg, PCMK_XA_REFERENCE);

	op = pcmk__assert_alloc(1, sizeof(struct pending_deletion_op_s));
	op->rsc = pcmk__str_copy(rsc->id);
	op->input = copy_ha_msg_input(request);
	g_hash_table_insert(lrm_state->deletion_ops, ref, op);
	}
	return;
	} else {
	crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
	QB_XS " rc=%d", id, sys, (user? " as " : ""),
	(user? user : ""), pcmk_strerror(rc), rc);
	}

	delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
	}

	static int
	get_fake_call_id(lrm_state_t lrm_state, const char rsc_id)
	{
	int call_id = 999999999;
	rsc_history_t *entry = NULL;

	if(lrm_state) {
	entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
	}

	/* Make sure the call id is greater than the last successful operation,
	* otherwise the failure will not result in a possible recovery of the resource
	* as it could appear the failure occurred before the successful start */
	if (entry) {
	call_id = entry->last_callid + 1;
	}

	if (call_id < 0) {
	call_id = 1;
	}
	return call_id;
	}

	static void
	fake_op_status(lrm_state_t lrm_state, lrmd_event_data_t op, int op_status,
	enum ocf_exitcode op_exitcode, const char *exit_reason)
	{
	op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
	op->t_run = time(NULL);
	op->t_rcchange = op->t_run;
	lrmd__set_result(op, op_exitcode, op_status, exit_reason);
	}

	static void
	force_reprobe(lrm_state_t lrm_state, const char from_sys,
	const char from_host, const char user_name,
	gboolean is_remote_node, bool reprobe_all_nodes)
	{
	GHashTableIter gIter;
	rsc_history_t *entry = NULL;

	crm_info("Clearing resource history on node %s", lrm_state->node_name);
	g_hash_table_iter_init(&gIter, lrm_state->resource_history);
	while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
	/* only unregister the resource during a reprobe if it is not a remote connection
	* resource. otherwise unregistering the connection will terminate remote-node
	* membership */
	bool unregister = true;

	if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
	unregister = false;

	if (reprobe_all_nodes) {
	lrm_state_t *remote_lrm_state =
	controld_get_executor_state(entry->id, false);

	if (remote_lrm_state != NULL) {
	/* If reprobing all nodes, be sure to reprobe the remote
	* node before clearing its connection resource
	*/
	force_reprobe(remote_lrm_state, from_sys, from_host,
	user_name, TRUE, reprobe_all_nodes);
	}
	}
	}

	/* Don't delete from the CIB, since we'll delete the whole node's LRM
	* state from the CIB soon
	*/
	delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
	user_name, NULL, unregister, false);
	}

	/* Now delete the copy in the CIB */
	- controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
	- cib_none);
	+ controld_delete_node_history(lrm_state->node_name, false, cib_none);
	}

	/*!
	* \internal
	* \brief Fail a requested action without actually executing it
	*
	* For an action that can't be executed, process it similarly to an actual
	* execution result, with specified error status (except for notify actions,
	* which will always be treated as successful).
	*
	* \param[in,out] lrm_state Executor connection that action is for
	* \param[in] action Action XML from request
	* \param[in] rc Desired return code to use
	* \param[in] op_status Desired operation status to use
	* \param[in] exit_reason Human-friendly detail, if error
	*/
	static void
	synthesize_lrmd_failure(lrm_state_t lrm_state, const xmlNode action,
	int op_status, enum ocf_exitcode rc,
	const char *exit_reason)
	{
	lrmd_event_data_t *op = NULL;
	const char *operation = crm_element_value(action, PCMK_XA_OPERATION);
	const char *target_node = crm_element_value(action, PCMK__META_ON_NODE);
	xmlNode *xml_rsc = pcmk__xe_first_child(action, PCMK_XE_PRIMITIVE, NULL,
	NULL);

	if ((xml_rsc == NULL) \|\| (pcmk__xe_id(xml_rsc) == NULL)) {
	/* @TODO Should we do something else, like direct ack? */
	crm_info("Can't fake %s failure (%d) on %s without resource configuration",
	crm_element_value(action, PCMK__XA_OPERATION_KEY), rc,
	target_node);
	return;

	} else if(operation == NULL) {
	/* This probably came from crm_resource -C, nothing to do */
	crm_info("Can't fake %s failure (%d) on %s without operation",
	pcmk__xe_id(xml_rsc), rc, target_node);
	return;
	}

	op = construct_op(lrm_state, action, pcmk__xe_id(xml_rsc), operation);

	if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
	// Notifications can't fail
	fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
	} else {
	fake_op_status(lrm_state, op, op_status, rc, exit_reason);
	}

	crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
	op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);

	// Process the result as if it came from the LRM
	process_lrm_event(lrm_state, op, NULL, action);
	lrmd_free_event(op);
	}

	/*!
	* \internal
	* \brief Get target of an LRM operation (replacing \p NULL with local node
	* name)
	*
	* \param[in] xml LRM operation data XML
	*
	* \return LRM operation target node name (local node or Pacemaker Remote node)
	*/
	static const char *
	lrm_op_target(const xmlNode *xml)
	{
	const char *target = NULL;

	if (xml) {
	target = crm_element_value(xml, PCMK__META_ON_NODE);
	}
	if (target == NULL) {
	target = controld_globals.cluster->priv->node_name;
	}
	return target;
	}

	static void
	fail_lrm_resource(xmlNode xml, lrm_state_t lrm_state, const char *user_name,
	const char from_host, const char from_sys)
	{
	lrmd_event_data_t *op = NULL;
	lrmd_rsc_info_t *rsc = NULL;
	xmlNode *xml_rsc = pcmk__xe_first_child(xml, PCMK_XE_PRIMITIVE, NULL, NULL);

	CRM_CHECK(xml_rsc != NULL, return);

	/* The executor simply executes operations and reports the results, without
	* any concept of success or failure, so to fail a resource, we must fake
	* what a failure looks like.
	*
	* To do this, we create a fake executor operation event for the resource,
	* and pass that event to the executor client callback so it will be
	* processed as if it came from the executor.
	*/
	op = construct_op(lrm_state, xml, pcmk__xe_id(xml_rsc), "asyncmon");

	free((char*) op->user_data);
	op->user_data = NULL;
	op->interval_ms = 0;

	if (user_name && !pcmk__is_privileged(user_name)) {
	crm_err("%s does not have permission to fail %s",
	user_name, pcmk__xe_id(xml_rsc));
	fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
	PCMK_OCF_INSUFFICIENT_PRIV,
	"Unprivileged user cannot fail resources");
	controld_ack_event_directly(from_host, from_sys, NULL, op,
	pcmk__xe_id(xml_rsc));
	lrmd_free_event(op);
	return;
	}


	if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
	crm_info("Failing resource %s...", rsc->id);
	fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
	"Simulated failure");
	process_lrm_event(lrm_state, op, NULL, xml);
	op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
	lrmd_free_rsc_info(rsc);

	} else {
	crm_info("Cannot find/create resource in order to fail it...");
	crm_log_xml_warn(xml, "bad input");
	fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
	"Cannot fail unknown resource");
	}

	controld_ack_event_directly(from_host, from_sys, NULL, op,
	pcmk__xe_id(xml_rsc));
	lrmd_free_event(op);
	}

	static void
	handle_reprobe_op(lrm_state_t lrm_state, xmlNode msg, const char *from_sys,
	const char from_host, const char user_name,
	gboolean is_remote_node, bool reprobe_all_nodes)
	{
	crm_notice("Forcing the status of all resources to be redetected");
	force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
	reprobe_all_nodes);

	if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
	xmlNode *reply = pcmk__new_reply(msg, NULL);

	crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);

	if (relay_message(reply, TRUE) == FALSE) {
	crm_log_xml_err(reply, "Unable to route reply");
	}
	pcmk__xml_free(reply);
	}
	}

	static bool do_lrm_cancel(ha_msg_input_t input, lrm_state_t lrm_state,
	lrmd_rsc_info_t rsc, const char from_host, const char *from_sys)
	{
	char *op_key = NULL;
	char *meta_key = NULL;
	int call = 0;
	const char *call_id = NULL;
	const char *op_task = NULL;
	guint interval_ms = 0;
	gboolean in_progress = FALSE;
	xmlNode *params = pcmk__xe_first_child(input->xml, PCMK__XE_ATTRIBUTES,
	NULL, NULL);

	CRM_CHECK(params != NULL, return FALSE);

	meta_key = crm_meta_name(PCMK_XA_OPERATION);
	op_task = crm_element_value(params, meta_key);
	free(meta_key);
	CRM_CHECK(op_task != NULL, return FALSE);

	meta_key = crm_meta_name(PCMK_META_INTERVAL);
	if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
	free(meta_key);
	return FALSE;
	}
	free(meta_key);

	op_key = pcmk__op_key(rsc->id, op_task, interval_ms);

	meta_key = crm_meta_name(PCMK__XA_CALL_ID);
	call_id = crm_element_value(params, meta_key);
	free(meta_key);

	crm_debug("Scheduler requested op %s (call=%s) be cancelled",
	op_key, (call_id? call_id : "NA"));
	pcmk__scan_min_int(call_id, &call, 0);
	if (call == 0) {
	// Normal case when the scheduler cancels a recurring op
	in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);

	} else {
	// Normal case when the scheduler cancels an orphan op
	in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
	}

	// Acknowledge cancellation operation if for a remote connection resource
	if (!in_progress \|\| is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
	char *op_id = make_stop_id(rsc->id, call);

	if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
	crm_info("Nothing known about operation %d for %s", call, op_key);
	}
	controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
	op_key, call);
	send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
	from_host, from_sys);

	/* needed at least for cancellation of a remote operation */
	if (lrm_state->active_ops != NULL) {
	g_hash_table_remove(lrm_state->active_ops, op_id);
	}
	free(op_id);
	}

	free(op_key);
	return TRUE;
	}

	static void
	do_lrm_delete(ha_msg_input_t input, lrm_state_t lrm_state,
	lrmd_rsc_info_t rsc, const char from_sys, const char *from_host,
	bool crm_rsc_delete, const char *user_name)
	{
	bool unregister = true;
	int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
	user_name,
	cib_dryrun\|cib_sync_call);

	if (cib_rc != pcmk_rc_ok) {
	lrmd_event_data_t *op = NULL;

	op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE);

	/* These are resource clean-ups, not actions, so no exit reason is
	* needed.
	*/
	lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
	controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
	lrmd_free_event(op);
	return;
	}

	if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
	unregister = false;
	}

	delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
	user_name, input, unregister, true);
	}

	// User data for asynchronous metadata execution
	struct metadata_cb_data {
	lrmd_rsc_info_t *rsc; // Copy of resource information
	xmlNode *input_xml; // Copy of FSA input XML
	};

	static struct metadata_cb_data *
	new_metadata_cb_data(lrmd_rsc_info_t rsc, xmlNode input_xml)
	{
	struct metadata_cb_data *data = NULL;

	data = pcmk__assert_alloc(1, sizeof(struct metadata_cb_data));
	data->input_xml = pcmk__xml_copy(NULL, input_xml);
	data->rsc = lrmd_copy_rsc_info(rsc);
	return data;
	}

	static void
	free_metadata_cb_data(struct metadata_cb_data *data)
	{
	lrmd_free_rsc_info(data->rsc);
	pcmk__xml_free(data->input_xml);
	free(data);
	}

	/*!
	* \internal
	* \brief Execute an action after metadata has been retrieved
	*
	* \param[in] pid Ignored
	* \param[in] result Result of metadata action
	* \param[in] user_data Metadata callback data
	*/
	static void
	metadata_complete(int pid, const pcmk__action_result_t result, void user_data)
	{
	struct metadata_cb_data data = (struct metadata_cb_data ) user_data;

	struct ra_metadata_s *md = NULL;
	lrm_state_t *lrm_state =
	controld_get_executor_state(lrm_op_target(data->input_xml), false);

	if ((lrm_state != NULL) && pcmk__result_ok(result)) {
	md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
	result->action_stdout);
	}
	if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
	do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
	}
	free_metadata_cb_data(data);
	}

	/* A_LRM_INVOKE */
	void
	do_lrm_invoke(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	lrm_state_t *lrm_state = NULL;
	const char *crm_op = NULL;
	const char *from_sys = NULL;
	const char *from_host = NULL;
	const char *operation = NULL;
	ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
	const char *user_name = NULL;
	const char *target_node = lrm_op_target(input->xml);
	gboolean is_remote_node = FALSE;
	bool crm_rsc_delete = FALSE;

	// Message routed to the local node is targeting a specific, non-local node
	is_remote_node = !controld_is_local_node(target_node);

	lrm_state = controld_get_executor_state(target_node, false);
	if ((lrm_state == NULL) && is_remote_node) {
	crm_err("Failing action because local node has never had connection to remote node %s",
	target_node);
	synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
	PCMK_OCF_UNKNOWN_ERROR,
	"Local node has no connection to remote");
	return;
	}
	pcmk__assert(lrm_state != NULL);

	user_name = pcmk__update_acl_user(input->msg, PCMK__XA_CRM_USER, NULL);
	crm_op = crm_element_value(input->msg, PCMK__XA_CRM_TASK);
	from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
	if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
	from_host = crm_element_value(input->msg, PCMK__XA_SRC);
	}

	if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
	if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
	crm_rsc_delete = TRUE; // from crm_resource
	}
	operation = PCMK_ACTION_DELETE;

	} else if (input->xml != NULL) {
	operation = crm_element_value(input->xml, PCMK_XA_OPERATION);
	}

	CRM_CHECK(!pcmk__str_empty(crm_op) \|\| !pcmk__str_empty(operation), return);

	crm_trace("'%s' execution request from %s as %s user",
	pcmk__s(crm_op, operation),
	pcmk__s(from_sys, "unknown subsystem"),
	pcmk__s(user_name, "current"));

	if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
	fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
	from_sys);

	} else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
	\|\| pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
	const char *raw_target = NULL;

	if (input->xml != NULL) {
	// For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
	raw_target = crm_element_value(input->xml, PCMK__META_ON_NODE);
	}
	handle_reprobe_op(lrm_state, input->msg, from_sys, from_host, user_name,
	is_remote_node, (raw_target == NULL));

	} else if (operation != NULL) {
	lrmd_rsc_info_t *rsc = NULL;
	xmlNode *xml_rsc = pcmk__xe_first_child(input->xml, PCMK_XE_PRIMITIVE,
	NULL, NULL);
	gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE,
	pcmk__str_none);
	int rc;

	// We can't return anything meaningful without a resource ID
	CRM_CHECK((xml_rsc != NULL) && (pcmk__xe_id(xml_rsc) != NULL), return);

	rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
	if (rc == -ENOTCONN) {
	synthesize_lrmd_failure(lrm_state, input->xml,
	PCMK_EXEC_NOT_CONNECTED,
	PCMK_OCF_UNKNOWN_ERROR,
	"Not connected to remote executor");
	return;

	} else if ((rc < 0) && !create_rsc) {
	/* Delete of malformed or nonexistent resource
	* (deleting something that does not exist is a success)
	*/
	crm_debug("Not registering resource '%s' for a %s event "
	QB_XS " get-rc=%d (%s) transition-key=%s",
	pcmk__xe_id(xml_rsc), operation,
	rc, pcmk_strerror(rc), pcmk__xe_id(input->xml));
	delete_rsc_entry(lrm_state, input, pcmk__xe_id(xml_rsc), NULL,
	pcmk_ok, user_name, true);
	return;

	} else if (rc == -EINVAL) {
	// Resource operation on malformed resource
	crm_err("Invalid resource definition for %s", pcmk__xe_id(xml_rsc));
	crm_log_xml_warn(input->msg, "invalid resource");
	synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
	PCMK_OCF_NOT_CONFIGURED, // fatal error
	"Invalid resource definition");
	return;

	} else if (rc < 0) {
	// Error communicating with the executor
	crm_err("Could not register resource '%s' with executor: %s "
	QB_XS " rc=%d",
	pcmk__xe_id(xml_rsc), pcmk_strerror(rc), rc);
	crm_log_xml_warn(input->msg, "failed registration");
	synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
	PCMK_OCF_INVALID_PARAM, // hard error
	"Could not register resource with executor");
	return;
	}

	if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) {
	if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
	crm_log_xml_warn(input->xml, "Bad command");
	}

	} else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE,
	pcmk__str_none)) {
	do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
	crm_rsc_delete, user_name);

	} else {
	struct ra_metadata_s *md = NULL;

	/* Getting metadata from cache is OK except for start actions --
	* always refresh from the agent for those, in case the resource
	* agent was updated.
	*
	* @TODO Only refresh metadata for starts if the agent actually
	* changed (using something like inotify, or a hash or modification
	* time of the agent executable).
	*/
	if (strcmp(operation, PCMK_ACTION_START) != 0) {
	md = controld_get_rsc_metadata(lrm_state, rsc,
	controld_metadata_from_cache);
	}

	if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
	operation)) {
	/* Most likely, we'll need the agent metadata to record the
	* pending operation and the operation result. Get it now rather
	* than wait until then, so the metadata action doesn't eat into
	* the real action's timeout.
	*
	* @TODO Metadata is retrieved via direct execution of the
	* agent, which has a couple of related issues: the executor
	* should execute agents, not the controller; and metadata for
	* Pacemaker Remote nodes should be collected on those nodes,
	* not locally.
	*/
	struct metadata_cb_data *data = NULL;

	data = new_metadata_cb_data(rsc, input->xml);
	crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
	rsc->id, rsc->standard,
	((rsc->provider == NULL)? "" : ":"),
	((rsc->provider == NULL)? "" : rsc->provider),
	rsc->type);
	(void) lrmd__metadata_async(rsc, metadata_complete,
	(void *) data);
	} else {
	do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
	}
	}

	lrmd_free_rsc_info(rsc);

	} else {
	crm_err("Invalid execution request: unknown command '%s' (bug?)",
	crm_op);
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	}

	static lrmd_event_data_t *
	construct_op(const lrm_state_t lrm_state, const xmlNode rsc_op,
	const char rsc_id, const char operation)
	{
	lrmd_event_data_t *op = NULL;
	const char *op_delay = NULL;
	const char *op_timeout = NULL;
	GHashTable *params = NULL;

	xmlNode *primitive = NULL;
	const char *class = NULL;

	const char *transition = NULL;

	pcmk__assert((rsc_id != NULL) && (operation != NULL));

	op = lrmd_new_event(rsc_id, operation, 0);
	op->type = lrmd_event_exec_complete;
	op->timeout = 0;
	op->start_delay = 0;
	lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);

	if (rsc_op == NULL) {
	CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP,
	pcmk__str_casei));
	op->user_data = NULL;
	/* the stop_all_resources() case
	* by definition there is no DC (or they'd be shutting
	* us down).
	* So we should put our version here.
	*/
	op->params = pcmk__strkey_table(free, free);

	pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);

	crm_trace("Constructed %s op for %s", operation, rsc_id);
	return op;
	}

	params = xml2list(rsc_op);
	g_hash_table_remove(params, CRM_META "_" PCMK__META_OP_TARGET_RC);

	op_delay = crm_meta_value(params, PCMK_META_START_DELAY);
	pcmk__scan_min_int(op_delay, &op->start_delay, 0);

	op_timeout = crm_meta_value(params, PCMK_META_TIMEOUT);
	pcmk__scan_min_int(op_timeout, &op->timeout, 0);

	if (pcmk__guint_from_hash(params, CRM_META "_" PCMK_META_INTERVAL, 0,
	&(op->interval_ms)) != pcmk_rc_ok) {
	op->interval_ms = 0;
	}

	/* Use pcmk_monitor_timeout instead of meta timeout for stonith
	recurring monitor, if set */
	primitive = pcmk__xe_first_child(rsc_op, PCMK_XE_PRIMITIVE, NULL, NULL);
	class = crm_element_value(primitive, PCMK_XA_CLASS);

	if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
	&& pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei)
	&& (op->interval_ms > 0)) {

	op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
	if (op_timeout != NULL) {
	long long timeout_ms = crm_get_msec(op_timeout);

	op->timeout = (int) QB_MIN(timeout_ms, INT_MAX);
	}
	}

	if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) {
	op->params = params;

	} else {
	rsc_history_t *entry = NULL;

	if (lrm_state) {
	entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
	}

	/* If we do not have stop parameters cached, use
	* whatever we are given */
	if (!entry \|\| !entry->stop_params) {
	op->params = params;
	} else {
	/* Copy the cached parameter list so that we stop the resource
	* with the old attributes, not the new ones */
	op->params = pcmk__strkey_table(free, free);

	g_hash_table_foreach(params, copy_meta_keys, op->params);
	g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
	g_hash_table_destroy(params);
	params = NULL;
	}
	}

	/* sanity */
	if (op->timeout <= 0) {
	op->timeout = op->interval_ms;
	}
	if (op->start_delay < 0) {
	op->start_delay = 0;
	}

	transition = crm_element_value(rsc_op, PCMK__XA_TRANSITION_KEY);
	CRM_CHECK(transition != NULL, return op);

	op->user_data = pcmk__str_copy(transition);

	if (op->interval_ms != 0) {
	if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP,
	NULL)) {
	crm_err("Start and Stop actions cannot have an interval: %u",
	op->interval_ms);
	op->interval_ms = 0;
	}
	}

	crm_trace("Constructed %s op for %s: interval=%u",
	operation, rsc_id, op->interval_ms);

	return op;
	}

	/*!
	* \internal
	* \brief Send a (synthesized) event result
	*
	* Reply with a synthesized event result directly, as opposed to going through
	* the executor.
	*
	* \param[in] to_host Host to send result to
	* \param[in] to_sys IPC name to send result (NULL for transition engine)
	* \param[in] rsc Type information about resource the result is for
	* \param[in,out] op Event with result to send
	* \param[in] rsc_id ID of resource the result is for
	*/
	void
	controld_ack_event_directly(const char to_host, const char to_sys,
	const lrmd_rsc_info_t rsc, lrmd_event_data_t op,
	const char *rsc_id)
	{
	xmlNode *reply = NULL;
	xmlNode update, iter;
	pcmk__node_status_t *peer = NULL;

	CRM_CHECK(op != NULL, return);
	if (op->rsc_id == NULL) {
	// op->rsc_id is a (const char *) but lrmd_free_event() frees it
	pcmk__assert(rsc_id != NULL);
	op->rsc_id = pcmk__str_copy(rsc_id);
	}
	if (to_sys == NULL) {
	to_sys = CRM_SYSTEM_TENGINE;
	}

	peer = controld_get_local_node_status();
	update = create_node_state_update(peer, controld_node_update_none, NULL,
	__func__);

	iter = pcmk__xe_create(update, PCMK__XE_LRM);
	crm_xml_add(iter, PCMK_XA_ID, controld_globals.our_uuid);
	iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCES);
	iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCE);

	crm_xml_add(iter, PCMK_XA_ID, op->rsc_id);

	controld_add_resource_history_xml(iter, rsc, op,
	controld_globals.cluster->priv->node_name);

	/* We don't have the original message ID, so use "direct-ack" (we just need
	* something non-NULL for this to create a reply)
	*
	* @TODO It would be better to use the server, message ID, and task from the
	* original request when callers have it available
	*/
	reply = pcmk__new_message(pcmk_ipc_controld, "direct-ack", CRM_SYSTEM_LRMD,
	to_host, to_sys, CRM_OP_INVOKE_LRM, update);

	crm_log_xml_trace(update, "[direct ACK]");

	crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
	op->rsc_id, op->op_type, op->interval_ms, op->user_data,
	crm_element_value(reply, PCMK_XA_REFERENCE));

	if (relay_message(reply, TRUE) == FALSE) {
	crm_log_xml_err(reply, "Unable to route reply");
	}

	pcmk__xml_free(update);
	pcmk__xml_free(reply);
	}

	gboolean
	verify_stopped(enum crmd_fsa_state cur_state, int log_level)
	{
	gboolean res = TRUE;
	GList *lrm_state_list = lrm_state_get_list();
	GList *state_entry;

	for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
	lrm_state_t *lrm_state = state_entry->data;

	if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
	/* keep iterating through all even when false is returned */
	res = FALSE;
	}
	}

	controld_set_fsa_input_flags(R_SENT_RSC_STOP);
	g_list_free(lrm_state_list); lrm_state_list = NULL;
	return res;
	}

	struct stop_recurring_action_s {
	lrmd_rsc_info_t *rsc;
	lrm_state_t *lrm_state;
	};

	static gboolean
	stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
	{
	gboolean remove = FALSE;
	struct stop_recurring_action_s *event = user_data;
	active_op_t *op = value;

	if ((op->interval_ms != 0)
	&& pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {

	crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
	remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
	}

	return remove;
	}

	static gboolean
	stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
	{
	gboolean remove = FALSE;
	lrm_state_t *lrm_state = user_data;
	active_op_t *op = value;

	if (op->interval_ms != 0) {
	crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
	(const char *) key);
	remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
	}

	return remove;
	}

	/*!
	* \internal
	* \brief Check whether recurring actions should be cancelled before an action
	*
	* \param[in] rsc_id Resource that action is for
	* \param[in] action Action being performed
	* \param[in] interval_ms Operation interval of \p action (in milliseconds)
	*
	* \return true if recurring actions should be cancelled, otherwise false
	*/
	static bool
	should_cancel_recurring(const char rsc_id, const char action, guint interval_ms)
	{
	if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
	&& (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) {
	/* Don't stop monitoring a migrating Pacemaker Remote connection
	* resource until the entire migration has completed. We must detect if
	* the connection is unexpectedly severed, even during a migration.
	*/
	return false;
	}

	// Cancel recurring actions before changing resource state
	return (interval_ms == 0)
	&& !pcmk__str_any_of(action, PCMK_ACTION_MONITOR,
	PCMK_ACTION_NOTIFY, NULL);
	}

	/*!
	* \internal
	* \brief Check whether an action should not be performed at this time
	*
	* \param[in] operation Action to be performed
	*
	* \return Readable description of why action should not be performed,
	* or NULL if it should be performed
	*/
	static const char *
	should_nack_action(const char *action)
	{
	if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
	&& pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) {

	register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
	return "Not attempting start due to shutdown in progress";
	}

	switch (controld_globals.fsa_state) {
	case S_NOT_DC:
	case S_POLICY_ENGINE: // Recalculating
	case S_TRANSITION_ENGINE:
	break;
	default:
	if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) {
	return "Controller cannot attempt actions at this time";
	}
	break;
	}
	return NULL;
	}

	static void
	do_lrm_rsc_op(lrm_state_t lrm_state, lrmd_rsc_info_t rsc, xmlNode *msg,
	struct ra_metadata_s *md)
	{
	int rc;
	int call_id = 0;
	char *op_id = NULL;
	lrmd_event_data_t *op = NULL;
	fsa_data_t *msg_data = NULL;
	const char *transition = NULL;
	const char *operation = NULL;
	const char *nack_reason = NULL;

	CRM_CHECK((rsc != NULL) && (msg != NULL), return);

	operation = crm_element_value(msg, PCMK_XA_OPERATION);
	CRM_CHECK(!pcmk__str_empty(operation), return);

	transition = crm_element_value(msg, PCMK__XA_TRANSITION_KEY);
	if (pcmk__str_empty(transition)) {
	crm_log_xml_err(msg, "Missing transition number");
	}

	if (lrm_state == NULL) {
	// This shouldn't be possible, but provide a failsafe just in case
	crm_err("Cannot execute %s of %s: No executor connection "
	QB_XS " transition_key=%s",
	operation, rsc->id, pcmk__s(transition, ""));
	synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
	PCMK_OCF_UNKNOWN_ERROR,
	"No executor connection");
	return;
	}

	if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD,
	PCMK_ACTION_RELOAD_AGENT, NULL)) {
	/* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
	* will schedule reload-agent actions only. In either case, we need
	* to map that to whatever the resource agent actually supports.
	* Default to the OCF 1.1 name.
	*/
	if ((md != NULL)
	&& pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
	operation = PCMK_ACTION_RELOAD;
	} else {
	operation = PCMK_ACTION_RELOAD_AGENT;
	}
	}

	op = construct_op(lrm_state, msg, rsc->id, operation);
	CRM_CHECK(op != NULL, return);

	if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
	guint removed = 0;
	struct stop_recurring_action_s data;

	data.rsc = rsc;
	data.lrm_state = lrm_state;
	removed = g_hash_table_foreach_remove(lrm_state->active_ops,
	stop_recurring_action_by_rsc,
	&data);

	if (removed) {
	crm_debug("Stopped %u recurring operation%s in preparation for "
	PCMK__OP_FMT, removed, pcmk__plural_s(removed),
	rsc->id, operation, op->interval_ms);
	}
	}

	nack_reason = should_nack_action(operation);
	if (nack_reason != NULL) {
	crm_notice("Not requesting local execution of %s operation for %s on %s"
	" in state %s: %s",
	pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
	lrm_state->node_name,
	fsa_state2string(controld_globals.fsa_state), nack_reason);

	lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
	nack_reason);
	controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
	lrmd_free_event(op);
	free(op_id);
	return;
	}

	crm_notice("Requesting local execution of %s operation for %s on %s "
	QB_XS " transition %s",
	pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
	lrm_state->node_name, pcmk__s(transition, ""));

	controld_record_pending_op(lrm_state->node_name, rsc, op);

	op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);

	if (op->interval_ms > 0) {
	/* cancel it so we can then restart it without conflict */
	cancel_op_key(lrm_state, rsc, op_id, FALSE);
	}

	rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
	op->user_data, op->interval_ms,
	op->timeout, op->start_delay,
	op->params, &call_id);
	if (rc == pcmk_rc_ok) {
	/* record all operations so we can wait
	* for them to complete during shutdown
	*/
	char *call_id_s = make_stop_id(rsc->id, call_id);
	active_op_t *pending = NULL;

	pending = pcmk__assert_alloc(1, sizeof(active_op_t));
	crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);

	pending->call_id = call_id;
	pending->interval_ms = op->interval_ms;
	pending->op_type = pcmk__str_copy(operation);
	pending->op_key = pcmk__str_copy(op_id);
	pending->rsc_id = pcmk__str_copy(rsc->id);
	pending->start_time = time(NULL);
	pending->user_data = pcmk__str_copy(op->user_data);
	if (crm_element_value_epoch(msg, PCMK_OPT_SHUTDOWN_LOCK,
	&(pending->lock_time)) != pcmk_ok) {
	pending->lock_time = 0;
	}
	g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);

	if ((op->interval_ms > 0)
	&& (op->start_delay > START_DELAY_THRESHOLD)) {
	int target_rc = PCMK_OCF_OK;

	crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
	decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
	lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
	controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
	}

	pending->params = op->params;
	op->params = NULL;

	} else if (lrm_state_is_local(lrm_state)) {
	crm_err("Could not initiate %s action for resource %s locally: %s "
	QB_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
	fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
	PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
	process_lrm_event(lrm_state, op, NULL, NULL);
	register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);

	} else {
	crm_err("Could not initiate %s action for resource %s remotely on %s: "
	"%s " QB_XS " rc=%d",
	operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
	fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
	PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
	process_lrm_event(lrm_state, op, NULL, NULL);
	}

	free(op_id);
	lrmd_free_event(op);
	}

	static char *
	unescape_newlines(const char *string)
	{
	char *pch = NULL;
	char *ret = NULL;
	static const char *escaped_newline = "\\n";

	if (!string) {
	return NULL;
	}

	ret = pcmk__str_copy(string);
	pch = strstr(ret, escaped_newline);
	while (pch != NULL) {
	/* Replace newline escape pattern with actual newline (and a space so we
	* don't have to shuffle the rest of the buffer)
	*/
	pch[0] = '\n';
	pch[1] = ' ';
	pch = strstr(pch, escaped_newline);
	}

	return ret;
	}

	static bool
	did_lrm_rsc_op_fail(lrm_state_t lrm_state, const char rsc_id,
	const char * op_type, guint interval_ms)
	{
	rsc_history_t *entry = NULL;

	CRM_CHECK(lrm_state != NULL, return FALSE);
	CRM_CHECK(rsc_id != NULL, return FALSE);
	CRM_CHECK(op_type != NULL, return FALSE);

	entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
	if (entry == NULL \|\| entry->failed == NULL) {
	return FALSE;
	}

	if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
	&& pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
	&& entry->failed->interval_ms == interval_ms) {
	return TRUE;
	}

	return FALSE;
	}

	/*!
	* \internal
	* \brief Log the result of an executor action (actual or synthesized)
	*
	* \param[in] op Executor action to log result for
	* \param[in] op_key Operation key for action
	* \param[in] node_name Name of node action was performed on, if known
	* \param[in] confirmed Whether to log that graph action was confirmed
	*/
	static void
	log_executor_event(const lrmd_event_data_t op, const char op_key,
	const char *node_name, gboolean confirmed)
	{
	int log_level = LOG_ERR;
	GString *str = g_string_sized_new(100); // reasonable starting size

	pcmk__g_strcat(str,
	"Result of ",
	pcmk__readable_action(op->op_type, op->interval_ms),
	" operation for ", op->rsc_id, NULL);

	if (node_name != NULL) {
	pcmk__g_strcat(str, " on ", node_name, NULL);
	}

	switch (op->op_status) {
	case PCMK_EXEC_DONE:
	log_level = LOG_NOTICE;
	pcmk__g_strcat(str, ": ", crm_exit_str((crm_exit_t) op->rc), NULL);
	break;

	case PCMK_EXEC_TIMEOUT:
	pcmk__g_strcat(str,
	": ", pcmk_exec_status_str(op->op_status), " after ",
	pcmk__readable_interval(op->timeout), NULL);
	break;

	case PCMK_EXEC_CANCELLED:
	log_level = LOG_INFO;
	pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
	NULL);
	break;

	default:
	pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
	NULL);
	break;
	}

	if ((op->exit_reason != NULL)
	&& ((op->op_status != PCMK_EXEC_DONE) \|\| (op->rc != PCMK_OCF_OK))) {

	pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
	}

	g_string_append(str, " " QB_XS);
	g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
	(confirmed? "" : "un"), op->call_id, op_key);
	if (op->op_status == PCMK_EXEC_DONE) {
	g_string_append_printf(str, " rc=%d", op->rc);
	}

	do_crm_log(log_level, "%s", str->str);
	g_string_free(str, TRUE);

	/* The services library has already logged the output at info or debug
	* level, so just raise to notice if it looks like a failure.
	*/
	if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
	char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
	op->rsc_id, op->op_type,
	op->interval_ms, node_name);

	crm_log_output(LOG_NOTICE, prefix, op->output);
	free(prefix);
	}
	}

	void
	process_lrm_event(lrm_state_t lrm_state, lrmd_event_data_t op,
	active_op_t pending, const xmlNode action_xml)
	{
	char *op_id = NULL;
	char *op_key = NULL;

	gboolean remove = FALSE;
	gboolean removed = FALSE;
	bool need_direct_ack = FALSE;
	lrmd_rsc_info_t *rsc = NULL;
	const char *node_name = NULL;

	CRM_CHECK(op != NULL, return);
	CRM_CHECK(op->rsc_id != NULL, return);

	// Remap new status codes for older DCs
	if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
	switch (op->op_status) {
	case PCMK_EXEC_NOT_CONNECTED:
	lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
	PCMK_EXEC_ERROR, op->exit_reason);
	break;
	case PCMK_EXEC_INVALID:
	lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
	op->exit_reason);
	break;
	default:
	break;
	}
	}

	op_id = make_stop_id(op->rsc_id, op->call_id);
	op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);

	// Get resource info if available (from executor state or action XML)
	if (lrm_state) {
	rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
	}
	if ((rsc == NULL) && action_xml) {
	xmlNode *xml = pcmk__xe_first_child(action_xml, PCMK_XE_PRIMITIVE, NULL,
	NULL);

	const char *standard = crm_element_value(xml, PCMK_XA_CLASS);
	const char *provider = crm_element_value(xml, PCMK_XA_PROVIDER);
	const char *type = crm_element_value(xml, PCMK_XA_TYPE);

	if (standard && type) {
	crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
	op->rsc_id, standard,
	(provider? ":" : ""), (provider? provider : ""), type);
	rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
	} else {
	crm_err("Can't process %s result because %s agent information not cached or in XML",
	op_key, op->rsc_id);
	}
	}

	// Get node name if available (from executor state or action XML)
	if (lrm_state) {
	node_name = lrm_state->node_name;
	} else if (action_xml) {
	node_name = crm_element_value(action_xml, PCMK__META_ON_NODE);
	}

	if(pending == NULL) {
	remove = TRUE;
	if (lrm_state) {
	pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
	}
	}

	if (op->op_status == PCMK_EXEC_ERROR) {
	switch(op->rc) {
	case PCMK_OCF_NOT_RUNNING:
	case PCMK_OCF_RUNNING_PROMOTED:
	case PCMK_OCF_DEGRADED:
	case PCMK_OCF_DEGRADED_PROMOTED:
	// Leave it to the TE/scheduler to decide if this is an error
	op->op_status = PCMK_EXEC_DONE;
	break;
	default:
	/* Nothing to do */
	break;
	}
	}

	if (op->op_status != PCMK_EXEC_CANCELLED) {
	/* We might not record the result, so directly acknowledge it to the
	* originator instead, so it doesn't time out waiting for the result
	* (especially important if part of a transition).
	*/
	need_direct_ack = TRUE;

	if (controld_action_is_recordable(op->op_type)) {
	if (node_name && rsc) {
	// We should record the result, and happily, we can
	time_t lock_time = (pending == NULL)? 0 : pending->lock_time;

	controld_update_resource_history(node_name, rsc, op, lock_time);
	need_direct_ack = FALSE;

	} else if (op->rsc_deleted) {
	/* We shouldn't record the result (likely the resource was
	* refreshed, cleaned, or removed while this operation was
	* in flight).
	*/
	crm_notice("Not recording %s result in CIB because "
	"resource information was removed since it was initiated",
	op_key);
	} else {
	/* This shouldn't be possible; the executor didn't consider the
	* resource deleted, but we couldn't find resource or node
	* information.
	*/
	crm_err("Unable to record %s result in CIB: %s", op_key,
	(node_name? "No resource information" : "No node name"));
	}
	}

	} else if (op->interval_ms == 0) {
	/* A non-recurring operation was cancelled. Most likely, the
	* never-initiated action was removed from the executor's pending
	* operations list upon resource removal.
	*/
	need_direct_ack = TRUE;

	} else if (pending == NULL) {
	/* This recurring operation was cancelled, but was not pending. No
	* transition actions are waiting on it, nothing needs to be done.
	*/

	} else if (op->user_data == NULL) {
	/* This recurring operation was cancelled and pending, but we don't
	* have a transition key. This should never happen.
	*/
	crm_err("Recurring operation %s was cancelled without transition information",
	op_key);

	} else if (pcmk_is_set(pending->flags, active_op_remove)) {
	/* This recurring operation was cancelled (by us) and pending, and we
	* have been waiting for it to finish.
	*/
	if (lrm_state) {
	controld_delete_action_history(op);
	}

	/* Directly acknowledge failed recurring actions here. The above call to
	* controld_delete_action_history() will not erase any corresponding
	* last_failure entry, which means that the DC won't confirm the
	* cancellation via process_op_deletion(), and the transition would
	* otherwise wait for the action timer to pop.
	*/
	if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
	pending->op_type, pending->interval_ms)) {
	need_direct_ack = TRUE;
	}

	} else if (op->rsc_deleted) {
	/* This recurring operation was cancelled (but not by us, and the
	* executor does not have resource information, likely due to resource
	* cleanup, refresh, or removal) and pending.
	*/
	crm_debug("Recurring op %s was cancelled due to resource deletion",
	op_key);
	need_direct_ack = TRUE;

	} else {
	/* This recurring operation was cancelled (but not by us, likely by the
	* executor before stopping the resource) and pending. We don't need to
	* do anything special.
	*/
	}

	if (need_direct_ack) {
	controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
	}

	if(remove == FALSE) {
	/* The caller will do this afterwards, but keep the logging consistent */
	removed = TRUE;

	} else if (lrm_state && ((op->interval_ms == 0)
	\|\| (op->op_status == PCMK_EXEC_CANCELLED))) {

	gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);

	if (op->interval_ms != 0) {
	removed = TRUE;
	} else if (found) {
	removed = TRUE;
	crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
	op_key, op->call_id, op_id,
	g_hash_table_size(lrm_state->active_ops));
	}
	}

	log_executor_event(op, op_key, node_name, removed);

	if (lrm_state) {
	if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA,
	pcmk__str_casei)) {
	crmd_alert_resource_op(lrm_state->node_name, op);
	} else if (rsc && (op->rc == PCMK_OCF_OK)) {
	char *metadata = unescape_newlines(op->output);

	controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
	free(metadata);
	}
	}

	if (op->rsc_deleted) {
	crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
	if (lrm_state) {
	delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
	true);
	}
	}

	/* If a shutdown was escalated while operations were pending,
	* then the FSA will be stalled right now... allow it to continue
	*/
	controld_trigger_fsa();
	if (lrm_state && rsc) {
	update_history_cache(lrm_state, rsc, op);
	}

	lrmd_free_rsc_info(rsc);
	free(op_key);
	free(op_id);
	}
	diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
	index 4304ae799e..3d0e017355 100644
	--- a/daemons/controld/controld_fencing.c
	+++ b/daemons/controld/controld_fencing.c
	@@ -1,1113 +1,1117 @@
	/*
	* Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>
	#include <crm/crm.h>
	#include <crm/common/xml.h>
	#include <crm/stonith-ng.h>
	#include <crm/fencing/internal.h>

	#include <pacemaker-controld.h>

	static void
	tengine_stonith_history_synced(stonith_t st, stonith_event_t st_event);

	/*
	* stonith failure counting
	*
	* We don't want to get stuck in a permanent fencing loop. Keep track of the
	* number of fencing failures for each target node, and the most we'll restart a
	* transition for.
	*/

	struct st_fail_rec {
	int count;
	};

	#define DEFAULT_STONITH_MAX_ATTEMPTS 10

	static bool fence_reaction_panic = false;
	static unsigned long int stonith_max_attempts = DEFAULT_STONITH_MAX_ATTEMPTS;
	static GHashTable *stonith_failures = NULL;

	/*!
	* \internal
	* \brief Update max fencing attempts before giving up
	*
	* \param[in] value New max fencing attempts
	*/
	static void
	update_stonith_max_attempts(const char *value)
	{
	int score = 0;
	int rc = pcmk_parse_score(value, &score, DEFAULT_STONITH_MAX_ATTEMPTS);

	// The option validator ensures invalid values shouldn't be possible
	CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return);

	if (stonith_max_attempts != score) {
	crm_debug("Maximum fencing attempts per transition is now %d (was %lu)",
	score, stonith_max_attempts);
	}
	stonith_max_attempts = score;
	}

	/*!
	* \internal
	* \brief Configure reaction to notification of local node being fenced
	*
	* \param[in] reaction_s Reaction type
	*/
	static void
	set_fence_reaction(const char *reaction_s)
	{
	if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
	fence_reaction_panic = true;

	} else {
	if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
	crm_warn("Invalid value '%s' for %s, using 'stop'",
	reaction_s, PCMK_OPT_FENCE_REACTION);
	}
	fence_reaction_panic = false;
	}
	}

	/*!
	* \internal
	* \brief Configure fencing options based on the CIB
	*
	* \param[in,out] options Name/value pairs for configured options
	*/
	void
	controld_configure_fencing(GHashTable *options)
	{
	const char *value = NULL;

	value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
	set_fence_reaction(value);

	value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
	update_stonith_max_attempts(value);
	}

	static gboolean
	too_many_st_failures(const char *target)
	{
	GHashTableIter iter;
	const char *key = NULL;
	struct st_fail_rec *value = NULL;

	if (stonith_failures == NULL) {
	return FALSE;
	}

	if (target == NULL) {
	g_hash_table_iter_init(&iter, stonith_failures);
	while (g_hash_table_iter_next(&iter, (gpointer *) &key,
	(gpointer *) &value)) {

	if (value->count >= stonith_max_attempts) {
	target = (const char*)key;
	goto too_many;
	}
	}
	} else {
	value = g_hash_table_lookup(stonith_failures, target);
	if ((value != NULL) && (value->count >= stonith_max_attempts)) {
	goto too_many;
	}
	}
	return FALSE;

	too_many:
	crm_warn("Too many failures (%d) to fence %s, giving up",
	value->count, target);
	return TRUE;
	}

	/*!
	* \internal
	* \brief Reset a stonith fail count
	*
	* \param[in] target Name of node to reset, or NULL for all
	*/
	void
	st_fail_count_reset(const char *target)
	{
	if (stonith_failures == NULL) {
	return;
	}

	if (target) {
	struct st_fail_rec *rec = NULL;

	rec = g_hash_table_lookup(stonith_failures, target);
	if (rec) {
	rec->count = 0;
	}
	} else {
	GHashTableIter iter;
	const char *key = NULL;
	struct st_fail_rec *rec = NULL;

	g_hash_table_iter_init(&iter, stonith_failures);
	while (g_hash_table_iter_next(&iter, (gpointer *) &key,
	(gpointer *) &rec)) {
	rec->count = 0;
	}
	}
	}

	static void
	st_fail_count_increment(const char *target)
	{
	struct st_fail_rec *rec = NULL;

	if (stonith_failures == NULL) {
	stonith_failures = pcmk__strkey_table(free, free);
	}

	rec = g_hash_table_lookup(stonith_failures, target);
	if (rec) {
	rec->count++;
	} else {
	rec = malloc(sizeof(struct st_fail_rec));
	if(rec == NULL) {
	return;
	}

	rec->count = 1;
	g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
	}
	}

	/* end stonith fail count functions */


	static void
	cib_fencing_updated(xmlNode msg, int call_id, int rc, xmlNode output,
	void *user_data)
	{
	if (rc < pcmk_ok) {
	crm_err("Fencing update %d for %s: failed - %s (%d)",
	call_id, (char *)user_data, pcmk_strerror(rc), rc);
	crm_log_xml_warn(msg, "Failed update");
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
	"CIB update failed", NULL);

	} else {
	crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
	}
	}

	/*!
	* \internal
	* \brief Update a fencing target's node state
	*
	* \param[in] target Node that was successfully fenced
	* \param[in] target_xml_id CIB XML ID of target
	*/
	static void
	update_node_state_after_fencing(const char target, const char target_xml_id)
	{
	int rc = pcmk_ok;
	pcmk__node_status_t *peer = NULL;
	xmlNode *node_state = NULL;

	/* We (usually) rely on the membership layer to do
	* controld_node_update_cluster, and the peer status callback to do
	* controld_node_update_peer, because the node might have already rejoined
	* before we get the stonith result here.
	*/
	uint32_t flags = controld_node_update_join\|controld_node_update_expected;

	CRM_CHECK((target != NULL) && (target_xml_id != NULL), return);

	// Ensure target is cached
	peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any);
	CRM_CHECK(peer != NULL, return);

	if (peer->state == NULL) {
	/* Usually, we rely on the membership layer to update the cluster state
	* in the CIB. However, if the node has never been seen, do it here, so
	* the node is not considered unclean.
	*/
	flags \|= controld_node_update_cluster;
	}

	if (peer->xml_id == NULL) {
	crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target);
	peer->xml_id = pcmk__str_copy(target_xml_id);
	}

	crmd_peer_down(peer, TRUE);

	node_state = create_node_state_update(peer, flags, NULL, __func__);
	crm_xml_add(node_state, PCMK_XA_ID, target_xml_id);

	if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
	char *now_s = pcmk__ttoa(time(NULL));

	crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
	free(now_s);
	}

	rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
	PCMK_XE_STATUS, node_state,
	cib_can_create);
	pcmk__xml_free(node_state);

	crm_debug("Updating node state for %s after fencing (call %d)", target, rc);
	fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);

	- controld_delete_node_state(peer->name, controld_section_all, cib_none);
	+ // Delete node's resource history from CIB
	+ controld_delete_node_history(peer->name, false, cib_none);
	+
	+ // Ask attribute manager to delete node's transient attributes
	+ controld_purge_node_attrs(peer->name, false);
	}

	/*!
	* \internal
	* \brief Abort transition due to stonith failure
	*
	* \param[in] abort_action Whether to restart or stop transition
	* \param[in] target Don't restart if this (NULL for any) has too many failures
	* \param[in] reason Log this stonith action XML as abort reason (or NULL)
	*/
	static void
	abort_for_stonith_failure(enum pcmk__graph_next abort_action,
	const char target, const xmlNode reason)
	{
	/* If stonith repeatedly fails, we eventually give up on starting a new
	* transition for that reason.
	*/
	if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
	abort_action = pcmk__graph_wait;
	}
	abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
	reason);
	}


	/*
	* stonith cleanup list
	*
	* If the DC is shot, proper notifications might not go out.
	* The stonith cleanup list allows the cluster to (re-)send
	* notifications once a new DC is elected.
	*/

	static GList *stonith_cleanup_list = NULL;

	/*!
	* \internal
	* \brief Add a node to the stonith cleanup list
	*
	* \param[in] target Name of node to add
	*/
	void
	add_stonith_cleanup(const char *target) {
	stonith_cleanup_list = g_list_append(stonith_cleanup_list,
	pcmk__str_copy(target));
	}

	/*!
	* \internal
	* \brief Remove a node from the stonith cleanup list
	*
	* \param[in] Name of node to remove
	*/
	void
	remove_stonith_cleanup(const char *target)
	{
	GList *iter = stonith_cleanup_list;

	while (iter != NULL) {
	GList *tmp = iter;
	char *iter_name = tmp->data;

	iter = iter->next;
	if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
	crm_trace("Removing %s from the cleanup list", iter_name);
	stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
	free(iter_name);
	}
	}
	}

	/*!
	* \internal
	* \brief Purge all entries from the stonith cleanup list
	*/
	void
	purge_stonith_cleanup(void)
	{
	if (stonith_cleanup_list) {
	GList *iter = NULL;

	for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
	char *target = iter->data;

	crm_info("Purging %s from stonith cleanup list", target);
	free(target);
	}
	g_list_free(stonith_cleanup_list);
	stonith_cleanup_list = NULL;
	}
	}

	/*!
	* \internal
	* \brief Send stonith updates for all entries in cleanup list, then purge it
	*/
	void
	execute_stonith_cleanup(void)
	{
	GList *iter;

	for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
	char *target = iter->data;
	pcmk__node_status_t *target_node =
	pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
	const char *uuid = pcmk__cluster_get_xml_id(target_node);

	crm_notice("Marking %s, target of a previous stonith action, as clean", target);
	update_node_state_after_fencing(target, uuid);
	free(target);
	}
	g_list_free(stonith_cleanup_list);
	stonith_cleanup_list = NULL;
	}

	/* end stonith cleanup list functions */


	/* stonith API client
	*
	* Functions that need to interact directly with the fencer via its API
	*/

	static stonith_t *stonith_api = NULL;
	static mainloop_timer_t *controld_fencer_connect_timer = NULL;
	static char *te_client_id = NULL;

	static gboolean
	fail_incompletable_stonith(pcmk__graph_t *graph)
	{
	GList *lpc = NULL;
	const char *task = NULL;
	xmlNode *last_action = NULL;

	if (graph == NULL) {
	return FALSE;
	}

	for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
	GList *lpc2 = NULL;
	pcmk__graph_synapse_t synapse = (pcmk__graph_synapse_t ) lpc->data;

	if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
	continue;
	}

	for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
	pcmk__graph_action_t action = (pcmk__graph_action_t ) lpc2->data;

	if ((action->type != pcmk__cluster_graph_action)
	\|\| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
	continue;
	}

	task = crm_element_value(action->xml, PCMK_XA_OPERATION);
	if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
	pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
	last_action = action->xml;
	pcmk__update_graph(graph, action);
	crm_notice("Failing action %d (%s): fencer terminated",
	action->id, pcmk__xe_id(action->xml));
	}
	}
	}

	if (last_action != NULL) {
	crm_warn("Fencer failure resulted in unrunnable actions");
	abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
	return TRUE;
	}

	return FALSE;
	}

	static void
	tengine_stonith_connection_destroy(stonith_t st, stonith_event_t e)
	{
	te_cleanup_stonith_history_sync(st, FALSE);

	if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
	crm_err("Lost fencer connection (will attempt to reconnect)");
	if (!mainloop_timer_running(controld_fencer_connect_timer)) {
	mainloop_timer_start(controld_fencer_connect_timer);
	}
	} else {
	crm_info("Disconnected from fencer");
	}

	if (stonith_api) {
	/* the client API won't properly reconnect notifications
	* if they are still in the table - so remove them
	*/
	if (stonith_api->state != stonith_disconnected) {
	stonith_api->cmds->disconnect(st);
	}
	stonith_api->cmds->remove_notification(stonith_api, NULL);
	}

	if (AM_I_DC) {
	fail_incompletable_stonith(controld_globals.transition_graph);
	trigger_graph();
	}
	}

	/*!
	* \internal
	* \brief Handle an event notification from the fencing API
	*
	* \param[in] st Fencing API connection (ignored)
	* \param[in] event Fencing API event notification
	*/
	static void
	handle_fence_notification(stonith_t st, stonith_event_t event)
	{
	bool succeeded = true;
	const char *executioner = "the cluster";
	const char *client = "a client";
	const char *reason = NULL;
	int exec_status;

	if (te_client_id == NULL) {
	te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
	(unsigned long) getpid());
	}

	if (event == NULL) {
	crm_err("Notify data not found");
	return;
	}

	if (event->executioner != NULL) {
	executioner = event->executioner;
	}
	if (event->client_origin != NULL) {
	client = event->client_origin;
	}

	exec_status = stonith__event_execution_status(event);
	if ((stonith__event_exit_status(event) != CRM_EX_OK)
	\|\| (exec_status != PCMK_EXEC_DONE)) {
	succeeded = false;
	if (exec_status == PCMK_EXEC_DONE) {
	exec_status = PCMK_EXEC_ERROR;
	}
	}
	reason = stonith__event_exit_reason(event);

	crmd_alert_fencing_op(event);

	if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
	// Unfencing doesn't need special handling, just a log message
	if (succeeded) {
	crm_notice("%s was unfenced by %s at the request of %s@%s",
	event->target, executioner, client, event->origin);
	} else {
	crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
	event->target, executioner,
	pcmk_exec_status_str(exec_status),
	((reason == NULL)? "" : ": "),
	((reason == NULL)? "" : reason),
	stonith__event_exit_status(event));
	}
	return;
	}

	if (succeeded && controld_is_local_node(event->target)) {
	/* We were notified of our own fencing. Most likely, either fencing was
	* misconfigured, or fabric fencing that doesn't cut cluster
	* communication is in use.
	*
	* Either way, shutting down the local host is a good idea, to require
	* administrator intervention. Also, other nodes would otherwise likely
	* set our status to lost because of the fencing callback and discard
	* our subsequent election votes as "not part of our cluster".
	*/
	crm_crit("We were allegedly just fenced by %s for %s!",
	executioner, event->origin); // Dumps blackbox if enabled
	if (fence_reaction_panic) {
	pcmk__panic("Notified of own fencing");
	} else {
	crm_exit(CRM_EX_FATAL);
	}
	return; // Should never get here
	}

	/* Update the count of fencing failures for this target, in case we become
	* DC later. The current DC has already updated its fail count in
	* tengine_stonith_callback().
	*/
	if (!AM_I_DC) {
	if (succeeded) {
	st_fail_count_reset(event->target);
	} else {
	st_fail_count_increment(event->target);
	}
	}

	crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
	"%s%s%s%s " QB_XS " event=%s",
	event->target, (succeeded? "" : " not"),
	event->action, executioner, client, event->origin,
	(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
	((reason == NULL)? "" : " ("),
	((reason == NULL)? "" : reason),
	((reason == NULL)? "" : ")"),
	event->id);

	if (succeeded) {
	const uint32_t flags = pcmk__node_search_any
	\|pcmk__node_search_cluster_cib;

	pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target,
	NULL, flags);
	const char *uuid = NULL;

	if (peer == NULL) {
	return;
	}

	uuid = pcmk__cluster_get_xml_id(peer);

	if (AM_I_DC) {
	/* The DC always sends updates */
	update_node_state_after_fencing(event->target, uuid);

	/* @TODO Ideally, at this point, we'd check whether the fenced node
	* hosted any guest nodes, and call remote_node_down() for them.
	* Unfortunately, the controller doesn't have a simple, reliable way
	* to map hosts to guests. It might be possible to track this in the
	* peer cache via refresh_remote_nodes(). For now, we rely on the
	* scheduler creating fence pseudo-events for the guests.
	*/

	if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
	/* Abort the current transition if it wasn't the cluster that
	* initiated fencing.
	*/
	crm_info("External fencing operation from %s fenced %s",
	client, event->target);
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
	"External Fencing Operation", NULL);
	}

	} else if (pcmk__str_eq(controld_globals.dc_name, event->target,
	pcmk__str_null_matches\|pcmk__str_casei)
	&& !pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
	// Assume the target was our DC if we don't currently have one

	if (controld_globals.dc_name != NULL) {
	crm_notice("Fencing target %s was our DC", event->target);
	} else {
	crm_notice("Fencing target %s may have been our DC",
	event->target);
	}

	/* Given the CIB resyncing that occurs around elections,
	* have one node update the CIB now and, if the new DC is different,
	* have them do so too after the election
	*/
	if (controld_is_local_node(event->executioner)) {
	update_node_state_after_fencing(event->target, uuid);
	}
	add_stonith_cleanup(event->target);
	}

	/* If the target is a remote node, and we host its connection,
	* immediately fail all monitors so it can be recovered quickly.
	* The connection won't necessarily drop when a remote node is fenced,
	* so the failure might not otherwise be detected until the next poke.
	*/
	if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
	remote_ra_fail(event->target);
	}

	crmd_peer_down(peer, TRUE);
	}
	}

	/*!
	* \brief Connect to fencer
	*
	* \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
	*
	* \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
	* \note If user_data is NULL, this will wait 2s between attempts, for up to
	* 30 attempts, meaning the controller could be blocked as long as 58s.
	*/
	gboolean
	controld_timer_fencer_connect(gpointer user_data)
	{
	int rc = pcmk_ok;

	if (stonith_api == NULL) {
	stonith_api = stonith_api_new();
	if (stonith_api == NULL) {
	crm_err("Could not connect to fencer: API memory allocation failed");
	return G_SOURCE_REMOVE;
	}
	}

	if (stonith_api->state != stonith_disconnected) {
	crm_trace("Already connected to fencer, no need to retry");
	return G_SOURCE_REMOVE;
	}

	if (user_data == NULL) {
	// Blocking (retry failures now until successful)
	rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
	if (rc != pcmk_ok) {
	crm_err("Could not connect to fencer in 30 attempts: %s "
	QB_XS " rc=%d", pcmk_strerror(rc), rc);
	}
	} else {
	// Non-blocking (retry failures later in main loop)
	rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);

	if (controld_fencer_connect_timer == NULL) {
	controld_fencer_connect_timer =
	mainloop_timer_add("controld_fencer_connect", 1000,
	TRUE, controld_timer_fencer_connect,
	GINT_TO_POINTER(TRUE));
	}

	if (rc != pcmk_ok) {
	if (pcmk_is_set(controld_globals.fsa_input_register,
	R_ST_REQUIRED)) {
	crm_notice("Fencer connection failed (will retry): %s "
	QB_XS " rc=%d", pcmk_strerror(rc), rc);

	if (!mainloop_timer_running(controld_fencer_connect_timer)) {
	mainloop_timer_start(controld_fencer_connect_timer);
	}

	return G_SOURCE_CONTINUE;
	} else {
	crm_info("Fencer connection failed (ignoring because no longer required): %s "
	QB_XS " rc=%d", pcmk_strerror(rc), rc);
	}
	return G_SOURCE_REMOVE;
	}
	}

	if (rc == pcmk_ok) {
	stonith_api_operations_t *cmds = stonith_api->cmds;

	cmds->register_notification(stonith_api,
	PCMK__VALUE_ST_NOTIFY_DISCONNECT,
	tengine_stonith_connection_destroy);
	cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
	handle_fence_notification);
	cmds->register_notification(stonith_api,
	PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
	tengine_stonith_history_synced);
	te_trigger_stonith_history_sync(TRUE);
	crm_notice("Fencer successfully connected");
	}

	return G_SOURCE_REMOVE;
	}

	void
	controld_disconnect_fencer(bool destroy)
	{
	if (stonith_api) {
	// Prevent fencer connection from coming up again
	controld_clear_fsa_input_flags(R_ST_REQUIRED);

	if (stonith_api->state != stonith_disconnected) {
	stonith_api->cmds->disconnect(stonith_api);
	}
	stonith_api->cmds->remove_notification(stonith_api, NULL);
	}
	if (destroy) {
	if (stonith_api) {
	stonith_api->cmds->free(stonith_api);
	stonith_api = NULL;
	}
	if (controld_fencer_connect_timer) {
	mainloop_timer_del(controld_fencer_connect_timer);
	controld_fencer_connect_timer = NULL;
	}
	if (te_client_id) {
	free(te_client_id);
	te_client_id = NULL;
	}
	}
	}

	static gboolean
	do_stonith_history_sync(gpointer user_data)
	{
	if (stonith_api && (stonith_api->state != stonith_disconnected)) {
	stonith_history_t *history = NULL;

	te_cleanup_stonith_history_sync(stonith_api, FALSE);
	stonith_api->cmds->history(stonith_api,
	st_opt_sync_call \| st_opt_broadcast,
	NULL, &history, 5);
	stonith_history_free(history);
	return TRUE;
	} else {
	crm_info("Skip triggering stonith history-sync as stonith is disconnected");
	return FALSE;
	}
	}

	static void
	tengine_stonith_callback(stonith_t stonith, stonith_callback_data_t data)
	{
	char *uuid = NULL;
	int stonith_id = -1;
	int transition_id = -1;
	pcmk__graph_action_t *action = NULL;
	const char *target = NULL;

	if ((data == NULL) \|\| (data->userdata == NULL)) {
	crm_err("Ignoring fence operation %d result: "
	"No transition key given (bug?)",
	((data == NULL)? -1 : data->call_id));
	return;
	}

	if (!AM_I_DC) {
	const char *reason = stonith__exit_reason(data);

	if (reason == NULL) {
	reason = pcmk_exec_status_str(stonith__execution_status(data));
	}
	crm_notice("Result of fence operation %d: %d (%s) " QB_XS " key=%s",
	data->call_id, stonith__exit_status(data), reason,
	(const char *) data->userdata);
	return;
	}

	CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
	&stonith_id, NULL),
	goto bail);

	if (controld_globals.transition_graph->complete \|\| (stonith_id < 0)
	\|\| !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
	\|\| (controld_globals.transition_graph->id != transition_id)) {
	crm_info("Ignoring fence operation %d result: "
	"Not from current transition " QB_XS
	" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
	data->call_id,
	pcmk__btoa(controld_globals.transition_graph->complete),
	stonith_id, uuid, controld_globals.te_uuid, transition_id,
	controld_globals.transition_graph->id);
	goto bail;
	}

	action = controld_get_action(stonith_id);
	if (action == NULL) {
	crm_err("Ignoring fence operation %d result: "
	"Action %d not found in transition graph (bug?) "
	QB_XS " uuid=%s transition=%d",
	data->call_id, stonith_id, uuid, transition_id);
	goto bail;
	}

	target = crm_element_value(action->xml, PCMK__META_ON_NODE);
	if (target == NULL) {
	crm_err("Ignoring fence operation %d result: No target given (bug?)",
	data->call_id);
	goto bail;
	}

	stop_te_timer(action);
	if (stonith__exit_status(data) == CRM_EX_OK) {
	const char *uuid = crm_element_value(action->xml,
	PCMK__META_ON_NODE_UUID);
	const char *op = crm_meta_value(action->params,
	PCMK__META_STONITH_ACTION);

	crm_info("Fence operation %d for %s succeeded", data->call_id, target);
	if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
	te_action_confirmed(action, NULL);
	if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
	const char *value = NULL;
	char *now = pcmk__ttoa(time(NULL));
	gboolean is_remote_node = FALSE;

	/* This check is not 100% reliable, since this node is not
	* guaranteed to have the remote node cached. However, it
	* doesn't have to be reliable, since the attribute manager can
	* learn a node's "remoteness" by other means sooner or later.
	* This allows it to learn more quickly if this node does have
	* the information.
	*/
	if (g_hash_table_lookup(pcmk__remote_peer_cache,
	uuid) != NULL) {
	is_remote_node = TRUE;
	}

	update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
	is_remote_node);
	free(now);

	value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
	update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
	is_remote_node);

	value = crm_meta_value(action->params,
	PCMK__META_DIGESTS_SECURE);
	update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
	is_remote_node);

	} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
	update_node_state_after_fencing(target, uuid);
	pcmk__set_graph_action_flags(action,
	pcmk__graph_action_sent_update);
	}
	}
	st_fail_count_reset(target);

	} else {
	enum pcmk__graph_next abort_action = pcmk__graph_restart;
	int status = stonith__execution_status(data);
	const char *reason = stonith__exit_reason(data);

	if (reason == NULL) {
	if (status == PCMK_EXEC_DONE) {
	reason = "Agent returned error";
	} else {
	reason = pcmk_exec_status_str(status);
	}
	}
	pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);

	/* If no fence devices were available, there's no use in immediately
	* checking again, so don't start a new transition in that case.
	*/
	if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
	crm_warn("Fence operation %d for %s failed: %s "
	"(aborting transition and giving up for now)",
	data->call_id, target, reason);
	abort_action = pcmk__graph_wait;
	} else {
	crm_notice("Fence operation %d for %s failed: %s "
	"(aborting transition)", data->call_id, target, reason);
	}

	/* Increment the fail count now, so abort_for_stonith_failure() can
	* check it. Non-DC nodes will increment it in
	* handle_fence_notification().
	*/
	st_fail_count_increment(target);
	abort_for_stonith_failure(abort_action, target, NULL);
	}

	pcmk__update_graph(controld_globals.transition_graph, action);
	trigger_graph();

	bail:
	free(data->userdata);
	free(uuid);
	return;
	}

	static int
	fence_with_delay(const char target, const char type, int delay)
	{
	uint32_t options = st_opt_none; // Group of enum stonith_call_options
	int timeout_sec = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);

	if (crmd_join_phase_count(controld_join_confirmed) == 1) {
	stonith__set_call_options(options, target, st_opt_allow_self_fencing);
	}
	return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
	type, timeout_sec, 0, delay);
	}

	/*!
	* \internal
	* \brief Execute a fencing action from a transition graph
	*
	* \param[in] graph Transition graph being executed (ignored)
	* \param[in] action Fencing action to execute
	*
	* \return Standard Pacemaker return code
	*/
	int
	controld_execute_fence_action(pcmk__graph_t *graph,
	pcmk__graph_action_t *action)
	{
	int rc = 0;
	const char *id = pcmk__xe_id(action->xml);
	const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
	const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
	const char *type = crm_meta_value(action->params,
	PCMK__META_STONITH_ACTION);
	char *transition_key = NULL;
	const char *priority_delay = NULL;
	int delay_i = 0;
	gboolean invalid_action = FALSE;
	int stonith_timeout = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);

	CRM_CHECK(id != NULL, invalid_action = TRUE);
	CRM_CHECK(uuid != NULL, invalid_action = TRUE);
	CRM_CHECK(type != NULL, invalid_action = TRUE);
	CRM_CHECK(target != NULL, invalid_action = TRUE);

	if (invalid_action) {
	crm_log_xml_warn(action->xml, "BadAction");
	return EPROTO;
	}

	priority_delay = crm_meta_value(action->params,
	PCMK_OPT_PRIORITY_FENCING_DELAY);

	crm_notice("Requesting fencing (%s) targeting node %s "
	QB_XS " action=%s timeout=%i%s%s",
	type, target, id, stonith_timeout,
	priority_delay ? " priority_delay=" : "",
	priority_delay ? priority_delay : "");

	/* Passing NULL means block until we can connect... */
	controld_timer_fencer_connect(NULL);

	pcmk__scan_min_int(priority_delay, &delay_i, 0);
	rc = fence_with_delay(target, type, delay_i);
	transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
	action->id, 0,
	controld_globals.te_uuid),
	stonith_api->cmds->register_callback(stonith_api, rc,
	(stonith_timeout
	+ (delay_i > 0 ? delay_i : 0)),
	st_opt_timeout_updates, transition_key,
	"tengine_stonith_callback",
	tengine_stonith_callback);
	return pcmk_rc_ok;
	}

	bool
	controld_verify_stonith_watchdog_timeout(const char *value)
	{
	long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
	const char *our_nodename = controld_globals.cluster->priv->node_name;

	if (st_timeout == 0
	\|\| (stonith_api && (stonith_api->state != stonith_disconnected) &&
	stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
	our_nodename))) {
	return pcmk__valid_stonith_watchdog_timeout(value);
	}
	return true;
	}

	/* end stonith API client functions */


	/*
	* stonith history synchronization
	*
	* Each node's fencer keeps track of a cluster-wide fencing history. When a node
	* joins or leaves, we need to synchronize the history across all nodes.
	*/

	static crm_trigger_t *stonith_history_sync_trigger = NULL;
	static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
	static mainloop_timer_t *stonith_history_sync_timer_long = NULL;

	void
	te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
	{
	if (free_timers) {
	mainloop_timer_del(stonith_history_sync_timer_short);
	stonith_history_sync_timer_short = NULL;
	mainloop_timer_del(stonith_history_sync_timer_long);
	stonith_history_sync_timer_long = NULL;
	} else {
	mainloop_timer_stop(stonith_history_sync_timer_short);
	mainloop_timer_stop(stonith_history_sync_timer_long);
	}

	if (st) {
	st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
	}
	}

	static void
	tengine_stonith_history_synced(stonith_t st, stonith_event_t st_event)
	{
	te_cleanup_stonith_history_sync(st, FALSE);
	crm_debug("Fence-history synced - cancel all timers");
	}

	static gboolean
	stonith_history_sync_set_trigger(gpointer user_data)
	{
	mainloop_set_trigger(stonith_history_sync_trigger);
	return FALSE;
	}

	void
	te_trigger_stonith_history_sync(bool long_timeout)
	{
	/* trigger a sync in 5s to give more nodes the
	* chance to show up so that we don't create
	* unnecessary stonith-history-sync traffic
	*
	* the long timeout of 30s is there as a fallback
	* so that after a successful connection to fenced
	* we will wait for 30s for the DC to trigger a
	* history-sync
	* if this doesn't happen we trigger a sync locally
	* (e.g. fenced segfaults and is restarted by pacemakerd)
	*/

	/* as we are finally checking the stonith-connection
	* in do_stonith_history_sync we should be fine
	* leaving stonith_history_sync_time & stonith_history_sync_trigger
	* around
	*/
	if (stonith_history_sync_trigger == NULL) {
	stonith_history_sync_trigger =
	mainloop_add_trigger(G_PRIORITY_LOW,
	do_stonith_history_sync, NULL);
	}

	if (long_timeout) {
	if(stonith_history_sync_timer_long == NULL) {
	stonith_history_sync_timer_long =
	mainloop_timer_add("history_sync_long", 30000,
	FALSE, stonith_history_sync_set_trigger,
	NULL);
	}
	crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
	mainloop_timer_start(stonith_history_sync_timer_long);
	} else {
	if(stonith_history_sync_timer_short == NULL) {
	stonith_history_sync_timer_short =
	mainloop_timer_add("history_sync_short", 5000,
	FALSE, stonith_history_sync_set_trigger,
	NULL);
	}
	crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
	mainloop_timer_start(stonith_history_sync_timer_short);
	}

	}

	/* end stonith history synchronization functions */
	diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
	index a91fbfa3a7..8c494b97e0 100644
	--- a/daemons/controld/controld_join_dc.c
	+++ b/daemons/controld/controld_join_dc.c
	@@ -1,1096 +1,1095 @@
	/*
	- * Copyright 2004-2024 the Pacemaker project contributors
	+ * Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <inttypes.h> // PRIu32
	#include <stdbool.h> // bool, true, false
	#include <stdio.h> // NULL
	#include <stdlib.h> // free(), etc.

	#include <glib.h> // gboolean, etc.
	#include <libxml/tree.h> // xmlNode

	#include <crm/crm.h>

	#include <crm/common/xml.h>
	#include <crm/cluster.h>

	#include <pacemaker-controld.h>

	static char *max_generation_from = NULL;
	static xmlNodePtr max_generation_xml = NULL;

	/*!
	* \internal
	* \brief Nodes from which a CIB sync has failed since the peer joined
	*
	* This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
	* the name of a client node from which a CIB \p sync_from() call has failed in
	* \p do_dc_join_finalize() since the client joined the cluster as a peer.
	* \p join_id is the ID of the join round in which the \p sync_from() failed,
	* and is intended for use in nack log messages.
	*/
	static GHashTable *failed_sync_nodes = NULL;

	void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
	void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
	gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);

	/* Numeric counter used to identify join rounds (an unsigned int would be
	* appropriate, except we get and set it in XML as int)
	*/
	static int current_join_id = 0;

	/*!
	* \internal
	* \brief Get log-friendly string equivalent of a controller group join phase
	*
	* \param[in] phase Join phase
	*
	* \return Log-friendly string equivalent of \p phase
	*/
	static const char *
	join_phase_text(enum controld_join_phase phase)
	{
	switch (phase) {
	case controld_join_nack:
	return "nack";
	case controld_join_none:
	return "none";
	case controld_join_welcomed:
	return "welcomed";
	case controld_join_integrated:
	return "integrated";
	case controld_join_finalized:
	return "finalized";
	case controld_join_confirmed:
	return "confirmed";
	default:
	return "invalid";
	}
	}

	/*!
	* \internal
	* \brief Destroy the hash table containing failed sync nodes
	*/
	void
	controld_destroy_failed_sync_table(void)
	{
	if (failed_sync_nodes != NULL) {
	g_hash_table_destroy(failed_sync_nodes);
	failed_sync_nodes = NULL;
	}
	}

	/*!
	* \internal
	* \brief Remove a node from the failed sync nodes table if present
	*
	* \param[in] node_name Node name to remove
	*/
	void
	controld_remove_failed_sync_node(const char *node_name)
	{
	if (failed_sync_nodes != NULL) {
	g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
	}
	}

	/*!
	* \internal
	* \brief Add to a hash table a node whose CIB failed to sync
	*
	* \param[in] node_name Name of node whose CIB failed to sync
	* \param[in] join_id Join round when the failure occurred
	*/
	static void
	record_failed_sync_node(const char *node_name, gint join_id)
	{
	if (failed_sync_nodes == NULL) {
	failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
	}

	/* If the node is already in the table then we failed to nack it during the
	* filter offer step
	*/
	CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
	GINT_TO_POINTER(join_id)));
	}

	/*!
	* \internal
	* \brief Look up a node name in the failed sync table
	*
	* \param[in] node_name Name of node to look up
	* \param[out] join_id Where to store the join ID of when the sync failed
	*
	* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
	* node name was found, or \p pcmk_rc_node_unknown otherwise.
	* \note \p *join_id is set to -1 if the node is not found.
	*/
	static int
	lookup_failed_sync_node(const char node_name, gint join_id)
	{
	*join_id = -1;

	if (failed_sync_nodes != NULL) {
	gpointer result = g_hash_table_lookup(failed_sync_nodes,
	(gchar *) node_name);
	if (result != NULL) {
	*join_id = GPOINTER_TO_INT(result);
	return pcmk_rc_ok;
	}
	}
	return pcmk_rc_node_unknown;
	}

	void
	crm_update_peer_join(const char source, pcmk__node_status_t node,
	enum controld_join_phase phase)
	{
	enum controld_join_phase last = controld_get_join_phase(node);

	CRM_CHECK(node != NULL, return);

	/* Remote nodes do not participate in joins */
	if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
	return;
	}

	if (phase == last) {
	crm_trace("Node %s join-%d phase is still %s "
	QB_XS " nodeid=%" PRIu32 " source=%s",
	node->name, current_join_id, join_phase_text(last),
	node->cluster_layer_id, source);
	return;
	}

	if ((phase <= controld_join_none) \|\| (phase == (last + 1))) {
	struct controld_node_status_data *data = NULL;

	if (node->user_data == NULL) {
	node->user_data =
	pcmk__assert_alloc(1, sizeof(struct controld_node_status_data));
	}
	data = node->user_data;
	data->join_phase = phase;

	crm_trace("Node %s join-%d phase is now %s (was %s) "
	QB_XS " nodeid=%" PRIu32 " source=%s",
	node->name, current_join_id, join_phase_text(phase),
	join_phase_text(last), node->cluster_layer_id,
	source);
	return;
	}

	crm_warn("Rejecting join-%d phase update for node %s because can't go from "
	"%s to %s " QB_XS " nodeid=%" PRIu32 " source=%s",
	current_join_id, node->name, join_phase_text(last),
	join_phase_text(phase), node->cluster_layer_id, source);
	}

	static void
	start_join_round(void)
	{
	GHashTableIter iter;
	pcmk__node_status_t *peer = NULL;

	crm_debug("Starting new join round join-%d", current_join_id);

	g_hash_table_iter_init(&iter, pcmk__peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	crm_update_peer_join(__func__, peer, controld_join_none);
	}
	if (max_generation_from != NULL) {
	free(max_generation_from);
	max_generation_from = NULL;
	}
	if (max_generation_xml != NULL) {
	pcmk__xml_free(max_generation_xml);
	max_generation_xml = NULL;
	}
	controld_clear_fsa_input_flags(R_HAVE_CIB);
	}

	/*!
	* \internal
	* \brief Create a join message from the DC
	*
	* \param[in] join_op Join operation name
	* \param[in] host_to Recipient of message
	*/
	static xmlNode *
	create_dc_message(const char join_op, const char host_to)
	{
	xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to,
	CRM_SYSTEM_CRMD, join_op, NULL);

	/* Identify which election this is a part of */
	crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);

	/* Add a field specifying whether the DC is shutting down. This keeps the
	* joining node from fencing the old DC if it becomes the new DC.
	*/
	pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
	pcmk_is_set(controld_globals.fsa_input_register,
	R_SHUTDOWN));
	return msg;
	}

	static void
	join_make_offer(gpointer key, gpointer value, gpointer user_data)
	{
	/* @TODO We don't use user_data except to distinguish one particular call
	* from others. Make this clearer.
	*/
	xmlNode *offer = NULL;
	pcmk__node_status_t member = (pcmk__node_status_t ) value;

	pcmk__assert(member != NULL);
	if (!pcmk__cluster_is_node_active(member)) {
	crm_info("Not making join-%d offer to inactive node %s",
	current_join_id, pcmk__s(member->name, "with unknown name"));
	if ((member->expected == NULL)
	&& pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) {
	/* You would think this unsafe, but in fact this plus an
	* active resource is what causes it to be fenced.
	*
	* Yes, this does mean that any node that dies at the same
	* time as the old DC and is not running resource (still)
	* won't be fenced.
	*
	* I'm not happy about this either.
	*/
	pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
	}
	return;
	}

	if (member->name == NULL) {
	crm_info("Not making join-%d offer to node uuid %s with unknown name",
	current_join_id, member->xml_id);
	return;
	}

	if (controld_globals.membership_id != controld_globals.peer_seq) {
	controld_globals.membership_id = controld_globals.peer_seq;
	crm_info("Making join-%d offers based on membership event %llu",
	current_join_id, controld_globals.peer_seq);
	}

	if (user_data != NULL) {
	enum controld_join_phase phase = controld_get_join_phase(member);

	if (phase > controld_join_none) {
	crm_info("Not making join-%d offer to already known node %s (%s)",
	current_join_id, member->name, join_phase_text(phase));
	return;
	}
	}

	crm_update_peer_join(__func__, (pcmk__node_status_t*) member,
	controld_join_none);

	offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name);

	// Advertise our feature set so the joining node can bail if not compatible
	crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);

	crm_info("Sending join-%d offer to %s", current_join_id, member->name);
	pcmk__cluster_send_message(member, pcmk_ipc_controld, offer);
	pcmk__xml_free(offer);

	crm_update_peer_join(__func__, member, controld_join_welcomed);
	}

	/* A_DC_JOIN_OFFER_ALL */
	void
	do_dc_join_offer_all(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	int count;

	/* Reset everyone's status back to down or in_ccm in the CIB.
	* Any nodes that are active in the CIB but not in the cluster membership
	* will be seen as offline by the scheduler anyway.
	*/
	current_join_id++;
	start_join_round();

	update_dc(NULL);
	if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
	crm_info("A new node joined the cluster");
	}
	g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL);

	count = crmd_join_phase_count(controld_join_welcomed);
	crm_info("Waiting on join-%d requests from %d outstanding node%s",
	current_join_id, count, pcmk__plural_s(count));

	// Don't waste time by invoking the scheduler yet
	}

	/* A_DC_JOIN_OFFER_ONE */
	void
	do_dc_join_offer_one(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	pcmk__node_status_t *member = NULL;
	ha_msg_input_t *welcome = NULL;
	int count;
	const char *join_to = NULL;

	if (msg_data->data == NULL) {
	crm_info("Making join-%d offers to any unconfirmed nodes "
	"because an unknown node joined", current_join_id);
	g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member);
	check_join_state(cur_state, __func__);
	return;
	}

	welcome = fsa_typed_data(fsa_dt_ha_msg);
	if (welcome == NULL) {
	// fsa_typed_data() already logged an error
	return;
	}

	join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
	if (join_to == NULL) {
	crm_err("Can't make join-%d offer to unknown node", current_join_id);
	return;
	}
	member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);

	/* It is possible that a node will have been sick or starting up when the
	* original offer was made. However, it will either re-announce itself in
	* due course, or we can re-store the original offer on the client.
	*/

	crm_update_peer_join(__func__, member, controld_join_none);
	join_make_offer(NULL, member, NULL);

	/* If the offer isn't to the local node, make an offer to the local node as
	* well, to ensure the correct value for max_generation_from.
	*/
	if (!controld_is_local_node(join_to)) {
	member = controld_get_local_node_status();
	join_make_offer(NULL, member, NULL);
	}

	/* This was a genuine join request; cancel any existing transition and
	* invoke the scheduler.
	*/
	abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
	NULL);

	count = crmd_join_phase_count(controld_join_welcomed);
	crm_info("Waiting on join-%d requests from %d outstanding node%s",
	current_join_id, count, pcmk__plural_s(count));

	// Don't waste time by invoking the scheduler yet
	}

	static int
	compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
	{
	const char *elem_l = crm_element_value(left, field);
	const char *elem_r = crm_element_value(right, field);

	long long int_elem_l;
	long long int_elem_r;

	int rc = pcmk_rc_ok;

	rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
	if (rc != pcmk_rc_ok) { // Shouldn't be possible
	crm_warn("Comparing current CIB %s as -1 "
	"because '%s' is not an integer", field, elem_l);
	}

	rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
	if (rc != pcmk_rc_ok) { // Shouldn't be possible
	crm_warn("Comparing joining node's CIB %s as -1 "
	"because '%s' is not an integer", field, elem_r);
	}

	if (int_elem_l < int_elem_r) {
	return -1;

	} else if (int_elem_l > int_elem_r) {
	return 1;
	}

	return 0;
	}

	/* A_DC_JOIN_PROCESS_REQ */
	void
	do_dc_join_filter_offer(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	xmlNode *generation = NULL;

	int cmp = 0;
	int join_id = -1;
	int count = 0;
	gint value = 0;
	gboolean ack_nack_bool = TRUE;
	ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);

	const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
	const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
	const char *join_version = crm_element_value(join_ack->msg,
	PCMK_XA_CRM_FEATURE_SET);
	pcmk__node_status_t *join_node = NULL;

	if (join_from == NULL) {
	crm_err("Ignoring invalid join request without node name");
	return;
	}
	join_node = pcmk__get_node(0, join_from, NULL,
	pcmk__node_search_cluster_member);

	crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
	if (join_id != current_join_id) {
	crm_debug("Ignoring join-%d request from %s because we are on join-%d",
	join_id, join_from, current_join_id);
	check_join_state(cur_state, __func__);
	return;
	}

	generation = join_ack->xml;
	if (max_generation_xml != NULL && generation != NULL) {
	int lpc = 0;

	const char *attributes[] = {
	PCMK_XA_ADMIN_EPOCH,
	PCMK_XA_EPOCH,
	PCMK_XA_NUM_UPDATES,
	};

	/* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
	* element from the join client. The "if" guard is for clarity.
	*/
	if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
	for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
	cmp = compare_int_fields(max_generation_xml, generation,
	attributes[lpc]);
	}

	} else { // Should always be PCMK__XE_GENERATION_TUPLE
	CRM_LOG_ASSERT(false);
	}
	}

	if (ref == NULL) {
	ref = "none"; // for logging only
	}

	if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
	crm_err("Rejecting join-%d request from node %s because we failed to "
	"sync its CIB in join-%d " QB_XS " ref=%s",
	join_id, join_from, value, ref);
	ack_nack_bool = FALSE;

	} else if (!pcmk__cluster_is_node_active(join_node)) {
	if (match_down_event(join_from) != NULL) {
	/* The join request was received after the node was fenced or
	* otherwise shutdown in a way that we're aware of. No need to log
	* an error in this rare occurrence; we know the client was recently
	* shut down, and receiving a lingering in-flight request is not
	* cause for alarm.
	*/
	crm_debug("Rejecting join-%d request from inactive node %s "
	QB_XS " ref=%s", join_id, join_from, ref);
	} else {
	crm_err("Rejecting join-%d request from inactive node %s "
	QB_XS " ref=%s", join_id, join_from, ref);
	}
	ack_nack_bool = FALSE;

	} else if (generation == NULL) {
	crm_err("Rejecting invalid join-%d request from node %s "
	"missing CIB generation " QB_XS " ref=%s",
	join_id, join_from, ref);
	ack_nack_bool = FALSE;

	} else if ((join_version == NULL)
	\|\| !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
	crm_err("Rejecting join-%d request from node %s because feature set %s"
	" is incompatible with ours (%s) " QB_XS " ref=%s",
	join_id, join_from, (join_version? join_version : "pre-3.1.0"),
	CRM_FEATURE_SET, ref);
	ack_nack_bool = FALSE;

	} else if (max_generation_xml == NULL) {
	const char *validation = crm_element_value(generation,
	PCMK_XA_VALIDATE_WITH);

	if (pcmk__get_schema(validation) == NULL) {
	crm_err("Rejecting join-%d request from %s (with first CIB "
	"generation) due to %s schema version %s " QB_XS " ref=%s",
	join_id, join_from,
	((validation == NULL)? "missing" : "unknown"),
	pcmk__s(validation, ""), ref);
	ack_nack_bool = FALSE;

	} else {
	crm_debug("Accepting join-%d request from %s (with first CIB "
	"generation) " QB_XS " ref=%s",
	join_id, join_from, ref);
	max_generation_xml = pcmk__xml_copy(NULL, generation);
	pcmk__str_update(&max_generation_from, join_from);
	}

	} else if ((cmp < 0)
	\|\| ((cmp == 0) && controld_is_local_node(join_from))) {
	const char *validation = crm_element_value(generation,
	PCMK_XA_VALIDATE_WITH);

	if (pcmk__get_schema(validation) == NULL) {
	crm_err("Rejecting join-%d request from %s (with better CIB "
	"generation than current best from %s) due to %s "
	"schema version %s " QB_XS " ref=%s",
	join_id, join_from, max_generation_from,
	((validation == NULL)? "missing" : "unknown"),
	pcmk__s(validation, ""), ref);
	ack_nack_bool = FALSE;

	} else {
	crm_debug("Accepting join-%d request from %s (with better CIB "
	"generation than current best from %s) " QB_XS " ref=%s",
	join_id, join_from, max_generation_from, ref);
	crm_log_xml_debug(max_generation_xml, "Old max generation");
	crm_log_xml_debug(generation, "New max generation");

	pcmk__xml_free(max_generation_xml);
	max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
	pcmk__str_update(&max_generation_from, join_from);
	}

	} else {
	crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s",
	join_id, join_from, ref);
	}

	if (!ack_nack_bool) {
	crm_update_peer_join(__func__, join_node, controld_join_nack);
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);

	} else {
	crm_update_peer_join(__func__, join_node, controld_join_integrated);
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
	}

	count = crmd_join_phase_count(controld_join_integrated);
	crm_debug("%d node%s currently integrated in join-%d",
	count, pcmk__plural_s(count), join_id);

	if (check_join_state(cur_state, __func__) == FALSE) {
	// Don't waste time by invoking the scheduler yet
	count = crmd_join_phase_count(controld_join_welcomed);
	crm_debug("Waiting on join-%d requests from %d outstanding node%s",
	join_id, count, pcmk__plural_s(count));
	}
	}

	/* A_DC_JOIN_FINALIZE */
	void
	do_dc_join_finalize(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	char *sync_from = NULL;
	int rc = pcmk_ok;
	int count_welcomed = crmd_join_phase_count(controld_join_welcomed);
	int count_finalizable = crmd_join_phase_count(controld_join_integrated)
	+ crmd_join_phase_count(controld_join_nack);

	/* This we can do straight away and avoid clients timing us out
	* while we compute the latest CIB
	*/
	if (count_welcomed != 0) {
	crm_debug("Waiting on join-%d requests from %d outstanding node%s "
	"before finalizing join", current_join_id, count_welcomed,
	pcmk__plural_s(count_welcomed));
	crmd_join_phase_log(LOG_DEBUG);
	/* crmd_fsa_stall(FALSE); Needed? */
	return;

	} else if (count_finalizable == 0) {
	crm_debug("Finalization not needed for join-%d at the current time",
	current_join_id);
	crmd_join_phase_log(LOG_DEBUG);
	check_join_state(controld_globals.fsa_state, __func__);
	return;
	}

	controld_clear_fsa_input_flags(R_HAVE_CIB);
	if ((max_generation_from == NULL)
	\|\| controld_is_local_node(max_generation_from)) {
	controld_set_fsa_input_flags(R_HAVE_CIB);
	}

	if (!controld_globals.transition_graph->complete) {
	crm_warn("Delaying join-%d finalization while transition in progress",
	current_join_id);
	crmd_join_phase_log(LOG_DEBUG);
	crmd_fsa_stall(FALSE);
	return;
	}

	if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
	// Send our CIB out to everyone
	sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name);
	} else {
	// Ask for the agreed best CIB
	sync_from = pcmk__str_copy(max_generation_from);
	}
	crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB %s.%s.%s "
	"with schema %s and feature set %s from %s)",
	current_join_id, count_finalizable,
	pcmk__plural_s(count_finalizable),
	crm_element_value(max_generation_xml, PCMK_XA_ADMIN_EPOCH),
	crm_element_value(max_generation_xml, PCMK_XA_EPOCH),
	crm_element_value(max_generation_xml, PCMK_XA_NUM_UPDATES),
	crm_element_value(max_generation_xml, PCMK_XA_VALIDATE_WITH),
	crm_element_value(max_generation_xml, PCMK_XA_CRM_FEATURE_SET),
	sync_from);
	crmd_join_phase_log(LOG_DEBUG);

	rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
	sync_from, NULL, cib_none);
	fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
	}

	void
	free_max_generation(void)
	{
	free(max_generation_from);
	max_generation_from = NULL;

	pcmk__xml_free(max_generation_xml);
	max_generation_xml = NULL;
	}

	void
	finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
	{
	CRM_LOG_ASSERT(-EPERM != rc);

	if (rc != pcmk_ok) {
	const char sync_from = (const char ) user_data;

	do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
	"Could not sync CIB from %s in join-%d: %s",
	sync_from, current_join_id, pcmk_strerror(rc));

	if (rc != -pcmk_err_old_data) {
	record_failed_sync_node(sync_from, current_join_id);
	}

	/* restart the whole join process */
	register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
	__func__);

	} else if (!AM_I_DC) {
	crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);

	} else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
	crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
	"(%s)", current_join_id,
	fsa_state2string(controld_globals.fsa_state));

	} else {
	controld_set_fsa_input_flags(R_HAVE_CIB);

	/* make sure dc_uuid is re-set to us */
	if (!check_join_state(controld_globals.fsa_state, __func__)) {
	int count_finalizable = 0;

	count_finalizable = crmd_join_phase_count(controld_join_integrated)
	+ crmd_join_phase_count(controld_join_nack);

	crm_debug("Notifying %d node%s of join-%d results",
	count_finalizable, pcmk__plural_s(count_finalizable),
	current_join_id);
	g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL);
	}
	}
	}

	static void
	join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
	xmlNode output, void user_data)
	{
	const char *node = user_data;

	if (rc != pcmk_ok) {
	fsa_data_t *msg_data = NULL; // for register_fsa_error() macro

	crm_crit("join-%d node history update (via CIB call %d) for node %s "
	"failed: %s",
	current_join_id, call_id, node, pcmk_strerror(rc));
	crm_log_xml_debug(msg, "failed");
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}

	crm_debug("join-%d node history update (via CIB call %d) for node %s "
	"complete",
	current_join_id, call_id, node);
	check_join_state(controld_globals.fsa_state, __func__);
	}

	/* A_DC_JOIN_PROCESS_ACK */
	void
	do_dc_join_ack(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	int join_id = -1;
	ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);

	const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
	char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
	pcmk__node_status_t *peer = NULL;
	enum controld_join_phase phase = controld_join_none;

	- enum controld_section_e section = controld_section_lrm;
	+ const bool unlocked_only = pcmk_is_set(controld_globals.flags,
	+ controld_shutdown_lock_enabled);
	char *xpath = NULL;
	xmlNode *state = join_ack->xml;
	xmlNode *execd_state = NULL;

	cib_t *cib = controld_globals.cib_conn;
	int rc = pcmk_ok;

	// Sanity checks
	if (join_from == NULL) {
	crm_warn("Ignoring message received without node identification");
	goto done;
	}
	if (op == NULL) {
	crm_warn("Ignoring message received from %s without task", join_from);
	goto done;
	}

	if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
	crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
	op, join_from, CRM_OP_JOIN_CONFIRM);
	goto done;
	}

	if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
	crm_warn("Ignoring join confirmation from %s without valid join ID",
	join_from);
	goto done;
	}

	peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
	phase = controld_get_join_phase(peer);
	if (phase != controld_join_finalized) {
	crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
	"(currently %s not %s)",
	join_id, join_from, join_phase_text(phase),
	join_phase_text(controld_join_finalized));
	goto done;
	}

	if (join_id != current_join_id) {
	crm_err("Rejecting join-%d confirmation from %s "
	"because currently on join-%d",
	join_id, join_from, current_join_id);
	crm_update_peer_join(__func__, peer, controld_join_nack);
	goto done;
	}

	crm_update_peer_join(__func__, peer, controld_join_confirmed);

	/* Update CIB with node's current executor state. A new transition will be
	* triggered later, when the CIB manager notifies us of the change.
	*
	* The delete and modify requests are part of an atomic transaction.
	*/
	rc = cib->cmds->init_transaction(cib);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Delete relevant parts of node's current executor state from CIB
	- if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
	- section = controld_section_lrm_unlocked;
	- }
	- controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
	+ controld_node_history_deletion_strings(join_from, unlocked_only, &xpath,
	+ NULL);

	rc = cib->cmds->remove(cib, xpath, NULL,
	cib_xpath\|cib_multiple\|cib_transaction);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Update CIB with node's latest known executor state
	if (controld_is_local_node(join_from)) {

	// Use the latest possible state if processing our own join ack
	execd_state = controld_query_executor_state();

	if (execd_state != NULL) {
	crm_debug("Updating local node history for join-%d from query "
	"result",
	current_join_id);
	state = execd_state;

	} else {
	crm_warn("Updating local node history from join-%d confirmation "
	"because query failed",
	current_join_id);
	}

	} else {
	crm_debug("Updating node history for %s from join-%d confirmation",
	join_from, current_join_id);
	}

	rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
	cib_can_create\|cib_transaction);
	pcmk__xml_free(execd_state);
	if (rc != pcmk_ok) {
	goto done;
	}

	// Commit the transaction
	rc = cib->cmds->end_transaction(cib, true, cib_none);
	fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);

	if (rc > 0) {
	// join_from will be freed after callback
	join_from = NULL;
	rc = pcmk_ok;
	}

	done:
	if (rc != pcmk_ok) {
	crm_crit("join-%d node history update for node %s failed: %s",
	current_join_id, join_from, pcmk_strerror(rc));
	register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
	}
	free(join_from);
	free(xpath);
	}

	void
	finalize_join_for(gpointer key, gpointer value, gpointer user_data)
	{
	xmlNode *acknak = NULL;
	xmlNode *tmp1 = NULL;
	pcmk__node_status_t *join_node = value;
	const char *join_to = join_node->name;
	enum controld_join_phase phase = controld_get_join_phase(join_node);
	bool integrated = false;

	switch (phase) {
	case controld_join_integrated:
	integrated = true;
	break;
	case controld_join_nack:
	break;
	default:
	crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
	"for join-%d",
	join_to, join_phase_text(phase), current_join_id);
	return;
	}

	/* Update the <node> element with the node's name and UUID, in case they
	* weren't known before
	*/
	crm_trace("Updating node name and UUID in CIB for %s", join_to);
	tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
	crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node));
	crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
	fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
	pcmk__xml_free(tmp1);

	join_node = pcmk__get_node(0, join_to, NULL,
	pcmk__node_search_cluster_member);
	if (!pcmk__cluster_is_node_active(join_node)) {
	/*
	* NACK'ing nodes that the membership layer doesn't know about yet
	* simply creates more churn
	*
	* Better to leave them waiting and let the join restart when
	* the new membership event comes in
	*
	* All other NACKs (due to versions etc) should still be processed
	*/
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
	return;
	}

	// Acknowledge or nack node's join request
	crm_debug("%sing join-%d request from %s",
	integrated? "Acknowledg" : "Nack", current_join_id, join_to);
	acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
	pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);

	if (integrated) {
	// No change needed for a nacked node
	crm_update_peer_join(__func__, join_node, controld_join_finalized);
	pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);

	/* Iterate through the remote peer cache and add information on which
	* node hosts each to the ACK message. This keeps new controllers in
	* sync with what has already happened.
	*/
	if (pcmk__cluster_num_remote_nodes() > 0) {
	GHashTableIter iter;
	pcmk__node_status_t *node = NULL;
	xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);

	g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
	xmlNode *remote = NULL;

	if (!node->conn_host) {
	continue;
	}

	remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
	pcmk__xe_set_props(remote,
	PCMK_XA_ID, node->name,
	PCMK__XA_NODE_STATE, node->state,
	PCMK__XA_CONNECTION_HOST, node->conn_host,
	NULL);
	}
	}
	}
	pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak);
	pcmk__xml_free(acknak);
	return;
	}

	gboolean
	check_join_state(enum crmd_fsa_state cur_state, const char *source)
	{
	static unsigned long long highest_seq = 0;

	if (controld_globals.membership_id != controld_globals.peer_seq) {
	crm_debug("join-%d: Membership changed from %llu to %llu "
	QB_XS " highest=%llu state=%s for=%s",
	current_join_id, controld_globals.membership_id,
	controld_globals.peer_seq, highest_seq,
	fsa_state2string(cur_state), source);
	if (highest_seq < controld_globals.peer_seq) {
	/* Don't spam the FSA with duplicates */
	highest_seq = controld_globals.peer_seq;
	register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
	}

	} else if (cur_state == S_INTEGRATION) {
	if (crmd_join_phase_count(controld_join_welcomed) == 0) {
	int count = crmd_join_phase_count(controld_join_integrated);

	crm_debug("join-%d: Integration of %d peer%s complete "
	QB_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
	return TRUE;
	}

	} else if (cur_state == S_FINALIZE_JOIN) {
	if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
	crm_debug("join-%d: Delaying finalization until we have CIB "
	QB_XS " state=%s for=%s",
	current_join_id, fsa_state2string(cur_state), source);
	return TRUE;

	} else if (crmd_join_phase_count(controld_join_welcomed) != 0) {
	int count = crmd_join_phase_count(controld_join_welcomed);

	crm_debug("join-%d: Still waiting on %d welcomed node%s "
	QB_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else if (crmd_join_phase_count(controld_join_integrated) != 0) {
	int count = crmd_join_phase_count(controld_join_integrated);

	crm_debug("join-%d: Still waiting on %d integrated node%s "
	QB_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else if (crmd_join_phase_count(controld_join_finalized) != 0) {
	int count = crmd_join_phase_count(controld_join_finalized);

	crm_debug("join-%d: Still waiting on %d finalized node%s "
	QB_XS " state=%s for=%s",
	current_join_id, count, pcmk__plural_s(count),
	fsa_state2string(cur_state), source);
	crmd_join_phase_log(LOG_DEBUG);

	} else {
	crm_debug("join-%d: Complete " QB_XS " state=%s for=%s",
	current_join_id, fsa_state2string(cur_state), source);
	register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
	return TRUE;
	}
	}

	return FALSE;
	}

	void
	do_dc_join_final(long long action,
	enum crmd_fsa_cause cause,
	enum crmd_fsa_state cur_state,
	enum crmd_fsa_input current_input, fsa_data_t * msg_data)
	{
	crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
	crm_update_quorum(pcmk__cluster_has_quorum(), TRUE);
	}

	int crmd_join_phase_count(enum controld_join_phase phase)
	{
	int count = 0;
	pcmk__node_status_t *peer;
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, pcmk__peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	if (controld_get_join_phase(peer) == phase) {
	count++;
	}
	}
	return count;
	}

	void crmd_join_phase_log(int level)
	{
	pcmk__node_status_t *peer;
	GHashTableIter iter;

	g_hash_table_iter_init(&iter, pcmk__peer_cache);
	while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
	do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name,
	join_phase_text(controld_get_join_phase(peer)));
	}
	}
	diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
	index 1cc4ae008b..1c52477689 100644
	--- a/daemons/controld/controld_remote_ra.c
	+++ b/daemons/controld/controld_remote_ra.c
	@@ -1,1493 +1,1472 @@
	/*
	* Copyright 2013-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU General Public License version 2
	* or later (GPLv2+) WITHOUT ANY WARRANTY.
	*/

	#include <crm_internal.h>

	#include <crm/crm.h>
	#include <crm/common/xml.h>
	#include <crm/common/xml_internal.h>
	#include <crm/lrmd.h>
	#include <crm/lrmd_internal.h>
	#include <crm/services.h>

	#include <libxml/xpath.h> // xmlXPathObject, etc.

	#include <pacemaker-controld.h>

	#define REMOTE_LRMD_RA "remote"

	/* The max start timeout before cmd retry */
	#define MAX_START_TIMEOUT_MS 10000

	#define cmd_set_flags(cmd, flags_to_set) do { \
	(cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
	"Remote command", (cmd)->rsc_id, (cmd)->status, \
	(flags_to_set), #flags_to_set); \
	} while (0)

	#define cmd_clear_flags(cmd, flags_to_clear) do { \
	(cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
	"Remote command", (cmd)->rsc_id, (cmd)->status, \
	(flags_to_clear), #flags_to_clear); \
	} while (0)

	enum remote_cmd_status {
	cmd_reported_success = (1 << 0),
	cmd_cancel = (1 << 1),
	};

	typedef struct remote_ra_cmd_s {
	/! the local node the cmd is issued from /
	char *owner;
	/! the remote node the cmd is executed on /
	char *rsc_id;
	/! the action to execute /
	char *action;
	/! some string the client wants us to give it back /
	char *userdata;
	/! start delay in ms /
	int start_delay;
	/! timer id used for start delay. /
	int delay_id;
	/! timeout in ms for cmd /
	int timeout;
	/! recurring interval in ms /
	guint interval_ms;
	/! interval timer id /
	int interval_id;
	int monitor_timeout_id;
	int takeover_timeout_id;
	/! action parameters /
	lrmd_key_value_t *params;
	pcmk__action_result_t result;
	int call_id;
	time_t start_time;
	uint32_t status;
	} remote_ra_cmd_t;

	#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
	lrm_state_t *lrm = (lrm_state); \
	remote_ra_data_t *ra = lrm->remote_ra_data; \
	ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
	lrm->node_name, ra->status, \
	(flags_to_set), #flags_to_set); \
	} while (0)

	#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
	lrm_state_t *lrm = (lrm_state); \
	remote_ra_data_t *ra = lrm->remote_ra_data; \
	ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
	lrm->node_name, ra->status, \
	(flags_to_clear), #flags_to_clear); \
	} while (0)

	enum remote_status {
	expect_takeover = (1 << 0),
	takeover_complete = (1 << 1),
	remote_active = (1 << 2),
	/* Maintenance mode is difficult to determine from the controller's context,
	* so we have it signalled back with the transition from the scheduler.
	*/
	remote_in_maint = (1 << 3),
	/* Similar for whether we are controlling a guest node or remote node.
	* Fortunately there is a meta-attribute in the transition already and
	* as the situation doesn't change over time we can use the
	* resource start for noting down the information for later use when
	* the attributes aren't at hand.
	*/
	controlling_guest = (1 << 4),
	};

	typedef struct remote_ra_data_s {
	crm_trigger_t *work;
	remote_ra_cmd_t *cur_cmd;
	GList *cmds;
	GList *recurring_cmds;
	uint32_t status;
	} remote_ra_data_t;

	static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
	static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
	static GList fail_all_monitor_cmds(GList list);

	static void
	free_cmd(gpointer user_data)
	{
	remote_ra_cmd_t *cmd = user_data;

	if (!cmd) {
	return;
	}
	if (cmd->delay_id) {
	g_source_remove(cmd->delay_id);
	}
	if (cmd->interval_id) {
	g_source_remove(cmd->interval_id);
	}
	if (cmd->monitor_timeout_id) {
	g_source_remove(cmd->monitor_timeout_id);
	}
	if (cmd->takeover_timeout_id) {
	g_source_remove(cmd->takeover_timeout_id);
	}
	free(cmd->owner);
	free(cmd->rsc_id);
	free(cmd->action);
	free(cmd->userdata);
	pcmk__reset_result(&(cmd->result));
	lrmd_key_value_freeall(cmd->params);
	free(cmd);
	}

	static int
	generate_callid(void)
	{
	static int remote_ra_callid = 0;

	remote_ra_callid++;
	if (remote_ra_callid <= 0) {
	remote_ra_callid = 1;
	}

	return remote_ra_callid;
	}

	static gboolean
	recurring_helper(gpointer data)
	{
	remote_ra_cmd_t *cmd = data;
	lrm_state_t *connection_rsc = NULL;

	cmd->interval_id = 0;
	connection_rsc = controld_get_executor_state(cmd->rsc_id, false);
	if (connection_rsc && connection_rsc->remote_ra_data) {
	remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;

	ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);

	ra_data->cmds = g_list_append(ra_data->cmds, cmd);
	mainloop_set_trigger(ra_data->work);
	}
	return FALSE;
	}

	static gboolean
	start_delay_helper(gpointer data)
	{
	remote_ra_cmd_t *cmd = data;
	lrm_state_t *connection_rsc = NULL;

	cmd->delay_id = 0;
	connection_rsc = controld_get_executor_state(cmd->rsc_id, false);
	if (connection_rsc && connection_rsc->remote_ra_data) {
	remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;

	mainloop_set_trigger(ra_data->work);
	}
	return FALSE;
	}

	static bool
	should_purge_attributes(pcmk__node_status_t *node)
	{
	pcmk__node_status_t *conn_node = NULL;
	lrm_state_t *connection_rsc = NULL;

	if ((node->conn_host == NULL) \|\| (node->name == NULL)) {
	return true;
	}

	/* Get the node that was hosting the remote connection resource from the
	* peer cache. That's the one we really care about here.
	*/
	conn_node = pcmk__get_node(0, node->conn_host, NULL,
	pcmk__node_search_cluster_member);
	if (conn_node == NULL) {
	return true;
	}

	/* Check the uptime of connection_rsc. If it hasn't been running long
	* enough, set purge=true. "Long enough" means it started running earlier
	* than the timestamp when we noticed it went away in the first place.
	*/
	connection_rsc = controld_get_executor_state(node->name, false);

	if (connection_rsc != NULL) {
	lrmd_t *lrm = connection_rsc->conn;
	time_t uptime = lrmd__uptime(lrm);
	time_t now = time(NULL);

	/* Add 20s of fuzziness to give corosync a while to notice the remote
	* host is gone. On various error conditions (failure to get uptime,
	* peer_lost isn't set) we default to purging.
	*/
	if (uptime > 0 &&
	conn_node->peer_lost > 0 &&
	uptime + 20 >= now - conn_node->peer_lost) {
	return false;
	}
	}

	return true;
	}

	-static enum controld_section_e
	-section_to_delete(bool purge)
	-{
	- if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
	- if (purge) {
	- return controld_section_all_unlocked;
	- } else {
	- return controld_section_lrm_unlocked;
	- }
	- } else {
	- if (purge) {
	- return controld_section_all;
	- } else {
	- return controld_section_lrm;
	- }
	- }
	-}
	-
	static void
	purge_remote_node_attrs(int call_opt, pcmk__node_status_t *node)
	{
	- bool purge = should_purge_attributes(node);
	- enum controld_section_e section = section_to_delete(purge);
	+ const bool unlocked_only = pcmk_is_set(controld_globals.flags,
	+ controld_shutdown_lock_enabled);

	- /* Purge node from attrd's memory */
	- if (purge) {
	- update_attrd_remote_node_removed(node->name, NULL);
	+ // Purge node's transient attributes (from attribute manager and CIB)
	+ if (should_purge_attributes(node)) {
	+ controld_purge_node_attrs(node->name, true);
	}

	- controld_delete_node_state(node->name, section, call_opt);
	+ controld_delete_node_history(node->name, unlocked_only, call_opt);
	}

	/*!
	* \internal
	* \brief Handle cluster communication related to pacemaker_remote node joining
	*
	* \param[in] node_name Name of newly integrated pacemaker_remote node
	*/
	static void
	remote_node_up(const char *node_name)
	{
	int call_opt;
	xmlNode update, state;
	pcmk__node_status_t *node = NULL;
	lrm_state_t *connection_rsc = NULL;

	CRM_CHECK(node_name != NULL, return);
	crm_info("Announcing Pacemaker Remote node %s", node_name);

	call_opt = crmd_cib_smart_opt();

	/* Delete node's CRM_OP_PROBED attribute. Deleting any attribute ensures
	* that the attribute manager learns the node is remote. Deletion of this
	* specfic attribute is a holdover from when it had special meaning.
	*
	* @COMPAT Find another way to tell attrd that the node is remote, without
	* risking deletion or overwrite of an arbitrary attribute. Then work on
	* deprecating CRM_OP_PROBED.
	*/
	update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);

	/* Ensure node is in the remote peer cache with member status */
	node = pcmk__cluster_lookup_remote_node(node_name);
	CRM_CHECK((node != NULL) && (node->name != NULL), return);

	purge_remote_node_attrs(call_opt, node);
	pcmk__update_peer_state(__func__, node, PCMK_VALUE_MEMBER, 0);

	/* Apply any start state that we were given from the environment on the
	* remote node.
	*/
	connection_rsc = controld_get_executor_state(node->name, false);

	if (connection_rsc != NULL) {
	lrmd_t *lrm = connection_rsc->conn;
	const char *start_state = lrmd__node_start_state(lrm);

	if (start_state) {
	set_join_state(start_state, node->name, node->xml_id, true);
	}
	}

	/* pacemaker_remote nodes don't participate in the membership layer,
	* so cluster nodes don't automatically get notified when they come and go.
	* We send a cluster message to the DC, and update the CIB node state entry,
	* so the DC will get it sooner (via message) or later (via CIB refresh),
	* and any other interested parties can query the CIB.
	*/
	broadcast_remote_state_message(node_name, true);

	update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
	state = create_node_state_update(node, controld_node_update_cluster, update,
	__func__);

	/* Clear the PCMK__XA_NODE_FENCED flag in the node state. If the node ever
	* needs to be fenced, this flag will allow various actions to determine
	* whether the fencing has happened yet.
	*/
	crm_xml_add(state, PCMK__XA_NODE_FENCED, "0");

	/* TODO: If the remote connection drops, and this (async) CIB update either
	* failed or has not yet completed, later actions could mistakenly think the
	* node has already been fenced (if the PCMK__XA_NODE_FENCED attribute was
	* previously set, because it won't have been cleared). This could prevent
	* actual fencing or allow recurring monitor failures to be cleared too
	* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
	*/
	controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
	pcmk__xml_free(update);
	}

	enum down_opts {
	DOWN_KEEP_LRM,
	DOWN_ERASE_LRM
	};

	/*!
	* \internal
	* \brief Handle cluster communication related to pacemaker_remote node leaving
	*
	* \param[in] node_name Name of lost node
	* \param[in] opts Whether to keep or erase LRM history
	*/
	static void
	remote_node_down(const char *node_name, const enum down_opts opts)
	{
	xmlNode *update;
	int call_opt = crmd_cib_smart_opt();
	pcmk__node_status_t *node = NULL;

	- /* Purge node from attrd's memory */
	- update_attrd_remote_node_removed(node_name, NULL);
	+ // Purge node's transient attributes (from attribute manager and CIB)
	+ controld_purge_node_attrs(node_name, true);

	- /* Normally, only node attributes should be erased, and the resource history
	- * should be kept until the node comes back up. However, after a successful
	- * fence, we want to clear the history as well, so we don't think resources
	- * are still running on the node.
	+ /* Normally, the resource history should be kept until the node comes back
	+ * up. However, after a successful fence, clear the history so we don't
	+ * think resources are still running on the node.
	*/
	if (opts == DOWN_ERASE_LRM) {
	- controld_delete_node_state(node_name, controld_section_all, call_opt);
	- } else {
	- controld_delete_node_state(node_name, controld_section_attrs, call_opt);
	+ controld_delete_node_history(node_name, false, call_opt);
	}

	/* Ensure node is in the remote peer cache with lost state */
	node = pcmk__cluster_lookup_remote_node(node_name);
	CRM_CHECK(node != NULL, return);
	pcmk__update_peer_state(__func__, node, PCMK__VALUE_LOST, 0);

	/* Notify DC */
	broadcast_remote_state_message(node_name, false);

	/* Update CIB node state */
	update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
	create_node_state_update(node, controld_node_update_cluster, update,
	__func__);
	controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
	pcmk__xml_free(update);
	}

	/*!
	* \internal
	* \brief Handle effects of a remote RA command on node state
	*
	* \param[in] cmd Completed remote RA command
	*/
	static void
	check_remote_node_state(const remote_ra_cmd_t *cmd)
	{
	/* Only successful actions can change node state */
	if (!pcmk__result_ok(&(cmd->result))) {
	return;
	}

	if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
	remote_node_up(cmd->rsc_id);

	} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
	pcmk__str_casei)) {
	/* After a successful migration, we don't need to do remote_node_up()
	* because the DC already knows the node is up, and we don't want to
	* clear LRM history etc. We do need to add the remote node to this
	* host's remote peer cache, because (unless it happens to be DC)
	* it hasn't been tracking the remote node, and other code relies on
	* the cache to distinguish remote nodes from unseen cluster nodes.
	*/
	pcmk__node_status_t *node =
	pcmk__cluster_lookup_remote_node(cmd->rsc_id);

	CRM_CHECK(node != NULL, return);
	pcmk__update_peer_state(__func__, node, PCMK_VALUE_MEMBER, 0);

	} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
	lrm_state_t *lrm_state = controld_get_executor_state(cmd->rsc_id,
	false);
	remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;

	if (ra_data) {
	if (!pcmk_is_set(ra_data->status, takeover_complete)) {
	/* Stop means down if we didn't successfully migrate elsewhere */
	remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
	} else if (AM_I_DC == FALSE) {
	/* Only the connection host and DC track node state,
	* so if the connection migrated elsewhere and we aren't DC,
	* un-cache the node, so we don't have stale info
	*/
	pcmk__cluster_forget_remote_node(cmd->rsc_id);
	}
	}
	}

	/* We don't do anything for successful monitors, which is correct for
	* routine recurring monitors, and for monitors on nodes where the
	* connection isn't supposed to be (the cluster will stop the connection in
	* that case). However, if the initial probe finds the connection already
	* active on the node where we want it, we probably should do
	* remote_node_up(). Unfortunately, we can't distinguish that case here.
	* Given that connections have to be initiated by the cluster, the chance of
	* that should be close to zero.
	*/
	}

	static void
	report_remote_ra_result(remote_ra_cmd_t * cmd)
	{
	lrmd_event_data_t op = { 0, };

	check_remote_node_state(cmd);

	op.type = lrmd_event_exec_complete;
	op.rsc_id = cmd->rsc_id;
	op.op_type = cmd->action;
	op.user_data = cmd->userdata;
	op.timeout = cmd->timeout;
	op.interval_ms = cmd->interval_ms;
	op.t_run = cmd->start_time;
	op.t_rcchange = cmd->start_time;

	lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
	cmd->result.exit_reason);

	if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
	op.t_rcchange = time(NULL);
	/* This edge case will likely never ever occur, but if it does the
	* result is that a failure will not be processed correctly. This is only
	* remotely possible because we are able to detect a connection resource's tcp
	* connection has failed at any moment after start has completed. The actual
	* recurring operation is just a connectivity ping.
	*
	* basically, we are not guaranteed that the first successful monitor op and
	* a subsequent failed monitor op will not occur in the same timestamp. We have to
	* make it look like the operations occurred at separate times though. */
	if (op.t_rcchange == op.t_run) {
	op.t_rcchange++;
	}
	}

	if (cmd->params) {
	lrmd_key_value_t *tmp;

	op.params = pcmk__strkey_table(free, free);
	for (tmp = cmd->params; tmp; tmp = tmp->next) {
	pcmk__insert_dup(op.params, tmp->key, tmp->value);
	}

	}
	op.call_id = cmd->call_id;
	op.remote_nodename = cmd->owner;

	lrm_op_callback(&op);

	if (op.params) {
	g_hash_table_destroy(op.params);
	}
	lrmd__reset_result(&op);
	}

	/*!
	* \internal
	* \brief Return a remote command's remaining timeout in seconds
	*
	* \param[in] cmd Remote command to check
	*
	* \return Command's remaining timeout in seconds
	*/
	static int
	remaining_timeout_sec(const remote_ra_cmd_t *cmd)
	{
	return pcmk__timeout_ms2s(cmd->timeout) - (time(NULL) - cmd->start_time);
	}

	static gboolean
	retry_start_cmd_cb(gpointer data)
	{
	lrm_state_t *lrm_state = data;
	remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
	remote_ra_cmd_t *cmd = NULL;
	int rc = ETIME;
	int remaining = 0;

	if (!ra_data \|\| !ra_data->cur_cmd) {
	return FALSE;
	}
	cmd = ra_data->cur_cmd;
	if (!pcmk__is_up_action(cmd->action)) {
	return FALSE;
	}

	remaining = remaining_timeout_sec(cmd);
	if (remaining > 0) {
	rc = handle_remote_ra_start(lrm_state, cmd, remaining * 1000);
	} else {
	pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
	PCMK_EXEC_TIMEOUT,
	"Not enough time remains to retry remote connection");
	}

	if (rc != pcmk_rc_ok) {
	report_remote_ra_result(cmd);

	if (ra_data->cmds) {
	mainloop_set_trigger(ra_data->work);
	}
	ra_data->cur_cmd = NULL;
	free_cmd(cmd);
	} else {
	/* wait for connection event */
	}

	return FALSE;
	}


	static gboolean
	connection_takeover_timeout_cb(gpointer data)
	{
	lrm_state_t *lrm_state = NULL;
	remote_ra_cmd_t *cmd = data;

	crm_info("takeover event timed out for node %s", cmd->rsc_id);
	cmd->takeover_timeout_id = 0;

	lrm_state = controld_get_executor_state(cmd->rsc_id, false);

	handle_remote_ra_stop(lrm_state, cmd);
	free_cmd(cmd);

	return FALSE;
	}

	static gboolean
	monitor_timeout_cb(gpointer data)
	{
	lrm_state_t *lrm_state = NULL;
	remote_ra_cmd_t *cmd = data;

	lrm_state = controld_get_executor_state(cmd->rsc_id, false);

	crm_info("Timed out waiting for remote poke response from %s%s",
	cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
	cmd->monitor_timeout_id = 0;
	pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
	"Remote executor did not respond");

	if (lrm_state && lrm_state->remote_ra_data) {
	remote_ra_data_t *ra_data = lrm_state->remote_ra_data;

	if (ra_data->cur_cmd == cmd) {
	ra_data->cur_cmd = NULL;
	}
	if (ra_data->cmds) {
	mainloop_set_trigger(ra_data->work);
	}
	}

	report_remote_ra_result(cmd);
	free_cmd(cmd);

	if(lrm_state) {
	// @TODO Should we move this before reporting the result above?
	lrm_state_disconnect(lrm_state);
	}
	return FALSE;
	}

	static void
	synthesize_lrmd_success(lrm_state_t lrm_state, const char rsc_id, const char *op_type)
	{
	lrmd_event_data_t op = { 0, };

	if (lrm_state == NULL) {
	/* if lrm_state not given assume local */
	lrm_state = controld_get_executor_state(NULL, false);
	}
	pcmk__assert(lrm_state != NULL);

	op.type = lrmd_event_exec_complete;
	op.rsc_id = rsc_id;
	op.op_type = op_type;
	op.t_run = time(NULL);
	op.t_rcchange = op.t_run;
	op.call_id = generate_callid();
	lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
	process_lrm_event(lrm_state, &op, NULL, NULL);
	}

	void
	remote_lrm_op_callback(lrmd_event_data_t * op)
	{
	gboolean cmd_handled = FALSE;
	lrm_state_t *lrm_state = NULL;
	remote_ra_data_t *ra_data = NULL;
	remote_ra_cmd_t *cmd = NULL;

	CRM_CHECK((op != NULL) && (op->remote_nodename != NULL), return);

	crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
	"(%d) status=%s (%d)",
	(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
	lrmd_event_type2str(op->type), op->remote_nodename,
	crm_exit_str((crm_exit_t) op->rc), op->rc,
	pcmk_exec_status_str(op->op_status), op->op_status);

	lrm_state = controld_get_executor_state(op->remote_nodename, false);
	if (!lrm_state \|\| !lrm_state->remote_ra_data) {
	crm_debug("No state information found for remote connection event");
	return;
	}
	ra_data = lrm_state->remote_ra_data;

	if (op->type == lrmd_event_new_client) {
	// Another client has connected to the remote daemon

	if (pcmk_is_set(ra_data->status, expect_takeover)) {
	// Great, we knew this was coming
	lrm_remote_clear_flags(lrm_state, expect_takeover);
	lrm_remote_set_flags(lrm_state, takeover_complete);

	} else {
	crm_err("Disconnecting from Pacemaker Remote node %s due to "
	"unexpected client takeover", op->remote_nodename);
	/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
	/* Do not free lrm_state->conn yet. */
	/* It'll be freed in the following stop action. */
	lrm_state_disconnect_only(lrm_state);
	}
	return;
	}

	/* filter all EXEC events up */
	if (op->type == lrmd_event_exec_complete) {
	if (pcmk_is_set(ra_data->status, takeover_complete)) {
	crm_debug("ignoring event, this connection is taken over by another node");
	} else {
	lrm_op_callback(op);
	}
	return;
	}

	if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {

	if (!pcmk_is_set(ra_data->status, remote_active)) {
	crm_debug("Disconnection from Pacemaker Remote node %s complete",
	lrm_state->node_name);

	} else if (!remote_ra_is_in_maintenance(lrm_state)) {
	crm_err("Lost connection to Pacemaker Remote node %s",
	lrm_state->node_name);
	ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
	ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);

	} else {
	crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
	lrm_state->node_name);
	/* Do roughly what a 'stop' on the remote-resource would do */
	handle_remote_ra_stop(lrm_state, NULL);
	remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
	/* now fake the reply of a successful 'stop' */
	synthesize_lrmd_success(NULL, lrm_state->node_name,
	PCMK_ACTION_STOP);
	}
	return;
	}

	if (!ra_data->cur_cmd) {
	crm_debug("no event to match");
	return;
	}

	cmd = ra_data->cur_cmd;

	/* Start actions and migrate from actions complete after connection
	* comes back to us. */
	if ((op->type == lrmd_event_connect) && pcmk__is_up_action(cmd->action)) {
	if (op->connection_rc < 0) {
	int remaining = remaining_timeout_sec(cmd);

	if ((op->connection_rc == -ENOKEY)
	\|\| (op->connection_rc == -EKEYREJECTED)) {
	// Hard error, don't retry
	pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
	PCMK_EXEC_ERROR,
	pcmk_strerror(op->connection_rc));

	} else if (remaining > 3) {
	crm_trace("Rescheduling start (%ds remains before timeout)",
	remaining);
	pcmk__create_timer(1000, retry_start_cmd_cb, lrm_state);
	return;

	} else {
	crm_trace("Not enough time before timeout (%ds) "
	"to reschedule start", remaining);
	pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
	PCMK_EXEC_TIMEOUT,
	"%s without enough time to retry",
	pcmk_strerror(op->connection_rc));
	}

	} else {
	lrm_state_reset_tables(lrm_state, TRUE);
	pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
	lrm_remote_set_flags(lrm_state, remote_active);
	}

	crm_debug("Remote connection event matched %s action", cmd->action);
	report_remote_ra_result(cmd);
	cmd_handled = TRUE;

	} else if ((op->type == lrmd_event_poke)
	&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
	pcmk__str_casei)) {

	if (cmd->monitor_timeout_id) {
	g_source_remove(cmd->monitor_timeout_id);
	cmd->monitor_timeout_id = 0;
	}

	/* Only report success the first time, after that only worry about failures.
	* For this function, if we get the poke pack, it is always a success. Pokes
	* only fail if the send fails, or the response times out. */
	if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
	pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
	report_remote_ra_result(cmd);
	cmd_set_flags(cmd, cmd_reported_success);
	}

	crm_debug("Remote poke event matched %s action", cmd->action);

	/* success, keep rescheduling if interval is present. */
	if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
	ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
	cmd->interval_id = pcmk__create_timer(cmd->interval_ms,
	recurring_helper, cmd);
	cmd = NULL; /* prevent free */
	}
	cmd_handled = TRUE;

	} else if ((op->type == lrmd_event_disconnect)
	&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
	pcmk__str_casei)) {
	if (pcmk_is_set(ra_data->status, remote_active) &&
	!pcmk_is_set(cmd->status, cmd_cancel)) {
	pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
	PCMK_EXEC_ERROR,
	"Remote connection unexpectedly dropped "
	"during monitor");
	report_remote_ra_result(cmd);
	crm_err("Remote connection to %s unexpectedly dropped during monitor",
	lrm_state->node_name);
	}
	cmd_handled = TRUE;

	} else {
	crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
	}

	if (cmd_handled) {
	ra_data->cur_cmd = NULL;
	if (ra_data->cmds) {
	mainloop_set_trigger(ra_data->work);
	}
	free_cmd(cmd);
	}
	}

	static void
	handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
	{
	remote_ra_data_t *ra_data = NULL;

	pcmk__assert(lrm_state != NULL);
	ra_data = lrm_state->remote_ra_data;

	if (!pcmk_is_set(ra_data->status, takeover_complete)) {
	/* delete pending ops when ever the remote connection is intentionally stopped */
	g_hash_table_remove_all(lrm_state->active_ops);
	} else {
	/* we no longer hold the history if this connection has been migrated,
	* however, we keep metadata cache for future use */
	lrm_state_reset_tables(lrm_state, FALSE);
	}

	lrm_remote_clear_flags(lrm_state, remote_active);
	lrm_state_disconnect(lrm_state);

	if (ra_data->cmds) {
	g_list_free_full(ra_data->cmds, free_cmd);
	}
	if (ra_data->recurring_cmds) {
	g_list_free_full(ra_data->recurring_cmds, free_cmd);
	}
	ra_data->cmds = NULL;
	ra_data->recurring_cmds = NULL;
	ra_data->cur_cmd = NULL;

	if (cmd) {
	pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
	report_remote_ra_result(cmd);
	}
	}

	// \return Standard Pacemaker return code
	static int
	handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
	{
	const char *server = NULL;
	lrmd_key_value_t *tmp = NULL;
	int port = 0;
	int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
	int rc = pcmk_rc_ok;

	for (tmp = cmd->params; tmp; tmp = tmp->next) {
	if (pcmk__strcase_any_of(tmp->key,
	PCMK_REMOTE_RA_ADDR, PCMK_REMOTE_RA_SERVER,
	NULL)) {
	server = tmp->value;

	} else if (pcmk__str_eq(tmp->key, PCMK_REMOTE_RA_PORT,
	pcmk__str_none)) {
	port = atoi(tmp->value);

	} else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER,
	pcmk__str_none)) {
	lrm_remote_set_flags(lrm_state, controlling_guest);
	}
	}

	rc = controld_connect_remote_executor(lrm_state, server, port,
	timeout_used);
	if (rc != pcmk_rc_ok) {
	pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
	PCMK_EXEC_ERROR,
	"Could not connect to Pacemaker Remote node %s: %s",
	lrm_state->node_name, pcmk_rc_str(rc));
	}
	return rc;
	}

	static gboolean
	handle_remote_ra_exec(gpointer user_data)
	{
	int rc = 0;
	lrm_state_t *lrm_state = user_data;
	remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
	remote_ra_cmd_t *cmd;
	GList *first = NULL;

	if (ra_data->cur_cmd) {
	/* still waiting on previous cmd */
	return TRUE;
	}

	while (ra_data->cmds) {
	first = ra_data->cmds;
	cmd = first->data;
	if (cmd->delay_id) {
	/* still waiting for start delay timer to trip */
	return TRUE;
	}

	ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
	g_list_free_1(first);

	if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
	PCMK_ACTION_MIGRATE_FROM, NULL)) {
	lrm_remote_clear_flags(lrm_state, expect_takeover \| takeover_complete);
	if (handle_remote_ra_start(lrm_state, cmd,
	cmd->timeout) == pcmk_rc_ok) {
	/* take care of this later when we get async connection result */
	crm_debug("Initiated async remote connection, %s action will complete after connect event",
	cmd->action);
	ra_data->cur_cmd = cmd;
	return TRUE;
	}
	report_remote_ra_result(cmd);

	} else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {

	if (lrm_state_is_connected(lrm_state) == TRUE) {
	rc = lrm_state_poke_connection(lrm_state);
	if (rc < 0) {
	pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
	PCMK_EXEC_ERROR, pcmk_strerror(rc));
	}
	} else {
	rc = -1;
	pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
	PCMK_EXEC_DONE, "Remote connection inactive");
	}

	if (rc == 0) {
	crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
	cmd->rsc_id);
	ra_data->cur_cmd = cmd;
	cmd->monitor_timeout_id = pcmk__create_timer(cmd->timeout, monitor_timeout_cb, cmd);
	return TRUE;
	}
	report_remote_ra_result(cmd);

	} else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {

	if (pcmk_is_set(ra_data->status, expect_takeover)) {
	/* Briefly wait on stop for an expected takeover to occur. If
	* the takeover does not occur during the wait, that's fine; it
	* just means that the remote node's resource history will be
	* cleared, which will require probing all resources on the
	* remote node. If the takeover does occur successfully, then we
	* can leave the status section intact.
	*/
	cmd->takeover_timeout_id = pcmk__create_timer((cmd->timeout/2),
	connection_takeover_timeout_cb,
	cmd);
	ra_data->cur_cmd = cmd;
	return TRUE;
	}

	handle_remote_ra_stop(lrm_state, cmd);

	} else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
	lrm_remote_clear_flags(lrm_state, takeover_complete);
	lrm_remote_set_flags(lrm_state, expect_takeover);
	pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
	report_remote_ra_result(cmd);

	} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
	PCMK_ACTION_RELOAD_AGENT, NULL)) {
	/* Currently the only reloadable parameter is
	* PCMK_REMOTE_RA_RECONNECT_INTERVAL, which is only used by the
	* scheduler via the CIB, so reloads are a no-op.
	*
	* @COMPAT DC <2.1.0: We only need to check for "reload" in case
	* we're in a rolling upgrade with a DC scheduling "reload" instead
	* of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
	* so this would work for that purpose as well.
	*/
	pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
	report_remote_ra_result(cmd);
	}

	free_cmd(cmd);
	}

	return TRUE;
	}

	static void
	remote_ra_data_init(lrm_state_t * lrm_state)
	{
	remote_ra_data_t *ra_data = NULL;

	if (lrm_state->remote_ra_data) {
	return;
	}

	ra_data = pcmk__assert_alloc(1, sizeof(remote_ra_data_t));
	ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
	lrm_state->remote_ra_data = ra_data;
	}

	void
	remote_ra_cleanup(lrm_state_t * lrm_state)
	{
	remote_ra_data_t *ra_data = lrm_state->remote_ra_data;

	if (!ra_data) {
	return;
	}

	if (ra_data->cmds) {
	g_list_free_full(ra_data->cmds, free_cmd);
	}

	if (ra_data->recurring_cmds) {
	g_list_free_full(ra_data->recurring_cmds, free_cmd);
	}
	mainloop_destroy_trigger(ra_data->work);
	free(ra_data);
	lrm_state->remote_ra_data = NULL;
	}

	gboolean
	is_remote_lrmd_ra(const char agent, const char provider, const char *id)
	{
	if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
	return TRUE;
	}
	return (id != NULL) && (controld_get_executor_state(id, false) != NULL)
	&& !controld_is_local_node(id);
	}

	lrmd_rsc_info_t *
	remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
	{
	lrmd_rsc_info_t *info = NULL;

	CRM_CHECK(rsc_id != NULL, return NULL);

	if (controld_get_executor_state(rsc_id, false) != NULL) {
	info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t));

	info->id = pcmk__str_copy(rsc_id);
	info->type = pcmk__str_copy(REMOTE_LRMD_RA);
	info->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_OCF);
	info->provider = pcmk__str_copy("pacemaker");
	}

	return info;
	}

	static gboolean
	is_remote_ra_supported_action(const char *action)
	{
	return pcmk__str_any_of(action,
	PCMK_ACTION_START,
	PCMK_ACTION_STOP,
	PCMK_ACTION_MONITOR,
	PCMK_ACTION_MIGRATE_TO,
	PCMK_ACTION_MIGRATE_FROM,
	PCMK_ACTION_RELOAD_AGENT,
	PCMK_ACTION_RELOAD,
	NULL);
	}

	static GList *
	fail_all_monitor_cmds(GList * list)
	{
	GList *rm_list = NULL;
	remote_ra_cmd_t *cmd = NULL;
	GList *gIter = NULL;

	for (gIter = list; gIter != NULL; gIter = gIter->next) {
	cmd = gIter->data;
	if ((cmd->interval_ms > 0)
	&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
	pcmk__str_casei)) {
	rm_list = g_list_append(rm_list, cmd);
	}
	}

	for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
	cmd = gIter->data;

	pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
	PCMK_EXEC_ERROR, "Lost connection to remote executor");
	crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
	cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
	report_remote_ra_result(cmd);

	list = g_list_remove(list, cmd);
	free_cmd(cmd);
	}

	/* frees only the list data, not the cmds */
	g_list_free(rm_list);
	return list;
	}

	static GList *
	remove_cmd(GList * list, const char *action, guint interval_ms)
	{
	remote_ra_cmd_t *cmd = NULL;
	GList *gIter = NULL;

	for (gIter = list; gIter != NULL; gIter = gIter->next) {
	cmd = gIter->data;
	if ((cmd->interval_ms == interval_ms)
	&& pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
	break;
	}
	cmd = NULL;
	}
	if (cmd) {
	list = g_list_remove(list, cmd);
	free_cmd(cmd);
	}
	return list;
	}

	int
	remote_ra_cancel(lrm_state_t lrm_state, const char rsc_id,
	const char *action, guint interval_ms)
	{
	lrm_state_t *connection_rsc = NULL;
	remote_ra_data_t *ra_data = NULL;

	CRM_CHECK(rsc_id != NULL, return -EINVAL);

	connection_rsc = controld_get_executor_state(rsc_id, false);
	if (!connection_rsc \|\| !connection_rsc->remote_ra_data) {
	return -EINVAL;
	}

	ra_data = connection_rsc->remote_ra_data;
	ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
	ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
	interval_ms);
	if (ra_data->cur_cmd &&
	(ra_data->cur_cmd->interval_ms == interval_ms) &&
	(pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {

	cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
	}

	return 0;
	}

	static remote_ra_cmd_t *
	handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
	const char *userdata)
	{
	GList *gIter = NULL;
	remote_ra_cmd_t *cmd = NULL;

	/* there are 3 places a potential duplicate monitor operation
	* could exist.
	* 1. recurring_cmds list. where the op is waiting for its next interval
	* 2. cmds list, where the op is queued to get executed immediately
	* 3. cur_cmd, which means the monitor op is in flight right now.
	*/
	if (interval_ms == 0) {
	return NULL;
	}

	if (ra_data->cur_cmd &&
	!pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
	(ra_data->cur_cmd->interval_ms == interval_ms)
	&& pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
	pcmk__str_casei)) {

	cmd = ra_data->cur_cmd;
	goto handle_dup;
	}

	for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
	cmd = gIter->data;
	if ((cmd->interval_ms == interval_ms)
	&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
	pcmk__str_casei)) {
	goto handle_dup;
	}
	}

	for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
	cmd = gIter->data;
	if ((cmd->interval_ms == interval_ms)
	&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
	pcmk__str_casei)) {
	goto handle_dup;
	}
	}

	return NULL;

	handle_dup:

	crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
	cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);

	/* update the userdata */
	if (userdata) {
	free(cmd->userdata);
	cmd->userdata = pcmk__str_copy(userdata);
	}

	/* if we've already reported success, generate a new call id */
	if (pcmk_is_set(cmd->status, cmd_reported_success)) {
	cmd->start_time = time(NULL);
	cmd->call_id = generate_callid();
	cmd_clear_flags(cmd, cmd_reported_success);
	}

	/* if we have an interval_id set, that means we are in the process of
	* waiting for this cmd's next interval. instead of waiting, cancel
	* the timer and execute the action immediately */
	if (cmd->interval_id) {
	g_source_remove(cmd->interval_id);
	cmd->interval_id = 0;
	recurring_helper(cmd);
	}

	return cmd;
	}

	/*!
	* \internal
	* \brief Execute an action using the (internal) ocf:pacemaker:remote agent
	*
	* \param[in] lrm_state Executor state object for remote connection
	* \param[in] rsc_id Connection resource ID
	* \param[in] action Action to execute
	* \param[in] userdata String to copy and pass to execution callback
	* \param[in] interval_ms Action interval (in milliseconds)
	* \param[in] timeout_ms Action timeout (in milliseconds)
	* \param[in] start_delay_ms Delay (in milliseconds) before executing action
	* \param[in,out] params Connection resource parameters
	* \param[out] call_id Where to store call ID on success
	*
	* \return Standard Pacemaker return code
	* \note This takes ownership of \p params, which should not be used or freed
	* after calling this function.
	*/
	int
	controld_execute_remote_agent(const lrm_state_t lrm_state, const char rsc_id,
	const char action, const char userdata,
	guint interval_ms, int timeout_ms,
	int start_delay_ms, lrmd_key_value_t *params,
	int *call_id)
	{
	lrm_state_t *connection_rsc = NULL;
	remote_ra_cmd_t *cmd = NULL;
	remote_ra_data_t *ra_data = NULL;

	*call_id = 0;

	CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
	&& (userdata != NULL) && (call_id != NULL),
	lrmd_key_value_freeall(params); return EINVAL);

	if (!is_remote_ra_supported_action(action)) {
	lrmd_key_value_freeall(params);
	return EOPNOTSUPP;
	}

	connection_rsc = controld_get_executor_state(rsc_id, false);
	if (connection_rsc == NULL) {
	lrmd_key_value_freeall(params);
	return ENOTCONN;
	}

	remote_ra_data_init(connection_rsc);
	ra_data = connection_rsc->remote_ra_data;

	cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
	if (cmd) {
	*call_id = cmd->call_id;
	lrmd_key_value_freeall(params);
	return pcmk_rc_ok;
	}

	cmd = pcmk__assert_alloc(1, sizeof(remote_ra_cmd_t));

	cmd->owner = pcmk__str_copy(lrm_state->node_name);
	cmd->rsc_id = pcmk__str_copy(rsc_id);
	cmd->action = pcmk__str_copy(action);
	cmd->userdata = pcmk__str_copy(userdata);
	cmd->interval_ms = interval_ms;
	cmd->timeout = timeout_ms;
	cmd->start_delay = start_delay_ms;
	cmd->params = params;
	cmd->start_time = time(NULL);

	cmd->call_id = generate_callid();

	if (cmd->start_delay) {
	cmd->delay_id = pcmk__create_timer(cmd->start_delay, start_delay_helper, cmd);
	}

	ra_data->cmds = g_list_append(ra_data->cmds, cmd);
	mainloop_set_trigger(ra_data->work);

	*call_id = cmd->call_id;
	return pcmk_rc_ok;
	}

	/*!
	* \internal
	* \brief Immediately fail all monitors of a remote node, if proxied here
	*
	* \param[in] node_name Name of pacemaker_remote node
	*/
	void
	remote_ra_fail(const char *node_name)
	{
	lrm_state_t *lrm_state = NULL;

	CRM_CHECK(node_name != NULL, return);

	lrm_state = controld_get_executor_state(node_name, false);
	if (lrm_state && lrm_state_is_connected(lrm_state)) {
	remote_ra_data_t *ra_data = lrm_state->remote_ra_data;

	crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
	ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
	ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
	}
	}

	/* A guest node fencing implied by host fencing looks like:
	*
	* <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
	* on_node="lxc1" on_node_uuid="lxc1">
	* <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1"
	* CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/>
	* <downed>
	* <node id="lxc1"/>
	* </downed>
	* </pseudo_event>
	*/
	#define XPATH_PSEUDO_FENCE "/" PCMK__XE_PSEUDO_EVENT \
	"[@" PCMK_XA_OPERATION "='stonith']/" PCMK__XE_DOWNED "/" PCMK_XE_NODE

	/*!
	* \internal
	* \brief Check a pseudo-action for Pacemaker Remote node side effects
	*
	* \param[in,out] xml XML of pseudo-action to check
	*/
	void
	remote_ra_process_pseudo(xmlNode *xml)
	{
	xmlXPathObject *search = pcmk__xpath_search(xml->doc, XPATH_PSEUDO_FENCE);

	if (pcmk__xpath_num_results(search) == 1) {
	xmlNode *result = pcmk__xpath_result(search, 0);

	/* Normally, we handle the necessary side effects of a guest node stop
	* action when reporting the remote agent's result. However, if the stop
	* is implied due to fencing, it will be a fencing pseudo-event, and
	* there won't be a result to report. Handle that case here.
	*
	* This will result in a duplicate call to remote_node_down() if the
	* guest stop was real instead of implied, but that shouldn't hurt.
	*
	* There is still one corner case that isn't handled: if a guest node
	* isn't running any resources when its host is fenced, it will appear
	* to be cleanly stopped, so there will be no pseudo-fence, and our
	* peer cache state will be incorrect unless and until the guest is
	* recovered.
	*/
	if (result) {
	const char *remote = pcmk__xe_id(result);

	if (remote) {
	remote_node_down(remote, DOWN_ERASE_LRM);
	}
	}
	}
	xmlXPathFreeObject(search);
	}

	static void
	remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
	{
	xmlNode update, state;
	int call_opt;
	pcmk__node_status_t *node = NULL;

	call_opt = crmd_cib_smart_opt();
	node = pcmk__cluster_lookup_remote_node(lrm_state->node_name);
	CRM_CHECK(node != NULL, return);
	update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
	state = create_node_state_update(node, controld_node_update_none, update,
	__func__);
	crm_xml_add(state, PCMK__XA_NODE_IN_MAINTENANCE, (maintenance? "1" : "0"));
	if (controld_update_cib(PCMK_XE_STATUS, update, call_opt,
	NULL) == pcmk_rc_ok) {
	/* TODO: still not 100% sure that async update will succeed ... */
	if (maintenance) {
	lrm_remote_set_flags(lrm_state, remote_in_maint);
	} else {
	lrm_remote_clear_flags(lrm_state, remote_in_maint);
	}
	}
	pcmk__xml_free(update);
	}

	#define XPATH_PSEUDO_MAINTENANCE "//" PCMK__XE_PSEUDO_EVENT \
	"[@" PCMK_XA_OPERATION "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \
	PCMK__XE_MAINTENANCE

	/*!
	* \internal
	* \brief Check a pseudo-action holding updates for maintenance state
	*
	* \param[in,out] xml XML of pseudo-action to check
	*/
	void
	remote_ra_process_maintenance_nodes(xmlNode *xml)
	{
	xmlXPathObject *search = pcmk__xpath_search(xml->doc,
	XPATH_PSEUDO_MAINTENANCE);

	if (pcmk__xpath_num_results(search) == 1) {
	xmlNode *node;
	int cnt = 0, cnt_remote = 0;

	for (node = pcmk__xe_first_child(pcmk__xpath_result(search, 0),
	PCMK_XE_NODE, NULL, NULL);
	node != NULL; node = pcmk__xe_next(node, PCMK_XE_NODE)) {

	lrm_state_t *lrm_state = NULL;
	const char *id = pcmk__xe_id(node);

	cnt++;
	if (id == NULL) {
	continue; // Shouldn't be possible
	}

	lrm_state = controld_get_executor_state(id, false);

	if (lrm_state && lrm_state->remote_ra_data &&
	pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {

	const char *in_maint_s = NULL;
	int in_maint;

	cnt_remote++;
	in_maint_s = crm_element_value(node,
	PCMK__XA_NODE_IN_MAINTENANCE);
	pcmk__scan_min_int(in_maint_s, &in_maint, 0);
	remote_ra_maintenance(lrm_state, in_maint);
	}
	}
	crm_trace("Action holds %d nodes (%d remotes found) adjusting "
	PCMK_OPT_MAINTENANCE_MODE,
	cnt, cnt_remote);
	}
	xmlXPathFreeObject(search);
	}

	gboolean
	remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
	{
	remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
	return pcmk_is_set(ra_data->status, remote_in_maint);
	}

	gboolean
	remote_ra_controlling_guest(lrm_state_t * lrm_state)
	{
	remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
	return pcmk_is_set(ra_data->status, controlling_guest);
	}
	diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
	index e6338882e7..262e0d1f39 100644
	--- a/daemons/controld/controld_utils.h
	+++ b/daemons/controld/controld_utils.h
	@@ -1,87 +1,87 @@
	/*
	* Copyright 2004-2025 the Pacemaker project contributors
	*
	* The version control history for this file may have further details.
	*
	* This source code is licensed under the GNU Lesser General Public License
	* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
	*/

	#ifndef CRMD_UTILS__H
	#define CRMD_UTILS__H

	#include <stdint.h> // UINT32_C(), uint32_t

	#include <glib.h> // gboolean
	#include <libxml/tree.h> // xmlNode

	#include <crm/crm.h>
	#include <crm/cluster.h> // enum controld_join_phase
	#include <crm/cluster/internal.h> // pcmk__node_status_t
	#include <crm/common/xml.h>

	# define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"

	//! Flags determining how the controller updates node information in the CIB
	enum controld_node_update_flags {
	//! This flag has no effect
	controld_node_update_none = UINT32_C(0),

	//! Skip loading the node list from the cluster layer
	controld_node_update_quick = (UINT32_C(1) << 0),

	//! Update \c PCMK__XA_IN_CCM with the time the node became a cluster member
	controld_node_update_cluster = (UINT32_C(1) << 1),

	//! Update \c PCMK_XA_CRMD with the time the node joined the CPG
	controld_node_update_peer = (UINT32_C(1) << 2),

	//! Update \c PCMK__XA_JOIN with the node's join state
	controld_node_update_join = (UINT32_C(1) << 3),

	//! Update \c PCMK_XA_EXPECTED with the node's expected join state
	controld_node_update_expected = (UINT32_C(1) << 4),

	//! Convenience alias to update all of the attributes mentioned above
	controld_node_update_all = controld_node_update_cluster
	\|controld_node_update_peer
	\|controld_node_update_join
	\|controld_node_update_expected,
	};

	crm_exit_t crmd_exit(crm_exit_t exit_code);
	_Noreturn void crmd_fast_exit(crm_exit_t exit_code);
	void controld_shutdown_schedulerd_ipc(void);
	void controld_stop_sched_timer(void);
	void controld_free_sched_timer(void);
	void controld_expect_sched_reply(char *ref);

	void fsa_dump_actions(uint64_t action, const char *text);
	void fsa_dump_inputs(int log_level, const char *text, long long input_register);

	gboolean update_dc(xmlNode * msg);
	void crm_update_peer_join(const char source, pcmk__node_status_t node,
	enum controld_join_phase phase);
	xmlNode create_node_state_update(pcmk__node_status_t node, uint32_t flags,
	xmlNode parent, const char source);
	void populate_cib_nodes(uint32_t flags, const char *source);
	void crm_update_quorum(gboolean quorum, gboolean force_update);
	void controld_close_attrd_ipc(void);
	void update_attrd(const char host, const char name, const char value, const char user_name, gboolean is_remote_node);
	void update_attrd_list(GList *attrs, uint32_t opts);
	-void update_attrd_remote_node_removed(const char host, const char user_name);
	+void controld_purge_node_attrs(const char *node_name, bool from_cache);
	void update_attrd_clear_failures(const char host, const char rsc,
	const char op, const char interval_spec,
	gboolean is_remote_node);

	int crmd_join_phase_count(enum controld_join_phase phase);
	void crmd_join_phase_log(int level);

	void crmd_peer_down(pcmk__node_status_t *peer, bool full);

	bool feature_set_compatible(const char dc_version, const char join_version);
	bool controld_is_local_node(const char *name);
	pcmk__node_status_t *controld_get_local_node_status(void);
	const char get_node_id(xmlNode lrm_rsc_op);

	#endif

File Metadata

Mime Type: text/x-diff
Expires: Mon, Apr 21, 2:50 PM (1 d, 1 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1664933
Default Alt Text: (400 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions